]> git.proxmox.com Git - mirror_ovs.git/blame - datapath/datapath.c
debian: Remove extra space in switch init script.
[mirror_ovs.git] / datapath / datapath.c
CommitLineData
064af421 1/*
f632c8fc 2 * Copyright (c) 2007, 2008, 2009, 2010, 2011 Nicira Networks.
a14bc59f
BP
3 * Distributed under the terms of the GNU GPL version 2.
4 *
5 * Significant portions of this file may be copied from parts of the Linux
6 * kernel, by Linus Torvalds and others.
064af421
BP
7 */
8
9/* Functions for managing the dp interface/device. */
10
dfffaef1
JP
11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
064af421
BP
13#include <linux/init.h>
14#include <linux/module.h>
064af421 15#include <linux/if_arp.h>
064af421
BP
16#include <linux/if_vlan.h>
17#include <linux/in.h>
18#include <linux/ip.h>
982b8810 19#include <linux/jhash.h>
064af421
BP
20#include <linux/delay.h>
21#include <linux/time.h>
22#include <linux/etherdevice.h>
ed099e92 23#include <linux/genetlink.h>
064af421
BP
24#include <linux/kernel.h>
25#include <linux/kthread.h>
064af421
BP
26#include <linux/mutex.h>
27#include <linux/percpu.h>
28#include <linux/rcupdate.h>
29#include <linux/tcp.h>
30#include <linux/udp.h>
31#include <linux/version.h>
32#include <linux/ethtool.h>
064af421
BP
33#include <linux/wait.h>
34#include <asm/system.h>
35#include <asm/div64.h>
36#include <asm/bug.h>
656a0e37 37#include <linux/highmem.h>
064af421
BP
38#include <linux/netfilter_bridge.h>
39#include <linux/netfilter_ipv4.h>
40#include <linux/inetdevice.h>
41#include <linux/list.h>
42#include <linux/rculist.h>
064af421 43#include <linux/dmi.h>
3c5f6de3 44#include <net/inet_ecn.h>
36956a7d 45#include <net/genetlink.h>
064af421
BP
46
47#include "openvswitch/datapath-protocol.h"
dd8d6b8c 48#include "checksum.h"
064af421
BP
49#include "datapath.h"
50#include "actions.h"
064af421 51#include "flow.h"
7eaa9830 52#include "loop_counter.h"
8d5ebd83 53#include "table.h"
f2459fe7 54#include "vport-internal_dev.h"
064af421 55
064af421
BP
56int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
57EXPORT_SYMBOL(dp_ioctl_hook);
58
ed099e92
BP
59/**
60 * DOC: Locking:
064af421 61 *
ed099e92
BP
62 * Writes to device state (add/remove datapath, port, set operations on vports,
63 * etc.) are protected by RTNL.
064af421 64 *
ed099e92
BP
65 * Writes to other state (flow table modifications, set miscellaneous datapath
66 * parameters such as drop frags, etc.) are protected by genl_mutex. The RTNL
67 * lock nests inside genl_mutex.
68 *
69 * Reads are protected by RCU.
70 *
71 * There are a few special cases (mostly stats) that have their own
72 * synchronization but they nest under all of above and don't interact with
73 * each other.
064af421 74 */
ed099e92 75
254f2dc8
BP
76/* Global list of datapaths to enable dumping them all out.
77 * Protected by genl_mutex.
78 */
79static LIST_HEAD(dps);
064af421 80
c19e6535 81static struct vport *new_vport(const struct vport_parms *);
064af421 82
ed099e92 83/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
254f2dc8 84struct datapath *get_dp(int dp_ifindex)
064af421 85{
254f2dc8
BP
86 struct datapath *dp = NULL;
87 struct net_device *dev;
ed099e92 88
254f2dc8
BP
89 rcu_read_lock();
90 dev = dev_get_by_index_rcu(&init_net, dp_ifindex);
91 if (dev) {
92 struct vport *vport = internal_dev_get_vport(dev);
93 if (vport)
94 dp = vport->dp;
95 }
96 rcu_read_unlock();
97
98 return dp;
064af421
BP
99}
100EXPORT_SYMBOL_GPL(get_dp);
101
ed099e92 102/* Must be called with genl_mutex. */
027f9007 103static struct tbl *get_table_protected(struct datapath *dp)
9abaf6b3 104{
ed099e92 105 return rcu_dereference_protected(dp->table, lockdep_genl_is_held());
1452b28c
JG
106}
107
ed099e92 108/* Must be called with rcu_read_lock or RTNL lock. */
027f9007 109static struct vport *get_vport_protected(struct datapath *dp, u16 port_no)
1452b28c 110{
ed099e92 111 return rcu_dereference_rtnl(dp->ports[port_no]);
9abaf6b3
JG
112}
113
f2459fe7
JG
114/* Must be called with rcu_read_lock or RTNL lock. */
115const char *dp_name(const struct datapath *dp)
116{
ad919711 117 return vport_get_name(rcu_dereference_rtnl(dp->ports[ODPP_LOCAL]));
f2459fe7
JG
118}
119
064af421
BP
120static inline size_t br_nlmsg_size(void)
121{
122 return NLMSG_ALIGN(sizeof(struct ifinfomsg))
123 + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
124 + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
125 + nla_total_size(4) /* IFLA_MASTER */
126 + nla_total_size(4) /* IFLA_MTU */
127 + nla_total_size(4) /* IFLA_LINK */
128 + nla_total_size(1); /* IFLA_OPERSTATE */
129}
130
ed099e92 131/* Caller must hold RTNL lock. */
064af421 132static int dp_fill_ifinfo(struct sk_buff *skb,
e779d8d9 133 const struct vport *port,
064af421
BP
134 int event, unsigned int flags)
135{
027f9007 136 struct datapath *dp = port->dp;
e779d8d9
BP
137 int ifindex = vport_get_ifindex(port);
138 int iflink = vport_get_iflink(port);
064af421
BP
139 struct ifinfomsg *hdr;
140 struct nlmsghdr *nlh;
141
f2459fe7
JG
142 if (ifindex < 0)
143 return ifindex;
144
145 if (iflink < 0)
146 return iflink;
147
064af421
BP
148 nlh = nlmsg_put(skb, 0, 0, event, sizeof(*hdr), flags);
149 if (nlh == NULL)
150 return -EMSGSIZE;
151
152 hdr = nlmsg_data(nlh);
153 hdr->ifi_family = AF_BRIDGE;
154 hdr->__ifi_pad = 0;
f2459fe7
JG
155 hdr->ifi_type = ARPHRD_ETHER;
156 hdr->ifi_index = ifindex;
e779d8d9 157 hdr->ifi_flags = vport_get_flags(port);
064af421
BP
158 hdr->ifi_change = 0;
159
e779d8d9 160 NLA_PUT_STRING(skb, IFLA_IFNAME, vport_get_name(port));
ad919711 161 NLA_PUT_U32(skb, IFLA_MASTER,
1452b28c 162 vport_get_ifindex(get_vport_protected(dp, ODPP_LOCAL)));
e779d8d9 163 NLA_PUT_U32(skb, IFLA_MTU, vport_get_mtu(port));
064af421
BP
164#ifdef IFLA_OPERSTATE
165 NLA_PUT_U8(skb, IFLA_OPERSTATE,
e779d8d9
BP
166 vport_is_running(port)
167 ? vport_get_operstate(port)
f2459fe7 168 : IF_OPER_DOWN);
064af421
BP
169#endif
170
e779d8d9 171 NLA_PUT(skb, IFLA_ADDRESS, ETH_ALEN, vport_get_addr(port));
064af421 172
f2459fe7
JG
173 if (ifindex != iflink)
174 NLA_PUT_U32(skb, IFLA_LINK,iflink);
064af421
BP
175
176 return nlmsg_end(skb, nlh);
177
178nla_put_failure:
179 nlmsg_cancel(skb, nlh);
180 return -EMSGSIZE;
181}
182
ed099e92 183/* Caller must hold RTNL lock. */
e779d8d9 184static void dp_ifinfo_notify(int event, struct vport *port)
064af421 185{
064af421
BP
186 struct sk_buff *skb;
187 int err = -ENOBUFS;
188
189 skb = nlmsg_new(br_nlmsg_size(), GFP_KERNEL);
190 if (skb == NULL)
191 goto errout;
192
193 err = dp_fill_ifinfo(skb, port, event, 0);
194 if (err < 0) {
195 /* -EMSGSIZE implies BUG in br_nlmsg_size() */
196 WARN_ON(err == -EMSGSIZE);
197 kfree_skb(skb);
198 goto errout;
199 }
f2459fe7 200 rtnl_notify(skb, &init_net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
cfe7c1f5 201 return;
064af421
BP
202errout:
203 if (err < 0)
f2459fe7 204 rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err);
064af421
BP
205}
206
58c342f6
BP
207static void release_dp(struct kobject *kobj)
208{
209 struct datapath *dp = container_of(kobj, struct datapath, ifobj);
210 kfree(dp);
211}
212
35f7605b 213static struct kobj_type dp_ktype = {
58c342f6
BP
214 .release = release_dp
215};
216
46c6a11d
JG
217static void destroy_dp_rcu(struct rcu_head *rcu)
218{
219 struct datapath *dp = container_of(rcu, struct datapath, rcu);
46c6a11d
JG
220
221 tbl_destroy((struct tbl __force *)dp->table, flow_free_tbl);
222 free_percpu(dp->stats_percpu);
223 kobject_put(&dp->ifobj);
224}
225
ed099e92 226/* Called with RTNL lock and genl_lock. */
c19e6535 227static struct vport *new_vport(const struct vport_parms *parms)
064af421 228{
f2459fe7 229 struct vport *vport;
f2459fe7 230
c19e6535
BP
231 vport = vport_add(parms);
232 if (!IS_ERR(vport)) {
233 struct datapath *dp = parms->dp;
064af421 234
c19e6535 235 rcu_assign_pointer(dp->ports[parms->port_no], vport);
ed099e92 236 list_add(&vport->node, &dp->port_list);
064af421 237
c19e6535
BP
238 dp_ifinfo_notify(RTM_NEWLINK, vport);
239 }
064af421 240
c19e6535 241 return vport;
064af421
BP
242}
243
ed099e92 244/* Called with RTNL lock. */
e779d8d9 245int dp_detach_port(struct vport *p)
064af421
BP
246{
247 ASSERT_RTNL();
248
2e7dd8ec 249 if (p->port_no != ODPP_LOCAL)
0515ceb3 250 dp_sysfs_del_if(p);
064af421
BP
251 dp_ifinfo_notify(RTM_DELLINK, p);
252
064af421 253 /* First drop references to device. */
ed099e92 254 list_del(&p->node);
064af421 255 rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
f2459fe7 256
7237e4f4 257 /* Then destroy it. */
057dd6d2 258 return vport_del(p);
064af421
BP
259}
260
8819fac7 261/* Must be called with rcu_read_lock. */
e779d8d9 262void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
064af421
BP
263{
264 struct datapath *dp = p->dp;
265 struct dp_stats_percpu *stats;
8819fac7 266 int stats_counter_off;
55574bb0
BP
267 struct sw_flow_actions *acts;
268 struct loop_counter *loop;
4c1ad233 269 int error;
064af421 270
e779d8d9 271 OVS_CB(skb)->vport = p;
a063b0df 272
3976f6d5 273 if (!OVS_CB(skb)->flow) {
36956a7d 274 struct sw_flow_key key;
3976f6d5 275 struct tbl_node *flow_node;
b7a31ec1 276 bool is_frag;
4c1ad233 277
3976f6d5 278 /* Extract flow from 'skb' into 'key'. */
c75d4dcf 279 error = flow_extract(skb, p->port_no, &key, &is_frag);
3976f6d5
JG
280 if (unlikely(error)) {
281 kfree_skb(skb);
282 return;
283 }
064af421 284
b7a31ec1 285 if (is_frag && dp->drop_frags) {
3976f6d5
JG
286 kfree_skb(skb);
287 stats_counter_off = offsetof(struct dp_stats_percpu, n_frags);
288 goto out;
289 }
290
291 /* Look up flow. */
292 flow_node = tbl_lookup(rcu_dereference(dp->table), &key,
293 flow_hash(&key), flow_cmp);
294 if (unlikely(!flow_node)) {
856081f6
BP
295 struct dp_upcall_info upcall;
296
982b8810 297 upcall.cmd = ODP_PACKET_CMD_MISS;
856081f6
BP
298 upcall.key = &key;
299 upcall.userdata = 0;
300 upcall.sample_pool = 0;
301 upcall.actions = NULL;
302 upcall.actions_len = 0;
303 dp_upcall(dp, skb, &upcall);
3976f6d5
JG
304 stats_counter_off = offsetof(struct dp_stats_percpu, n_missed);
305 goto out;
306 }
307
308 OVS_CB(skb)->flow = flow_cast(flow_node);
55574bb0
BP
309 }
310
f267de8a 311 stats_counter_off = offsetof(struct dp_stats_percpu, n_hit);
3976f6d5 312 flow_used(OVS_CB(skb)->flow, skb);
55574bb0 313
3976f6d5 314 acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
55574bb0
BP
315
316 /* Check whether we've looped too much. */
7eaa9830
JG
317 loop = loop_get_counter();
318 if (unlikely(++loop->count > MAX_LOOPS))
55574bb0
BP
319 loop->looping = true;
320 if (unlikely(loop->looping)) {
7eaa9830 321 loop_suppress(dp, acts);
f267de8a 322 kfree_skb(skb);
55574bb0 323 goto out_loop;
064af421 324 }
8819fac7 325
55574bb0 326 /* Execute actions. */
3976f6d5 327 execute_actions(dp, skb, &OVS_CB(skb)->flow->key, acts->actions,
cdee00fd 328 acts->actions_len);
55574bb0
BP
329
330 /* Check whether sub-actions looped too much. */
331 if (unlikely(loop->looping))
7eaa9830 332 loop_suppress(dp, acts);
55574bb0
BP
333
334out_loop:
335 /* Decrement loop counter. */
336 if (!--loop->count)
337 loop->looping = false;
7eaa9830 338 loop_put_counter();
55574bb0 339
8819fac7 340out:
55574bb0 341 /* Update datapath statistics. */
8819fac7
JG
342 local_bh_disable();
343 stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
38c6ecbc
JG
344
345 write_seqcount_begin(&stats->seqlock);
8819fac7 346 (*(u64 *)((u8 *)stats + stats_counter_off))++;
38c6ecbc
JG
347 write_seqcount_end(&stats->seqlock);
348
8819fac7 349 local_bh_enable();
064af421
BP
350}
351
856081f6
BP
352static void copy_and_csum_skb(struct sk_buff *skb, void *to)
353{
354 u16 csum_start, csum_offset;
355 __wsum csum;
356
357 get_skb_csum_pointers(skb, &csum_start, &csum_offset);
358 csum_start -= skb_headroom(skb);
359 BUG_ON(csum_start >= skb_headlen(skb));
360
361 skb_copy_bits(skb, 0, to, csum_start);
362
363 csum = skb_copy_and_csum_bits(skb, csum_start, to + csum_start,
364 skb->len - csum_start, 0);
365 *(__sum16 *)(to + csum_start + csum_offset) = csum_fold(csum);
366}
367
982b8810
BP
368static struct genl_family dp_packet_genl_family;
369#define PACKET_N_MC_GROUPS 16
370
371static int packet_mc_group(struct datapath *dp, u8 cmd)
372{
373 BUILD_BUG_ON_NOT_POWER_OF_2(PACKET_N_MC_GROUPS);
254f2dc8 374 return jhash_2words(dp->dp_ifindex, cmd, 0) & (PACKET_N_MC_GROUPS - 1);
982b8810
BP
375}
376
377/* Send each packet in the 'skb' list to userspace for 'dp' as directed by
378 * 'upcall_info'. There will be only one packet unless we broke up a GSO
379 * packet.
380 */
856081f6
BP
381static int queue_control_packets(struct datapath *dp, struct sk_buff *skb,
382 const struct dp_upcall_info *upcall_info)
cb5087ca 383{
982b8810 384 u32 group = packet_mc_group(dp, upcall_info->cmd);
cb5087ca
BP
385 struct sk_buff *nskb;
386 int port_no;
387 int err;
388
e779d8d9
BP
389 if (OVS_CB(skb)->vport)
390 port_no = OVS_CB(skb)->vport->port_no;
f2459fe7
JG
391 else
392 port_no = ODPP_LOCAL;
cb5087ca
BP
393
394 do {
982b8810 395 struct odp_header *upcall;
856081f6
BP
396 struct sk_buff *user_skb; /* to be queued to userspace */
397 struct nlattr *nla;
398 unsigned int len;
cb5087ca
BP
399
400 nskb = skb->next;
401 skb->next = NULL;
402
982b8810 403 len = sizeof(struct odp_header);
856081f6
BP
404 len += nla_total_size(4); /* ODP_PACKET_ATTR_TYPE. */
405 len += nla_total_size(skb->len);
406 len += nla_total_size(FLOW_BUFSIZE);
407 if (upcall_info->userdata)
408 len += nla_total_size(8);
409 if (upcall_info->sample_pool)
410 len += nla_total_size(4);
411 if (upcall_info->actions_len)
412 len += nla_total_size(upcall_info->actions_len);
413
982b8810
BP
414 user_skb = genlmsg_new(len, GFP_ATOMIC);
415 if (!user_skb) {
416 netlink_set_err(INIT_NET_GENL_SOCK, 0, group, -ENOBUFS);
cb5087ca 417 goto err_kfree_skbs;
982b8810 418 }
cb5087ca 419
982b8810 420 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 0, upcall_info->cmd);
254f2dc8 421 upcall->dp_ifindex = dp->dp_ifindex;
856081f6 422
856081f6
BP
423 nla = nla_nest_start(user_skb, ODP_PACKET_ATTR_KEY);
424 flow_to_nlattrs(upcall_info->key, user_skb);
425 nla_nest_end(user_skb, nla);
cb5087ca 426
856081f6
BP
427 if (upcall_info->userdata)
428 nla_put_u64(user_skb, ODP_PACKET_ATTR_USERDATA, upcall_info->userdata);
429 if (upcall_info->sample_pool)
430 nla_put_u32(user_skb, ODP_PACKET_ATTR_SAMPLE_POOL, upcall_info->sample_pool);
431 if (upcall_info->actions_len) {
432 const struct nlattr *actions = upcall_info->actions;
433 u32 actions_len = upcall_info->actions_len;
434
435 nla = nla_nest_start(user_skb, ODP_PACKET_ATTR_ACTIONS);
436 memcpy(__skb_put(user_skb, actions_len), actions, actions_len);
437 nla_nest_end(user_skb, nla);
438 }
439
440 nla = __nla_reserve(user_skb, ODP_PACKET_ATTR_PACKET, skb->len);
441 if (skb->ip_summed == CHECKSUM_PARTIAL)
442 copy_and_csum_skb(skb, nla_data(nla));
443 else
444 skb_copy_bits(skb, 0, nla_data(nla), skb->len);
445
982b8810
BP
446 err = genlmsg_multicast(user_skb, 0, group, GFP_ATOMIC);
447 if (err)
448 goto err_kfree_skbs;
856081f6
BP
449
450 kfree_skb(skb);
cb5087ca
BP
451 skb = nskb;
452 } while (skb);
453 return 0;
454
455err_kfree_skbs:
456 kfree_skb(skb);
457 while ((skb = nskb) != NULL) {
458 nskb = skb->next;
459 kfree_skb(skb);
460 }
461 return err;
462}
463
982b8810
BP
464/* Generic Netlink multicast groups for upcalls.
465 *
466 * We really want three unique multicast groups per datapath, but we can't even
467 * get one, because genl_register_mc_group() takes genl_lock, which is also
468 * held during Generic Netlink message processing, so trying to acquire
469 * multicast groups during ODP_DP_NEW processing deadlocks. Instead, we
470 * preallocate a few groups and use them round-robin for datapaths. Collision
471 * isn't fatal--multicast listeners should check that the family is the one
472 * that they want and discard others--but it wastes time and memory to receive
473 * unwanted messages.
474 */
475static struct genl_multicast_group packet_mc_groups[PACKET_N_MC_GROUPS];
476
477static struct genl_family dp_packet_genl_family = {
478 .id = GENL_ID_GENERATE,
479 .hdrsize = sizeof(struct odp_header),
480 .name = ODP_PACKET_FAMILY,
481 .version = 1,
482 .maxattr = ODP_PACKET_ATTR_MAX
483};
484
485static int packet_register_mc_groups(void)
486{
487 int i;
488
489 for (i = 0; i < PACKET_N_MC_GROUPS; i++) {
490 struct genl_multicast_group *group = &packet_mc_groups[i];
491 int error;
492
493 sprintf(group->name, "packet%d", i);
494 error = genl_register_mc_group(&dp_packet_genl_family, group);
495 if (error)
496 return error;
497 }
498 return 0;
499}
500
856081f6 501int dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info)
064af421
BP
502{
503 struct dp_stats_percpu *stats;
064af421
BP
504 int err;
505
506 WARN_ON_ONCE(skb_shared(skb));
064af421 507
a6057323
JG
508 forward_ip_summed(skb);
509
a2377e44
JG
510 err = vswitch_skb_checksum_setup(skb);
511 if (err)
512 goto err_kfree_skb;
513
064af421
BP
514 /* Break apart GSO packets into their component pieces. Otherwise
515 * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
516 if (skb_is_gso(skb)) {
9cc8b4e4 517 struct sk_buff *nskb = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
2d7ce2ee
JG
518
519 kfree_skb(skb);
520 skb = nskb;
40796b34 521 if (IS_ERR(skb)) {
2d7ce2ee
JG
522 err = PTR_ERR(skb);
523 goto err;
064af421
BP
524 }
525 }
526
982b8810 527 return queue_control_packets(dp, skb, upcall_info);
064af421
BP
528
529err_kfree_skb:
530 kfree_skb(skb);
531err:
1c075d0a
JG
532 local_bh_disable();
533 stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
38c6ecbc
JG
534
535 write_seqcount_begin(&stats->seqlock);
064af421 536 stats->n_lost++;
38c6ecbc
JG
537 write_seqcount_end(&stats->seqlock);
538
1c075d0a 539 local_bh_enable();
064af421
BP
540
541 return err;
542}
543
ed099e92 544/* Called with genl_mutex. */
254f2dc8 545static int flush_flows(int dp_ifindex)
064af421 546{
9c52546b 547 struct tbl *old_table;
8d5ebd83 548 struct tbl *new_table;
9c52546b 549 struct datapath *dp;
9c52546b 550
254f2dc8 551 dp = get_dp(dp_ifindex);
9c52546b 552 if (!dp)
ed099e92 553 return -ENODEV;
8d5ebd83 554
9c52546b 555 old_table = get_table_protected(dp);
c6fadeb1 556 new_table = tbl_create(TBL_MIN_BUCKETS);
8d5ebd83 557 if (!new_table)
ed099e92 558 return -ENOMEM;
8d5ebd83
JG
559
560 rcu_assign_pointer(dp->table, new_table);
561
562 tbl_deferred_destroy(old_table, flow_free_tbl);
563
ed099e92 564 return 0;
064af421
BP
565}
566
37a1300c 567static int validate_actions(const struct nlattr *attr)
064af421 568{
23cad98c
BP
569 const struct nlattr *a;
570 int rem;
571
37a1300c 572 nla_for_each_nested(a, attr, rem) {
7aec165d
BP
573 static const u32 action_lens[ODP_ACTION_ATTR_MAX + 1] = {
574 [ODP_ACTION_ATTR_OUTPUT] = 4,
575 [ODP_ACTION_ATTR_CONTROLLER] = 8,
576 [ODP_ACTION_ATTR_SET_DL_TCI] = 2,
577 [ODP_ACTION_ATTR_STRIP_VLAN] = 0,
578 [ODP_ACTION_ATTR_SET_DL_SRC] = ETH_ALEN,
579 [ODP_ACTION_ATTR_SET_DL_DST] = ETH_ALEN,
580 [ODP_ACTION_ATTR_SET_NW_SRC] = 4,
581 [ODP_ACTION_ATTR_SET_NW_DST] = 4,
582 [ODP_ACTION_ATTR_SET_NW_TOS] = 1,
583 [ODP_ACTION_ATTR_SET_TP_SRC] = 2,
584 [ODP_ACTION_ATTR_SET_TP_DST] = 2,
585 [ODP_ACTION_ATTR_SET_TUNNEL] = 8,
586 [ODP_ACTION_ATTR_SET_PRIORITY] = 4,
587 [ODP_ACTION_ATTR_POP_PRIORITY] = 0,
588 [ODP_ACTION_ATTR_DROP_SPOOFED_ARP] = 0,
23cad98c
BP
589 };
590 int type = nla_type(a);
591
7aec165d 592 if (type > ODP_ACTION_ATTR_MAX || nla_len(a) != action_lens[type])
23cad98c
BP
593 return -EINVAL;
594
595 switch (type) {
7aec165d 596 case ODP_ACTION_ATTR_UNSPEC:
cdee00fd 597 return -EINVAL;
064af421 598
7aec165d
BP
599 case ODP_ACTION_ATTR_CONTROLLER:
600 case ODP_ACTION_ATTR_STRIP_VLAN:
601 case ODP_ACTION_ATTR_SET_DL_SRC:
602 case ODP_ACTION_ATTR_SET_DL_DST:
603 case ODP_ACTION_ATTR_SET_NW_SRC:
604 case ODP_ACTION_ATTR_SET_NW_DST:
605 case ODP_ACTION_ATTR_SET_TP_SRC:
606 case ODP_ACTION_ATTR_SET_TP_DST:
607 case ODP_ACTION_ATTR_SET_TUNNEL:
608 case ODP_ACTION_ATTR_SET_PRIORITY:
609 case ODP_ACTION_ATTR_POP_PRIORITY:
610 case ODP_ACTION_ATTR_DROP_SPOOFED_ARP:
23cad98c
BP
611 /* No validation needed. */
612 break;
613
7aec165d 614 case ODP_ACTION_ATTR_OUTPUT:
23cad98c
BP
615 if (nla_get_u32(a) >= DP_MAX_PORTS)
616 return -EINVAL;
3b1fc5f3 617 break;
cdee00fd 618
7aec165d 619 case ODP_ACTION_ATTR_SET_DL_TCI:
cdee00fd 620 if (nla_get_be16(a) & htons(VLAN_CFI_MASK))
064af421 621 return -EINVAL;
23cad98c 622 break;
064af421 623
7aec165d 624 case ODP_ACTION_ATTR_SET_NW_TOS:
23cad98c
BP
625 if (nla_get_u8(a) & INET_ECN_MASK)
626 return -EINVAL;
627 break;
064af421 628
23cad98c
BP
629 default:
630 return -EOPNOTSUPP;
631 }
632 }
3c5f6de3 633
23cad98c
BP
634 if (rem > 0)
635 return -EINVAL;
064af421 636
23cad98c 637 return 0;
064af421 638}
064af421
BP
639static void clear_stats(struct sw_flow *flow)
640{
6bfafa55 641 flow->used = 0;
064af421 642 flow->tcp_flags = 0;
064af421
BP
643 flow->packet_count = 0;
644 flow->byte_count = 0;
645}
646
ed099e92 647/* Called with genl_mutex. */
8d5ebd83
JG
648static int expand_table(struct datapath *dp)
649{
9abaf6b3 650 struct tbl *old_table = get_table_protected(dp);
8d5ebd83
JG
651 struct tbl *new_table;
652
653 new_table = tbl_expand(old_table);
654 if (IS_ERR(new_table))
655 return PTR_ERR(new_table);
656
657 rcu_assign_pointer(dp->table, new_table);
658 tbl_deferred_destroy(old_table, NULL);
659
d6569377 660 return 0;
8d5ebd83
JG
661}
662
982b8810 663static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
064af421 664{
982b8810
BP
665 struct odp_header *odp_header = info->userhdr;
666 struct nlattr **a = info->attrs;
667 struct sk_buff *packet;
36956a7d 668 struct sw_flow_key key;
f7cd0081 669 struct datapath *dp;
d6569377
BP
670 struct ethhdr *eth;
671 bool is_frag;
672 int err;
064af421 673
f7cd0081
BP
674 err = -EINVAL;
675 if (!a[ODP_PACKET_ATTR_PACKET] || !a[ODP_PACKET_ATTR_ACTIONS] ||
676 nla_len(a[ODP_PACKET_ATTR_PACKET]) < ETH_HLEN)
982b8810 677 goto exit;
064af421 678
37a1300c 679 err = validate_actions(a[ODP_PACKET_ATTR_ACTIONS]);
f7cd0081 680 if (err)
982b8810 681 goto exit;
f7cd0081
BP
682
683 packet = skb_clone(skb, GFP_KERNEL);
684 err = -ENOMEM;
685 if (!packet)
982b8810 686 goto exit;
f7cd0081
BP
687 packet->data = nla_data(a[ODP_PACKET_ATTR_PACKET]);
688 packet->len = nla_len(a[ODP_PACKET_ATTR_PACKET]);
8d5ebd83 689
f7cd0081
BP
690 skb_reset_mac_header(packet);
691 eth = eth_hdr(packet);
064af421 692
d6569377
BP
693 /* Normally, setting the skb 'protocol' field would be handled by a
694 * call to eth_type_trans(), but it assumes there's a sending
695 * device, which we may not have. */
696 if (ntohs(eth->h_proto) >= 1536)
f7cd0081 697 packet->protocol = eth->h_proto;
d6569377 698 else
f7cd0081 699 packet->protocol = htons(ETH_P_802_2);
d3c54451 700
f7cd0081 701 err = flow_extract(packet, -1, &key, &is_frag);
d6569377 702 if (err)
982b8810 703 goto exit;
064af421 704
d6569377 705 rcu_read_lock();
254f2dc8 706 dp = get_dp(odp_header->dp_ifindex);
f7cd0081
BP
707 err = -ENODEV;
708 if (dp)
37a1300c
BP
709 err = execute_actions(dp, packet, &key,
710 nla_data(a[ODP_PACKET_ATTR_ACTIONS]),
711 nla_len(a[ODP_PACKET_ATTR_ACTIONS]));
d6569377 712 rcu_read_unlock();
064af421 713
982b8810 714exit:
d6569377 715 return err;
064af421
BP
716}
717
982b8810
BP
718static const struct nla_policy packet_policy[ODP_PACKET_ATTR_MAX + 1] = {
719 [ODP_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
720 [ODP_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
721};
722
723static struct genl_ops dp_packet_genl_ops[] = {
724 { .cmd = ODP_PACKET_CMD_EXECUTE,
725 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
726 .policy = packet_policy,
727 .doit = odp_packet_cmd_execute
728 }
729};
730
d6569377 731static void get_dp_stats(struct datapath *dp, struct odp_stats *stats)
064af421 732{
d6569377 733 int i;
064af421 734
d6569377
BP
735 stats->n_frags = stats->n_hit = stats->n_missed = stats->n_lost = 0;
736 for_each_possible_cpu(i) {
737 const struct dp_stats_percpu *percpu_stats;
738 struct dp_stats_percpu local_stats;
739 unsigned seqcount;
44e05eca 740
d6569377 741 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
064af421 742
d6569377
BP
743 do {
744 seqcount = read_seqcount_begin(&percpu_stats->seqlock);
745 local_stats = *percpu_stats;
746 } while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
064af421 747
d6569377
BP
748 stats->n_frags += local_stats.n_frags;
749 stats->n_hit += local_stats.n_hit;
750 stats->n_missed += local_stats.n_missed;
751 stats->n_lost += local_stats.n_lost;
752 }
753}
064af421 754
ed099e92
BP
755/* MTU of the dp pseudo-device: ETH_DATA_LEN or the minimum of the ports.
756 * Called with RTNL lock.
757 */
d6569377
BP
758int dp_min_mtu(const struct datapath *dp)
759{
760 struct vport *p;
761 int mtu = 0;
762
763 ASSERT_RTNL();
764
ed099e92 765 list_for_each_entry (p, &dp->port_list, node) {
d6569377
BP
766 int dev_mtu;
767
768 /* Skip any internal ports, since that's what we're trying to
769 * set. */
770 if (is_internal_vport(p))
771 continue;
772
773 dev_mtu = vport_get_mtu(p);
774 if (!mtu || dev_mtu < mtu)
775 mtu = dev_mtu;
776 }
777
778 return mtu ? mtu : ETH_DATA_LEN;
064af421
BP
779}
780
ed099e92
BP
781/* Sets the MTU of all datapath devices to the minimum of the ports
782 * Called with RTNL lock.
783 */
d6569377 784void set_internal_devs_mtu(const struct datapath *dp)
064af421 785{
d6569377
BP
786 struct vport *p;
787 int mtu;
064af421 788
d6569377
BP
789 ASSERT_RTNL();
790
791 mtu = dp_min_mtu(dp);
44e05eca 792
ed099e92 793 list_for_each_entry (p, &dp->port_list, node) {
d6569377
BP
794 if (is_internal_vport(p))
795 vport_set_mtu(p, mtu);
796 }
064af421
BP
797}
798
d6569377
BP
799static const struct nla_policy flow_policy[ODP_FLOW_ATTR_MAX + 1] = {
800 [ODP_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
801 [ODP_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
802 [ODP_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
d6569377 803};
36956a7d 804
37a1300c
BP
805static struct genl_family dp_flow_genl_family = {
806 .id = GENL_ID_GENERATE,
807 .hdrsize = sizeof(struct odp_header),
808 .name = ODP_FLOW_FAMILY,
809 .version = 1,
810 .maxattr = ODP_FLOW_ATTR_MAX
811};
ed099e92 812
37a1300c
BP
813static struct genl_multicast_group dp_flow_multicast_group = {
814 .name = ODP_FLOW_MCGROUP
815};
816
817/* Called with genl_lock. */
818static int odp_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
819 struct sk_buff *skb, u32 pid, u32 seq, u32 flags, u8 cmd)
d6569377 820{
37a1300c 821 const int skb_orig_len = skb->len;
d6569377
BP
822 const struct sw_flow_actions *sf_acts;
823 struct odp_flow_stats stats;
37a1300c 824 struct odp_header *odp_header;
d6569377
BP
825 struct nlattr *nla;
826 unsigned long used;
827 u8 tcp_flags;
37a1300c 828 int nla_len;
d6569377 829 int err;
064af421 830
d6569377 831 sf_acts = rcu_dereference_protected(flow->sf_acts,
ed099e92 832 lockdep_genl_is_held());
064af421 833
37a1300c
BP
834 odp_header = genlmsg_put(skb, pid, seq, &dp_flow_genl_family, flags, cmd);
835 if (!odp_header)
836 return -EMSGSIZE;
d6569377 837
254f2dc8 838 odp_header->dp_ifindex = dp->dp_ifindex;
d6569377
BP
839
840 nla = nla_nest_start(skb, ODP_FLOW_ATTR_KEY);
841 if (!nla)
842 goto nla_put_failure;
843 err = flow_to_nlattrs(&flow->key, skb);
844 if (err)
37a1300c 845 goto error;
d6569377
BP
846 nla_nest_end(skb, nla);
847
848 spin_lock_bh(&flow->lock);
849 used = flow->used;
850 stats.n_packets = flow->packet_count;
851 stats.n_bytes = flow->byte_count;
852 tcp_flags = flow->tcp_flags;
853 spin_unlock_bh(&flow->lock);
854
855 if (used)
856 NLA_PUT_MSECS(skb, ODP_FLOW_ATTR_USED, used);
857
858 if (stats.n_packets)
859 NLA_PUT(skb, ODP_FLOW_ATTR_STATS, sizeof(struct odp_flow_stats), &stats);
860
861 if (tcp_flags)
862 NLA_PUT_U8(skb, ODP_FLOW_ATTR_TCP_FLAGS, tcp_flags);
863
37a1300c
BP
864 /* If ODP_FLOW_ATTR_ACTIONS doesn't fit, and this is the first flow to
865 * be dumped into 'skb', then expand the skb. This is unusual for
866 * Netlink but individual action lists can be longer than a page and
867 * thus entirely undumpable if we didn't do this. */
868 nla_len = nla_total_size(sf_acts->actions_len);
869 if (nla_len > skb_tailroom(skb) && !skb_orig_len) {
870 int hdr_off = (unsigned char *)odp_header - skb->data;
d6569377 871
37a1300c
BP
872 err = pskb_expand_head(skb, 0, nla_len - skb_tailroom(skb), GFP_KERNEL);
873 if (err)
874 goto error;
d6569377 875
37a1300c
BP
876 odp_header = (struct odp_header *)(skb->data + hdr_off);
877 }
878 nla = nla_nest_start(skb, ODP_FLOW_ATTR_ACTIONS);
879 memcpy(__skb_put(skb, sf_acts->actions_len), sf_acts->actions, sf_acts->actions_len);
880 nla_nest_end(skb, nla);
881
882 return genlmsg_end(skb, odp_header);
d6569377
BP
883
884nla_put_failure:
885 err = -EMSGSIZE;
37a1300c
BP
886error:
887 genlmsg_cancel(skb, odp_header);
d6569377 888 return err;
44e05eca
BP
889}
890
37a1300c 891static struct sk_buff *odp_flow_cmd_alloc_info(struct sw_flow *flow)
44e05eca 892{
37a1300c
BP
893 const struct sw_flow_actions *sf_acts;
894 int len;
d6569377 895
37a1300c
BP
896 sf_acts = rcu_dereference_protected(flow->sf_acts,
897 lockdep_genl_is_held());
d6569377 898
37a1300c
BP
899 len = nla_total_size(FLOW_BUFSIZE); /* ODP_FLOW_ATTR_KEY */
900 len += nla_total_size(sf_acts->actions_len); /* ODP_FLOW_ATTR_ACTIONS */
901 len += nla_total_size(sizeof(struct odp_flow_stats)); /* ODP_FLOW_ATTR_STATS */
902 len += nla_total_size(1); /* ODP_FLOW_ATTR_TCP_FLAGS */
903 len += nla_total_size(8); /* ODP_FLOW_ATTR_USED */
904 return genlmsg_new(NLMSG_ALIGN(sizeof(struct odp_header)) + len, GFP_KERNEL);
905}
8d5ebd83 906
37a1300c
BP
907static struct sk_buff *odp_flow_cmd_build_info(struct sw_flow *flow, struct datapath *dp,
908 u32 pid, u32 seq, u8 cmd)
909{
910 struct sk_buff *skb;
911 int retval;
d6569377 912
37a1300c
BP
913 skb = odp_flow_cmd_alloc_info(flow);
914 if (!skb)
915 return ERR_PTR(-ENOMEM);
d6569377 916
37a1300c
BP
917 retval = odp_flow_cmd_fill_info(flow, dp, skb, pid, seq, 0, cmd);
918 BUG_ON(retval < 0);
d6569377 919 return skb;
064af421
BP
920}
921
37a1300c 922static int odp_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
064af421 923{
37a1300c
BP
924 struct nlattr **a = info->attrs;
925 struct odp_header *odp_header = info->userhdr;
bc4a05c6 926 struct tbl_node *flow_node;
37a1300c 927 struct sw_flow_key key;
d6569377 928 struct sw_flow *flow;
37a1300c 929 struct sk_buff *reply;
9c52546b 930 struct datapath *dp;
d6569377
BP
931 struct tbl *table;
932 u32 hash;
bc4a05c6 933 int error;
064af421 934
37a1300c
BP
935 /* Extract key. */
936 error = -EINVAL;
937 if (!a[ODP_FLOW_ATTR_KEY])
938 goto error;
939 error = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
940 if (error)
941 goto error;
064af421 942
37a1300c
BP
943 /* Validate actions. */
944 if (a[ODP_FLOW_ATTR_ACTIONS]) {
945 error = validate_actions(a[ODP_FLOW_ATTR_ACTIONS]);
946 if (error)
947 goto error;
948 } else if (info->genlhdr->cmd == ODP_FLOW_CMD_NEW) {
949 error = -EINVAL;
950 goto error;
951 }
952
254f2dc8 953 dp = get_dp(odp_header->dp_ifindex);
d6569377 954 error = -ENODEV;
9c52546b 955 if (!dp)
37a1300c 956 goto error;
704a1e09 957
37a1300c 958 hash = flow_hash(&key);
d6569377 959 table = get_table_protected(dp);
37a1300c 960 flow_node = tbl_lookup(table, &key, hash, flow_cmp);
d6569377
BP
961 if (!flow_node) {
962 struct sw_flow_actions *acts;
963
964 /* Bail out if we're not allowed to create a new flow. */
965 error = -ENOENT;
37a1300c
BP
966 if (info->genlhdr->cmd == ODP_FLOW_CMD_SET)
967 goto error;
d6569377
BP
968
969 /* Expand table, if necessary, to make room. */
970 if (tbl_count(table) >= tbl_n_buckets(table)) {
971 error = expand_table(dp);
972 if (error)
37a1300c 973 goto error;
d6569377
BP
974 table = get_table_protected(dp);
975 }
976
977 /* Allocate flow. */
978 flow = flow_alloc();
979 if (IS_ERR(flow)) {
980 error = PTR_ERR(flow);
37a1300c 981 goto error;
d6569377 982 }
37a1300c 983 flow->key = key;
d6569377
BP
984 clear_stats(flow);
985
986 /* Obtain actions. */
37a1300c 987 acts = flow_actions_alloc(a[ODP_FLOW_ATTR_ACTIONS]);
d6569377
BP
988 error = PTR_ERR(acts);
989 if (IS_ERR(acts))
990 goto error_free_flow;
991 rcu_assign_pointer(flow->sf_acts, acts);
992
d6569377
BP
993 /* Put flow in bucket. */
994 error = tbl_insert(table, &flow->tbl_node, hash);
995 if (error)
996 goto error_free_flow;
37a1300c
BP
997
998 reply = odp_flow_cmd_build_info(flow, dp, info->snd_pid,
999 info->snd_seq, ODP_FLOW_CMD_NEW);
d6569377
BP
1000 } else {
1001 /* We found a matching flow. */
1002 struct sw_flow_actions *old_acts;
1003
1004 /* Bail out if we're not allowed to modify an existing flow.
1005 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1006 * because Generic Netlink treats the latter as a dump
1007 * request. We also accept NLM_F_EXCL in case that bug ever
1008 * gets fixed.
1009 */
1010 error = -EEXIST;
37a1300c
BP
1011 if (info->genlhdr->cmd == ODP_FLOW_CMD_NEW &&
1012 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1013 goto error;
d6569377
BP
1014
1015 /* Update actions. */
1016 flow = flow_cast(flow_node);
1017 old_acts = rcu_dereference_protected(flow->sf_acts,
ed099e92 1018 lockdep_genl_is_held());
37a1300c
BP
1019 if (a[ODP_FLOW_ATTR_ACTIONS] &&
1020 (old_acts->actions_len != nla_len(a[ODP_FLOW_ATTR_ACTIONS]) ||
1021 memcmp(old_acts->actions, nla_data(a[ODP_FLOW_ATTR_ACTIONS]),
1022 old_acts->actions_len))) {
d6569377
BP
1023 struct sw_flow_actions *new_acts;
1024
37a1300c 1025 new_acts = flow_actions_alloc(a[ODP_FLOW_ATTR_ACTIONS]);
d6569377
BP
1026 error = PTR_ERR(new_acts);
1027 if (IS_ERR(new_acts))
37a1300c 1028 goto error;
d6569377
BP
1029
1030 rcu_assign_pointer(flow->sf_acts, new_acts);
1031 flow_deferred_free_acts(old_acts);
1032 }
1033
37a1300c
BP
1034 reply = odp_flow_cmd_build_info(flow, dp, info->snd_pid,
1035 info->snd_seq, ODP_FLOW_CMD_NEW);
d6569377
BP
1036
1037 /* Clear stats. */
37a1300c 1038 if (a[ODP_FLOW_ATTR_CLEAR]) {
d6569377
BP
1039 spin_lock_bh(&flow->lock);
1040 clear_stats(flow);
1041 spin_unlock_bh(&flow->lock);
1042 }
9c52546b 1043 }
37a1300c
BP
1044
1045 if (!IS_ERR(reply))
1046 genl_notify(reply, genl_info_net(info), info->snd_pid,
1047 dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1048 else
1049 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1050 dp_flow_multicast_group.id, PTR_ERR(reply));
d6569377 1051 return 0;
704a1e09 1052
d6569377
BP
1053error_free_flow:
1054 flow_put(flow);
37a1300c 1055error:
9c52546b 1056 return error;
704a1e09
BP
1057}
1058
37a1300c 1059static int odp_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
704a1e09 1060{
37a1300c
BP
1061 struct nlattr **a = info->attrs;
1062 struct odp_header *odp_header = info->userhdr;
1063 struct sw_flow_key key;
d6569377 1064 struct tbl_node *flow_node;
37a1300c 1065 struct sk_buff *reply;
704a1e09 1066 struct sw_flow *flow;
9c52546b
BP
1067 struct datapath *dp;
1068 struct tbl *table;
9c52546b 1069 int err;
704a1e09 1070
37a1300c
BP
1071 if (!a[ODP_FLOW_ATTR_KEY])
1072 return -EINVAL;
1073 err = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
1074 if (err)
1075 return err;
704a1e09 1076
254f2dc8 1077 dp = get_dp(odp_header->dp_ifindex);
9c52546b 1078 if (!dp)
ed099e92 1079 return -ENODEV;
704a1e09 1080
9c52546b 1081 table = get_table_protected(dp);
37a1300c 1082 flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp);
d6569377 1083 if (!flow_node)
ed099e92 1084 return -ENOENT;
d6569377 1085
d6569377 1086 flow = flow_cast(flow_node);
37a1300c
BP
1087 reply = odp_flow_cmd_build_info(flow, dp, info->snd_pid, info->snd_seq, ODP_FLOW_CMD_NEW);
1088 if (IS_ERR(reply))
1089 return PTR_ERR(reply);
36956a7d 1090
37a1300c 1091 return genlmsg_reply(reply, info);
d6569377 1092}
9c52546b 1093
37a1300c 1094static int odp_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
d6569377 1095{
37a1300c
BP
1096 struct nlattr **a = info->attrs;
1097 struct odp_header *odp_header = info->userhdr;
1098 struct sw_flow_key key;
d6569377 1099 struct tbl_node *flow_node;
37a1300c 1100 struct sk_buff *reply;
d6569377 1101 struct sw_flow *flow;
d6569377 1102 struct datapath *dp;
37a1300c 1103 struct tbl *table;
d6569377 1104 int err;
36956a7d 1105
37a1300c 1106 if (!a[ODP_FLOW_ATTR_KEY])
254f2dc8 1107 return flush_flows(odp_header->dp_ifindex);
37a1300c
BP
1108 err = flow_from_nlattrs(&key, a[ODP_FLOW_ATTR_KEY]);
1109 if (err)
1110 return err;
d6569377 1111
254f2dc8 1112 dp = get_dp(odp_header->dp_ifindex);
d6569377 1113 if (!dp)
37a1300c 1114 return -ENODEV;
d6569377 1115
37a1300c
BP
1116 table = get_table_protected(dp);
1117 flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp);
d6569377 1118 if (!flow_node)
37a1300c 1119 return -ENOENT;
d6569377 1120 flow = flow_cast(flow_node);
d6569377 1121
37a1300c
BP
1122 reply = odp_flow_cmd_alloc_info(flow);
1123 if (!reply)
1124 return -ENOMEM;
1125
1126 err = tbl_remove(table, flow_node);
1127 if (err) {
1128 kfree_skb(reply);
1129 return err;
1130 }
1131
1132 err = odp_flow_cmd_fill_info(flow, dp, reply, info->snd_pid,
1133 info->snd_seq, 0, ODP_FLOW_CMD_DEL);
1134 BUG_ON(err < 0);
1135
1136 flow_deferred_free(flow);
1137
1138 genl_notify(reply, genl_info_net(info), info->snd_pid,
1139 dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1140 return 0;
1141}
1142
1143static int odp_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1144{
1145 struct odp_header *odp_header = genlmsg_data(nlmsg_data(cb->nlh));
1146 struct datapath *dp;
1147
254f2dc8 1148 dp = get_dp(odp_header->dp_ifindex);
37a1300c
BP
1149 if (!dp)
1150 return -ENODEV;
1151
1152 for (;;) {
1153 struct tbl_node *flow_node;
1154 struct sw_flow *flow;
1155 u32 bucket, obj;
1156
1157 bucket = cb->args[0];
1158 obj = cb->args[1];
1159 flow_node = tbl_next(get_table_protected(dp), &bucket, &obj);
1160 if (!flow_node)
1161 break;
1162
1163 flow = flow_cast(flow_node);
1164 if (odp_flow_cmd_fill_info(flow, dp, skb, NETLINK_CB(cb->skb).pid,
1165 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1166 ODP_FLOW_CMD_NEW) < 0)
1167 break;
1168
1169 cb->args[0] = bucket;
1170 cb->args[1] = obj;
1171 }
1172 return skb->len;
704a1e09
BP
1173}
1174
37a1300c
BP
1175static struct genl_ops dp_flow_genl_ops[] = {
1176 { .cmd = ODP_FLOW_CMD_NEW,
1177 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1178 .policy = flow_policy,
1179 .doit = odp_flow_cmd_new_or_set
1180 },
1181 { .cmd = ODP_FLOW_CMD_DEL,
1182 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1183 .policy = flow_policy,
1184 .doit = odp_flow_cmd_del
1185 },
1186 { .cmd = ODP_FLOW_CMD_GET,
1187 .flags = 0, /* OK for unprivileged users. */
1188 .policy = flow_policy,
1189 .doit = odp_flow_cmd_get,
1190 .dumpit = odp_flow_cmd_dump
1191 },
1192 { .cmd = ODP_FLOW_CMD_SET,
1193 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1194 .policy = flow_policy,
1195 .doit = odp_flow_cmd_new_or_set,
1196 },
1197};
1198
d6569377 1199static const struct nla_policy datapath_policy[ODP_DP_ATTR_MAX + 1] = {
aaff4b55 1200#ifdef HAVE_NLA_NUL_STRING
d6569377 1201 [ODP_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
aaff4b55 1202#endif
d6569377
BP
1203 [ODP_DP_ATTR_IPV4_FRAGS] = { .type = NLA_U32 },
1204 [ODP_DP_ATTR_SAMPLING] = { .type = NLA_U32 },
1205};
1206
aaff4b55
BP
1207static struct genl_family dp_datapath_genl_family = {
1208 .id = GENL_ID_GENERATE,
1209 .hdrsize = sizeof(struct odp_header),
1210 .name = ODP_DATAPATH_FAMILY,
1211 .version = 1,
1212 .maxattr = ODP_DP_ATTR_MAX
1213};
1214
1215static struct genl_multicast_group dp_datapath_multicast_group = {
1216 .name = ODP_DATAPATH_MCGROUP
1217};
1218
1219static int odp_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1220 u32 pid, u32 seq, u32 flags, u8 cmd)
064af421 1221{
aaff4b55 1222 struct odp_header *odp_header;
d6569377 1223 struct nlattr *nla;
064af421
BP
1224 int err;
1225
aaff4b55
BP
1226 odp_header = genlmsg_put(skb, pid, seq, &dp_datapath_genl_family,
1227 flags, cmd);
1228 if (!odp_header)
1229 goto error;
064af421 1230
254f2dc8 1231 odp_header->dp_ifindex = dp->dp_ifindex;
064af421 1232
d6569377
BP
1233 rcu_read_lock();
1234 err = nla_put_string(skb, ODP_DP_ATTR_NAME, dp_name(dp));
1235 rcu_read_unlock();
064af421 1236 if (err)
d6569377 1237 goto nla_put_failure;
064af421 1238
d6569377
BP
1239 nla = nla_reserve(skb, ODP_DP_ATTR_STATS, sizeof(struct odp_stats));
1240 if (!nla)
1241 goto nla_put_failure;
1242 get_dp_stats(dp, nla_data(nla));
1243
1244 NLA_PUT_U32(skb, ODP_DP_ATTR_IPV4_FRAGS,
1245 dp->drop_frags ? ODP_DP_FRAG_DROP : ODP_DP_FRAG_ZERO);
1246
1247 if (dp->sflow_probability)
1248 NLA_PUT_U32(skb, ODP_DP_ATTR_SAMPLING, dp->sflow_probability);
1249
982b8810
BP
1250 nla = nla_nest_start(skb, ODP_DP_ATTR_MCGROUPS);
1251 if (!nla)
1252 goto nla_put_failure;
1253 NLA_PUT_U32(skb, ODP_PACKET_CMD_MISS, packet_mc_group(dp, ODP_PACKET_CMD_MISS));
1254 NLA_PUT_U32(skb, ODP_PACKET_CMD_ACTION, packet_mc_group(dp, ODP_PACKET_CMD_ACTION));
1255 NLA_PUT_U32(skb, ODP_PACKET_CMD_SAMPLE, packet_mc_group(dp, ODP_PACKET_CMD_SAMPLE));
1256 nla_nest_end(skb, nla);
1257
aaff4b55 1258 return genlmsg_end(skb, odp_header);
d6569377
BP
1259
1260nla_put_failure:
aaff4b55
BP
1261 genlmsg_cancel(skb, odp_header);
1262error:
1263 return -EMSGSIZE;
d6569377
BP
1264}
1265
aaff4b55
BP
1266static struct sk_buff *odp_dp_cmd_build_info(struct datapath *dp, u32 pid,
1267 u32 seq, u8 cmd)
d6569377 1268{
d6569377 1269 struct sk_buff *skb;
aaff4b55 1270 int retval;
d6569377 1271
aaff4b55 1272 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
064af421 1273 if (!skb)
d6569377 1274 return ERR_PTR(-ENOMEM);
659586ef 1275
aaff4b55
BP
1276 retval = odp_dp_cmd_fill_info(dp, skb, pid, seq, 0, cmd);
1277 if (retval < 0) {
1278 kfree_skb(skb);
1279 return ERR_PTR(retval);
1280 }
1281 return skb;
1282}
9dca7bd5 1283
aaff4b55
BP
1284static int odp_dp_cmd_validate(struct nlattr *a[ODP_DP_ATTR_MAX + 1])
1285{
d6569377
BP
1286 if (a[ODP_DP_ATTR_IPV4_FRAGS]) {
1287 u32 frags = nla_get_u32(a[ODP_DP_ATTR_IPV4_FRAGS]);
9dca7bd5 1288
d6569377 1289 if (frags != ODP_DP_FRAG_ZERO && frags != ODP_DP_FRAG_DROP)
aaff4b55 1290 return -EINVAL;
d6569377
BP
1291 }
1292
aaff4b55 1293 return VERIFY_NUL_STRING(a[ODP_DP_ATTR_NAME], IFNAMSIZ - 1);
d6569377
BP
1294}
1295
ed099e92 1296/* Called with genl_mutex and optionally with RTNL lock also. */
aaff4b55 1297static struct datapath *lookup_datapath(struct odp_header *odp_header, struct nlattr *a[ODP_DP_ATTR_MAX + 1])
d6569377 1298{
254f2dc8
BP
1299 struct datapath *dp;
1300
1301 if (!a[ODP_DP_ATTR_NAME])
1302 dp = get_dp(odp_header->dp_ifindex);
1303 else {
d6569377 1304 struct vport *vport;
d6569377 1305
057dd6d2 1306 rcu_read_lock();
d6569377 1307 vport = vport_locate(nla_data(a[ODP_DP_ATTR_NAME]));
254f2dc8 1308 dp = vport && vport->port_no == ODPP_LOCAL ? vport->dp : NULL;
057dd6d2 1309 rcu_read_unlock();
d6569377 1310 }
254f2dc8 1311 return dp ? dp : ERR_PTR(-ENODEV);
d6569377
BP
1312}
1313
ed099e92 1314/* Called with genl_mutex. */
d6569377
BP
1315static void change_datapath(struct datapath *dp, struct nlattr *a[ODP_DP_ATTR_MAX + 1])
1316{
1317 if (a[ODP_DP_ATTR_IPV4_FRAGS])
1318 dp->drop_frags = nla_get_u32(a[ODP_DP_ATTR_IPV4_FRAGS]) == ODP_DP_FRAG_DROP;
1319 if (a[ODP_DP_ATTR_SAMPLING])
1320 dp->sflow_probability = nla_get_u32(a[ODP_DP_ATTR_SAMPLING]);
1321}
1322
aaff4b55 1323static int odp_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
d6569377 1324{
aaff4b55 1325 struct nlattr **a = info->attrs;
d6569377 1326 struct vport_parms parms;
aaff4b55 1327 struct sk_buff *reply;
d6569377
BP
1328 struct datapath *dp;
1329 struct vport *vport;
d6569377 1330 int err;
d6569377 1331
d6569377
BP
1332 err = -EINVAL;
1333 if (!a[ODP_DP_ATTR_NAME])
aaff4b55
BP
1334 goto err;
1335
1336 err = odp_dp_cmd_validate(a);
1337 if (err)
1338 goto err;
d6569377
BP
1339
1340 rtnl_lock();
d6569377
BP
1341 err = -ENODEV;
1342 if (!try_module_get(THIS_MODULE))
ed099e92 1343 goto err_unlock_rtnl;
d6569377 1344
d6569377
BP
1345 err = -ENOMEM;
1346 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1347 if (dp == NULL)
1348 goto err_put_module;
1349 INIT_LIST_HEAD(&dp->port_list);
d6569377
BP
1350
1351 /* Initialize kobject for bridge. This will be added as
1352 * /sys/class/net/<devname>/brif later, if sysfs is enabled. */
1353 dp->ifobj.kset = NULL;
1354 kobject_init(&dp->ifobj, &dp_ktype);
1355
1356 /* Allocate table. */
1357 err = -ENOMEM;
1358 rcu_assign_pointer(dp->table, tbl_create(TBL_MIN_BUCKETS));
1359 if (!dp->table)
1360 goto err_free_dp;
1361
1362 /* Set up our datapath device. */
1363 parms.name = nla_data(a[ODP_DP_ATTR_NAME]);
1364 parms.type = ODP_VPORT_TYPE_INTERNAL;
1365 parms.options = NULL;
1366 parms.dp = dp;
1367 parms.port_no = ODPP_LOCAL;
1368 vport = new_vport(&parms);
1369 if (IS_ERR(vport)) {
1370 err = PTR_ERR(vport);
1371 if (err == -EBUSY)
1372 err = -EEXIST;
1373
1374 goto err_destroy_table;
1375 }
254f2dc8 1376 dp->dp_ifindex = vport_get_ifindex(vport);
d6569377
BP
1377
1378 dp->drop_frags = 0;
1379 dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
1380 if (!dp->stats_percpu) {
1381 err = -ENOMEM;
1382 goto err_destroy_local_port;
1383 }
1384
1385 change_datapath(dp, a);
1386
aaff4b55
BP
1387 reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW);
1388 err = PTR_ERR(reply);
1389 if (IS_ERR(reply))
1390 goto err_destroy_local_port;
1391
254f2dc8 1392 list_add_tail(&dp->list_node, &dps);
d6569377
BP
1393 dp_sysfs_add_dp(dp);
1394
d6569377
BP
1395 rtnl_unlock();
1396
aaff4b55
BP
1397 genl_notify(reply, genl_info_net(info), info->snd_pid,
1398 dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
d6569377
BP
1399 return 0;
1400
1401err_destroy_local_port:
1402 dp_detach_port(get_vport_protected(dp, ODPP_LOCAL));
1403err_destroy_table:
1404 tbl_destroy(get_table_protected(dp), NULL);
1405err_free_dp:
d6569377
BP
1406 kfree(dp);
1407err_put_module:
1408 module_put(THIS_MODULE);
ed099e92 1409err_unlock_rtnl:
d6569377 1410 rtnl_unlock();
d6569377 1411err:
064af421
BP
1412 return err;
1413}
1414
aaff4b55 1415static int odp_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
44e05eca 1416{
ed099e92 1417 struct vport *vport, *next_vport;
aaff4b55 1418 struct sk_buff *reply;
9c52546b 1419 struct datapath *dp;
d6569377 1420 int err;
44e05eca 1421
aaff4b55
BP
1422 err = odp_dp_cmd_validate(info->attrs);
1423 if (err)
d6569377 1424 goto exit;
44e05eca 1425
d6569377 1426 rtnl_lock();
aaff4b55 1427 dp = lookup_datapath(info->userhdr, info->attrs);
d6569377
BP
1428 err = PTR_ERR(dp);
1429 if (IS_ERR(dp))
aaff4b55
BP
1430 goto exit_unlock;
1431
1432 reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_DEL);
1433 err = PTR_ERR(reply);
1434 if (IS_ERR(reply))
1435 goto exit_unlock;
9c52546b 1436
ed099e92
BP
1437 list_for_each_entry_safe (vport, next_vport, &dp->port_list, node)
1438 if (vport->port_no != ODPP_LOCAL)
1439 dp_detach_port(vport);
1440
1441 dp_sysfs_del_dp(dp);
254f2dc8 1442 list_del(&dp->list_node);
ed099e92
BP
1443 dp_detach_port(get_vport_protected(dp, ODPP_LOCAL));
1444
1445 call_rcu(&dp->rcu, destroy_dp_rcu);
1446 module_put(THIS_MODULE);
1447
aaff4b55
BP
1448 genl_notify(reply, genl_info_net(info), info->snd_pid,
1449 dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
d6569377
BP
1450 err = 0;
1451
aaff4b55 1452exit_unlock:
d6569377
BP
1453 rtnl_unlock();
1454exit:
1455 return err;
44e05eca
BP
1456}
1457
aaff4b55 1458static int odp_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
064af421 1459{
aaff4b55 1460 struct sk_buff *reply;
d6569377 1461 struct datapath *dp;
d6569377 1462 int err;
064af421 1463
aaff4b55
BP
1464 err = odp_dp_cmd_validate(info->attrs);
1465 if (err)
1466 return err;
38c6ecbc 1467
aaff4b55 1468 dp = lookup_datapath(info->userhdr, info->attrs);
d6569377 1469 if (IS_ERR(dp))
aaff4b55 1470 return PTR_ERR(dp);
38c6ecbc 1471
aaff4b55 1472 change_datapath(dp, info->attrs);
38c6ecbc 1473
aaff4b55
BP
1474 reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW);
1475 if (IS_ERR(reply)) {
1476 err = PTR_ERR(reply);
1477 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1478 dp_datapath_multicast_group.id, err);
1479 return 0;
1480 }
1481
1482 genl_notify(reply, genl_info_net(info), info->snd_pid,
1483 dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL);
1484 return 0;
064af421
BP
1485}
1486
aaff4b55 1487static int odp_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1dcf111b 1488{
aaff4b55 1489 struct sk_buff *reply;
d6569377 1490 struct datapath *dp;
d6569377 1491 int err;
1dcf111b 1492
aaff4b55
BP
1493 err = odp_dp_cmd_validate(info->attrs);
1494 if (err)
1495 return err;
1dcf111b 1496
aaff4b55 1497 dp = lookup_datapath(info->userhdr, info->attrs);
d6569377 1498 if (IS_ERR(dp))
aaff4b55 1499 return PTR_ERR(dp);
1dcf111b 1500
aaff4b55
BP
1501 reply = odp_dp_cmd_build_info(dp, info->snd_pid, info->snd_seq, ODP_DP_CMD_NEW);
1502 if (IS_ERR(reply))
1503 return PTR_ERR(reply);
1504
1505 return genlmsg_reply(reply, info);
1dcf111b
JP
1506}
1507
aaff4b55 1508static int odp_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
a7786963 1509{
254f2dc8
BP
1510 struct datapath *dp;
1511 int skip = cb->args[0];
1512 int i = 0;
a7786963 1513
254f2dc8
BP
1514 list_for_each_entry (dp, &dps, list_node) {
1515 if (i < skip)
d6569377 1516 continue;
aaff4b55
BP
1517 if (odp_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid,
1518 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1519 ODP_DP_CMD_NEW) < 0)
1520 break;
254f2dc8 1521 i++;
a7786963 1522 }
aaff4b55 1523
254f2dc8
BP
1524 cb->args[0] = i;
1525
aaff4b55 1526 return skb->len;
c19e6535
BP
1527}
1528
aaff4b55
BP
1529static struct genl_ops dp_datapath_genl_ops[] = {
1530 { .cmd = ODP_DP_CMD_NEW,
1531 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1532 .policy = datapath_policy,
1533 .doit = odp_dp_cmd_new
1534 },
1535 { .cmd = ODP_DP_CMD_DEL,
1536 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1537 .policy = datapath_policy,
1538 .doit = odp_dp_cmd_del
1539 },
1540 { .cmd = ODP_DP_CMD_GET,
1541 .flags = 0, /* OK for unprivileged users. */
1542 .policy = datapath_policy,
1543 .doit = odp_dp_cmd_get,
1544 .dumpit = odp_dp_cmd_dump
1545 },
1546 { .cmd = ODP_DP_CMD_SET,
1547 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1548 .policy = datapath_policy,
1549 .doit = odp_dp_cmd_set,
1550 },
1551};
1552
c19e6535 1553static const struct nla_policy vport_policy[ODP_VPORT_ATTR_MAX + 1] = {
f0fef760 1554#ifdef HAVE_NLA_NUL_STRING
c19e6535
BP
1555 [ODP_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1556 [ODP_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
1557 [ODP_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
1558 [ODP_VPORT_ATTR_STATS] = { .len = sizeof(struct rtnl_link_stats64) },
1559 [ODP_VPORT_ATTR_ADDRESS] = { .len = ETH_ALEN },
f0fef760
BP
1560#else
1561 [ODP_VPORT_ATTR_STATS] = { .minlen = sizeof(struct rtnl_link_stats64) },
1562 [ODP_VPORT_ATTR_ADDRESS] = { .minlen = ETH_ALEN },
1563#endif
c19e6535
BP
1564 [ODP_VPORT_ATTR_MTU] = { .type = NLA_U32 },
1565 [ODP_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
1566};
1567
f0fef760
BP
1568static struct genl_family dp_vport_genl_family = {
1569 .id = GENL_ID_GENERATE,
1570 .hdrsize = sizeof(struct odp_header),
1571 .name = ODP_VPORT_FAMILY,
1572 .version = 1,
1573 .maxattr = ODP_VPORT_ATTR_MAX
1574};
1575
1576static struct genl_multicast_group dp_vport_multicast_group = {
1577 .name = ODP_VPORT_MCGROUP
1578};
1579
1580/* Called with RTNL lock or RCU read lock. */
1581static int odp_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1582 u32 pid, u32 seq, u32 flags, u8 cmd)
064af421 1583{
f0fef760 1584 struct odp_header *odp_header;
c19e6535
BP
1585 struct nlattr *nla;
1586 int ifindex, iflink;
1587 int err;
1588
f0fef760
BP
1589 odp_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family,
1590 flags, cmd);
1591 if (!odp_header)
1592 return -EMSGSIZE;
c19e6535 1593
254f2dc8 1594 odp_header->dp_ifindex = vport->dp->dp_ifindex;
c19e6535
BP
1595
1596 NLA_PUT_U32(skb, ODP_VPORT_ATTR_PORT_NO, vport->port_no);
1597 NLA_PUT_U32(skb, ODP_VPORT_ATTR_TYPE, vport_get_type(vport));
1598 NLA_PUT_STRING(skb, ODP_VPORT_ATTR_NAME, vport_get_name(vport));
1599
1600 nla = nla_reserve(skb, ODP_VPORT_ATTR_STATS, sizeof(struct rtnl_link_stats64));
1601 if (!nla)
1602 goto nla_put_failure;
1603 if (vport_get_stats(vport, nla_data(nla)))
1604 __skb_trim(skb, skb->len - nla->nla_len);
1605
1606 NLA_PUT(skb, ODP_VPORT_ATTR_ADDRESS, ETH_ALEN, vport_get_addr(vport));
1607
1608 NLA_PUT_U32(skb, ODP_VPORT_ATTR_MTU, vport_get_mtu(vport));
1609
1610 err = vport_get_options(vport, skb);
f0fef760
BP
1611 if (err == -EMSGSIZE)
1612 goto error;
c19e6535
BP
1613
1614 ifindex = vport_get_ifindex(vport);
1615 if (ifindex > 0)
1616 NLA_PUT_U32(skb, ODP_VPORT_ATTR_IFINDEX, ifindex);
1617
1618 iflink = vport_get_iflink(vport);
1619 if (iflink > 0)
1620 NLA_PUT_U32(skb, ODP_VPORT_ATTR_IFLINK, iflink);
1621
f0fef760 1622 return genlmsg_end(skb, odp_header);
c19e6535
BP
1623
1624nla_put_failure:
1625 err = -EMSGSIZE;
f0fef760
BP
1626error:
1627 genlmsg_cancel(skb, odp_header);
1628 return err;
064af421
BP
1629}
1630
f0fef760
BP
1631/* Called with RTNL lock or RCU read lock. */
1632static struct sk_buff *odp_vport_cmd_build_info(struct vport *vport, u32 pid,
1633 u32 seq, u8 cmd)
064af421 1634{
c19e6535 1635 struct sk_buff *skb;
f0fef760 1636 int retval;
c19e6535 1637
f0fef760 1638 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
c19e6535
BP
1639 if (!skb)
1640 return ERR_PTR(-ENOMEM);
1641
f0fef760
BP
1642 retval = odp_vport_cmd_fill_info(vport, skb, pid, seq, 0, cmd);
1643 if (retval < 0) {
1644 kfree_skb(skb);
1645 return ERR_PTR(retval);
1646 }
c19e6535 1647 return skb;
f0fef760 1648}
c19e6535 1649
f0fef760
BP
1650static int odp_vport_cmd_validate(struct nlattr *a[ODP_VPORT_ATTR_MAX + 1])
1651{
1652 return VERIFY_NUL_STRING(a[ODP_VPORT_ATTR_NAME], IFNAMSIZ - 1);
c19e6535 1653}
51d4d598 1654
ed099e92 1655/* Called with RTNL lock or RCU read lock. */
f0fef760 1656static struct vport *lookup_vport(struct odp_header *odp_header,
c19e6535
BP
1657 struct nlattr *a[ODP_VPORT_ATTR_MAX + 1])
1658{
1659 struct datapath *dp;
1660 struct vport *vport;
1661
1662 if (a[ODP_VPORT_ATTR_NAME]) {
c19e6535 1663 vport = vport_locate(nla_data(a[ODP_VPORT_ATTR_NAME]));
ed099e92 1664 if (!vport)
c19e6535 1665 return ERR_PTR(-ENODEV);
c19e6535
BP
1666 return vport;
1667 } else if (a[ODP_VPORT_ATTR_PORT_NO]) {
1668 u32 port_no = nla_get_u32(a[ODP_VPORT_ATTR_PORT_NO]);
1669
1670 if (port_no >= DP_MAX_PORTS)
f0fef760 1671 return ERR_PTR(-EFBIG);
c19e6535 1672
254f2dc8 1673 dp = get_dp(odp_header->dp_ifindex);
c19e6535
BP
1674 if (!dp)
1675 return ERR_PTR(-ENODEV);
f2459fe7 1676
c19e6535 1677 vport = get_vport_protected(dp, port_no);
ed099e92 1678 if (!vport)
c19e6535 1679 return ERR_PTR(-ENOENT);
c19e6535
BP
1680 return vport;
1681 } else
1682 return ERR_PTR(-EINVAL);
064af421
BP
1683}
1684
ed099e92 1685/* Called with RTNL lock. */
c19e6535 1686static int change_vport(struct vport *vport, struct nlattr *a[ODP_VPORT_ATTR_MAX + 1])
064af421 1687{
c19e6535
BP
1688 int err = 0;
1689 if (a[ODP_VPORT_ATTR_STATS])
1690 err = vport_set_stats(vport, nla_data(a[ODP_VPORT_ATTR_STATS]));
1691 if (!err && a[ODP_VPORT_ATTR_ADDRESS])
1692 err = vport_set_addr(vport, nla_data(a[ODP_VPORT_ATTR_ADDRESS]));
1693 if (!err && a[ODP_VPORT_ATTR_MTU])
1694 err = vport_set_mtu(vport, nla_get_u32(a[ODP_VPORT_ATTR_MTU]));
1695 return err;
1696}
1697
f0fef760 1698static int odp_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
c19e6535 1699{
f0fef760
BP
1700 struct nlattr **a = info->attrs;
1701 struct odp_header *odp_header = info->userhdr;
c19e6535 1702 struct vport_parms parms;
ed099e92 1703 struct sk_buff *reply;
c19e6535 1704 struct vport *vport;
c19e6535 1705 struct datapath *dp;
b0ec0f27 1706 u32 port_no;
c19e6535 1707 int err;
b0ec0f27 1708
c19e6535
BP
1709 err = -EINVAL;
1710 if (!a[ODP_VPORT_ATTR_NAME] || !a[ODP_VPORT_ATTR_TYPE])
f0fef760
BP
1711 goto exit;
1712
1713 err = odp_vport_cmd_validate(a);
1714 if (err)
1715 goto exit;
51d4d598 1716
c19e6535 1717 rtnl_lock();
254f2dc8 1718 dp = get_dp(odp_header->dp_ifindex);
c19e6535
BP
1719 err = -ENODEV;
1720 if (!dp)
ed099e92 1721 goto exit_unlock;
c19e6535
BP
1722
1723 if (a[ODP_VPORT_ATTR_PORT_NO]) {
1724 port_no = nla_get_u32(a[ODP_VPORT_ATTR_PORT_NO]);
1725
1726 err = -EFBIG;
1727 if (port_no >= DP_MAX_PORTS)
ed099e92 1728 goto exit_unlock;
c19e6535
BP
1729
1730 vport = get_vport_protected(dp, port_no);
1731 err = -EBUSY;
1732 if (vport)
ed099e92 1733 goto exit_unlock;
c19e6535
BP
1734 } else {
1735 for (port_no = 1; ; port_no++) {
1736 if (port_no >= DP_MAX_PORTS) {
1737 err = -EFBIG;
ed099e92 1738 goto exit_unlock;
c19e6535
BP
1739 }
1740 vport = get_vport_protected(dp, port_no);
1741 if (!vport)
1742 break;
51d4d598 1743 }
064af421 1744 }
b0ec0f27 1745
c19e6535
BP
1746 parms.name = nla_data(a[ODP_VPORT_ATTR_NAME]);
1747 parms.type = nla_get_u32(a[ODP_VPORT_ATTR_TYPE]);
1748 parms.options = a[ODP_VPORT_ATTR_OPTIONS];
1749 parms.dp = dp;
1750 parms.port_no = port_no;
1751
1752 vport = new_vport(&parms);
1753 err = PTR_ERR(vport);
1754 if (IS_ERR(vport))
ed099e92 1755 goto exit_unlock;
c19e6535
BP
1756
1757 set_internal_devs_mtu(dp);
1758 dp_sysfs_add_if(vport);
1759
1760 err = change_vport(vport, a);
f0fef760
BP
1761 if (!err) {
1762 reply = odp_vport_cmd_build_info(vport, info->snd_pid,
1763 info->snd_seq, ODP_VPORT_CMD_NEW);
1764 if (IS_ERR(reply))
1765 err = PTR_ERR(reply);
1766 }
c19e6535
BP
1767 if (err) {
1768 dp_detach_port(vport);
ed099e92 1769 goto exit_unlock;
c19e6535 1770 }
f0fef760
BP
1771 genl_notify(reply, genl_info_net(info), info->snd_pid,
1772 dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
c19e6535 1773
c19e6535 1774
ed099e92 1775exit_unlock:
c19e6535 1776 rtnl_unlock();
c19e6535
BP
1777exit:
1778 return err;
44e05eca
BP
1779}
1780
f0fef760 1781static int odp_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
44e05eca 1782{
f0fef760
BP
1783 struct nlattr **a = info->attrs;
1784 struct sk_buff *reply;
c19e6535 1785 struct vport *vport;
c19e6535 1786 int err;
44e05eca 1787
f0fef760
BP
1788 err = odp_vport_cmd_validate(a);
1789 if (err)
c19e6535
BP
1790 goto exit;
1791
1792 rtnl_lock();
f0fef760 1793 vport = lookup_vport(info->userhdr, a);
c19e6535
BP
1794 err = PTR_ERR(vport);
1795 if (IS_ERR(vport))
f0fef760 1796 goto exit_unlock;
44e05eca 1797
c19e6535
BP
1798 err = 0;
1799 if (a[ODP_VPORT_ATTR_OPTIONS])
1800 err = vport_set_options(vport, a[ODP_VPORT_ATTR_OPTIONS]);
1801 if (!err)
1802 err = change_vport(vport, a);
1803
f0fef760
BP
1804 reply = odp_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1805 ODP_VPORT_CMD_NEW);
1806 if (IS_ERR(reply)) {
1807 err = PTR_ERR(reply);
1808 netlink_set_err(INIT_NET_GENL_SOCK, 0,
1809 dp_vport_multicast_group.id, err);
1810 return 0;
1811 }
1812
1813 genl_notify(reply, genl_info_net(info), info->snd_pid,
1814 dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1815
1816exit_unlock:
c19e6535
BP
1817 rtnl_unlock();
1818exit:
1819 return err;
064af421
BP
1820}
1821
f0fef760 1822static int odp_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
7c40efc9 1823{
f0fef760
BP
1824 struct nlattr **a = info->attrs;
1825 struct sk_buff *reply;
c19e6535 1826 struct vport *vport;
c19e6535
BP
1827 int err;
1828
f0fef760
BP
1829 err = odp_vport_cmd_validate(a);
1830 if (err)
c19e6535
BP
1831 goto exit;
1832
1833 rtnl_lock();
f0fef760 1834 vport = lookup_vport(info->userhdr, a);
c19e6535 1835 err = PTR_ERR(vport);
f0fef760
BP
1836 if (IS_ERR(vport))
1837 goto exit_unlock;
c19e6535 1838
f0fef760
BP
1839 if (vport->port_no == ODPP_LOCAL) {
1840 err = -EINVAL;
1841 goto exit_unlock;
1842 }
1843
1844 reply = odp_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1845 ODP_VPORT_CMD_DEL);
1846 err = PTR_ERR(reply);
1847 if (IS_ERR(reply))
1848 goto exit_unlock;
1849
1850 err = dp_detach_port(vport);
1851
1852 genl_notify(reply, genl_info_net(info), info->snd_pid,
1853 dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1854
1855exit_unlock:
c19e6535
BP
1856 rtnl_unlock();
1857exit:
1858 return err;
7c40efc9
BP
1859}
1860
f0fef760 1861static int odp_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
7c40efc9 1862{
f0fef760
BP
1863 struct nlattr **a = info->attrs;
1864 struct odp_header *odp_header = info->userhdr;
ed099e92 1865 struct sk_buff *reply;
c19e6535 1866 struct vport *vport;
c19e6535
BP
1867 int err;
1868
f0fef760
BP
1869 err = odp_vport_cmd_validate(a);
1870 if (err)
1871 goto exit;
c19e6535 1872
ed099e92 1873 rcu_read_lock();
f0fef760 1874 vport = lookup_vport(odp_header, a);
c19e6535
BP
1875 err = PTR_ERR(vport);
1876 if (IS_ERR(vport))
f0fef760 1877 goto exit_unlock;
c19e6535 1878
f0fef760
BP
1879 reply = odp_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1880 ODP_VPORT_CMD_NEW);
ed099e92
BP
1881 err = PTR_ERR(reply);
1882 if (IS_ERR(reply))
f0fef760 1883 goto exit_unlock;
ed099e92 1884
f0fef760 1885 err = genlmsg_reply(reply, info);
ed099e92 1886
f0fef760 1887exit_unlock:
ed099e92 1888 rcu_read_unlock();
f0fef760 1889exit:
c19e6535
BP
1890 return err;
1891}
1892
f0fef760 1893static int odp_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
c19e6535 1894{
f0fef760 1895 struct odp_header *odp_header = genlmsg_data(nlmsg_data(cb->nlh));
c19e6535
BP
1896 struct datapath *dp;
1897 u32 port_no;
f0fef760 1898 int retval;
c19e6535 1899
254f2dc8 1900 dp = get_dp(odp_header->dp_ifindex);
c19e6535 1901 if (!dp)
f0fef760 1902 return -ENODEV;
ed099e92
BP
1903
1904 rcu_read_lock();
f0fef760 1905 for (port_no = cb->args[0]; port_no < DP_MAX_PORTS; port_no++) {
ed099e92 1906 struct vport *vport;
ed099e92
BP
1907
1908 vport = get_vport_protected(dp, port_no);
1909 if (!vport)
1910 continue;
1911
f0fef760
BP
1912 if (odp_vport_cmd_fill_info(vport, skb, NETLINK_CB(cb->skb).pid,
1913 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1914 ODP_VPORT_CMD_NEW) < 0)
1915 break;
c19e6535 1916 }
ed099e92 1917 rcu_read_unlock();
c19e6535 1918
f0fef760
BP
1919 cb->args[0] = port_no;
1920 retval = skb->len;
1921
1922 return retval;
7c40efc9
BP
1923}
1924
f0fef760
BP
1925static struct genl_ops dp_vport_genl_ops[] = {
1926 { .cmd = ODP_VPORT_CMD_NEW,
1927 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1928 .policy = vport_policy,
1929 .doit = odp_vport_cmd_new
1930 },
1931 { .cmd = ODP_VPORT_CMD_DEL,
1932 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1933 .policy = vport_policy,
1934 .doit = odp_vport_cmd_del
1935 },
1936 { .cmd = ODP_VPORT_CMD_GET,
1937 .flags = 0, /* OK for unprivileged users. */
1938 .policy = vport_policy,
1939 .doit = odp_vport_cmd_get,
1940 .dumpit = odp_vport_cmd_dump
1941 },
1942 { .cmd = ODP_VPORT_CMD_SET,
1943 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1944 .policy = vport_policy,
1945 .doit = odp_vport_cmd_set,
1946 },
1947};
1948
982b8810
BP
1949struct genl_family_and_ops {
1950 struct genl_family *family;
1951 struct genl_ops *ops;
1952 int n_ops;
1953 struct genl_multicast_group *group;
1954};
ed099e92 1955
982b8810 1956static const struct genl_family_and_ops dp_genl_families[] = {
aaff4b55
BP
1957 { &dp_datapath_genl_family,
1958 dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
1959 &dp_datapath_multicast_group },
f0fef760
BP
1960 { &dp_vport_genl_family,
1961 dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
1962 &dp_vport_multicast_group },
37a1300c
BP
1963 { &dp_flow_genl_family,
1964 dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
1965 &dp_flow_multicast_group },
982b8810
BP
1966 { &dp_packet_genl_family,
1967 dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
1968 NULL },
1969};
ed099e92 1970
982b8810
BP
1971static void dp_unregister_genl(int n_families)
1972{
1973 int i;
ed099e92 1974
982b8810
BP
1975 for (i = 0; i < n_families; i++) {
1976 genl_unregister_family(dp_genl_families[i].family);
1977 }
ed099e92
BP
1978}
1979
982b8810 1980static int dp_register_genl(void)
064af421 1981{
982b8810
BP
1982 int n_registered;
1983 int err;
1984 int i;
064af421 1985
982b8810
BP
1986 n_registered = 0;
1987 for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
1988 const struct genl_family_and_ops *f = &dp_genl_families[i];
064af421 1989
982b8810
BP
1990 err = genl_register_family_with_ops(f->family, f->ops,
1991 f->n_ops);
1992 if (err)
1993 goto error;
1994 n_registered++;
e22d4953 1995
982b8810
BP
1996 if (f->group) {
1997 err = genl_register_mc_group(f->family, f->group);
1998 if (err)
1999 goto error;
2000 }
2001 }
9cc8b4e4 2002
982b8810
BP
2003 err = packet_register_mc_groups();
2004 if (err)
2005 goto error;
2006 return 0;
064af421
BP
2007
2008error:
982b8810
BP
2009 dp_unregister_genl(n_registered);
2010 return err;
064af421
BP
2011}
2012
22d24ebf
BP
2013static int __init dp_init(void)
2014{
f2459fe7 2015 struct sk_buff *dummy_skb;
22d24ebf
BP
2016 int err;
2017
f2459fe7 2018 BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof(dummy_skb->cb));
22d24ebf 2019
f2459fe7 2020 printk("Open vSwitch %s, built "__DATE__" "__TIME__"\n", VERSION BUILDNR);
064af421
BP
2021
2022 err = flow_init();
2023 if (err)
2024 goto error;
2025
f2459fe7 2026 err = vport_init();
064af421
BP
2027 if (err)
2028 goto error_flow_exit;
2029
f2459fe7
JG
2030 err = register_netdevice_notifier(&dp_device_notifier);
2031 if (err)
2032 goto error_vport_exit;
2033
982b8810
BP
2034 err = dp_register_genl();
2035 if (err < 0)
37a1300c 2036 goto error_unreg_notifier;
982b8810 2037
064af421
BP
2038 return 0;
2039
2040error_unreg_notifier:
2041 unregister_netdevice_notifier(&dp_device_notifier);
f2459fe7
JG
2042error_vport_exit:
2043 vport_exit();
064af421
BP
2044error_flow_exit:
2045 flow_exit();
2046error:
2047 return err;
2048}
2049
2050static void dp_cleanup(void)
2051{
2052 rcu_barrier();
982b8810 2053 dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
064af421 2054 unregister_netdevice_notifier(&dp_device_notifier);
f2459fe7 2055 vport_exit();
064af421 2056 flow_exit();
064af421
BP
2057}
2058
2059module_init(dp_init);
2060module_exit(dp_cleanup);
2061
2062MODULE_DESCRIPTION("Open vSwitch switching datapath");
2063MODULE_LICENSE("GPL");