]> git.proxmox.com Git - ovs.git/blame - datapath/datapath.c
ofp-util: Make ofputil_cls_rule_to_match() help with flow cookies too.
[ovs.git] / datapath / datapath.c
CommitLineData
064af421 1/*
a6057323 2 * Copyright (c) 2007, 2008, 2009, 2010 Nicira Networks.
a14bc59f
BP
3 * Distributed under the terms of the GNU GPL version 2.
4 *
5 * Significant portions of this file may be copied from parts of the Linux
6 * kernel, by Linus Torvalds and others.
064af421
BP
7 */
8
9/* Functions for managing the dp interface/device. */
10
dfffaef1
JP
11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
064af421
BP
13#include <linux/init.h>
14#include <linux/module.h>
15#include <linux/fs.h>
16#include <linux/if_arp.h>
064af421
BP
17#include <linux/if_vlan.h>
18#include <linux/in.h>
19#include <linux/ip.h>
20#include <linux/delay.h>
21#include <linux/time.h>
22#include <linux/etherdevice.h>
23#include <linux/kernel.h>
24#include <linux/kthread.h>
064af421
BP
25#include <linux/mutex.h>
26#include <linux/percpu.h>
27#include <linux/rcupdate.h>
28#include <linux/tcp.h>
29#include <linux/udp.h>
30#include <linux/version.h>
31#include <linux/ethtool.h>
064af421
BP
32#include <linux/wait.h>
33#include <asm/system.h>
34#include <asm/div64.h>
35#include <asm/bug.h>
656a0e37 36#include <linux/highmem.h>
064af421
BP
37#include <linux/netfilter_bridge.h>
38#include <linux/netfilter_ipv4.h>
39#include <linux/inetdevice.h>
40#include <linux/list.h>
41#include <linux/rculist.h>
064af421 42#include <linux/dmi.h>
3c5f6de3 43#include <net/inet_ecn.h>
3fbd517a 44#include <linux/compat.h>
064af421
BP
45
46#include "openvswitch/datapath-protocol.h"
dd8d6b8c 47#include "checksum.h"
064af421
BP
48#include "datapath.h"
49#include "actions.h"
064af421 50#include "flow.h"
7eaa9830 51#include "loop_counter.h"
3fbd517a 52#include "odp-compat.h"
8d5ebd83 53#include "table.h"
f2459fe7 54#include "vport-internal_dev.h"
064af421
BP
55
56#include "compat.h"
57
064af421
BP
58int (*dp_ioctl_hook)(struct net_device *dev, struct ifreq *rq, int cmd);
59EXPORT_SYMBOL(dp_ioctl_hook);
60
064af421 61/* Datapaths. Protected on the read side by rcu_read_lock, on the write side
0d3b8a34 62 * by dp_mutex.
064af421
BP
63 *
64 * dp_mutex nests inside the RTNL lock: if you need both you must take the RTNL
65 * lock first.
66 *
e779d8d9 67 * It is safe to access the datapath and vport structures with just
064af421
BP
68 * dp_mutex.
69 */
70static struct datapath *dps[ODP_MAX];
71static DEFINE_MUTEX(dp_mutex);
72
e779d8d9 73static int new_vport(struct datapath *, struct odp_port *, int port_no);
064af421
BP
74
75/* Must be called with rcu_read_lock or dp_mutex. */
76struct datapath *get_dp(int dp_idx)
77{
78 if (dp_idx < 0 || dp_idx >= ODP_MAX)
79 return NULL;
80 return rcu_dereference(dps[dp_idx]);
81}
82EXPORT_SYMBOL_GPL(get_dp);
83
35f7605b 84static struct datapath *get_dp_locked(int dp_idx)
064af421
BP
85{
86 struct datapath *dp;
87
88 mutex_lock(&dp_mutex);
89 dp = get_dp(dp_idx);
90 if (dp)
91 mutex_lock(&dp->mutex);
92 mutex_unlock(&dp_mutex);
93 return dp;
94}
95
f2459fe7
JG
96/* Must be called with rcu_read_lock or RTNL lock. */
97const char *dp_name(const struct datapath *dp)
98{
e779d8d9 99 return vport_get_name(dp->ports[ODPP_LOCAL]);
f2459fe7
JG
100}
101
064af421
BP
102static inline size_t br_nlmsg_size(void)
103{
104 return NLMSG_ALIGN(sizeof(struct ifinfomsg))
105 + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
106 + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
107 + nla_total_size(4) /* IFLA_MASTER */
108 + nla_total_size(4) /* IFLA_MTU */
109 + nla_total_size(4) /* IFLA_LINK */
110 + nla_total_size(1); /* IFLA_OPERSTATE */
111}
112
113static int dp_fill_ifinfo(struct sk_buff *skb,
e779d8d9 114 const struct vport *port,
064af421
BP
115 int event, unsigned int flags)
116{
117 const struct datapath *dp = port->dp;
e779d8d9
BP
118 int ifindex = vport_get_ifindex(port);
119 int iflink = vport_get_iflink(port);
064af421
BP
120 struct ifinfomsg *hdr;
121 struct nlmsghdr *nlh;
122
f2459fe7
JG
123 if (ifindex < 0)
124 return ifindex;
125
126 if (iflink < 0)
127 return iflink;
128
064af421
BP
129 nlh = nlmsg_put(skb, 0, 0, event, sizeof(*hdr), flags);
130 if (nlh == NULL)
131 return -EMSGSIZE;
132
133 hdr = nlmsg_data(nlh);
134 hdr->ifi_family = AF_BRIDGE;
135 hdr->__ifi_pad = 0;
f2459fe7
JG
136 hdr->ifi_type = ARPHRD_ETHER;
137 hdr->ifi_index = ifindex;
e779d8d9 138 hdr->ifi_flags = vport_get_flags(port);
064af421
BP
139 hdr->ifi_change = 0;
140
e779d8d9
BP
141 NLA_PUT_STRING(skb, IFLA_IFNAME, vport_get_name(port));
142 NLA_PUT_U32(skb, IFLA_MASTER, vport_get_ifindex(dp->ports[ODPP_LOCAL]));
143 NLA_PUT_U32(skb, IFLA_MTU, vport_get_mtu(port));
064af421
BP
144#ifdef IFLA_OPERSTATE
145 NLA_PUT_U8(skb, IFLA_OPERSTATE,
e779d8d9
BP
146 vport_is_running(port)
147 ? vport_get_operstate(port)
f2459fe7 148 : IF_OPER_DOWN);
064af421
BP
149#endif
150
e779d8d9 151 NLA_PUT(skb, IFLA_ADDRESS, ETH_ALEN, vport_get_addr(port));
064af421 152
f2459fe7
JG
153 if (ifindex != iflink)
154 NLA_PUT_U32(skb, IFLA_LINK,iflink);
064af421
BP
155
156 return nlmsg_end(skb, nlh);
157
158nla_put_failure:
159 nlmsg_cancel(skb, nlh);
160 return -EMSGSIZE;
161}
162
e779d8d9 163static void dp_ifinfo_notify(int event, struct vport *port)
064af421 164{
064af421
BP
165 struct sk_buff *skb;
166 int err = -ENOBUFS;
167
168 skb = nlmsg_new(br_nlmsg_size(), GFP_KERNEL);
169 if (skb == NULL)
170 goto errout;
171
172 err = dp_fill_ifinfo(skb, port, event, 0);
173 if (err < 0) {
174 /* -EMSGSIZE implies BUG in br_nlmsg_size() */
175 WARN_ON(err == -EMSGSIZE);
176 kfree_skb(skb);
177 goto errout;
178 }
f2459fe7 179 rtnl_notify(skb, &init_net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
cfe7c1f5 180 return;
064af421
BP
181errout:
182 if (err < 0)
f2459fe7 183 rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err);
064af421
BP
184}
185
58c342f6
BP
186static void release_dp(struct kobject *kobj)
187{
188 struct datapath *dp = container_of(kobj, struct datapath, ifobj);
189 kfree(dp);
190}
191
35f7605b 192static struct kobj_type dp_ktype = {
58c342f6
BP
193 .release = release_dp
194};
195
064af421
BP
196static int create_dp(int dp_idx, const char __user *devnamep)
197{
f2459fe7 198 struct odp_port internal_dev_port;
064af421
BP
199 char devname[IFNAMSIZ];
200 struct datapath *dp;
201 int err;
202 int i;
203
204 if (devnamep) {
968f7c8d
BP
205 int retval = strncpy_from_user(devname, devnamep, IFNAMSIZ);
206 if (retval < 0) {
207 err = -EFAULT;
064af421 208 goto err;
968f7c8d
BP
209 } else if (retval >= IFNAMSIZ) {
210 err = -ENAMETOOLONG;
211 goto err;
212 }
064af421
BP
213 } else {
214 snprintf(devname, sizeof devname, "of%d", dp_idx);
215 }
216
217 rtnl_lock();
218 mutex_lock(&dp_mutex);
219 err = -ENODEV;
220 if (!try_module_get(THIS_MODULE))
221 goto err_unlock;
222
223 /* Exit early if a datapath with that number already exists.
224 * (We don't use -EEXIST because that's ambiguous with 'devname'
225 * conflicting with an existing network device name.) */
226 err = -EBUSY;
227 if (get_dp(dp_idx))
228 goto err_put_module;
229
230 err = -ENOMEM;
231 dp = kzalloc(sizeof *dp, GFP_KERNEL);
232 if (dp == NULL)
233 goto err_put_module;
828bc1f0 234 INIT_LIST_HEAD(&dp->port_list);
064af421
BP
235 mutex_init(&dp->mutex);
236 dp->dp_idx = dp_idx;
237 for (i = 0; i < DP_N_QUEUES; i++)
238 skb_queue_head_init(&dp->queues[i]);
239 init_waitqueue_head(&dp->waitqueue);
240
58c342f6 241 /* Initialize kobject for bridge. This will be added as
b0c32774 242 * /sys/class/net/<devname>/brif later, if sysfs is enabled. */
58c342f6 243 dp->ifobj.kset = NULL;
58c342f6
BP
244 kobject_init(&dp->ifobj, &dp_ktype);
245
828bc1f0
BP
246 /* Allocate table. */
247 err = -ENOMEM;
8d5ebd83 248 rcu_assign_pointer(dp->table, tbl_create(0));
828bc1f0
BP
249 if (!dp->table)
250 goto err_free_dp;
251
d6fbec6d 252 /* Set up our datapath device. */
092a872d
BP
253 BUILD_BUG_ON(sizeof(internal_dev_port.devname) != sizeof(devname));
254 strcpy(internal_dev_port.devname, devname);
c3827f61 255 strcpy(internal_dev_port.type, "internal");
e779d8d9 256 err = new_vport(dp, &internal_dev_port, ODPP_LOCAL);
828bc1f0 257 if (err) {
f2459fe7
JG
258 if (err == -EBUSY)
259 err = -EEXIST;
260
064af421 261 goto err_destroy_table;
828bc1f0 262 }
064af421
BP
263
264 dp->drop_frags = 0;
265 dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
266 if (!dp->stats_percpu)
267 goto err_destroy_local_port;
268
269 rcu_assign_pointer(dps[dp_idx], dp);
dad80ec3
JG
270 dp_sysfs_add_dp(dp);
271
064af421
BP
272 mutex_unlock(&dp_mutex);
273 rtnl_unlock();
274
064af421
BP
275 return 0;
276
277err_destroy_local_port:
c3827f61 278 dp_detach_port(dp->ports[ODPP_LOCAL]);
064af421 279err_destroy_table:
8d5ebd83 280 tbl_destroy(dp->table, NULL);
064af421
BP
281err_free_dp:
282 kfree(dp);
283err_put_module:
284 module_put(THIS_MODULE);
285err_unlock:
286 mutex_unlock(&dp_mutex);
287 rtnl_unlock();
288err:
289 return err;
290}
291
72ca14c1 292static void do_destroy_dp(struct datapath *dp)
064af421 293{
e779d8d9 294 struct vport *p, *n;
064af421
BP
295 int i;
296
6fba0d0b
BP
297 list_for_each_entry_safe (p, n, &dp->port_list, node)
298 if (p->port_no != ODPP_LOCAL)
c3827f61 299 dp_detach_port(p);
6fba0d0b 300
2ba9026e 301 dp_sysfs_del_dp(dp);
064af421 302
064af421 303 rcu_assign_pointer(dps[dp->dp_idx], NULL);
064af421 304
c3827f61 305 dp_detach_port(dp->ports[ODPP_LOCAL]);
6fba0d0b 306
8d5ebd83 307 tbl_destroy(dp->table, flow_free_tbl);
6fba0d0b 308
064af421
BP
309 for (i = 0; i < DP_N_QUEUES; i++)
310 skb_queue_purge(&dp->queues[i]);
064af421 311 free_percpu(dp->stats_percpu);
58c342f6 312 kobject_put(&dp->ifobj);
064af421
BP
313 module_put(THIS_MODULE);
314}
315
316static int destroy_dp(int dp_idx)
317{
064af421 318 struct datapath *dp;
064af421
BP
319 int err;
320
321 rtnl_lock();
322 mutex_lock(&dp_mutex);
323 dp = get_dp(dp_idx);
324 err = -ENODEV;
325 if (!dp)
326 goto err_unlock;
327
72ca14c1 328 do_destroy_dp(dp);
064af421
BP
329 err = 0;
330
331err_unlock:
332 mutex_unlock(&dp_mutex);
333 rtnl_unlock();
064af421
BP
334 return err;
335}
336
337/* Called with RTNL lock and dp_mutex. */
e779d8d9 338static int new_vport(struct datapath *dp, struct odp_port *odp_port, int port_no)
064af421 339{
c3827f61 340 struct vport_parms parms;
f2459fe7 341 struct vport *vport;
f2459fe7 342
c3827f61
BP
343 parms.name = odp_port->devname;
344 parms.type = odp_port->type;
345 parms.config = odp_port->config;
e779d8d9
BP
346 parms.dp = dp;
347 parms.port_no = port_no;
f2459fe7 348
c3827f61
BP
349 vport_lock();
350 vport = vport_add(&parms);
351 vport_unlock();
064af421 352
c3827f61
BP
353 if (IS_ERR(vport))
354 return PTR_ERR(vport);
064af421 355
e779d8d9
BP
356 rcu_assign_pointer(dp->ports[port_no], vport);
357 list_add_rcu(&vport->node, &dp->port_list);
064af421
BP
358 dp->n_ports++;
359
e779d8d9 360 dp_ifinfo_notify(RTM_NEWLINK, vport);
064af421
BP
361
362 return 0;
363}
364
f2459fe7 365static int attach_port(int dp_idx, struct odp_port __user *portp)
064af421 366{
064af421
BP
367 struct datapath *dp;
368 struct odp_port port;
369 int port_no;
370 int err;
371
372 err = -EFAULT;
373 if (copy_from_user(&port, portp, sizeof port))
374 goto out;
375 port.devname[IFNAMSIZ - 1] = '\0';
c3827f61 376 port.type[VPORT_TYPE_SIZE - 1] = '\0';
064af421
BP
377
378 rtnl_lock();
379 dp = get_dp_locked(dp_idx);
380 err = -ENODEV;
381 if (!dp)
382 goto out_unlock_rtnl;
383
9ee3ae3e
BP
384 for (port_no = 1; port_no < DP_MAX_PORTS; port_no++)
385 if (!dp->ports[port_no])
386 goto got_port_no;
3c71830a 387 err = -EFBIG;
9ee3ae3e 388 goto out_unlock_dp;
064af421 389
9ee3ae3e 390got_port_no:
e779d8d9 391 err = new_vport(dp, &port, port_no);
064af421 392 if (err)
f2459fe7 393 goto out_unlock_dp;
064af421 394
d8b5d43a 395 set_internal_devs_mtu(dp);
2ba9026e 396 dp_sysfs_add_if(dp->ports[port_no]);
064af421 397
776f10ce 398 err = put_user(port_no, &portp->port);
064af421 399
064af421
BP
400out_unlock_dp:
401 mutex_unlock(&dp->mutex);
402out_unlock_rtnl:
403 rtnl_unlock();
404out:
405 return err;
406}
407
e779d8d9 408int dp_detach_port(struct vport *p)
064af421 409{
f2459fe7
JG
410 int err;
411
064af421
BP
412 ASSERT_RTNL();
413
2e7dd8ec 414 if (p->port_no != ODPP_LOCAL)
0515ceb3 415 dp_sysfs_del_if(p);
064af421
BP
416 dp_ifinfo_notify(RTM_DELLINK, p);
417
064af421 418 /* First drop references to device. */
f2459fe7 419 p->dp->n_ports--;
064af421
BP
420 list_del_rcu(&p->node);
421 rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
f2459fe7 422
7237e4f4 423 /* Then destroy it. */
c3827f61 424 vport_lock();
7237e4f4 425 err = vport_del(p);
c3827f61 426 vport_unlock();
f2459fe7 427
7237e4f4 428 return err;
064af421
BP
429}
430
f2459fe7 431static int detach_port(int dp_idx, int port_no)
064af421 432{
e779d8d9 433 struct vport *p;
064af421 434 struct datapath *dp;
064af421
BP
435 int err;
436
437 err = -EINVAL;
438 if (port_no < 0 || port_no >= DP_MAX_PORTS || port_no == ODPP_LOCAL)
439 goto out;
440
441 rtnl_lock();
442 dp = get_dp_locked(dp_idx);
443 err = -ENODEV;
444 if (!dp)
445 goto out_unlock_rtnl;
446
447 p = dp->ports[port_no];
448 err = -ENOENT;
449 if (!p)
450 goto out_unlock_dp;
451
c3827f61 452 err = dp_detach_port(p);
064af421
BP
453
454out_unlock_dp:
455 mutex_unlock(&dp->mutex);
456out_unlock_rtnl:
457 rtnl_unlock();
458out:
064af421
BP
459 return err;
460}
461
8819fac7 462/* Must be called with rcu_read_lock. */
e779d8d9 463void dp_process_received_packet(struct vport *p, struct sk_buff *skb)
064af421
BP
464{
465 struct datapath *dp = p->dp;
466 struct dp_stats_percpu *stats;
8819fac7 467 int stats_counter_off;
55574bb0
BP
468 struct sw_flow_actions *acts;
469 struct loop_counter *loop;
4c1ad233 470 int error;
064af421 471
e779d8d9 472 OVS_CB(skb)->vport = p;
a063b0df 473
3976f6d5
JG
474 if (!OVS_CB(skb)->flow) {
475 struct odp_flow_key key;
476 struct tbl_node *flow_node;
b7a31ec1 477 bool is_frag;
4c1ad233 478
3976f6d5 479 /* Extract flow from 'skb' into 'key'. */
b7a31ec1 480 error = flow_extract(skb, p ? p->port_no : ODPP_NONE, &key, &is_frag);
3976f6d5
JG
481 if (unlikely(error)) {
482 kfree_skb(skb);
483 return;
484 }
064af421 485
b7a31ec1 486 if (is_frag && dp->drop_frags) {
3976f6d5
JG
487 kfree_skb(skb);
488 stats_counter_off = offsetof(struct dp_stats_percpu, n_frags);
489 goto out;
490 }
491
492 /* Look up flow. */
493 flow_node = tbl_lookup(rcu_dereference(dp->table), &key,
494 flow_hash(&key), flow_cmp);
495 if (unlikely(!flow_node)) {
496 dp_output_control(dp, skb, _ODPL_MISS_NR, OVS_CB(skb)->tun_id);
497 stats_counter_off = offsetof(struct dp_stats_percpu, n_missed);
498 goto out;
499 }
500
501 OVS_CB(skb)->flow = flow_cast(flow_node);
55574bb0
BP
502 }
503
3976f6d5 504 flow_used(OVS_CB(skb)->flow, skb);
55574bb0 505
3976f6d5 506 acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
55574bb0
BP
507
508 /* Check whether we've looped too much. */
7eaa9830
JG
509 loop = loop_get_counter();
510 if (unlikely(++loop->count > MAX_LOOPS))
55574bb0
BP
511 loop->looping = true;
512 if (unlikely(loop->looping)) {
7eaa9830 513 loop_suppress(dp, acts);
55574bb0 514 goto out_loop;
064af421 515 }
8819fac7 516
55574bb0 517 /* Execute actions. */
3976f6d5 518 execute_actions(dp, skb, &OVS_CB(skb)->flow->key, acts->actions,
cdee00fd 519 acts->actions_len);
55574bb0
BP
520 stats_counter_off = offsetof(struct dp_stats_percpu, n_hit);
521
522 /* Check whether sub-actions looped too much. */
523 if (unlikely(loop->looping))
7eaa9830 524 loop_suppress(dp, acts);
55574bb0
BP
525
526out_loop:
527 /* Decrement loop counter. */
528 if (!--loop->count)
529 loop->looping = false;
7eaa9830 530 loop_put_counter();
55574bb0 531
8819fac7 532out:
55574bb0 533 /* Update datapath statistics. */
8819fac7
JG
534 local_bh_disable();
535 stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
38c6ecbc
JG
536
537 write_seqcount_begin(&stats->seqlock);
8819fac7 538 (*(u64 *)((u8 *)stats + stats_counter_off))++;
38c6ecbc
JG
539 write_seqcount_end(&stats->seqlock);
540
8819fac7 541 local_bh_enable();
064af421
BP
542}
543
cb5087ca
BP
544/* Append each packet in 'skb' list to 'queue'. There will be only one packet
545 * unless we broke up a GSO packet. */
fceb2a5b
JG
546static int queue_control_packets(struct sk_buff *skb, struct sk_buff_head *queue,
547 int queue_no, u32 arg)
cb5087ca
BP
548{
549 struct sk_buff *nskb;
550 int port_no;
551 int err;
552
e779d8d9
BP
553 if (OVS_CB(skb)->vport)
554 port_no = OVS_CB(skb)->vport->port_no;
f2459fe7
JG
555 else
556 port_no = ODPP_LOCAL;
cb5087ca
BP
557
558 do {
559 struct odp_msg *header;
560
561 nskb = skb->next;
562 skb->next = NULL;
563
cb5087ca
BP
564 err = skb_cow(skb, sizeof *header);
565 if (err)
566 goto err_kfree_skbs;
567
568 header = (struct odp_msg*)__skb_push(skb, sizeof *header);
569 header->type = queue_no;
570 header->length = skb->len;
571 header->port = port_no;
572 header->reserved = 0;
573 header->arg = arg;
574 skb_queue_tail(queue, skb);
575
576 skb = nskb;
577 } while (skb);
578 return 0;
579
580err_kfree_skbs:
581 kfree_skb(skb);
582 while ((skb = nskb) != NULL) {
583 nskb = skb->next;
584 kfree_skb(skb);
585 }
586 return err;
587}
588
fceb2a5b
JG
589int dp_output_control(struct datapath *dp, struct sk_buff *skb, int queue_no,
590 u32 arg)
064af421
BP
591{
592 struct dp_stats_percpu *stats;
593 struct sk_buff_head *queue;
064af421
BP
594 int err;
595
596 WARN_ON_ONCE(skb_shared(skb));
72b06300 597 BUG_ON(queue_no != _ODPL_MISS_NR && queue_no != _ODPL_ACTION_NR && queue_no != _ODPL_SFLOW_NR);
064af421
BP
598 queue = &dp->queues[queue_no];
599 err = -ENOBUFS;
600 if (skb_queue_len(queue) >= DP_MAX_QUEUE_LEN)
601 goto err_kfree_skb;
602
a6057323
JG
603 forward_ip_summed(skb);
604
a2377e44
JG
605 err = vswitch_skb_checksum_setup(skb);
606 if (err)
607 goto err_kfree_skb;
608
064af421
BP
609 /* Break apart GSO packets into their component pieces. Otherwise
610 * userspace may try to stuff a 64kB packet into a 1500-byte MTU. */
611 if (skb_is_gso(skb)) {
9cc8b4e4 612 struct sk_buff *nskb = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
064af421
BP
613 if (nskb) {
614 kfree_skb(skb);
615 skb = nskb;
616 if (unlikely(IS_ERR(skb))) {
617 err = PTR_ERR(skb);
618 goto err;
619 }
620 } else {
621 /* XXX This case might not be possible. It's hard to
622 * tell from the skb_gso_segment() code and comment. */
623 }
624 }
625
cb5087ca 626 err = queue_control_packets(skb, queue, queue_no, arg);
064af421 627 wake_up_interruptible(&dp->waitqueue);
cb5087ca 628 return err;
064af421
BP
629
630err_kfree_skb:
631 kfree_skb(skb);
632err:
1c075d0a
JG
633 local_bh_disable();
634 stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
38c6ecbc
JG
635
636 write_seqcount_begin(&stats->seqlock);
064af421 637 stats->n_lost++;
38c6ecbc
JG
638 write_seqcount_end(&stats->seqlock);
639
1c075d0a 640 local_bh_enable();
064af421
BP
641
642 return err;
643}
644
645static int flush_flows(struct datapath *dp)
646{
8d5ebd83
JG
647 struct tbl *old_table = rcu_dereference(dp->table);
648 struct tbl *new_table;
649
650 new_table = tbl_create(0);
651 if (!new_table)
652 return -ENOMEM;
653
654 rcu_assign_pointer(dp->table, new_table);
655
656 tbl_deferred_destroy(old_table, flow_free_tbl);
657
658 return 0;
064af421
BP
659}
660
cdee00fd 661static int validate_actions(const struct nlattr *actions, u32 actions_len)
064af421 662{
cdee00fd
BP
663 const struct nlattr *a;
664 int rem;
665
666 nla_for_each_attr(a, actions, actions_len, rem) {
667 static const u32 action_lens[ODPAT_MAX + 1] = {
668 [ODPAT_OUTPUT] = 4,
669 [ODPAT_CONTROLLER] = 4,
670 [ODPAT_SET_DL_TCI] = 2,
671 [ODPAT_STRIP_VLAN] = 0,
672 [ODPAT_SET_DL_SRC] = ETH_ALEN,
673 [ODPAT_SET_DL_DST] = ETH_ALEN,
674 [ODPAT_SET_NW_SRC] = 4,
675 [ODPAT_SET_NW_DST] = 4,
676 [ODPAT_SET_NW_TOS] = 1,
677 [ODPAT_SET_TP_SRC] = 2,
678 [ODPAT_SET_TP_DST] = 2,
679 [ODPAT_SET_TUNNEL] = 4,
680 [ODPAT_SET_PRIORITY] = 4,
681 [ODPAT_POP_PRIORITY] = 0,
682 [ODPAT_DROP_SPOOFED_ARP] = 0,
683 };
684 int type = nla_type(a);
685
686 if (type > ODPAT_MAX || nla_len(a) != action_lens[type])
687 return -EINVAL;
688
689 switch (type) {
690 case ODPAT_UNSPEC:
691 return -EINVAL;
064af421 692
cdee00fd
BP
693 case ODPAT_CONTROLLER:
694 case ODPAT_STRIP_VLAN:
695 case ODPAT_SET_DL_SRC:
696 case ODPAT_SET_DL_DST:
697 case ODPAT_SET_NW_SRC:
698 case ODPAT_SET_NW_DST:
699 case ODPAT_SET_TP_SRC:
700 case ODPAT_SET_TP_DST:
701 case ODPAT_SET_TUNNEL:
702 case ODPAT_SET_PRIORITY:
703 case ODPAT_POP_PRIORITY:
704 case ODPAT_DROP_SPOOFED_ARP:
705 /* No validation needed. */
706 break;
707
708 case ODPAT_OUTPUT:
709 if (nla_get_u32(a) >= DP_MAX_PORTS)
710 return -EINVAL;
711
712 case ODPAT_SET_DL_TCI:
713 if (nla_get_be16(a) & htons(VLAN_CFI_MASK))
064af421 714 return -EINVAL;
cdee00fd 715 break;
064af421 716
cdee00fd
BP
717 case ODPAT_SET_NW_TOS:
718 if (nla_get_u8(a) & INET_ECN_MASK)
719 return -EINVAL;
720 break;
064af421 721
cdee00fd
BP
722 default:
723 return -EOPNOTSUPP;
724 }
725 }
3c5f6de3 726
cdee00fd
BP
727 if (rem > 0)
728 return -EINVAL;
064af421 729
cdee00fd 730 return 0;
064af421
BP
731}
732
733static struct sw_flow_actions *get_actions(const struct odp_flow *flow)
734{
735 struct sw_flow_actions *actions;
736 int error;
737
cdee00fd 738 actions = flow_actions_alloc(flow->actions_len);
064af421
BP
739 error = PTR_ERR(actions);
740 if (IS_ERR(actions))
741 goto error;
742
743 error = -EFAULT;
cdee00fd 744 if (copy_from_user(actions->actions, flow->actions, flow->actions_len))
064af421 745 goto error_free_actions;
cdee00fd 746 error = validate_actions(actions->actions, actions->actions_len);
064af421
BP
747 if (error)
748 goto error_free_actions;
749
750 return actions;
751
752error_free_actions:
753 kfree(actions);
754error:
755 return ERR_PTR(error);
756}
757
65d042a1 758static void get_stats(struct sw_flow *flow, struct odp_flow_stats *stats)
6bfafa55
JG
759{
760 if (flow->used) {
65d042a1 761 struct timespec offset_ts, used, now_mono;
6bfafa55 762
65d042a1
HZ
763 ktime_get_ts(&now_mono);
764 jiffies_to_timespec(jiffies - flow->used, &offset_ts);
765 set_normalized_timespec(&used, now_mono.tv_sec - offset_ts.tv_sec,
766 now_mono.tv_nsec - offset_ts.tv_nsec);
6bfafa55
JG
767
768 stats->used_sec = used.tv_sec;
769 stats->used_nsec = used.tv_nsec;
064af421
BP
770 } else {
771 stats->used_sec = 0;
772 stats->used_nsec = 0;
773 }
6bfafa55 774
064af421
BP
775 stats->n_packets = flow->packet_count;
776 stats->n_bytes = flow->byte_count;
abfec865 777 stats->reserved = 0;
064af421 778 stats->tcp_flags = flow->tcp_flags;
f1aa2072 779 stats->error = 0;
064af421
BP
780}
781
782static void clear_stats(struct sw_flow *flow)
783{
6bfafa55 784 flow->used = 0;
064af421 785 flow->tcp_flags = 0;
064af421
BP
786 flow->packet_count = 0;
787 flow->byte_count = 0;
788}
789
8d5ebd83
JG
790static int expand_table(struct datapath *dp)
791{
792 struct tbl *old_table = rcu_dereference(dp->table);
793 struct tbl *new_table;
794
795 new_table = tbl_expand(old_table);
796 if (IS_ERR(new_table))
797 return PTR_ERR(new_table);
798
799 rcu_assign_pointer(dp->table, new_table);
800 tbl_deferred_destroy(old_table, NULL);
801
802 return 0;
803}
804
44e05eca
BP
805static int do_put_flow(struct datapath *dp, struct odp_flow_put *uf,
806 struct odp_flow_stats *stats)
064af421 807{
8d5ebd83 808 struct tbl_node *flow_node;
6fa58f7a 809 struct sw_flow *flow;
8d5ebd83 810 struct tbl *table;
064af421
BP
811 int error;
812
064af421 813 table = rcu_dereference(dp->table);
44e05eca 814 flow_node = tbl_lookup(table, &uf->flow.key, flow_hash(&uf->flow.key), flow_cmp);
8d5ebd83 815 if (!flow_node) {
6fa58f7a 816 /* No such flow. */
064af421
BP
817 struct sw_flow_actions *acts;
818
819 error = -ENOENT;
44e05eca 820 if (!(uf->flags & ODPPF_CREATE))
064af421
BP
821 goto error;
822
823 /* Expand table, if necessary, to make room. */
8d5ebd83
JG
824 if (tbl_count(table) >= tbl_n_buckets(table)) {
825 error = expand_table(dp);
064af421
BP
826 if (error)
827 goto error;
6fa58f7a 828 table = rcu_dereference(dp->table);
064af421
BP
829 }
830
831 /* Allocate flow. */
560e8022
JG
832 flow = flow_alloc();
833 if (IS_ERR(flow)) {
834 error = PTR_ERR(flow);
064af421 835 goto error;
560e8022 836 }
44e05eca 837 flow->key = uf->flow.key;
064af421
BP
838 clear_stats(flow);
839
840 /* Obtain actions. */
44e05eca 841 acts = get_actions(&uf->flow);
064af421
BP
842 error = PTR_ERR(acts);
843 if (IS_ERR(acts))
844 goto error_free_flow;
845 rcu_assign_pointer(flow->sf_acts, acts);
846
847 /* Put flow in bucket. */
8d5ebd83 848 error = tbl_insert(table, &flow->tbl_node, flow_hash(&flow->key));
6fa58f7a
BP
849 if (error)
850 goto error_free_flow_acts;
8d5ebd83 851
44e05eca 852 memset(stats, 0, sizeof(struct odp_flow_stats));
064af421
BP
853 } else {
854 /* We found a matching flow. */
064af421 855 struct sw_flow_actions *old_acts, *new_acts;
064af421 856
8d5ebd83
JG
857 flow = flow_cast(flow_node);
858
064af421
BP
859 /* Bail out if we're not allowed to modify an existing flow. */
860 error = -EEXIST;
44e05eca 861 if (!(uf->flags & ODPPF_MODIFY))
064af421
BP
862 goto error;
863
864 /* Swap actions. */
44e05eca 865 new_acts = get_actions(&uf->flow);
064af421
BP
866 error = PTR_ERR(new_acts);
867 if (IS_ERR(new_acts))
868 goto error;
869 old_acts = rcu_dereference(flow->sf_acts);
cdee00fd 870 if (old_acts->actions_len != new_acts->actions_len ||
064af421 871 memcmp(old_acts->actions, new_acts->actions,
cdee00fd 872 old_acts->actions_len)) {
064af421
BP
873 rcu_assign_pointer(flow->sf_acts, new_acts);
874 flow_deferred_free_acts(old_acts);
875 } else {
876 kfree(new_acts);
877 }
878
879 /* Fetch stats, then clear them if necessary. */
1d7241c7 880 spin_lock_bh(&flow->lock);
65d042a1 881 get_stats(flow, stats);
44e05eca 882 if (uf->flags & ODPPF_ZERO_STATS)
064af421 883 clear_stats(flow);
1d7241c7 884 spin_unlock_bh(&flow->lock);
064af421
BP
885 }
886
064af421
BP
887 return 0;
888
6fa58f7a
BP
889error_free_flow_acts:
890 kfree(flow->sf_acts);
064af421 891error_free_flow:
fb8c9347
JG
892 flow->sf_acts = NULL;
893 flow_put(flow);
064af421
BP
894error:
895 return error;
896}
897
44e05eca
BP
898static int put_flow(struct datapath *dp, struct odp_flow_put __user *ufp)
899{
900 struct odp_flow_stats stats;
901 struct odp_flow_put uf;
902 int error;
903
904 if (copy_from_user(&uf, ufp, sizeof(struct odp_flow_put)))
905 return -EFAULT;
906
907 error = do_put_flow(dp, &uf, &stats);
908 if (error)
909 return error;
910
776f10ce
BP
911 if (copy_to_user(&ufp->flow.stats, &stats,
912 sizeof(struct odp_flow_stats)))
44e05eca
BP
913 return -EFAULT;
914
915 return 0;
916}
917
918static int do_answer_query(struct sw_flow *flow, u32 query_flags,
919 struct odp_flow_stats __user *ustats,
cdee00fd
BP
920 struct nlattr __user *actions,
921 u32 __user *actions_lenp)
064af421 922{
064af421 923 struct sw_flow_actions *sf_acts;
44e05eca 924 struct odp_flow_stats stats;
cdee00fd 925 u32 actions_len;
064af421 926
1d7241c7 927 spin_lock_bh(&flow->lock);
65d042a1 928 get_stats(flow, &stats);
1d7241c7 929 if (query_flags & ODPFF_ZERO_TCP_FLAGS)
44e05eca 930 flow->tcp_flags = 0;
1d7241c7
JG
931
932 spin_unlock_bh(&flow->lock);
44e05eca 933
776f10ce 934 if (copy_to_user(ustats, &stats, sizeof(struct odp_flow_stats)) ||
cdee00fd 935 get_user(actions_len, actions_lenp))
064af421
BP
936 return -EFAULT;
937
cdee00fd 938 if (!actions_len)
064af421 939 return 0;
064af421
BP
940
941 sf_acts = rcu_dereference(flow->sf_acts);
cdee00fd 942 if (put_user(sf_acts->actions_len, actions_lenp) ||
064af421 943 (actions && copy_to_user(actions, sf_acts->actions,
cdee00fd 944 min(sf_acts->actions_len, actions_len))))
064af421
BP
945 return -EFAULT;
946
947 return 0;
948}
949
18fdbe16
JG
950static int answer_query(struct sw_flow *flow, u32 query_flags,
951 struct odp_flow __user *ufp)
064af421 952{
cdee00fd 953 struct nlattr *actions;
064af421 954
776f10ce 955 if (get_user(actions, &ufp->actions))
064af421 956 return -EFAULT;
44e05eca 957
65d042a1 958 return do_answer_query(flow, query_flags,
cdee00fd 959 &ufp->stats, actions, &ufp->actions_len);
064af421
BP
960}
961
44e05eca 962static struct sw_flow *do_del_flow(struct datapath *dp, struct odp_flow_key *key)
064af421 963{
8d5ebd83 964 struct tbl *table = rcu_dereference(dp->table);
8d5ebd83 965 struct tbl_node *flow_node;
064af421
BP
966 int error;
967
44e05eca 968 flow_node = tbl_lookup(table, key, flow_hash(key), flow_cmp);
8d5ebd83 969 if (!flow_node)
44e05eca 970 return ERR_PTR(-ENOENT);
064af421 971
8d5ebd83 972 error = tbl_remove(table, flow_node);
f1aa2072 973 if (error)
44e05eca 974 return ERR_PTR(error);
064af421 975
44e05eca
BP
976 /* XXX Returned flow_node's statistics might lose a few packets, since
977 * other CPUs can be using this flow. We used to synchronize_rcu() to
978 * make sure that we get completely accurate stats, but that blows our
979 * performance, badly. */
980 return flow_cast(flow_node);
981}
982
983static int del_flow(struct datapath *dp, struct odp_flow __user *ufp)
984{
985 struct sw_flow *flow;
986 struct odp_flow uf;
987 int error;
988
989 if (copy_from_user(&uf, ufp, sizeof uf))
990 return -EFAULT;
991
992 flow = do_del_flow(dp, &uf.key);
993 if (IS_ERR(flow))
994 return PTR_ERR(flow);
8d5ebd83 995
65d042a1 996 error = answer_query(flow, 0, ufp);
f1aa2072 997 flow_deferred_free(flow);
064af421
BP
998 return error;
999}
1000
44e05eca 1001static int do_query_flows(struct datapath *dp, const struct odp_flowvec *flowvec)
064af421 1002{
8d5ebd83 1003 struct tbl *table = rcu_dereference(dp->table);
6d7568dc
BP
1004 u32 i;
1005
064af421 1006 for (i = 0; i < flowvec->n_flows; i++) {
44e05eca 1007 struct odp_flow __user *ufp = &flowvec->flows[i];
064af421 1008 struct odp_flow uf;
8d5ebd83 1009 struct tbl_node *flow_node;
064af421
BP
1010 int error;
1011
776f10ce 1012 if (copy_from_user(&uf, ufp, sizeof uf))
064af421 1013 return -EFAULT;
064af421 1014
8d5ebd83
JG
1015 flow_node = tbl_lookup(table, &uf.key, flow_hash(&uf.key), flow_cmp);
1016 if (!flow_node)
776f10ce 1017 error = put_user(ENOENT, &ufp->stats.error);
064af421 1018 else
65d042a1 1019 error = answer_query(flow_cast(flow_node), uf.flags, ufp);
064af421
BP
1020 if (error)
1021 return -EFAULT;
1022 }
1023 return flowvec->n_flows;
1024}
1025
1026struct list_flows_cbdata {
1027 struct odp_flow __user *uflows;
6d7568dc
BP
1028 u32 n_flows;
1029 u32 listed_flows;
064af421
BP
1030};
1031
8d5ebd83 1032static int list_flow(struct tbl_node *node, void *cbdata_)
064af421 1033{
8d5ebd83 1034 struct sw_flow *flow = flow_cast(node);
064af421
BP
1035 struct list_flows_cbdata *cbdata = cbdata_;
1036 struct odp_flow __user *ufp = &cbdata->uflows[cbdata->listed_flows++];
1037 int error;
1038
776f10ce 1039 if (copy_to_user(&ufp->key, &flow->key, sizeof flow->key))
064af421 1040 return -EFAULT;
65d042a1 1041 error = answer_query(flow, 0, ufp);
064af421
BP
1042 if (error)
1043 return error;
1044
1045 if (cbdata->listed_flows >= cbdata->n_flows)
1046 return cbdata->listed_flows;
1047 return 0;
1048}
1049
44e05eca 1050static int do_list_flows(struct datapath *dp, const struct odp_flowvec *flowvec)
064af421
BP
1051{
1052 struct list_flows_cbdata cbdata;
1053 int error;
1054
1055 if (!flowvec->n_flows)
1056 return 0;
1057
1058 cbdata.uflows = flowvec->flows;
1059 cbdata.n_flows = flowvec->n_flows;
1060 cbdata.listed_flows = 0;
6bfafa55 1061
8d5ebd83 1062 error = tbl_foreach(rcu_dereference(dp->table), list_flow, &cbdata);
064af421
BP
1063 return error ? error : cbdata.listed_flows;
1064}
1065
1066static int do_flowvec_ioctl(struct datapath *dp, unsigned long argp,
1067 int (*function)(struct datapath *,
1068 const struct odp_flowvec *))
1069{
1070 struct odp_flowvec __user *uflowvec;
1071 struct odp_flowvec flowvec;
1072 int retval;
1073
1074 uflowvec = (struct odp_flowvec __user *)argp;
776f10ce 1075 if (copy_from_user(&flowvec, uflowvec, sizeof flowvec))
064af421
BP
1076 return -EFAULT;
1077
1078 if (flowvec.n_flows > INT_MAX / sizeof(struct odp_flow))
1079 return -EINVAL;
1080
064af421
BP
1081 retval = function(dp, &flowvec);
1082 return (retval < 0 ? retval
1083 : retval == flowvec.n_flows ? 0
776f10ce 1084 : put_user(retval, &uflowvec->n_flows));
064af421
BP
1085}
1086
44e05eca 1087static int do_execute(struct datapath *dp, const struct odp_execute *execute)
064af421 1088{
064af421
BP
1089 struct odp_flow_key key;
1090 struct sk_buff *skb;
1091 struct sw_flow_actions *actions;
a393b897 1092 struct ethhdr *eth;
b7a31ec1 1093 bool is_frag;
064af421
BP
1094 int err;
1095
064af421 1096 err = -EINVAL;
44e05eca 1097 if (execute->length < ETH_HLEN || execute->length > 65535)
064af421
BP
1098 goto error;
1099
cdee00fd 1100 actions = flow_actions_alloc(execute->actions_len);
8ba1fd2f
JG
1101 if (IS_ERR(actions)) {
1102 err = PTR_ERR(actions);
064af421 1103 goto error;
8ba1fd2f 1104 }
064af421
BP
1105
1106 err = -EFAULT;
cdee00fd 1107 if (copy_from_user(actions->actions, execute->actions, execute->actions_len))
064af421
BP
1108 goto error_free_actions;
1109
cdee00fd 1110 err = validate_actions(actions->actions, execute->actions_len);
064af421
BP
1111 if (err)
1112 goto error_free_actions;
1113
1114 err = -ENOMEM;
44e05eca 1115 skb = alloc_skb(execute->length, GFP_KERNEL);
064af421
BP
1116 if (!skb)
1117 goto error_free_actions;
659586ef 1118
064af421 1119 err = -EFAULT;
44e05eca
BP
1120 if (copy_from_user(skb_put(skb, execute->length), execute->data,
1121 execute->length))
064af421
BP
1122 goto error_free_skb;
1123
a393b897
JP
1124 skb_reset_mac_header(skb);
1125 eth = eth_hdr(skb);
1126
de3f65ea
JP
1127 /* Normally, setting the skb 'protocol' field would be handled by a
1128 * call to eth_type_trans(), but it assumes there's a sending
1129 * device, which we may not have. */
a393b897
JP
1130 if (ntohs(eth->h_proto) >= 1536)
1131 skb->protocol = eth->h_proto;
1132 else
1133 skb->protocol = htons(ETH_P_802_2);
1134
f1588b1f 1135 err = flow_extract(skb, -1, &key, &is_frag);
4c1ad233
BP
1136 if (err)
1137 goto error_free_skb;
9dca7bd5
JG
1138
1139 rcu_read_lock();
cdee00fd 1140 err = execute_actions(dp, skb, &key, actions->actions, actions->actions_len);
9dca7bd5
JG
1141 rcu_read_unlock();
1142
064af421
BP
1143 kfree(actions);
1144 return err;
1145
1146error_free_skb:
1147 kfree_skb(skb);
1148error_free_actions:
1149 kfree(actions);
1150error:
1151 return err;
1152}
1153
44e05eca
BP
1154static int execute_packet(struct datapath *dp, const struct odp_execute __user *executep)
1155{
1156 struct odp_execute execute;
1157
1158 if (copy_from_user(&execute, executep, sizeof execute))
1159 return -EFAULT;
1160
1161 return do_execute(dp, &execute);
1162}
1163
16190191 1164static int get_dp_stats(struct datapath *dp, struct odp_stats __user *statsp)
064af421 1165{
8d5ebd83 1166 struct tbl *table = rcu_dereference(dp->table);
064af421
BP
1167 struct odp_stats stats;
1168 int i;
1169
8d5ebd83
JG
1170 stats.n_flows = tbl_count(table);
1171 stats.cur_capacity = tbl_n_buckets(table);
1172 stats.max_capacity = TBL_MAX_BUCKETS;
064af421
BP
1173 stats.n_ports = dp->n_ports;
1174 stats.max_ports = DP_MAX_PORTS;
064af421
BP
1175 stats.n_frags = stats.n_hit = stats.n_missed = stats.n_lost = 0;
1176 for_each_possible_cpu(i) {
38c6ecbc
JG
1177 const struct dp_stats_percpu *percpu_stats;
1178 struct dp_stats_percpu local_stats;
1179 unsigned seqcount;
1180
1181 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
1182
1183 do {
1184 seqcount = read_seqcount_begin(&percpu_stats->seqlock);
1185 local_stats = *percpu_stats;
1186 } while (read_seqcount_retry(&percpu_stats->seqlock, seqcount));
1187
1188 stats.n_frags += local_stats.n_frags;
1189 stats.n_hit += local_stats.n_hit;
1190 stats.n_missed += local_stats.n_missed;
1191 stats.n_lost += local_stats.n_lost;
064af421
BP
1192 }
1193 stats.max_miss_queue = DP_MAX_QUEUE_LEN;
1194 stats.max_action_queue = DP_MAX_QUEUE_LEN;
1195 return copy_to_user(statsp, &stats, sizeof stats) ? -EFAULT : 0;
1196}
1197
1dcf111b
JP
1198/* MTU of the dp pseudo-device: ETH_DATA_LEN or the minimum of the ports */
1199int dp_min_mtu(const struct datapath *dp)
1200{
e779d8d9 1201 struct vport *p;
1dcf111b
JP
1202 int mtu = 0;
1203
1204 ASSERT_RTNL();
1205
1206 list_for_each_entry_rcu (p, &dp->port_list, node) {
f2459fe7 1207 int dev_mtu;
1dcf111b
JP
1208
1209 /* Skip any internal ports, since that's what we're trying to
1210 * set. */
e779d8d9 1211 if (is_internal_vport(p))
1dcf111b
JP
1212 continue;
1213
e779d8d9 1214 dev_mtu = vport_get_mtu(p);
f2459fe7
JG
1215 if (!mtu || dev_mtu < mtu)
1216 mtu = dev_mtu;
1dcf111b
JP
1217 }
1218
1219 return mtu ? mtu : ETH_DATA_LEN;
1220}
1221
f2459fe7 1222/* Sets the MTU of all datapath devices to the minimum of the ports. Must
d8b5d43a 1223 * be called with RTNL lock. */
f2459fe7 1224void set_internal_devs_mtu(const struct datapath *dp)
a7786963 1225{
e779d8d9 1226 struct vport *p;
a7786963
JG
1227 int mtu;
1228
1229 ASSERT_RTNL();
1230
a7786963
JG
1231 mtu = dp_min_mtu(dp);
1232
1233 list_for_each_entry_rcu (p, &dp->port_list, node) {
e779d8d9
BP
1234 if (is_internal_vport(p))
1235 vport_set_mtu(p, mtu);
a7786963
JG
1236 }
1237}
1238
e779d8d9 1239static int put_port(const struct vport *p, struct odp_port __user *uop)
064af421
BP
1240{
1241 struct odp_port op;
f2459fe7 1242
064af421 1243 memset(&op, 0, sizeof op);
f2459fe7
JG
1244
1245 rcu_read_lock();
e779d8d9
BP
1246 strncpy(op.devname, vport_get_name(p), sizeof op.devname);
1247 strncpy(op.type, vport_get_type(p), sizeof op.type);
f2459fe7
JG
1248 rcu_read_unlock();
1249
064af421 1250 op.port = p->port_no;
f2459fe7 1251
064af421
BP
1252 return copy_to_user(uop, &op, sizeof op) ? -EFAULT : 0;
1253}
1254
fceb2a5b 1255static int query_port(struct datapath *dp, struct odp_port __user *uport)
064af421
BP
1256{
1257 struct odp_port port;
1258
1259 if (copy_from_user(&port, uport, sizeof port))
1260 return -EFAULT;
f2459fe7 1261
064af421 1262 if (port.devname[0]) {
f2459fe7 1263 struct vport *vport;
f2459fe7 1264 int err = 0;
064af421
BP
1265
1266 port.devname[IFNAMSIZ - 1] = '\0';
1267
f2459fe7
JG
1268 vport_lock();
1269 rcu_read_lock();
064af421 1270
f2459fe7
JG
1271 vport = vport_locate(port.devname);
1272 if (!vport) {
1273 err = -ENODEV;
1274 goto error_unlock;
064af421 1275 }
e779d8d9 1276 if (vport->dp != dp) {
f2459fe7
JG
1277 err = -ENOENT;
1278 goto error_unlock;
1279 }
1280
e779d8d9 1281 port.port = vport->port_no;
f2459fe7
JG
1282
1283error_unlock:
1284 rcu_read_unlock();
1285 vport_unlock();
1286
1287 if (err)
1288 return err;
064af421
BP
1289 } else {
1290 if (port.port >= DP_MAX_PORTS)
1291 return -EINVAL;
1292 if (!dp->ports[port.port])
1293 return -ENOENT;
064af421 1294 }
f2459fe7
JG
1295
1296 return put_port(dp->ports[port.port], uport);
064af421
BP
1297}
1298
fceb2a5b
JG
1299static int do_list_ports(struct datapath *dp, struct odp_port __user *uports,
1300 int n_ports)
064af421 1301{
44e05eca
BP
1302 int idx = 0;
1303 if (n_ports) {
e779d8d9 1304 struct vport *p;
064af421 1305
064af421 1306 list_for_each_entry_rcu (p, &dp->port_list, node) {
44e05eca 1307 if (put_port(p, &uports[idx]))
064af421 1308 return -EFAULT;
44e05eca 1309 if (idx++ >= n_ports)
064af421
BP
1310 break;
1311 }
1312 }
44e05eca
BP
1313 return idx;
1314}
1315
fceb2a5b 1316static int list_ports(struct datapath *dp, struct odp_portvec __user *upv)
44e05eca
BP
1317{
1318 struct odp_portvec pv;
1319 int retval;
1320
1321 if (copy_from_user(&pv, upv, sizeof pv))
1322 return -EFAULT;
1323
1324 retval = do_list_ports(dp, pv.ports, pv.n_ports);
1325 if (retval < 0)
1326 return retval;
1327
1328 return put_user(retval, &upv->n_ports);
064af421
BP
1329}
1330
7c40efc9
BP
1331static int get_listen_mask(const struct file *f)
1332{
1333 return (long)f->private_data;
1334}
1335
1336static void set_listen_mask(struct file *f, int listen_mask)
1337{
1338 f->private_data = (void*)(long)listen_mask;
1339}
1340
064af421
BP
1341static long openvswitch_ioctl(struct file *f, unsigned int cmd,
1342 unsigned long argp)
1343{
1344 int dp_idx = iminor(f->f_dentry->d_inode);
1345 struct datapath *dp;
1346 int drop_frags, listeners, port_no;
72b06300 1347 unsigned int sflow_probability;
064af421
BP
1348 int err;
1349
1350 /* Handle commands with special locking requirements up front. */
1351 switch (cmd) {
1352 case ODP_DP_CREATE:
e86c8696
BP
1353 err = create_dp(dp_idx, (char __user *)argp);
1354 goto exit;
064af421
BP
1355
1356 case ODP_DP_DESTROY:
e86c8696
BP
1357 err = destroy_dp(dp_idx);
1358 goto exit;
064af421 1359
c3827f61 1360 case ODP_VPORT_ATTACH:
f2459fe7 1361 err = attach_port(dp_idx, (struct odp_port __user *)argp);
e86c8696 1362 goto exit;
064af421 1363
c3827f61 1364 case ODP_VPORT_DETACH:
064af421 1365 err = get_user(port_no, (int __user *)argp);
e86c8696 1366 if (!err)
f2459fe7
JG
1367 err = detach_port(dp_idx, port_no);
1368 goto exit;
1369
f2459fe7 1370 case ODP_VPORT_MOD:
c3827f61 1371 err = vport_user_mod((struct odp_port __user *)argp);
f2459fe7
JG
1372 goto exit;
1373
1374 case ODP_VPORT_STATS_GET:
61e89cd6 1375 err = vport_user_stats_get((struct odp_vport_stats_req __user *)argp);
f2459fe7
JG
1376 goto exit;
1377
780e6207
JG
1378 case ODP_VPORT_STATS_SET:
1379 err = vport_user_stats_set((struct odp_vport_stats_req __user *)argp);
1380 goto exit;
1381
f2459fe7 1382 case ODP_VPORT_ETHER_GET:
61e89cd6 1383 err = vport_user_ether_get((struct odp_vport_ether __user *)argp);
f2459fe7
JG
1384 goto exit;
1385
1386 case ODP_VPORT_ETHER_SET:
61e89cd6 1387 err = vport_user_ether_set((struct odp_vport_ether __user *)argp);
f2459fe7
JG
1388 goto exit;
1389
1390 case ODP_VPORT_MTU_GET:
61e89cd6 1391 err = vport_user_mtu_get((struct odp_vport_mtu __user *)argp);
f2459fe7
JG
1392 goto exit;
1393
1394 case ODP_VPORT_MTU_SET:
61e89cd6 1395 err = vport_user_mtu_set((struct odp_vport_mtu __user *)argp);
e86c8696 1396 goto exit;
064af421
BP
1397 }
1398
1399 dp = get_dp_locked(dp_idx);
e86c8696 1400 err = -ENODEV;
064af421 1401 if (!dp)
e86c8696 1402 goto exit;
064af421
BP
1403
1404 switch (cmd) {
1405 case ODP_DP_STATS:
1406 err = get_dp_stats(dp, (struct odp_stats __user *)argp);
1407 break;
1408
1409 case ODP_GET_DROP_FRAGS:
1410 err = put_user(dp->drop_frags, (int __user *)argp);
1411 break;
1412
1413 case ODP_SET_DROP_FRAGS:
1414 err = get_user(drop_frags, (int __user *)argp);
1415 if (err)
1416 break;
1417 err = -EINVAL;
1418 if (drop_frags != 0 && drop_frags != 1)
1419 break;
1420 dp->drop_frags = drop_frags;
1421 err = 0;
1422 break;
1423
1424 case ODP_GET_LISTEN_MASK:
7c40efc9 1425 err = put_user(get_listen_mask(f), (int __user *)argp);
064af421
BP
1426 break;
1427
1428 case ODP_SET_LISTEN_MASK:
1429 err = get_user(listeners, (int __user *)argp);
1430 if (err)
1431 break;
1432 err = -EINVAL;
1433 if (listeners & ~ODPL_ALL)
1434 break;
1435 err = 0;
7c40efc9 1436 set_listen_mask(f, listeners);
064af421
BP
1437 break;
1438
72b06300
BP
1439 case ODP_GET_SFLOW_PROBABILITY:
1440 err = put_user(dp->sflow_probability, (unsigned int __user *)argp);
1441 break;
1442
1443 case ODP_SET_SFLOW_PROBABILITY:
1444 err = get_user(sflow_probability, (unsigned int __user *)argp);
1445 if (!err)
1446 dp->sflow_probability = sflow_probability;
1447 break;
1448
c3827f61 1449 case ODP_VPORT_QUERY:
064af421
BP
1450 err = query_port(dp, (struct odp_port __user *)argp);
1451 break;
1452
c3827f61 1453 case ODP_VPORT_LIST:
064af421
BP
1454 err = list_ports(dp, (struct odp_portvec __user *)argp);
1455 break;
1456
064af421
BP
1457 case ODP_FLOW_FLUSH:
1458 err = flush_flows(dp);
1459 break;
1460
1461 case ODP_FLOW_PUT:
1462 err = put_flow(dp, (struct odp_flow_put __user *)argp);
1463 break;
1464
1465 case ODP_FLOW_DEL:
f1aa2072 1466 err = del_flow(dp, (struct odp_flow __user *)argp);
064af421
BP
1467 break;
1468
f1aa2072 1469 case ODP_FLOW_GET:
44e05eca 1470 err = do_flowvec_ioctl(dp, argp, do_query_flows);
064af421
BP
1471 break;
1472
1473 case ODP_FLOW_LIST:
44e05eca 1474 err = do_flowvec_ioctl(dp, argp, do_list_flows);
064af421
BP
1475 break;
1476
1477 case ODP_EXECUTE:
44e05eca 1478 err = execute_packet(dp, (struct odp_execute __user *)argp);
064af421
BP
1479 break;
1480
1481 default:
1482 err = -ENOIOCTLCMD;
1483 break;
1484 }
1485 mutex_unlock(&dp->mutex);
e86c8696 1486exit:
064af421
BP
1487 return err;
1488}
1489
1490static int dp_has_packet_of_interest(struct datapath *dp, int listeners)
1491{
1492 int i;
1493 for (i = 0; i < DP_N_QUEUES; i++) {
1494 if (listeners & (1 << i) && !skb_queue_empty(&dp->queues[i]))
1495 return 1;
1496 }
1497 return 0;
1498}
1499
3fbd517a
BP
1500#ifdef CONFIG_COMPAT
1501static int compat_list_ports(struct datapath *dp, struct compat_odp_portvec __user *upv)
1502{
1503 struct compat_odp_portvec pv;
1504 int retval;
1505
1506 if (copy_from_user(&pv, upv, sizeof pv))
1507 return -EFAULT;
1508
1509 retval = do_list_ports(dp, compat_ptr(pv.ports), pv.n_ports);
1510 if (retval < 0)
1511 return retval;
1512
1513 return put_user(retval, &upv->n_ports);
1514}
1515
3fbd517a
BP
1516static int compat_get_flow(struct odp_flow *flow, const struct compat_odp_flow __user *compat)
1517{
1518 compat_uptr_t actions;
1519
1520 if (!access_ok(VERIFY_READ, compat, sizeof(struct compat_odp_flow)) ||
1521 __copy_from_user(&flow->stats, &compat->stats, sizeof(struct odp_flow_stats)) ||
1522 __copy_from_user(&flow->key, &compat->key, sizeof(struct odp_flow_key)) ||
1523 __get_user(actions, &compat->actions) ||
cdee00fd 1524 __get_user(flow->actions_len, &compat->actions_len) ||
3fbd517a
BP
1525 __get_user(flow->flags, &compat->flags))
1526 return -EFAULT;
1527
1528 flow->actions = compat_ptr(actions);
1529 return 0;
1530}
1531
1532static int compat_put_flow(struct datapath *dp, struct compat_odp_flow_put __user *ufp)
1533{
1534 struct odp_flow_stats stats;
1535 struct odp_flow_put fp;
1536 int error;
1537
1538 if (compat_get_flow(&fp.flow, &ufp->flow) ||
1539 get_user(fp.flags, &ufp->flags))
1540 return -EFAULT;
1541
1542 error = do_put_flow(dp, &fp, &stats);
1543 if (error)
1544 return error;
1545
1546 if (copy_to_user(&ufp->flow.stats, &stats,
1547 sizeof(struct odp_flow_stats)))
1548 return -EFAULT;
1549
1550 return 0;
1551}
1552
1553static int compat_answer_query(struct sw_flow *flow, u32 query_flags,
1554 struct compat_odp_flow __user *ufp)
1555{
1556 compat_uptr_t actions;
1557
1558 if (get_user(actions, &ufp->actions))
1559 return -EFAULT;
1560
65d042a1 1561 return do_answer_query(flow, query_flags, &ufp->stats,
cdee00fd 1562 compat_ptr(actions), &ufp->actions_len);
3fbd517a
BP
1563}
1564
1565static int compat_del_flow(struct datapath *dp, struct compat_odp_flow __user *ufp)
1566{
1567 struct sw_flow *flow;
1568 struct odp_flow uf;
1569 int error;
1570
1571 if (compat_get_flow(&uf, ufp))
1572 return -EFAULT;
1573
1574 flow = do_del_flow(dp, &uf.key);
1575 if (IS_ERR(flow))
1576 return PTR_ERR(flow);
1577
65d042a1 1578 error = compat_answer_query(flow, 0, ufp);
3fbd517a
BP
1579 flow_deferred_free(flow);
1580 return error;
1581}
1582
1583static int compat_query_flows(struct datapath *dp, struct compat_odp_flow *flows, u32 n_flows)
1584{
1585 struct tbl *table = rcu_dereference(dp->table);
1586 u32 i;
1587
1588 for (i = 0; i < n_flows; i++) {
1589 struct compat_odp_flow __user *ufp = &flows[i];
1590 struct odp_flow uf;
1591 struct tbl_node *flow_node;
1592 int error;
1593
1594 if (compat_get_flow(&uf, ufp))
1595 return -EFAULT;
3fbd517a
BP
1596
1597 flow_node = tbl_lookup(table, &uf.key, flow_hash(&uf.key), flow_cmp);
1598 if (!flow_node)
1599 error = put_user(ENOENT, &ufp->stats.error);
1600 else
65d042a1 1601 error = compat_answer_query(flow_cast(flow_node), uf.flags, ufp);
3fbd517a
BP
1602 if (error)
1603 return -EFAULT;
1604 }
1605 return n_flows;
1606}
1607
1608struct compat_list_flows_cbdata {
1609 struct compat_odp_flow __user *uflows;
1610 u32 n_flows;
1611 u32 listed_flows;
1612};
1613
1614static int compat_list_flow(struct tbl_node *node, void *cbdata_)
1615{
1616 struct sw_flow *flow = flow_cast(node);
1617 struct compat_list_flows_cbdata *cbdata = cbdata_;
1618 struct compat_odp_flow __user *ufp = &cbdata->uflows[cbdata->listed_flows++];
1619 int error;
1620
1621 if (copy_to_user(&ufp->key, &flow->key, sizeof flow->key))
1622 return -EFAULT;
65d042a1 1623 error = compat_answer_query(flow, 0, ufp);
3fbd517a
BP
1624 if (error)
1625 return error;
1626
1627 if (cbdata->listed_flows >= cbdata->n_flows)
1628 return cbdata->listed_flows;
1629 return 0;
1630}
1631
1632static int compat_list_flows(struct datapath *dp, struct compat_odp_flow *flows, u32 n_flows)
1633{
1634 struct compat_list_flows_cbdata cbdata;
1635 int error;
1636
1637 if (!n_flows)
1638 return 0;
1639
1640 cbdata.uflows = flows;
1641 cbdata.n_flows = n_flows;
1642 cbdata.listed_flows = 0;
6bfafa55 1643
3fbd517a
BP
1644 error = tbl_foreach(rcu_dereference(dp->table), compat_list_flow, &cbdata);
1645 return error ? error : cbdata.listed_flows;
1646}
1647
1648static int compat_flowvec_ioctl(struct datapath *dp, unsigned long argp,
1649 int (*function)(struct datapath *,
1650 struct compat_odp_flow *,
1651 u32 n_flows))
1652{
1653 struct compat_odp_flowvec __user *uflowvec;
1654 struct compat_odp_flow __user *flows;
1655 struct compat_odp_flowvec flowvec;
1656 int retval;
1657
1658 uflowvec = compat_ptr(argp);
1659 if (!access_ok(VERIFY_WRITE, uflowvec, sizeof *uflowvec) ||
1660 copy_from_user(&flowvec, uflowvec, sizeof flowvec))
1661 return -EFAULT;
1662
1663 if (flowvec.n_flows > INT_MAX / sizeof(struct compat_odp_flow))
1664 return -EINVAL;
1665
1666 flows = compat_ptr(flowvec.flows);
1667 if (!access_ok(VERIFY_WRITE, flows,
1668 flowvec.n_flows * sizeof(struct compat_odp_flow)))
1669 return -EFAULT;
1670
1671 retval = function(dp, flows, flowvec.n_flows);
1672 return (retval < 0 ? retval
1673 : retval == flowvec.n_flows ? 0
1674 : put_user(retval, &uflowvec->n_flows));
1675}
1676
1677static int compat_execute(struct datapath *dp, const struct compat_odp_execute __user *uexecute)
1678{
1679 struct odp_execute execute;
1680 compat_uptr_t actions;
1681 compat_uptr_t data;
1682
1683 if (!access_ok(VERIFY_READ, uexecute, sizeof(struct compat_odp_execute)) ||
3fbd517a 1684 __get_user(actions, &uexecute->actions) ||
cdee00fd 1685 __get_user(execute.actions_len, &uexecute->actions_len) ||
3fbd517a
BP
1686 __get_user(data, &uexecute->data) ||
1687 __get_user(execute.length, &uexecute->length))
1688 return -EFAULT;
1689
1690 execute.actions = compat_ptr(actions);
1691 execute.data = compat_ptr(data);
1692
1693 return do_execute(dp, &execute);
1694}
1695
1696static long openvswitch_compat_ioctl(struct file *f, unsigned int cmd, unsigned long argp)
1697{
1698 int dp_idx = iminor(f->f_dentry->d_inode);
1699 struct datapath *dp;
1700 int err;
1701
1702 switch (cmd) {
1703 case ODP_DP_DESTROY:
1704 case ODP_FLOW_FLUSH:
1705 /* Ioctls that don't need any translation at all. */
1706 return openvswitch_ioctl(f, cmd, argp);
1707
1708 case ODP_DP_CREATE:
c3827f61
BP
1709 case ODP_VPORT_ATTACH:
1710 case ODP_VPORT_DETACH:
1711 case ODP_VPORT_MOD:
3fbd517a
BP
1712 case ODP_VPORT_MTU_SET:
1713 case ODP_VPORT_MTU_GET:
1714 case ODP_VPORT_ETHER_SET:
1715 case ODP_VPORT_ETHER_GET:
780e6207 1716 case ODP_VPORT_STATS_SET:
3fbd517a
BP
1717 case ODP_VPORT_STATS_GET:
1718 case ODP_DP_STATS:
1719 case ODP_GET_DROP_FRAGS:
1720 case ODP_SET_DROP_FRAGS:
1721 case ODP_SET_LISTEN_MASK:
1722 case ODP_GET_LISTEN_MASK:
1723 case ODP_SET_SFLOW_PROBABILITY:
1724 case ODP_GET_SFLOW_PROBABILITY:
c3827f61 1725 case ODP_VPORT_QUERY:
3fbd517a
BP
1726 /* Ioctls that just need their pointer argument extended. */
1727 return openvswitch_ioctl(f, cmd, (unsigned long)compat_ptr(argp));
3fbd517a
BP
1728 }
1729
1730 dp = get_dp_locked(dp_idx);
1731 err = -ENODEV;
1732 if (!dp)
1733 goto exit;
1734
1735 switch (cmd) {
c3827f61 1736 case ODP_VPORT_LIST32:
3fbd517a
BP
1737 err = compat_list_ports(dp, compat_ptr(argp));
1738 break;
1739
3fbd517a
BP
1740 case ODP_FLOW_PUT32:
1741 err = compat_put_flow(dp, compat_ptr(argp));
1742 break;
1743
1744 case ODP_FLOW_DEL32:
1745 err = compat_del_flow(dp, compat_ptr(argp));
1746 break;
1747
1748 case ODP_FLOW_GET32:
1749 err = compat_flowvec_ioctl(dp, argp, compat_query_flows);
1750 break;
1751
1752 case ODP_FLOW_LIST32:
1753 err = compat_flowvec_ioctl(dp, argp, compat_list_flows);
1754 break;
1755
1756 case ODP_EXECUTE32:
1757 err = compat_execute(dp, compat_ptr(argp));
1758 break;
1759
1760 default:
1761 err = -ENOIOCTLCMD;
1762 break;
1763 }
1764 mutex_unlock(&dp->mutex);
1765exit:
1766 return err;
1767}
1768#endif
1769
9cc8b4e4
JG
1770/* Unfortunately this function is not exported so this is a verbatim copy
1771 * from net/core/datagram.c in 2.6.30. */
1772static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
1773 u8 __user *to, int len,
1774 __wsum *csump)
1775{
1776 int start = skb_headlen(skb);
1777 int pos = 0;
1778 int i, copy = start - offset;
1779
1780 /* Copy header. */
1781 if (copy > 0) {
1782 int err = 0;
1783 if (copy > len)
1784 copy = len;
1785 *csump = csum_and_copy_to_user(skb->data + offset, to, copy,
1786 *csump, &err);
1787 if (err)
1788 goto fault;
1789 if ((len -= copy) == 0)
1790 return 0;
1791 offset += copy;
1792 to += copy;
1793 pos = copy;
1794 }
1795
1796 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1797 int end;
1798
1799 WARN_ON(start > offset + len);
1800
1801 end = start + skb_shinfo(skb)->frags[i].size;
1802 if ((copy = end - offset) > 0) {
1803 __wsum csum2;
1804 int err = 0;
1805 u8 *vaddr;
1806 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1807 struct page *page = frag->page;
1808
1809 if (copy > len)
1810 copy = len;
1811 vaddr = kmap(page);
1812 csum2 = csum_and_copy_to_user(vaddr +
1813 frag->page_offset +
1814 offset - start,
1815 to, copy, 0, &err);
1816 kunmap(page);
1817 if (err)
1818 goto fault;
1819 *csump = csum_block_add(*csump, csum2, pos);
1820 if (!(len -= copy))
1821 return 0;
1822 offset += copy;
1823 to += copy;
1824 pos += copy;
1825 }
1826 start = end;
1827 }
1828
1829 if (skb_shinfo(skb)->frag_list) {
1830 struct sk_buff *list = skb_shinfo(skb)->frag_list;
1831
1832 for (; list; list=list->next) {
1833 int end;
1834
1835 WARN_ON(start > offset + len);
1836
1837 end = start + list->len;
1838 if ((copy = end - offset) > 0) {
1839 __wsum csum2 = 0;
1840 if (copy > len)
1841 copy = len;
1842 if (skb_copy_and_csum_datagram(list,
1843 offset - start,
1844 to, copy,
1845 &csum2))
1846 goto fault;
1847 *csump = csum_block_add(*csump, csum2, pos);
1848 if ((len -= copy) == 0)
1849 return 0;
1850 offset += copy;
1851 to += copy;
1852 pos += copy;
1853 }
1854 start = end;
1855 }
1856 }
1857 if (!len)
1858 return 0;
1859
1860fault:
1861 return -EFAULT;
1862}
1863
33b38b63
JG
1864static ssize_t openvswitch_read(struct file *f, char __user *buf,
1865 size_t nbytes, loff_t *ppos)
064af421 1866{
7c40efc9 1867 int listeners = get_listen_mask(f);
064af421 1868 int dp_idx = iminor(f->f_dentry->d_inode);
e22d4953 1869 struct datapath *dp = get_dp_locked(dp_idx);
064af421 1870 struct sk_buff *skb;
9cc8b4e4 1871 size_t copy_bytes, tot_copy_bytes;
064af421
BP
1872 int retval;
1873
1874 if (!dp)
1875 return -ENODEV;
1876
1877 if (nbytes == 0 || !listeners)
1878 return 0;
1879
1880 for (;;) {
1881 int i;
1882
1883 for (i = 0; i < DP_N_QUEUES; i++) {
1884 if (listeners & (1 << i)) {
1885 skb = skb_dequeue(&dp->queues[i]);
1886 if (skb)
1887 goto success;
1888 }
1889 }
1890
1891 if (f->f_flags & O_NONBLOCK) {
1892 retval = -EAGAIN;
1893 goto error;
1894 }
1895
1896 wait_event_interruptible(dp->waitqueue,
1897 dp_has_packet_of_interest(dp,
1898 listeners));
1899
1900 if (signal_pending(current)) {
1901 retval = -ERESTARTSYS;
1902 goto error;
1903 }
1904 }
1905success:
e22d4953
JG
1906 mutex_unlock(&dp->mutex);
1907
9cc8b4e4 1908 copy_bytes = tot_copy_bytes = min_t(size_t, skb->len, nbytes);
d295e8e9 1909
9cc8b4e4
JG
1910 retval = 0;
1911 if (skb->ip_summed == CHECKSUM_PARTIAL) {
9fc10ed9
JG
1912 if (copy_bytes == skb->len) {
1913 __wsum csum = 0;
dd8d6b8c
JG
1914 u16 csum_start, csum_offset;
1915
1916 get_skb_csum_pointers(skb, &csum_start, &csum_offset);
f057cdda 1917 BUG_ON(csum_start >= skb_headlen(skb));
9cc8b4e4
JG
1918 retval = skb_copy_and_csum_datagram(skb, csum_start, buf + csum_start,
1919 copy_bytes - csum_start, &csum);
9cc8b4e4
JG
1920 if (!retval) {
1921 __sum16 __user *csump;
1922
1923 copy_bytes = csum_start;
1924 csump = (__sum16 __user *)(buf + csum_start + csum_offset);
f057cdda
JG
1925
1926 BUG_ON((char *)csump + sizeof(__sum16) > buf + nbytes);
9cc8b4e4
JG
1927 put_user(csum_fold(csum), csump);
1928 }
9fc10ed9
JG
1929 } else
1930 retval = skb_checksum_help(skb);
9cc8b4e4
JG
1931 }
1932
1933 if (!retval) {
1934 struct iovec __user iov;
1935
1936 iov.iov_base = buf;
1937 iov.iov_len = copy_bytes;
1938 retval = skb_copy_datagram_iovec(skb, 0, &iov, iov.iov_len);
1939 }
1940
064af421 1941 if (!retval)
9cc8b4e4
JG
1942 retval = tot_copy_bytes;
1943
064af421 1944 kfree_skb(skb);
e22d4953 1945 return retval;
064af421
BP
1946
1947error:
e22d4953 1948 mutex_unlock(&dp->mutex);
064af421
BP
1949 return retval;
1950}
1951
1952static unsigned int openvswitch_poll(struct file *file, poll_table *wait)
1953{
1954 int dp_idx = iminor(file->f_dentry->d_inode);
e22d4953 1955 struct datapath *dp = get_dp_locked(dp_idx);
064af421
BP
1956 unsigned int mask;
1957
1958 if (dp) {
1959 mask = 0;
1960 poll_wait(file, &dp->waitqueue, wait);
7c40efc9 1961 if (dp_has_packet_of_interest(dp, get_listen_mask(file)))
064af421 1962 mask |= POLLIN | POLLRDNORM;
e22d4953 1963 mutex_unlock(&dp->mutex);
064af421
BP
1964 } else {
1965 mask = POLLIN | POLLRDNORM | POLLHUP;
1966 }
1967 return mask;
1968}
1969
33b38b63 1970static struct file_operations openvswitch_fops = {
064af421
BP
1971 .read = openvswitch_read,
1972 .poll = openvswitch_poll,
1973 .unlocked_ioctl = openvswitch_ioctl,
3fbd517a
BP
1974#ifdef CONFIG_COMPAT
1975 .compat_ioctl = openvswitch_compat_ioctl,
1976#endif
064af421
BP
1977};
1978
1979static int major;
22d24ebf 1980
22d24ebf
BP
1981static int __init dp_init(void)
1982{
f2459fe7 1983 struct sk_buff *dummy_skb;
22d24ebf
BP
1984 int err;
1985
f2459fe7 1986 BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof(dummy_skb->cb));
22d24ebf 1987
f2459fe7 1988 printk("Open vSwitch %s, built "__DATE__" "__TIME__"\n", VERSION BUILDNR);
064af421
BP
1989
1990 err = flow_init();
1991 if (err)
1992 goto error;
1993
f2459fe7 1994 err = vport_init();
064af421
BP
1995 if (err)
1996 goto error_flow_exit;
1997
f2459fe7
JG
1998 err = register_netdevice_notifier(&dp_device_notifier);
1999 if (err)
2000 goto error_vport_exit;
2001
064af421
BP
2002 major = register_chrdev(0, "openvswitch", &openvswitch_fops);
2003 if (err < 0)
2004 goto error_unreg_notifier;
2005
064af421
BP
2006 return 0;
2007
2008error_unreg_notifier:
2009 unregister_netdevice_notifier(&dp_device_notifier);
f2459fe7
JG
2010error_vport_exit:
2011 vport_exit();
064af421
BP
2012error_flow_exit:
2013 flow_exit();
2014error:
2015 return err;
2016}
2017
2018static void dp_cleanup(void)
2019{
2020 rcu_barrier();
2021 unregister_chrdev(major, "openvswitch");
2022 unregister_netdevice_notifier(&dp_device_notifier);
f2459fe7 2023 vport_exit();
064af421 2024 flow_exit();
064af421
BP
2025}
2026
2027module_init(dp_init);
2028module_exit(dp_cleanup);
2029
2030MODULE_DESCRIPTION("Open vSwitch switching datapath");
2031MODULE_LICENSE("GPL");