]> git.proxmox.com Git - ovs.git/blame - datapath/vport.c
datapath: backport: ovs: set name assign type of internal port
[ovs.git] / datapath / vport.c
CommitLineData
f2459fe7 1/*
e23775f2 2 * Copyright (c) 2007-2015 Nicira, Inc.
f2459fe7 3 *
a9a29d22
JG
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
f2459fe7
JG
17 */
18
f2459fe7
JG
19#include <linux/etherdevice.h>
20#include <linux/if.h>
38aeef15 21#include <linux/if_vlan.h>
2a4999f3 22#include <linux/jhash.h>
f2459fe7
JG
23#include <linux/kernel.h>
24#include <linux/list.h>
25#include <linux/mutex.h>
26#include <linux/percpu.h>
057dd6d2 27#include <linux/rcupdate.h>
f2459fe7 28#include <linux/rtnetlink.h>
3fbd517a 29#include <linux/compat.h>
e23775f2
PS
30#include <linux/module.h>
31#include <linux/if_link.h>
2a4999f3 32#include <net/net_namespace.h>
e23775f2
PS
33#include <net/lisp.h>
34#include <net/gre.h>
35#include <net/geneve.h>
e23775f2 36#include <net/stt.h>
8063e095 37#include <net/vxlan.h>
f2459fe7 38
2a4999f3 39#include "datapath.h"
c450371e 40#include "gso.h"
f2459fe7 41#include "vport.h"
b19e8815 42#include "vport-internal_dev.h"
f2459fe7 43
5a38795f 44static LIST_HEAD(vport_ops_list);
f2459fe7 45
cd2a59e9 46/* Protected by RCU read lock for reading, ovs_mutex for writing. */
f2459fe7
JG
47static struct hlist_head *dev_table;
48#define VPORT_HASH_BUCKETS 1024
49
f2459fe7 50/**
850b6b3b 51 * ovs_vport_init - initialize vport subsystem
f2459fe7 52 *
806b46ef 53 * Called at module load time to initialize the vport subsystem.
f2459fe7 54 */
850b6b3b 55int ovs_vport_init(void)
f2459fe7 56{
e23775f2
PS
57 int err;
58
f2459fe7
JG
59 dev_table = kzalloc(VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
60 GFP_KERNEL);
806b46ef
PS
61 if (!dev_table)
62 return -ENOMEM;
f2459fe7 63
e23775f2
PS
64 err = lisp_init_module();
65 if (err)
66 goto err_lisp;
67 err = ipgre_init();
68 if (err)
69 goto err_gre;
70 err = geneve_init_module();
71 if (err)
72 goto err_geneve;
73
74 err = vxlan_init_module();
75 if (err)
76 goto err_vxlan;
77 err = ovs_stt_init_module();
78 if (err)
79 goto err_stt;
f2459fe7 80 return 0;
e23775f2
PS
81
82err_stt:
83 vxlan_cleanup_module();
84err_vxlan:
85 geneve_cleanup_module();
86err_geneve:
87 ipgre_fini();
88err_gre:
89 lisp_cleanup_module();
90err_lisp:
91 kfree(dev_table);
92 return err;
f2459fe7
JG
93}
94
f2459fe7 95/**
850b6b3b 96 * ovs_vport_exit - shutdown vport subsystem
f2459fe7 97 *
806b46ef 98 * Called at module exit time to shutdown the vport subsystem.
f2459fe7 99 */
850b6b3b 100void ovs_vport_exit(void)
f2459fe7 101{
e23775f2
PS
102 ovs_stt_cleanup_module();
103 vxlan_cleanup_module();
104 geneve_cleanup_module();
105 ipgre_fini();
106 lisp_cleanup_module();
f2459fe7
JG
107 kfree(dev_table);
108}
109
f1f60b85 110static struct hlist_head *hash_bucket(const struct net *net, const char *name)
f2459fe7 111{
2a4999f3 112 unsigned int hash = jhash(name, strlen(name), (unsigned long) net);
f2459fe7
JG
113 return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)];
114}
115
f07995ef 116int __ovs_vport_ops_register(struct vport_ops *ops)
5a38795f
TG
117{
118 int err = -EEXIST;
119 struct vport_ops *o;
120
121 ovs_lock();
122 list_for_each_entry(o, &vport_ops_list, list)
e23775f2
PS
123 if (ops->type == o->type)
124 goto errout;
5a38795f
TG
125
126 list_add_tail(&ops->list, &vport_ops_list);
127 err = 0;
128errout:
129 ovs_unlock();
130 return err;
131}
f07995ef 132EXPORT_SYMBOL_GPL(__ovs_vport_ops_register);
5a38795f
TG
133
134void ovs_vport_ops_unregister(struct vport_ops *ops)
135{
136 ovs_lock();
137 list_del(&ops->list);
138 ovs_unlock();
139}
140EXPORT_SYMBOL_GPL(ovs_vport_ops_unregister);
141
f2459fe7 142/**
850b6b3b 143 * ovs_vport_locate - find a port that has already been created
f2459fe7
JG
144 *
145 * @name: name of port to find
146 *
cd2a59e9 147 * Must be called with ovs or RCU read lock.
f2459fe7 148 */
f1f60b85 149struct vport *ovs_vport_locate(const struct net *net, const char *name)
f2459fe7 150{
2a4999f3 151 struct hlist_head *bucket = hash_bucket(net, name);
f2459fe7 152 struct vport *vport;
f2459fe7 153
f8dfbcb7 154 hlist_for_each_entry_rcu(vport, bucket, hash_node)
e23775f2 155 if (!strcmp(name, ovs_vport_name(vport)) &&
2a4999f3 156 net_eq(ovs_dp_get_net(vport->dp), net))
057dd6d2 157 return vport;
f2459fe7 158
057dd6d2 159 return NULL;
f2459fe7
JG
160}
161
162/**
850b6b3b 163 * ovs_vport_alloc - allocate and initialize new vport
f2459fe7
JG
164 *
165 * @priv_size: Size of private data area to allocate.
166 * @ops: vport device ops
167 *
168 * Allocate and initialize a new vport defined by @ops. The vport will contain
169 * a private data area of size @priv_size that can be accessed using
170 * vport_priv(). vports that are no longer needed should be released with
e23775f2 171 * vport_free().
f2459fe7 172 */
850b6b3b 173struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
e23775f2 174 const struct vport_parms *parms)
f2459fe7
JG
175{
176 struct vport *vport;
177 size_t alloc_size;
178
179 alloc_size = sizeof(struct vport);
180 if (priv_size) {
181 alloc_size = ALIGN(alloc_size, VPORT_ALIGN);
182 alloc_size += priv_size;
183 }
184
185 vport = kzalloc(alloc_size, GFP_KERNEL);
186 if (!vport)
187 return ERR_PTR(-ENOMEM);
188
e779d8d9
BP
189 vport->dp = parms->dp;
190 vport->port_no = parms->port_no;
f2459fe7 191 vport->ops = ops;
95b1d73a 192 INIT_HLIST_NODE(&vport->dp_hash_node);
f2459fe7 193
d5797f6d
CJ
194 if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids)) {
195 kfree(vport);
beb1c69a 196 return ERR_PTR(-EINVAL);
d5797f6d 197 }
beb1c69a 198
f2459fe7
JG
199 return vport;
200}
5a38795f
TG
201EXPORT_SYMBOL_GPL(ovs_vport_alloc);
202
f2459fe7 203/**
850b6b3b 204 * ovs_vport_free - uninitialize and free vport
f2459fe7
JG
205 *
206 * @vport: vport to free
207 *
e23775f2 208 * Frees a vport allocated with vport_alloc() when it is no longer needed.
057dd6d2
BP
209 *
210 * The caller must ensure that an RCU grace period has passed since the last
211 * time @vport was in a datapath.
f2459fe7 212 */
850b6b3b 213void ovs_vport_free(struct vport *vport)
f2459fe7 214{
e23775f2
PS
215 /* vport is freed from RCU callback or error path, Therefore
216 * it is safe to use raw dereference.
217 */
46051cf8 218 kfree(rcu_dereference_raw(vport->upcall_portids));
5ca1ba48 219 kfree(vport);
f2459fe7 220}
5a38795f 221EXPORT_SYMBOL_GPL(ovs_vport_free);
f2459fe7 222
e23775f2
PS
223static struct vport_ops *ovs_vport_lookup(const struct vport_parms *parms)
224{
225 struct vport_ops *ops;
226
227 list_for_each_entry(ops, &vport_ops_list, list)
228 if (ops->type == parms->type)
229 return ops;
230
231 return NULL;
232}
233
f2459fe7 234/**
850b6b3b 235 * ovs_vport_add - add vport device (for kernel callers)
f2459fe7 236 *
94903c98 237 * @parms: Information about new vport.
f2459fe7 238 *
7237e4f4 239 * Creates a new vport with the specified configuration (which is dependent on
cd2a59e9 240 * device type). ovs_mutex must be held.
f2459fe7 241 */
850b6b3b 242struct vport *ovs_vport_add(const struct vport_parms *parms)
f2459fe7 243{
5a38795f 244 struct vport_ops *ops;
f2459fe7 245 struct vport *vport;
f2459fe7 246
5a38795f
TG
247 ops = ovs_vport_lookup(parms);
248 if (ops) {
249 struct hlist_head *bucket;
2a4999f3 250
5a38795f
TG
251 if (!try_module_get(ops->owner))
252 return ERR_PTR(-EAFNOSUPPORT);
f2459fe7 253
5a38795f
TG
254 vport = ops->create(parms);
255 if (IS_ERR(vport)) {
256 module_put(ops->owner);
f2459fe7
JG
257 return vport;
258 }
5a38795f
TG
259
260 bucket = hash_bucket(ovs_dp_get_net(vport->dp),
e23775f2 261 ovs_vport_name(vport));
5a38795f
TG
262 hlist_add_head_rcu(&vport->hash_node, bucket);
263 return vport;
f2459fe7
JG
264 }
265
5a38795f
TG
266 /* Unlock to attempt module load and return -EAGAIN if load
267 * was successful as we need to restart the port addition
268 * workflow.
269 */
270 ovs_unlock();
271 request_module("vport-type-%d", parms->type);
272 ovs_lock();
f2459fe7 273
5a38795f
TG
274 if (!ovs_vport_lookup(parms))
275 return ERR_PTR(-EAFNOSUPPORT);
276 else
277 return ERR_PTR(-EAGAIN);
f2459fe7
JG
278}
279
280/**
850b6b3b 281 * ovs_vport_set_options - modify existing vport device (for kernel callers)
f2459fe7
JG
282 *
283 * @vport: vport to modify.
323301f2 284 * @options: New configuration.
f2459fe7
JG
285 *
286 * Modifies an existing device with the specified configuration (which is
cd2a59e9 287 * dependent on device type). ovs_mutex must be held.
f2459fe7 288 */
850b6b3b 289int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
f2459fe7 290{
c19e6535 291 if (!vport->ops->set_options)
f2459fe7 292 return -EOPNOTSUPP;
c19e6535 293 return vport->ops->set_options(vport, options);
f2459fe7
JG
294}
295
296/**
850b6b3b 297 * ovs_vport_del - delete existing vport device
f2459fe7
JG
298 *
299 * @vport: vport to delete.
300 *
554c64b0
AC
301 * Detaches @vport from its datapath and destroys it. ovs_mutex must be
302 * held.
f2459fe7 303 */
850b6b3b 304void ovs_vport_del(struct vport *vport)
f2459fe7 305{
cd2a59e9 306 ASSERT_OVSL();
f2459fe7 307
057dd6d2 308 hlist_del_rcu(&vport->hash_node);
5a38795f 309 module_put(vport->ops->owner);
3544358a 310 vport->ops->destroy(vport);
f2459fe7
JG
311}
312
780e6207 313/**
850b6b3b 314 * ovs_vport_get_stats - retrieve device stats
780e6207
JG
315 *
316 * @vport: vport from which to retrieve the stats
317 * @stats: location to store stats
318 *
319 * Retrieves transmit, receive, and error stats for the given device.
ed099e92 320 *
cd2a59e9 321 * Must be called with ovs_mutex or rcu_read_lock.
780e6207 322 */
850b6b3b 323void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
780e6207 324{
e23775f2
PS
325 const struct rtnl_link_stats64 *dev_stats;
326 struct rtnl_link_stats64 temp;
327
328 dev_stats = dev_get_stats(vport->dev, &temp);
329 stats->rx_errors = dev_stats->rx_errors;
330 stats->tx_errors = dev_stats->tx_errors;
331 stats->tx_dropped = dev_stats->tx_dropped;
332 stats->rx_dropped = dev_stats->rx_dropped;
333
334 stats->rx_bytes = dev_stats->rx_bytes;
335 stats->rx_packets = dev_stats->rx_packets;
336 stats->tx_bytes = dev_stats->tx_bytes;
337 stats->tx_packets = dev_stats->tx_packets;
780e6207
JG
338}
339
dd851cbb 340/**
850b6b3b 341 * ovs_vport_get_options - retrieve device options
dd851cbb 342 *
c19e6535
BP
343 * @vport: vport from which to retrieve the options.
344 * @skb: sk_buff where options should be appended.
dd851cbb 345 *
c19e6535 346 * Retrieves the configuration of the given device, appending an
df2c07f4 347 * %OVS_VPORT_ATTR_OPTIONS attribute that in turn contains nested
ed099e92 348 * vport-specific attributes to @skb.
c19e6535
BP
349 *
350 * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room, or another
f0fef760
BP
351 * negative error code if a real error occurred. If an error occurs, @skb is
352 * left unmodified.
ed099e92 353 *
cd2a59e9 354 * Must be called with ovs_mutex or rcu_read_lock.
dd851cbb 355 */
850b6b3b 356int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
dd851cbb 357{
c19e6535 358 struct nlattr *nla;
778ea0a4
TG
359 int err;
360
361 if (!vport->ops->get_options)
362 return 0;
c19e6535 363
df2c07f4 364 nla = nla_nest_start(skb, OVS_VPORT_ATTR_OPTIONS);
c19e6535
BP
365 if (!nla)
366 return -EMSGSIZE;
367
778ea0a4
TG
368 err = vport->ops->get_options(vport, skb);
369 if (err) {
370 nla_nest_cancel(skb, nla);
371 return err;
c19e6535
BP
372 }
373
374 nla_nest_end(skb, nla);
375 return 0;
dd851cbb
JP
376}
377
beb1c69a
AW
378/**
379 * ovs_vport_set_upcall_portids - set upcall portids of @vport.
380 *
381 * @vport: vport to modify.
382 * @ids: new configuration, an array of port ids.
383 *
384 * Sets the vport's upcall_portids to @ids.
385 *
386 * Returns 0 if successful, -EINVAL if @ids is zero length or cannot be parsed
387 * as an array of U32.
388 *
389 * Must be called with ovs_mutex.
390 */
f1f60b85 391int ovs_vport_set_upcall_portids(struct vport *vport, const struct nlattr *ids)
beb1c69a
AW
392{
393 struct vport_portids *old, *vport_portids;
394
395 if (!nla_len(ids) || nla_len(ids) % sizeof(u32))
396 return -EINVAL;
397
398 old = ovsl_dereference(vport->upcall_portids);
399
e23775f2 400 vport_portids = kmalloc(sizeof(*vport_portids) + nla_len(ids),
beb1c69a 401 GFP_KERNEL);
c78c16a1
PS
402 if (!vport_portids)
403 return -ENOMEM;
404
beb1c69a
AW
405 vport_portids->n_ids = nla_len(ids) / sizeof(u32);
406 vport_portids->rn_ids = reciprocal_value(vport_portids->n_ids);
407 nla_memcpy(vport_portids->ids, ids, nla_len(ids));
408
409 rcu_assign_pointer(vport->upcall_portids, vport_portids);
410
411 if (old)
8063e095 412 kfree_rcu(old, rcu);
beb1c69a
AW
413 return 0;
414}
415
416/**
417 * ovs_vport_get_upcall_portids - get the upcall_portids of @vport.
418 *
419 * @vport: vport from which to retrieve the portids.
420 * @skb: sk_buff where portids should be appended.
421 *
422 * Retrieves the configuration of the given vport, appending the
423 * %OVS_VPORT_ATTR_UPCALL_PID attribute which is the array of upcall
424 * portids to @skb.
425 *
426 * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room.
427 * If an error occurs, @skb is left unmodified. Must be called with
428 * ovs_mutex or rcu_read_lock.
429 */
430int ovs_vport_get_upcall_portids(const struct vport *vport,
431 struct sk_buff *skb)
432{
433 struct vport_portids *ids;
434
435 ids = rcu_dereference_ovsl(vport->upcall_portids);
436
437 if (vport->dp->user_features & OVS_DP_F_VPORT_PIDS)
438 return nla_put(skb, OVS_VPORT_ATTR_UPCALL_PID,
e23775f2 439 ids->n_ids * sizeof(u32), (void *)ids->ids);
beb1c69a
AW
440 else
441 return nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, ids->ids[0]);
442}
443
444/**
445 * ovs_vport_find_upcall_portid - find the upcall portid to send upcall.
446 *
447 * @vport: vport from which the missed packet is received.
448 * @skb: skb that the missed packet was received.
449 *
e2f3178f 450 * Uses the skb_get_hash() to select the upcall portid to send the
beb1c69a
AW
451 * upcall.
452 *
453 * Returns the portid of the target socket. Must be called with rcu_read_lock.
454 */
6bb842f6 455u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb)
beb1c69a
AW
456{
457 struct vport_portids *ids;
e23775f2 458 u32 ids_index;
beb1c69a
AW
459 u32 hash;
460
6bb842f6 461 ids = rcu_dereference(vport->upcall_portids);
beb1c69a
AW
462
463 if (ids->n_ids == 1 && ids->ids[0] == 0)
464 return 0;
465
e2f3178f 466 hash = skb_get_hash(skb);
e23775f2
PS
467 ids_index = hash - ids->n_ids * reciprocal_divide(hash, ids->rn_ids);
468 return ids->ids[ids_index];
beb1c69a
AW
469}
470
f2459fe7 471/**
850b6b3b 472 * ovs_vport_receive - pass up received packet to the datapath for processing
f2459fe7
JG
473 *
474 * @vport: vport that received the packet
475 * @skb: skb that was received
e23775f2 476 * @tun_key: tunnel (if any) that carried packet
f2459fe7 477 *
8819fac7 478 * Must be called with rcu_read_lock. The packet cannot be shared and
e23775f2 479 * skb->data should point to the Ethernet header.
f2459fe7 480 */
e23775f2
PS
481int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
482 const struct ip_tunnel_info *tun_info)
f2459fe7 483{
fb66fbd1
PS
484 struct sw_flow_key key;
485 int error;
f2459fe7 486
c450371e 487 OVS_CB(skb)->input_vport = vport;
a94ebc39 488 OVS_CB(skb)->mru = 0;
4c7804f1 489 OVS_CB(skb)->cutlen = 0;
5516c277
JS
490 if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) {
491 u32 mark;
492
493 mark = skb->mark;
494 skb_scrub_packet(skb, true);
495 skb->mark = mark;
496 tun_info = NULL;
497 }
498
e23775f2
PS
499 ovs_skb_init_inner_protocol(skb);
500 skb_clear_ovs_gso_cb(skb);
501 /* Extract flow from 'skb' into 'key'. */
fb66fbd1
PS
502 error = ovs_flow_key_extract(tun_info, skb, &key);
503 if (unlikely(error)) {
504 kfree_skb(skb);
e23775f2 505 return error;
fb66fbd1 506 }
e74d4817 507 ovs_dp_process_packet(skb, &key);
e23775f2 508 return 0;
f2459fe7 509}
5a38795f 510EXPORT_SYMBOL_GPL(ovs_vport_receive);
f2459fe7 511
c405d282
PS
512static void free_vport_rcu(struct rcu_head *rcu)
513{
514 struct vport *vport = container_of(rcu, struct vport, rcu);
515
516 ovs_vport_free(vport);
517}
518
519void ovs_vport_deferred_free(struct vport *vport)
520{
521 if (!vport)
522 return;
523
524 call_rcu(&vport->rcu, free_vport_rcu);
525}
5a38795f 526EXPORT_SYMBOL_GPL(ovs_vport_deferred_free);
8b7ea2d4 527
e23775f2
PS
528static unsigned int packet_length(const struct sk_buff *skb)
529{
530 unsigned int length = skb->len - ETH_HLEN;
531
532 if (skb->protocol == htons(ETH_P_8021Q))
533 length -= VLAN_HLEN;
534
535 return length;
536}
537
538void ovs_vport_send(struct vport *vport, struct sk_buff *skb)
539{
540 int mtu = vport->dev->mtu;
541
542 if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) {
543 net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
544 vport->dev->name,
545 packet_length(skb), mtu);
546 vport->dev->stats.tx_errors++;
547 goto drop;
548 }
549
550 skb->dev = vport->dev;
551 vport->ops->send(skb);
552 return;
553
554drop:
555 kfree_skb(skb);
8b7ea2d4 556}