]> git.proxmox.com Git - mirror_ovs.git/blame - lib/mac-learning.c
netdev-offload-tc: Use single 'once' variable for probing tc features
[mirror_ovs.git] / lib / mac-learning.c
CommitLineData
064af421 1/*
9d078ec2 2 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
064af421 3 *
a14bc59f
BP
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
064af421 7 *
a14bc59f
BP
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
064af421
BP
15 */
16
17#include <config.h>
18#include "mac-learning.h"
19
064af421
BP
20#include <inttypes.h>
21#include <stdlib.h>
22
f2d7fd66 23#include "bitmap.h"
064af421
BP
24#include "coverage.h"
25#include "hash.h"
b19bab5b 26#include "openvswitch/list.h"
fd016ae3 27#include "openvswitch/poll-loop.h"
064af421 28#include "timeval.h"
18e89129 29#include "unaligned.h"
064af421 30#include "util.h"
0fb7b915 31#include "vlan-bitmap.h"
5136ce49 32
d76f09ea
BP
33COVERAGE_DEFINE(mac_learning_learned);
34COVERAGE_DEFINE(mac_learning_expired);
5f3818f1
EC
35COVERAGE_DEFINE(mac_learning_evicted);
36COVERAGE_DEFINE(mac_learning_moved);
d76f09ea 37
e764773c 38/* Returns the number of seconds since 'e' (within 'ml') was last learned. */
321943f7 39int
e764773c 40mac_entry_age(const struct mac_learning *ml, const struct mac_entry *e)
321943f7
BP
41{
42 time_t remaining = e->expires - time_now();
e764773c 43 return ml->idle_time - remaining;
321943f7
BP
44}
45
064af421 46static uint32_t
74ff3298 47mac_table_hash(const struct mac_learning *ml, const struct eth_addr mac,
8e8d5966 48 uint16_t vlan)
064af421 49{
7e36ac42 50 return hash_mac(mac, vlan, ml->secret);
064af421
BP
51}
52
53static struct mac_entry *
ca6ba700 54mac_entry_from_lru_node(struct ovs_list *list)
064af421
BP
55{
56 return CONTAINER_OF(list, struct mac_entry, lru_node);
57}
58
064af421 59static struct mac_entry *
8ea45fdc 60mac_entry_lookup(const struct mac_learning *ml,
74ff3298 61 const struct eth_addr mac, uint16_t vlan)
064af421
BP
62{
63 struct mac_entry *e;
8ea45fdc 64
8e8d5966 65 HMAP_FOR_EACH_WITH_HASH (e, hmap_node, mac_table_hash(ml, mac, vlan),
8ea45fdc
EJ
66 &ml->table) {
67 if (e->vlan == vlan && eth_addr_equals(e->mac, mac)) {
064af421
BP
68 return e;
69 }
70 }
71 return NULL;
72}
73
9d078ec2
BP
74static struct mac_learning_port *
75mac_learning_port_lookup(struct mac_learning *ml, void *port)
76{
77 struct mac_learning_port *mlport;
78
79 HMAP_FOR_EACH_IN_BUCKET (mlport, hmap_node, hash_pointer(port, ml->secret),
80 &ml->ports_by_ptr) {
81 if (mlport->port == port) {
82 return mlport;
83 }
84 }
85 return NULL;
86}
87
88/* Changes the client-owned pointer for entry 'e' in 'ml' to 'port'. The
89 * pointer can be retrieved with mac_entry_get_port().
90 *
91 * The MAC-learning implementation treats the data that 'port' points to as
92 * opaque and never tries to dereference it. However, when a MAC learning
93 * table becomes overfull, so that eviction is required, the implementation
94 * does first evict MAC entries for the most common 'port's values in 'ml', so
95 * that there is a degree of fairness, that is, each port is entitled to its
96 * fair share of MAC entries. */
97void
98mac_entry_set_port(struct mac_learning *ml, struct mac_entry *e, void *port)
99 OVS_REQ_WRLOCK(ml->rwlock)
100{
101 if (mac_entry_get_port(ml, e) != port) {
102 ml->need_revalidate = true;
103
104 if (e->mlport) {
105 struct mac_learning_port *mlport = e->mlport;
417e7e66 106 ovs_list_remove(&e->port_lru_node);
9d078ec2 107
417e7e66 108 if (ovs_list_is_empty(&mlport->port_lrus)) {
9d078ec2
BP
109 ovs_assert(mlport->heap_node.priority == 1);
110 hmap_remove(&ml->ports_by_ptr, &mlport->hmap_node);
111 heap_remove(&ml->ports_by_usage, &mlport->heap_node);
112 free(mlport);
113 } else {
114 ovs_assert(mlport->heap_node.priority > 1);
115 heap_change(&ml->ports_by_usage, &mlport->heap_node,
116 mlport->heap_node.priority - 1);
117 }
118 e->mlport = NULL;
119 }
120
121 if (port) {
122 struct mac_learning_port *mlport;
123
124 mlport = mac_learning_port_lookup(ml, port);
125 if (!mlport) {
126 mlport = xzalloc(sizeof *mlport);
127 hmap_insert(&ml->ports_by_ptr, &mlport->hmap_node,
128 hash_pointer(port, ml->secret));
129 heap_insert(&ml->ports_by_usage, &mlport->heap_node, 1);
130 mlport->port = port;
417e7e66 131 ovs_list_init(&mlport->port_lrus);
9d078ec2
BP
132 } else {
133 heap_change(&ml->ports_by_usage, &mlport->heap_node,
134 mlport->heap_node.priority + 1);
135 }
417e7e66 136 ovs_list_push_back(&mlport->port_lrus, &e->port_lru_node);
9d078ec2
BP
137 e->mlport = mlport;
138 }
139 }
140}
141
142/* Finds one of the ports with the most MAC entries and evicts its least
143 * recently used entry. */
144static void
145evict_mac_entry_fairly(struct mac_learning *ml)
146 OVS_REQ_WRLOCK(ml->rwlock)
147{
148 struct mac_learning_port *mlport;
149 struct mac_entry *e;
150
151 mlport = CONTAINER_OF(heap_max(&ml->ports_by_usage),
152 struct mac_learning_port, heap_node);
417e7e66 153 e = CONTAINER_OF(ovs_list_front(&mlport->port_lrus),
9d078ec2 154 struct mac_entry, port_lru_node);
5f3818f1 155 COVERAGE_INC(mac_learning_evicted);
f34a5d40 156 ml->total_evicted++;
9d078ec2
BP
157 mac_learning_expire(ml, e);
158}
159
064af421
BP
160/* If the LRU list is not empty, stores the least-recently-used entry in '*e'
161 * and returns true. Otherwise, if the LRU list is empty, stores NULL in '*e'
162 * and return false. */
163static bool
164get_lru(struct mac_learning *ml, struct mac_entry **e)
509c0149 165 OVS_REQ_RDLOCK(ml->rwlock)
064af421 166{
417e7e66 167 if (!ovs_list_is_empty(&ml->lrus)) {
064af421
BP
168 *e = mac_entry_from_lru_node(ml->lrus.next);
169 return true;
170 } else {
171 *e = NULL;
172 return false;
173 }
174}
175
e764773c
BP
176static unsigned int
177normalize_idle_time(unsigned int idle_time)
178{
179 return (idle_time < 15 ? 15
180 : idle_time > 3600 ? 3600
181 : idle_time);
182}
183
f34a5d40 184/* Clear all the mac_learning statistics */
95dcecc5 185void
f34a5d40
EC
186mac_learning_clear_statistics(struct mac_learning *ml)
187{
188 if (ml != NULL) {
189 ml->total_learned = 0;
190 ml->total_expired = 0;
191 ml->total_evicted = 0;
192 ml->total_moved = 0;
193 }
194}
195
e764773c 196/* Creates and returns a new MAC learning table with an initial MAC aging
c4069512
BP
197 * timeout of 'idle_time' seconds and an initial maximum of MAC_DEFAULT_MAX
198 * entries. */
064af421 199struct mac_learning *
e764773c 200mac_learning_create(unsigned int idle_time)
064af421
BP
201{
202 struct mac_learning *ml;
064af421
BP
203
204 ml = xmalloc(sizeof *ml);
417e7e66 205 ovs_list_init(&ml->lrus);
8ea45fdc 206 hmap_init(&ml->table);
064af421 207 ml->secret = random_uint32();
8f30d09a 208 ml->flood_vlans = NULL;
e764773c 209 ml->idle_time = normalize_idle_time(idle_time);
c4069512 210 ml->max_entries = MAC_DEFAULT_MAX;
30618594 211 ml->need_revalidate = false;
9d078ec2
BP
212 hmap_init(&ml->ports_by_ptr);
213 heap_init(&ml->ports_by_usage);
37bec3d3 214 ovs_refcount_init(&ml->ref_cnt);
509c0149 215 ovs_rwlock_init(&ml->rwlock);
f34a5d40 216 mac_learning_clear_statistics(ml);
064af421
BP
217 return ml;
218}
219
5d989517
EJ
220struct mac_learning *
221mac_learning_ref(const struct mac_learning *ml_)
064af421 222{
5d989517 223 struct mac_learning *ml = CONST_CAST(struct mac_learning *, ml_);
f2d7fd66 224 if (ml) {
37bec3d3 225 ovs_refcount_ref(&ml->ref_cnt);
5d989517
EJ
226 }
227 return ml;
228}
229
230/* Unreferences (and possibly destroys) MAC learning table 'ml'. */
231void
232mac_learning_unref(struct mac_learning *ml)
233{
37bec3d3 234 if (ml && ovs_refcount_unref(&ml->ref_cnt) == 1) {
16a5d1e4
EJ
235 struct mac_entry *e, *next;
236
9d078ec2 237 ovs_rwlock_wrlock(&ml->rwlock);
16a5d1e4 238 HMAP_FOR_EACH_SAFE (e, next, hmap_node, &ml->table) {
9d078ec2 239 mac_learning_expire(ml, e);
16a5d1e4 240 }
8ea45fdc 241 hmap_destroy(&ml->table);
9d078ec2
BP
242 hmap_destroy(&ml->ports_by_ptr);
243 heap_destroy(&ml->ports_by_usage);
16a5d1e4 244
8f30d09a 245 bitmap_free(ml->flood_vlans);
9d078ec2 246 ovs_rwlock_unlock(&ml->rwlock);
509c0149 247 ovs_rwlock_destroy(&ml->rwlock);
8e2e7a5d 248 free(ml);
f2d7fd66 249 }
064af421
BP
250}
251
8f30d09a 252/* Provides a bitmap of VLANs which have learning disabled, that is, VLANs on
2a4ae635
BP
253 * which all packets are flooded. Returns true if the set has changed from the
254 * previous value. */
f2d7fd66 255bool
2a4ae635
BP
256mac_learning_set_flood_vlans(struct mac_learning *ml,
257 const unsigned long *bitmap)
f2d7fd66 258{
2a4ae635
BP
259 if (vlan_bitmap_equal(ml->flood_vlans, bitmap)) {
260 return false;
261 } else {
262 bitmap_free(ml->flood_vlans);
263 ml->flood_vlans = vlan_bitmap_clone(bitmap);
264 return true;
265 }
f2d7fd66
JG
266}
267
e764773c
BP
268/* Changes the MAC aging timeout of 'ml' to 'idle_time' seconds. */
269void
270mac_learning_set_idle_time(struct mac_learning *ml, unsigned int idle_time)
271{
272 idle_time = normalize_idle_time(idle_time);
273 if (idle_time != ml->idle_time) {
274 struct mac_entry *e;
275 int delta;
276
277 delta = (int) idle_time - (int) ml->idle_time;
278 LIST_FOR_EACH (e, lru_node, &ml->lrus) {
279 e->expires += delta;
280 }
281 ml->idle_time = idle_time;
282 }
283}
284
c4069512
BP
285/* Sets the maximum number of entries in 'ml' to 'max_entries', adjusting it
286 * to be within a reasonable range. */
287void
288mac_learning_set_max_entries(struct mac_learning *ml, size_t max_entries)
289{
290 ml->max_entries = (max_entries < 10 ? 10
291 : max_entries > 1000 * 1000 ? 1000 * 1000
292 : max_entries);
293}
294
f2d7fd66
JG
295static bool
296is_learning_vlan(const struct mac_learning *ml, uint16_t vlan)
297{
82062a20 298 return !ml->flood_vlans || !bitmap_is_set(ml->flood_vlans, vlan);
f2d7fd66
JG
299}
300
db8077c3
BP
301/* Returns true if 'src_mac' may be learned on 'vlan' for 'ml'.
302 * Returns false if 'ml' is NULL, if src_mac is not valid for learning, or if
303 * 'vlan' is configured on 'ml' to flood all packets. */
304bool
305mac_learning_may_learn(const struct mac_learning *ml,
74ff3298 306 const struct eth_addr src_mac, uint16_t vlan)
db8077c3
BP
307{
308 return ml && is_learning_vlan(ml, vlan) && !eth_addr_is_multicast(src_mac);
309}
310
311/* Searches 'ml' for and returns a MAC learning entry for 'src_mac' in 'vlan',
312 * inserting a new entry if necessary. The caller must have already verified,
313 * by calling mac_learning_may_learn(), that 'src_mac' and 'vlan' are
314 * learnable.
7febb910 315 *
9d078ec2
BP
316 * If the returned MAC entry is new (that is, if it has a NULL client-provided
317 * port, as returned by mac_entry_get_port()), then the caller must initialize
318 * the new entry's port to a nonnull value with mac_entry_set_port(). */
db8077c3
BP
319struct mac_entry *
320mac_learning_insert(struct mac_learning *ml,
74ff3298 321 const struct eth_addr src_mac, uint16_t vlan)
064af421
BP
322{
323 struct mac_entry *e;
064af421 324
8ea45fdc 325 e = mac_entry_lookup(ml, src_mac, vlan);
064af421 326 if (!e) {
8e8d5966
EJ
327 uint32_t hash = mac_table_hash(ml, src_mac, vlan);
328
c4069512 329 if (hmap_count(&ml->table) >= ml->max_entries) {
9d078ec2 330 evict_mac_entry_fairly(ml);
064af421 331 }
8e8d5966 332
16a5d1e4 333 e = xmalloc(sizeof *e);
8e8d5966 334 hmap_insert(&ml->table, &e->hmap_node, hash);
74ff3298 335 e->mac = src_mac;
064af421 336 e->vlan = vlan;
7febb910 337 e->grat_arp_lock = TIME_MIN;
9d078ec2
BP
338 e->mlport = NULL;
339 COVERAGE_INC(mac_learning_learned);
f34a5d40 340 ml->total_learned++;
16a5d1e4 341 } else {
417e7e66 342 ovs_list_remove(&e->lru_node);
064af421
BP
343 }
344
db8077c3 345 /* Mark 'e' as recently used. */
417e7e66 346 ovs_list_push_back(&ml->lrus, &e->lru_node);
9d078ec2 347 if (e->mlport) {
417e7e66
BW
348 ovs_list_remove(&e->port_lru_node);
349 ovs_list_push_back(&e->mlport->port_lrus, &e->port_lru_node);
9d078ec2 350 }
e764773c 351 e->expires = time_now() + ml->idle_time;
7febb910 352
db8077c3 353 return e;
064af421
BP
354}
355
064799a1
JR
356/* Checks whether a MAC learning update is necessary for MAC learning table
357 * 'ml' given that a packet matching 'src' was received on 'in_port' in 'vlan',
358 * and given that the packet was gratuitous ARP if 'is_gratuitous_arp' is
359 * 'true' and 'in_port' is a bond port if 'is_bond' is 'true'.
360 *
361 * Most packets processed through the MAC learning table do not actually
362 * change it in any way. This function requires only a read lock on the MAC
363 * learning table, so it is much cheaper in this common case.
364 *
365 * Keep the code here synchronized with that in update_learning_table__()
366 * below. */
367static bool
368is_mac_learning_update_needed(const struct mac_learning *ml,
369 struct eth_addr src, int vlan,
370 bool is_gratuitous_arp, bool is_bond,
371 void *in_port)
372 OVS_REQ_RDLOCK(ml->rwlock)
373{
374 struct mac_entry *mac;
375
376 if (!mac_learning_may_learn(ml, src, vlan)) {
377 return false;
378 }
379
380 mac = mac_learning_lookup(ml, src, vlan);
381 if (!mac || mac_entry_age(ml, mac)) {
382 return true;
383 }
384
385 if (is_gratuitous_arp) {
386 /* We don't want to learn from gratuitous ARP packets that are
91fc374a 387 * reflected back over bond members so we lock the learning table. For
064799a1
JR
388 * more detail, see the bigger comment in update_learning_table__(). */
389 if (!is_bond) {
390 return true; /* Need to set the gratuitous ARP lock. */
391 } else if (mac_entry_is_grat_arp_locked(mac)) {
392 return false;
393 }
394 }
395
396 return mac_entry_get_port(ml, mac) != in_port /* ofbundle */;
397}
398
399/* Updates MAC learning table 'ml' given that a packet matching 'src' was
400 * received on 'in_port' in 'vlan', and given that the packet was gratuitous
401 * ARP if 'is_gratuitous_arp' is 'true' and 'in_port' is a bond port if
402 * 'is_bond' is 'true'.
403 *
404 * This code repeats all the checks in is_mac_learning_update_needed() because
405 * the lock was released between there and here and thus the MAC learning state
406 * could have changed.
407 *
408 * Returns 'true' if 'ml' was updated, 'false' otherwise.
409 *
410 * Keep the code here synchronized with that in is_mac_learning_update_needed()
411 * above. */
412static bool
413update_learning_table__(struct mac_learning *ml, struct eth_addr src,
414 int vlan, bool is_gratuitous_arp, bool is_bond,
415 void *in_port)
416 OVS_REQ_WRLOCK(ml->rwlock)
417{
418 struct mac_entry *mac;
419
420 if (!mac_learning_may_learn(ml, src, vlan)) {
421 return false;
422 }
423
424 mac = mac_learning_insert(ml, src, vlan);
425 if (is_gratuitous_arp) {
426 /* Gratuitous ARP packets received over non-bond interfaces could be
91fc374a 427 * reflected back over bond members. We don't want to learn from these
064799a1
JR
428 * reflected packets, so we lock each entry for which a gratuitous ARP
429 * packet was received over a non-bond interface and refrain from
430 * learning from gratuitous ARP packets that arrive over bond
7c9afefd
SF
431 * interfaces for this entry while the lock is in effect. Refer to the
432 * 'ovs-vswitch Internals' document for more in-depth discussion on
433 * this topic. */
064799a1
JR
434 if (!is_bond) {
435 mac_entry_set_grat_arp_lock(mac);
436 } else if (mac_entry_is_grat_arp_locked(mac)) {
437 return false;
438 }
439 }
440
441 if (mac_entry_get_port(ml, mac) != in_port) {
5f3818f1
EC
442 if (mac_entry_get_port(ml, mac) != NULL) {
443 COVERAGE_INC(mac_learning_moved);
f34a5d40 444 ml->total_moved++;
5f3818f1 445 }
064799a1
JR
446 mac_entry_set_port(ml, mac, in_port);
447 return true;
448 }
449 return false;
450}
451
452/* Updates MAC learning table 'ml' given that a packet matching 'src' was
453 * received on 'in_port' in 'vlan', and given that the packet was gratuitous
454 * ARP if 'is_gratuitous_arp' is 'true' and 'in_port' is a bond port if
455 * 'is_bond' is 'true'.
456 *
457 * Returns 'true' if 'ml' was updated, 'false' otherwise. */
458bool
459mac_learning_update(struct mac_learning *ml, struct eth_addr src,
460 int vlan, bool is_gratuitous_arp, bool is_bond,
461 void *in_port)
462 OVS_EXCLUDED(ml->rwlock)
463{
464 bool need_update;
465 bool updated = false;
466
467 /* Don't learn the OFPP_NONE port. */
468 if (in_port != NULL) {
469 /* First try the common case: no change to MAC learning table. */
470 ovs_rwlock_rdlock(&ml->rwlock);
471 need_update = is_mac_learning_update_needed(ml, src, vlan,
472 is_gratuitous_arp, is_bond,
473 in_port);
474 ovs_rwlock_unlock(&ml->rwlock);
475
476 if (need_update) {
477 /* Slow path: MAC learning table might need an update. */
478 ovs_rwlock_wrlock(&ml->rwlock);
479 updated = update_learning_table__(ml, src, vlan, is_gratuitous_arp,
480 is_bond, in_port);
481 ovs_rwlock_unlock(&ml->rwlock);
482 }
483 }
484 return updated;
485}
486
db8077c3 487/* Looks up MAC 'dst' for VLAN 'vlan' in 'ml' and returns the associated MAC
9d078ec2 488 * learning entry, if any. */
db8077c3
BP
489struct mac_entry *
490mac_learning_lookup(const struct mac_learning *ml,
74ff3298 491 const struct eth_addr dst, uint16_t vlan)
064af421 492{
db8077c3
BP
493 if (eth_addr_is_multicast(dst)) {
494 /* No tag because the treatment of multicast destinations never
495 * changes. */
496 return NULL;
497 } else if (!is_learning_vlan(ml, vlan)) {
498 /* We don't tag this property. The set of learning VLANs changes so
499 * rarely that we revalidate every flow when it changes. */
500 return NULL;
064af421 501 } else {
8ea45fdc
EJ
502 struct mac_entry *e = mac_entry_lookup(ml, dst, vlan);
503
9d078ec2 504 ovs_assert(e == NULL || mac_entry_get_port(ml, e) != NULL);
db8077c3 505 return e;
064af421
BP
506 }
507}
508
16a5d1e4 509/* Expires 'e' from the 'ml' hash table. */
356180a8
BP
510void
511mac_learning_expire(struct mac_learning *ml, struct mac_entry *e)
512{
9d078ec2
BP
513 ml->need_revalidate = true;
514 mac_entry_set_port(ml, e, NULL);
8ea45fdc 515 hmap_remove(&ml->table, &e->hmap_node);
417e7e66 516 ovs_list_remove(&e->lru_node);
16a5d1e4 517 free(e);
356180a8
BP
518}
519
9d078ec2 520/* Expires all the mac-learning entries in 'ml'. */
064af421 521void
30618594 522mac_learning_flush(struct mac_learning *ml)
064af421
BP
523{
524 struct mac_entry *e;
525 while (get_lru(ml, &e)){
356180a8 526 mac_learning_expire(ml, e);
064af421 527 }
16a5d1e4 528 hmap_shrink(&ml->table);
064af421
BP
529}
530
30618594
EJ
531/* Does periodic work required by 'ml'. Returns true if something changed that
532 * may require flow revalidation. */
533bool
534mac_learning_run(struct mac_learning *ml)
064af421 535{
30618594 536 bool need_revalidate;
064af421 537 struct mac_entry *e;
ae1736c0 538
c4069512
BP
539 while (get_lru(ml, &e)
540 && (hmap_count(&ml->table) > ml->max_entries
541 || time_now() >= e->expires)) {
064af421 542 COVERAGE_INC(mac_learning_expired);
f34a5d40 543 ml->total_expired++;
356180a8 544 mac_learning_expire(ml, e);
064af421 545 }
30618594
EJ
546
547 need_revalidate = ml->need_revalidate;
548 ml->need_revalidate = false;
549 return need_revalidate;
064af421
BP
550}
551
552void
553mac_learning_wait(struct mac_learning *ml)
554{
ae1736c0 555 if (hmap_count(&ml->table) > ml->max_entries
30618594 556 || ml->need_revalidate) {
c4069512 557 poll_immediate_wake();
417e7e66 558 } else if (!ovs_list_is_empty(&ml->lrus)) {
064af421 559 struct mac_entry *e = mac_entry_from_lru_node(ml->lrus.next);
7cf8b266 560 poll_timer_wait_until(e->expires * 1000LL);
064af421
BP
561 }
562}