]> git.proxmox.com Git - mirror_ovs.git/blame - lib/mac-learning.c
stopwatch: Remove tabs from output.
[mirror_ovs.git] / lib / mac-learning.c
CommitLineData
064af421 1/*
9d078ec2 2 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
064af421 3 *
a14bc59f
BP
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
064af421 7 *
a14bc59f
BP
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
064af421
BP
15 */
16
17#include <config.h>
18#include "mac-learning.h"
19
064af421
BP
20#include <inttypes.h>
21#include <stdlib.h>
22
f2d7fd66 23#include "bitmap.h"
064af421
BP
24#include "coverage.h"
25#include "hash.h"
b19bab5b 26#include "openvswitch/list.h"
fd016ae3 27#include "openvswitch/poll-loop.h"
064af421 28#include "timeval.h"
18e89129 29#include "unaligned.h"
064af421 30#include "util.h"
0fb7b915 31#include "vlan-bitmap.h"
5136ce49 32
d76f09ea
BP
33COVERAGE_DEFINE(mac_learning_learned);
34COVERAGE_DEFINE(mac_learning_expired);
35
e764773c 36/* Returns the number of seconds since 'e' (within 'ml') was last learned. */
321943f7 37int
e764773c 38mac_entry_age(const struct mac_learning *ml, const struct mac_entry *e)
321943f7
BP
39{
40 time_t remaining = e->expires - time_now();
e764773c 41 return ml->idle_time - remaining;
321943f7
BP
42}
43
064af421 44static uint32_t
74ff3298 45mac_table_hash(const struct mac_learning *ml, const struct eth_addr mac,
8e8d5966 46 uint16_t vlan)
064af421 47{
7e36ac42 48 return hash_mac(mac, vlan, ml->secret);
064af421
BP
49}
50
51static struct mac_entry *
ca6ba700 52mac_entry_from_lru_node(struct ovs_list *list)
064af421
BP
53{
54 return CONTAINER_OF(list, struct mac_entry, lru_node);
55}
56
064af421 57static struct mac_entry *
8ea45fdc 58mac_entry_lookup(const struct mac_learning *ml,
74ff3298 59 const struct eth_addr mac, uint16_t vlan)
064af421
BP
60{
61 struct mac_entry *e;
8ea45fdc 62
8e8d5966 63 HMAP_FOR_EACH_WITH_HASH (e, hmap_node, mac_table_hash(ml, mac, vlan),
8ea45fdc
EJ
64 &ml->table) {
65 if (e->vlan == vlan && eth_addr_equals(e->mac, mac)) {
064af421
BP
66 return e;
67 }
68 }
69 return NULL;
70}
71
9d078ec2
BP
72static struct mac_learning_port *
73mac_learning_port_lookup(struct mac_learning *ml, void *port)
74{
75 struct mac_learning_port *mlport;
76
77 HMAP_FOR_EACH_IN_BUCKET (mlport, hmap_node, hash_pointer(port, ml->secret),
78 &ml->ports_by_ptr) {
79 if (mlport->port == port) {
80 return mlport;
81 }
82 }
83 return NULL;
84}
85
86/* Changes the client-owned pointer for entry 'e' in 'ml' to 'port'. The
87 * pointer can be retrieved with mac_entry_get_port().
88 *
89 * The MAC-learning implementation treats the data that 'port' points to as
90 * opaque and never tries to dereference it. However, when a MAC learning
91 * table becomes overfull, so that eviction is required, the implementation
92 * does first evict MAC entries for the most common 'port's values in 'ml', so
93 * that there is a degree of fairness, that is, each port is entitled to its
94 * fair share of MAC entries. */
95void
96mac_entry_set_port(struct mac_learning *ml, struct mac_entry *e, void *port)
97 OVS_REQ_WRLOCK(ml->rwlock)
98{
99 if (mac_entry_get_port(ml, e) != port) {
100 ml->need_revalidate = true;
101
102 if (e->mlport) {
103 struct mac_learning_port *mlport = e->mlport;
417e7e66 104 ovs_list_remove(&e->port_lru_node);
9d078ec2 105
417e7e66 106 if (ovs_list_is_empty(&mlport->port_lrus)) {
9d078ec2
BP
107 ovs_assert(mlport->heap_node.priority == 1);
108 hmap_remove(&ml->ports_by_ptr, &mlport->hmap_node);
109 heap_remove(&ml->ports_by_usage, &mlport->heap_node);
110 free(mlport);
111 } else {
112 ovs_assert(mlport->heap_node.priority > 1);
113 heap_change(&ml->ports_by_usage, &mlport->heap_node,
114 mlport->heap_node.priority - 1);
115 }
116 e->mlport = NULL;
117 }
118
119 if (port) {
120 struct mac_learning_port *mlport;
121
122 mlport = mac_learning_port_lookup(ml, port);
123 if (!mlport) {
124 mlport = xzalloc(sizeof *mlport);
125 hmap_insert(&ml->ports_by_ptr, &mlport->hmap_node,
126 hash_pointer(port, ml->secret));
127 heap_insert(&ml->ports_by_usage, &mlport->heap_node, 1);
128 mlport->port = port;
417e7e66 129 ovs_list_init(&mlport->port_lrus);
9d078ec2
BP
130 } else {
131 heap_change(&ml->ports_by_usage, &mlport->heap_node,
132 mlport->heap_node.priority + 1);
133 }
417e7e66 134 ovs_list_push_back(&mlport->port_lrus, &e->port_lru_node);
9d078ec2
BP
135 e->mlport = mlport;
136 }
137 }
138}
139
140/* Finds one of the ports with the most MAC entries and evicts its least
141 * recently used entry. */
142static void
143evict_mac_entry_fairly(struct mac_learning *ml)
144 OVS_REQ_WRLOCK(ml->rwlock)
145{
146 struct mac_learning_port *mlport;
147 struct mac_entry *e;
148
149 mlport = CONTAINER_OF(heap_max(&ml->ports_by_usage),
150 struct mac_learning_port, heap_node);
417e7e66 151 e = CONTAINER_OF(ovs_list_front(&mlport->port_lrus),
9d078ec2
BP
152 struct mac_entry, port_lru_node);
153 mac_learning_expire(ml, e);
154}
155
064af421
BP
156/* If the LRU list is not empty, stores the least-recently-used entry in '*e'
157 * and returns true. Otherwise, if the LRU list is empty, stores NULL in '*e'
158 * and return false. */
159static bool
160get_lru(struct mac_learning *ml, struct mac_entry **e)
509c0149 161 OVS_REQ_RDLOCK(ml->rwlock)
064af421 162{
417e7e66 163 if (!ovs_list_is_empty(&ml->lrus)) {
064af421
BP
164 *e = mac_entry_from_lru_node(ml->lrus.next);
165 return true;
166 } else {
167 *e = NULL;
168 return false;
169 }
170}
171
e764773c
BP
172static unsigned int
173normalize_idle_time(unsigned int idle_time)
174{
175 return (idle_time < 15 ? 15
176 : idle_time > 3600 ? 3600
177 : idle_time);
178}
179
180/* Creates and returns a new MAC learning table with an initial MAC aging
c4069512
BP
181 * timeout of 'idle_time' seconds and an initial maximum of MAC_DEFAULT_MAX
182 * entries. */
064af421 183struct mac_learning *
e764773c 184mac_learning_create(unsigned int idle_time)
064af421
BP
185{
186 struct mac_learning *ml;
064af421
BP
187
188 ml = xmalloc(sizeof *ml);
417e7e66 189 ovs_list_init(&ml->lrus);
8ea45fdc 190 hmap_init(&ml->table);
064af421 191 ml->secret = random_uint32();
8f30d09a 192 ml->flood_vlans = NULL;
e764773c 193 ml->idle_time = normalize_idle_time(idle_time);
c4069512 194 ml->max_entries = MAC_DEFAULT_MAX;
30618594 195 ml->need_revalidate = false;
9d078ec2
BP
196 hmap_init(&ml->ports_by_ptr);
197 heap_init(&ml->ports_by_usage);
37bec3d3 198 ovs_refcount_init(&ml->ref_cnt);
509c0149 199 ovs_rwlock_init(&ml->rwlock);
064af421
BP
200 return ml;
201}
202
5d989517
EJ
203struct mac_learning *
204mac_learning_ref(const struct mac_learning *ml_)
064af421 205{
5d989517 206 struct mac_learning *ml = CONST_CAST(struct mac_learning *, ml_);
f2d7fd66 207 if (ml) {
37bec3d3 208 ovs_refcount_ref(&ml->ref_cnt);
5d989517
EJ
209 }
210 return ml;
211}
212
213/* Unreferences (and possibly destroys) MAC learning table 'ml'. */
214void
215mac_learning_unref(struct mac_learning *ml)
216{
37bec3d3 217 if (ml && ovs_refcount_unref(&ml->ref_cnt) == 1) {
16a5d1e4
EJ
218 struct mac_entry *e, *next;
219
9d078ec2 220 ovs_rwlock_wrlock(&ml->rwlock);
16a5d1e4 221 HMAP_FOR_EACH_SAFE (e, next, hmap_node, &ml->table) {
9d078ec2 222 mac_learning_expire(ml, e);
16a5d1e4 223 }
8ea45fdc 224 hmap_destroy(&ml->table);
9d078ec2
BP
225 hmap_destroy(&ml->ports_by_ptr);
226 heap_destroy(&ml->ports_by_usage);
16a5d1e4 227
8f30d09a 228 bitmap_free(ml->flood_vlans);
9d078ec2 229 ovs_rwlock_unlock(&ml->rwlock);
509c0149 230 ovs_rwlock_destroy(&ml->rwlock);
8e2e7a5d 231 free(ml);
f2d7fd66 232 }
064af421
BP
233}
234
8f30d09a 235/* Provides a bitmap of VLANs which have learning disabled, that is, VLANs on
2a4ae635
BP
236 * which all packets are flooded. Returns true if the set has changed from the
237 * previous value. */
f2d7fd66 238bool
2a4ae635
BP
239mac_learning_set_flood_vlans(struct mac_learning *ml,
240 const unsigned long *bitmap)
f2d7fd66 241{
2a4ae635
BP
242 if (vlan_bitmap_equal(ml->flood_vlans, bitmap)) {
243 return false;
244 } else {
245 bitmap_free(ml->flood_vlans);
246 ml->flood_vlans = vlan_bitmap_clone(bitmap);
247 return true;
248 }
f2d7fd66
JG
249}
250
e764773c
BP
251/* Changes the MAC aging timeout of 'ml' to 'idle_time' seconds. */
252void
253mac_learning_set_idle_time(struct mac_learning *ml, unsigned int idle_time)
254{
255 idle_time = normalize_idle_time(idle_time);
256 if (idle_time != ml->idle_time) {
257 struct mac_entry *e;
258 int delta;
259
260 delta = (int) idle_time - (int) ml->idle_time;
261 LIST_FOR_EACH (e, lru_node, &ml->lrus) {
262 e->expires += delta;
263 }
264 ml->idle_time = idle_time;
265 }
266}
267
c4069512
BP
268/* Sets the maximum number of entries in 'ml' to 'max_entries', adjusting it
269 * to be within a reasonable range. */
270void
271mac_learning_set_max_entries(struct mac_learning *ml, size_t max_entries)
272{
273 ml->max_entries = (max_entries < 10 ? 10
274 : max_entries > 1000 * 1000 ? 1000 * 1000
275 : max_entries);
276}
277
f2d7fd66
JG
278static bool
279is_learning_vlan(const struct mac_learning *ml, uint16_t vlan)
280{
82062a20 281 return !ml->flood_vlans || !bitmap_is_set(ml->flood_vlans, vlan);
f2d7fd66
JG
282}
283
db8077c3
BP
284/* Returns true if 'src_mac' may be learned on 'vlan' for 'ml'.
285 * Returns false if 'ml' is NULL, if src_mac is not valid for learning, or if
286 * 'vlan' is configured on 'ml' to flood all packets. */
287bool
288mac_learning_may_learn(const struct mac_learning *ml,
74ff3298 289 const struct eth_addr src_mac, uint16_t vlan)
db8077c3
BP
290{
291 return ml && is_learning_vlan(ml, vlan) && !eth_addr_is_multicast(src_mac);
292}
293
294/* Searches 'ml' for and returns a MAC learning entry for 'src_mac' in 'vlan',
295 * inserting a new entry if necessary. The caller must have already verified,
296 * by calling mac_learning_may_learn(), that 'src_mac' and 'vlan' are
297 * learnable.
7febb910 298 *
9d078ec2
BP
299 * If the returned MAC entry is new (that is, if it has a NULL client-provided
300 * port, as returned by mac_entry_get_port()), then the caller must initialize
301 * the new entry's port to a nonnull value with mac_entry_set_port(). */
db8077c3
BP
302struct mac_entry *
303mac_learning_insert(struct mac_learning *ml,
74ff3298 304 const struct eth_addr src_mac, uint16_t vlan)
064af421
BP
305{
306 struct mac_entry *e;
064af421 307
8ea45fdc 308 e = mac_entry_lookup(ml, src_mac, vlan);
064af421 309 if (!e) {
8e8d5966
EJ
310 uint32_t hash = mac_table_hash(ml, src_mac, vlan);
311
c4069512 312 if (hmap_count(&ml->table) >= ml->max_entries) {
9d078ec2 313 evict_mac_entry_fairly(ml);
064af421 314 }
8e8d5966 315
16a5d1e4 316 e = xmalloc(sizeof *e);
8e8d5966 317 hmap_insert(&ml->table, &e->hmap_node, hash);
74ff3298 318 e->mac = src_mac;
064af421 319 e->vlan = vlan;
7febb910 320 e->grat_arp_lock = TIME_MIN;
9d078ec2
BP
321 e->mlport = NULL;
322 COVERAGE_INC(mac_learning_learned);
16a5d1e4 323 } else {
417e7e66 324 ovs_list_remove(&e->lru_node);
064af421
BP
325 }
326
db8077c3 327 /* Mark 'e' as recently used. */
417e7e66 328 ovs_list_push_back(&ml->lrus, &e->lru_node);
9d078ec2 329 if (e->mlport) {
417e7e66
BW
330 ovs_list_remove(&e->port_lru_node);
331 ovs_list_push_back(&e->mlport->port_lrus, &e->port_lru_node);
9d078ec2 332 }
e764773c 333 e->expires = time_now() + ml->idle_time;
7febb910 334
db8077c3 335 return e;
064af421
BP
336}
337
064799a1
JR
338/* Checks whether a MAC learning update is necessary for MAC learning table
339 * 'ml' given that a packet matching 'src' was received on 'in_port' in 'vlan',
340 * and given that the packet was gratuitous ARP if 'is_gratuitous_arp' is
341 * 'true' and 'in_port' is a bond port if 'is_bond' is 'true'.
342 *
343 * Most packets processed through the MAC learning table do not actually
344 * change it in any way. This function requires only a read lock on the MAC
345 * learning table, so it is much cheaper in this common case.
346 *
347 * Keep the code here synchronized with that in update_learning_table__()
348 * below. */
349static bool
350is_mac_learning_update_needed(const struct mac_learning *ml,
351 struct eth_addr src, int vlan,
352 bool is_gratuitous_arp, bool is_bond,
353 void *in_port)
354 OVS_REQ_RDLOCK(ml->rwlock)
355{
356 struct mac_entry *mac;
357
358 if (!mac_learning_may_learn(ml, src, vlan)) {
359 return false;
360 }
361
362 mac = mac_learning_lookup(ml, src, vlan);
363 if (!mac || mac_entry_age(ml, mac)) {
364 return true;
365 }
366
367 if (is_gratuitous_arp) {
368 /* We don't want to learn from gratuitous ARP packets that are
369 * reflected back over bond slaves so we lock the learning table. For
370 * more detail, see the bigger comment in update_learning_table__(). */
371 if (!is_bond) {
372 return true; /* Need to set the gratuitous ARP lock. */
373 } else if (mac_entry_is_grat_arp_locked(mac)) {
374 return false;
375 }
376 }
377
378 return mac_entry_get_port(ml, mac) != in_port /* ofbundle */;
379}
380
381/* Updates MAC learning table 'ml' given that a packet matching 'src' was
382 * received on 'in_port' in 'vlan', and given that the packet was gratuitous
383 * ARP if 'is_gratuitous_arp' is 'true' and 'in_port' is a bond port if
384 * 'is_bond' is 'true'.
385 *
386 * This code repeats all the checks in is_mac_learning_update_needed() because
387 * the lock was released between there and here and thus the MAC learning state
388 * could have changed.
389 *
390 * Returns 'true' if 'ml' was updated, 'false' otherwise.
391 *
392 * Keep the code here synchronized with that in is_mac_learning_update_needed()
393 * above. */
394static bool
395update_learning_table__(struct mac_learning *ml, struct eth_addr src,
396 int vlan, bool is_gratuitous_arp, bool is_bond,
397 void *in_port)
398 OVS_REQ_WRLOCK(ml->rwlock)
399{
400 struct mac_entry *mac;
401
402 if (!mac_learning_may_learn(ml, src, vlan)) {
403 return false;
404 }
405
406 mac = mac_learning_insert(ml, src, vlan);
407 if (is_gratuitous_arp) {
408 /* Gratuitous ARP packets received over non-bond interfaces could be
409 * reflected back over bond slaves. We don't want to learn from these
410 * reflected packets, so we lock each entry for which a gratuitous ARP
411 * packet was received over a non-bond interface and refrain from
412 * learning from gratuitous ARP packets that arrive over bond
7c9afefd
SF
413 * interfaces for this entry while the lock is in effect. Refer to the
414 * 'ovs-vswitch Internals' document for more in-depth discussion on
415 * this topic. */
064799a1
JR
416 if (!is_bond) {
417 mac_entry_set_grat_arp_lock(mac);
418 } else if (mac_entry_is_grat_arp_locked(mac)) {
419 return false;
420 }
421 }
422
423 if (mac_entry_get_port(ml, mac) != in_port) {
424 mac_entry_set_port(ml, mac, in_port);
425 return true;
426 }
427 return false;
428}
429
430/* Updates MAC learning table 'ml' given that a packet matching 'src' was
431 * received on 'in_port' in 'vlan', and given that the packet was gratuitous
432 * ARP if 'is_gratuitous_arp' is 'true' and 'in_port' is a bond port if
433 * 'is_bond' is 'true'.
434 *
435 * Returns 'true' if 'ml' was updated, 'false' otherwise. */
436bool
437mac_learning_update(struct mac_learning *ml, struct eth_addr src,
438 int vlan, bool is_gratuitous_arp, bool is_bond,
439 void *in_port)
440 OVS_EXCLUDED(ml->rwlock)
441{
442 bool need_update;
443 bool updated = false;
444
445 /* Don't learn the OFPP_NONE port. */
446 if (in_port != NULL) {
447 /* First try the common case: no change to MAC learning table. */
448 ovs_rwlock_rdlock(&ml->rwlock);
449 need_update = is_mac_learning_update_needed(ml, src, vlan,
450 is_gratuitous_arp, is_bond,
451 in_port);
452 ovs_rwlock_unlock(&ml->rwlock);
453
454 if (need_update) {
455 /* Slow path: MAC learning table might need an update. */
456 ovs_rwlock_wrlock(&ml->rwlock);
457 updated = update_learning_table__(ml, src, vlan, is_gratuitous_arp,
458 is_bond, in_port);
459 ovs_rwlock_unlock(&ml->rwlock);
460 }
461 }
462 return updated;
463}
464
db8077c3 465/* Looks up MAC 'dst' for VLAN 'vlan' in 'ml' and returns the associated MAC
9d078ec2 466 * learning entry, if any. */
db8077c3
BP
467struct mac_entry *
468mac_learning_lookup(const struct mac_learning *ml,
74ff3298 469 const struct eth_addr dst, uint16_t vlan)
064af421 470{
db8077c3
BP
471 if (eth_addr_is_multicast(dst)) {
472 /* No tag because the treatment of multicast destinations never
473 * changes. */
474 return NULL;
475 } else if (!is_learning_vlan(ml, vlan)) {
476 /* We don't tag this property. The set of learning VLANs changes so
477 * rarely that we revalidate every flow when it changes. */
478 return NULL;
064af421 479 } else {
8ea45fdc
EJ
480 struct mac_entry *e = mac_entry_lookup(ml, dst, vlan);
481
9d078ec2 482 ovs_assert(e == NULL || mac_entry_get_port(ml, e) != NULL);
db8077c3 483 return e;
064af421
BP
484 }
485}
486
16a5d1e4 487/* Expires 'e' from the 'ml' hash table. */
356180a8
BP
488void
489mac_learning_expire(struct mac_learning *ml, struct mac_entry *e)
490{
9d078ec2
BP
491 ml->need_revalidate = true;
492 mac_entry_set_port(ml, e, NULL);
8ea45fdc 493 hmap_remove(&ml->table, &e->hmap_node);
417e7e66 494 ovs_list_remove(&e->lru_node);
16a5d1e4 495 free(e);
356180a8
BP
496}
497
9d078ec2 498/* Expires all the mac-learning entries in 'ml'. */
064af421 499void
30618594 500mac_learning_flush(struct mac_learning *ml)
064af421
BP
501{
502 struct mac_entry *e;
503 while (get_lru(ml, &e)){
356180a8 504 mac_learning_expire(ml, e);
064af421 505 }
16a5d1e4 506 hmap_shrink(&ml->table);
064af421
BP
507}
508
30618594
EJ
509/* Does periodic work required by 'ml'. Returns true if something changed that
510 * may require flow revalidation. */
511bool
512mac_learning_run(struct mac_learning *ml)
064af421 513{
30618594 514 bool need_revalidate;
064af421 515 struct mac_entry *e;
ae1736c0 516
c4069512
BP
517 while (get_lru(ml, &e)
518 && (hmap_count(&ml->table) > ml->max_entries
519 || time_now() >= e->expires)) {
064af421 520 COVERAGE_INC(mac_learning_expired);
356180a8 521 mac_learning_expire(ml, e);
064af421 522 }
30618594
EJ
523
524 need_revalidate = ml->need_revalidate;
525 ml->need_revalidate = false;
526 return need_revalidate;
064af421
BP
527}
528
529void
530mac_learning_wait(struct mac_learning *ml)
531{
ae1736c0 532 if (hmap_count(&ml->table) > ml->max_entries
30618594 533 || ml->need_revalidate) {
c4069512 534 poll_immediate_wake();
417e7e66 535 } else if (!ovs_list_is_empty(&ml->lrus)) {
064af421 536 struct mac_entry *e = mac_entry_from_lru_node(ml->lrus.next);
7cf8b266 537 poll_timer_wait_until(e->expires * 1000LL);
064af421
BP
538 }
539}