]> git.proxmox.com Git - mirror_ovs.git/blob - lib/mac-learning.c
mac-learning: Add additional mac-learning coverage counters
[mirror_ovs.git] / lib / mac-learning.c
1 /*
2 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18 #include "mac-learning.h"
19
20 #include <inttypes.h>
21 #include <stdlib.h>
22
23 #include "bitmap.h"
24 #include "coverage.h"
25 #include "hash.h"
26 #include "openvswitch/list.h"
27 #include "openvswitch/poll-loop.h"
28 #include "timeval.h"
29 #include "unaligned.h"
30 #include "util.h"
31 #include "vlan-bitmap.h"
32
33 COVERAGE_DEFINE(mac_learning_learned);
34 COVERAGE_DEFINE(mac_learning_expired);
35 COVERAGE_DEFINE(mac_learning_evicted);
36 COVERAGE_DEFINE(mac_learning_moved);
37
38 /* Returns the number of seconds since 'e' (within 'ml') was last learned. */
39 int
40 mac_entry_age(const struct mac_learning *ml, const struct mac_entry *e)
41 {
42 time_t remaining = e->expires - time_now();
43 return ml->idle_time - remaining;
44 }
45
46 static uint32_t
47 mac_table_hash(const struct mac_learning *ml, const struct eth_addr mac,
48 uint16_t vlan)
49 {
50 return hash_mac(mac, vlan, ml->secret);
51 }
52
53 static struct mac_entry *
54 mac_entry_from_lru_node(struct ovs_list *list)
55 {
56 return CONTAINER_OF(list, struct mac_entry, lru_node);
57 }
58
59 static struct mac_entry *
60 mac_entry_lookup(const struct mac_learning *ml,
61 const struct eth_addr mac, uint16_t vlan)
62 {
63 struct mac_entry *e;
64
65 HMAP_FOR_EACH_WITH_HASH (e, hmap_node, mac_table_hash(ml, mac, vlan),
66 &ml->table) {
67 if (e->vlan == vlan && eth_addr_equals(e->mac, mac)) {
68 return e;
69 }
70 }
71 return NULL;
72 }
73
74 static struct mac_learning_port *
75 mac_learning_port_lookup(struct mac_learning *ml, void *port)
76 {
77 struct mac_learning_port *mlport;
78
79 HMAP_FOR_EACH_IN_BUCKET (mlport, hmap_node, hash_pointer(port, ml->secret),
80 &ml->ports_by_ptr) {
81 if (mlport->port == port) {
82 return mlport;
83 }
84 }
85 return NULL;
86 }
87
88 /* Changes the client-owned pointer for entry 'e' in 'ml' to 'port'. The
89 * pointer can be retrieved with mac_entry_get_port().
90 *
91 * The MAC-learning implementation treats the data that 'port' points to as
92 * opaque and never tries to dereference it. However, when a MAC learning
93 * table becomes overfull, so that eviction is required, the implementation
94 * does first evict MAC entries for the most common 'port's values in 'ml', so
95 * that there is a degree of fairness, that is, each port is entitled to its
96 * fair share of MAC entries. */
97 void
98 mac_entry_set_port(struct mac_learning *ml, struct mac_entry *e, void *port)
99 OVS_REQ_WRLOCK(ml->rwlock)
100 {
101 if (mac_entry_get_port(ml, e) != port) {
102 ml->need_revalidate = true;
103
104 if (e->mlport) {
105 struct mac_learning_port *mlport = e->mlport;
106 ovs_list_remove(&e->port_lru_node);
107
108 if (ovs_list_is_empty(&mlport->port_lrus)) {
109 ovs_assert(mlport->heap_node.priority == 1);
110 hmap_remove(&ml->ports_by_ptr, &mlport->hmap_node);
111 heap_remove(&ml->ports_by_usage, &mlport->heap_node);
112 free(mlport);
113 } else {
114 ovs_assert(mlport->heap_node.priority > 1);
115 heap_change(&ml->ports_by_usage, &mlport->heap_node,
116 mlport->heap_node.priority - 1);
117 }
118 e->mlport = NULL;
119 }
120
121 if (port) {
122 struct mac_learning_port *mlport;
123
124 mlport = mac_learning_port_lookup(ml, port);
125 if (!mlport) {
126 mlport = xzalloc(sizeof *mlport);
127 hmap_insert(&ml->ports_by_ptr, &mlport->hmap_node,
128 hash_pointer(port, ml->secret));
129 heap_insert(&ml->ports_by_usage, &mlport->heap_node, 1);
130 mlport->port = port;
131 ovs_list_init(&mlport->port_lrus);
132 } else {
133 heap_change(&ml->ports_by_usage, &mlport->heap_node,
134 mlport->heap_node.priority + 1);
135 }
136 ovs_list_push_back(&mlport->port_lrus, &e->port_lru_node);
137 e->mlport = mlport;
138 }
139 }
140 }
141
142 /* Finds one of the ports with the most MAC entries and evicts its least
143 * recently used entry. */
144 static void
145 evict_mac_entry_fairly(struct mac_learning *ml)
146 OVS_REQ_WRLOCK(ml->rwlock)
147 {
148 struct mac_learning_port *mlport;
149 struct mac_entry *e;
150
151 mlport = CONTAINER_OF(heap_max(&ml->ports_by_usage),
152 struct mac_learning_port, heap_node);
153 e = CONTAINER_OF(ovs_list_front(&mlport->port_lrus),
154 struct mac_entry, port_lru_node);
155 COVERAGE_INC(mac_learning_evicted);
156 mac_learning_expire(ml, e);
157 }
158
159 /* If the LRU list is not empty, stores the least-recently-used entry in '*e'
160 * and returns true. Otherwise, if the LRU list is empty, stores NULL in '*e'
161 * and return false. */
162 static bool
163 get_lru(struct mac_learning *ml, struct mac_entry **e)
164 OVS_REQ_RDLOCK(ml->rwlock)
165 {
166 if (!ovs_list_is_empty(&ml->lrus)) {
167 *e = mac_entry_from_lru_node(ml->lrus.next);
168 return true;
169 } else {
170 *e = NULL;
171 return false;
172 }
173 }
174
175 static unsigned int
176 normalize_idle_time(unsigned int idle_time)
177 {
178 return (idle_time < 15 ? 15
179 : idle_time > 3600 ? 3600
180 : idle_time);
181 }
182
183 /* Creates and returns a new MAC learning table with an initial MAC aging
184 * timeout of 'idle_time' seconds and an initial maximum of MAC_DEFAULT_MAX
185 * entries. */
186 struct mac_learning *
187 mac_learning_create(unsigned int idle_time)
188 {
189 struct mac_learning *ml;
190
191 ml = xmalloc(sizeof *ml);
192 ovs_list_init(&ml->lrus);
193 hmap_init(&ml->table);
194 ml->secret = random_uint32();
195 ml->flood_vlans = NULL;
196 ml->idle_time = normalize_idle_time(idle_time);
197 ml->max_entries = MAC_DEFAULT_MAX;
198 ml->need_revalidate = false;
199 hmap_init(&ml->ports_by_ptr);
200 heap_init(&ml->ports_by_usage);
201 ovs_refcount_init(&ml->ref_cnt);
202 ovs_rwlock_init(&ml->rwlock);
203 return ml;
204 }
205
206 struct mac_learning *
207 mac_learning_ref(const struct mac_learning *ml_)
208 {
209 struct mac_learning *ml = CONST_CAST(struct mac_learning *, ml_);
210 if (ml) {
211 ovs_refcount_ref(&ml->ref_cnt);
212 }
213 return ml;
214 }
215
216 /* Unreferences (and possibly destroys) MAC learning table 'ml'. */
217 void
218 mac_learning_unref(struct mac_learning *ml)
219 {
220 if (ml && ovs_refcount_unref(&ml->ref_cnt) == 1) {
221 struct mac_entry *e, *next;
222
223 ovs_rwlock_wrlock(&ml->rwlock);
224 HMAP_FOR_EACH_SAFE (e, next, hmap_node, &ml->table) {
225 mac_learning_expire(ml, e);
226 }
227 hmap_destroy(&ml->table);
228 hmap_destroy(&ml->ports_by_ptr);
229 heap_destroy(&ml->ports_by_usage);
230
231 bitmap_free(ml->flood_vlans);
232 ovs_rwlock_unlock(&ml->rwlock);
233 ovs_rwlock_destroy(&ml->rwlock);
234 free(ml);
235 }
236 }
237
238 /* Provides a bitmap of VLANs which have learning disabled, that is, VLANs on
239 * which all packets are flooded. Returns true if the set has changed from the
240 * previous value. */
241 bool
242 mac_learning_set_flood_vlans(struct mac_learning *ml,
243 const unsigned long *bitmap)
244 {
245 if (vlan_bitmap_equal(ml->flood_vlans, bitmap)) {
246 return false;
247 } else {
248 bitmap_free(ml->flood_vlans);
249 ml->flood_vlans = vlan_bitmap_clone(bitmap);
250 return true;
251 }
252 }
253
254 /* Changes the MAC aging timeout of 'ml' to 'idle_time' seconds. */
255 void
256 mac_learning_set_idle_time(struct mac_learning *ml, unsigned int idle_time)
257 {
258 idle_time = normalize_idle_time(idle_time);
259 if (idle_time != ml->idle_time) {
260 struct mac_entry *e;
261 int delta;
262
263 delta = (int) idle_time - (int) ml->idle_time;
264 LIST_FOR_EACH (e, lru_node, &ml->lrus) {
265 e->expires += delta;
266 }
267 ml->idle_time = idle_time;
268 }
269 }
270
271 /* Sets the maximum number of entries in 'ml' to 'max_entries', adjusting it
272 * to be within a reasonable range. */
273 void
274 mac_learning_set_max_entries(struct mac_learning *ml, size_t max_entries)
275 {
276 ml->max_entries = (max_entries < 10 ? 10
277 : max_entries > 1000 * 1000 ? 1000 * 1000
278 : max_entries);
279 }
280
281 static bool
282 is_learning_vlan(const struct mac_learning *ml, uint16_t vlan)
283 {
284 return !ml->flood_vlans || !bitmap_is_set(ml->flood_vlans, vlan);
285 }
286
287 /* Returns true if 'src_mac' may be learned on 'vlan' for 'ml'.
288 * Returns false if 'ml' is NULL, if src_mac is not valid for learning, or if
289 * 'vlan' is configured on 'ml' to flood all packets. */
290 bool
291 mac_learning_may_learn(const struct mac_learning *ml,
292 const struct eth_addr src_mac, uint16_t vlan)
293 {
294 return ml && is_learning_vlan(ml, vlan) && !eth_addr_is_multicast(src_mac);
295 }
296
297 /* Searches 'ml' for and returns a MAC learning entry for 'src_mac' in 'vlan',
298 * inserting a new entry if necessary. The caller must have already verified,
299 * by calling mac_learning_may_learn(), that 'src_mac' and 'vlan' are
300 * learnable.
301 *
302 * If the returned MAC entry is new (that is, if it has a NULL client-provided
303 * port, as returned by mac_entry_get_port()), then the caller must initialize
304 * the new entry's port to a nonnull value with mac_entry_set_port(). */
305 struct mac_entry *
306 mac_learning_insert(struct mac_learning *ml,
307 const struct eth_addr src_mac, uint16_t vlan)
308 {
309 struct mac_entry *e;
310
311 e = mac_entry_lookup(ml, src_mac, vlan);
312 if (!e) {
313 uint32_t hash = mac_table_hash(ml, src_mac, vlan);
314
315 if (hmap_count(&ml->table) >= ml->max_entries) {
316 evict_mac_entry_fairly(ml);
317 }
318
319 e = xmalloc(sizeof *e);
320 hmap_insert(&ml->table, &e->hmap_node, hash);
321 e->mac = src_mac;
322 e->vlan = vlan;
323 e->grat_arp_lock = TIME_MIN;
324 e->mlport = NULL;
325 COVERAGE_INC(mac_learning_learned);
326 } else {
327 ovs_list_remove(&e->lru_node);
328 }
329
330 /* Mark 'e' as recently used. */
331 ovs_list_push_back(&ml->lrus, &e->lru_node);
332 if (e->mlport) {
333 ovs_list_remove(&e->port_lru_node);
334 ovs_list_push_back(&e->mlport->port_lrus, &e->port_lru_node);
335 }
336 e->expires = time_now() + ml->idle_time;
337
338 return e;
339 }
340
341 /* Checks whether a MAC learning update is necessary for MAC learning table
342 * 'ml' given that a packet matching 'src' was received on 'in_port' in 'vlan',
343 * and given that the packet was gratuitous ARP if 'is_gratuitous_arp' is
344 * 'true' and 'in_port' is a bond port if 'is_bond' is 'true'.
345 *
346 * Most packets processed through the MAC learning table do not actually
347 * change it in any way. This function requires only a read lock on the MAC
348 * learning table, so it is much cheaper in this common case.
349 *
350 * Keep the code here synchronized with that in update_learning_table__()
351 * below. */
352 static bool
353 is_mac_learning_update_needed(const struct mac_learning *ml,
354 struct eth_addr src, int vlan,
355 bool is_gratuitous_arp, bool is_bond,
356 void *in_port)
357 OVS_REQ_RDLOCK(ml->rwlock)
358 {
359 struct mac_entry *mac;
360
361 if (!mac_learning_may_learn(ml, src, vlan)) {
362 return false;
363 }
364
365 mac = mac_learning_lookup(ml, src, vlan);
366 if (!mac || mac_entry_age(ml, mac)) {
367 return true;
368 }
369
370 if (is_gratuitous_arp) {
371 /* We don't want to learn from gratuitous ARP packets that are
372 * reflected back over bond slaves so we lock the learning table. For
373 * more detail, see the bigger comment in update_learning_table__(). */
374 if (!is_bond) {
375 return true; /* Need to set the gratuitous ARP lock. */
376 } else if (mac_entry_is_grat_arp_locked(mac)) {
377 return false;
378 }
379 }
380
381 return mac_entry_get_port(ml, mac) != in_port /* ofbundle */;
382 }
383
384 /* Updates MAC learning table 'ml' given that a packet matching 'src' was
385 * received on 'in_port' in 'vlan', and given that the packet was gratuitous
386 * ARP if 'is_gratuitous_arp' is 'true' and 'in_port' is a bond port if
387 * 'is_bond' is 'true'.
388 *
389 * This code repeats all the checks in is_mac_learning_update_needed() because
390 * the lock was released between there and here and thus the MAC learning state
391 * could have changed.
392 *
393 * Returns 'true' if 'ml' was updated, 'false' otherwise.
394 *
395 * Keep the code here synchronized with that in is_mac_learning_update_needed()
396 * above. */
397 static bool
398 update_learning_table__(struct mac_learning *ml, struct eth_addr src,
399 int vlan, bool is_gratuitous_arp, bool is_bond,
400 void *in_port)
401 OVS_REQ_WRLOCK(ml->rwlock)
402 {
403 struct mac_entry *mac;
404
405 if (!mac_learning_may_learn(ml, src, vlan)) {
406 return false;
407 }
408
409 mac = mac_learning_insert(ml, src, vlan);
410 if (is_gratuitous_arp) {
411 /* Gratuitous ARP packets received over non-bond interfaces could be
412 * reflected back over bond slaves. We don't want to learn from these
413 * reflected packets, so we lock each entry for which a gratuitous ARP
414 * packet was received over a non-bond interface and refrain from
415 * learning from gratuitous ARP packets that arrive over bond
416 * interfaces for this entry while the lock is in effect. Refer to the
417 * 'ovs-vswitch Internals' document for more in-depth discussion on
418 * this topic. */
419 if (!is_bond) {
420 mac_entry_set_grat_arp_lock(mac);
421 } else if (mac_entry_is_grat_arp_locked(mac)) {
422 return false;
423 }
424 }
425
426 if (mac_entry_get_port(ml, mac) != in_port) {
427 if (mac_entry_get_port(ml, mac) != NULL) {
428 COVERAGE_INC(mac_learning_moved);
429 }
430 mac_entry_set_port(ml, mac, in_port);
431 return true;
432 }
433 return false;
434 }
435
436 /* Updates MAC learning table 'ml' given that a packet matching 'src' was
437 * received on 'in_port' in 'vlan', and given that the packet was gratuitous
438 * ARP if 'is_gratuitous_arp' is 'true' and 'in_port' is a bond port if
439 * 'is_bond' is 'true'.
440 *
441 * Returns 'true' if 'ml' was updated, 'false' otherwise. */
442 bool
443 mac_learning_update(struct mac_learning *ml, struct eth_addr src,
444 int vlan, bool is_gratuitous_arp, bool is_bond,
445 void *in_port)
446 OVS_EXCLUDED(ml->rwlock)
447 {
448 bool need_update;
449 bool updated = false;
450
451 /* Don't learn the OFPP_NONE port. */
452 if (in_port != NULL) {
453 /* First try the common case: no change to MAC learning table. */
454 ovs_rwlock_rdlock(&ml->rwlock);
455 need_update = is_mac_learning_update_needed(ml, src, vlan,
456 is_gratuitous_arp, is_bond,
457 in_port);
458 ovs_rwlock_unlock(&ml->rwlock);
459
460 if (need_update) {
461 /* Slow path: MAC learning table might need an update. */
462 ovs_rwlock_wrlock(&ml->rwlock);
463 updated = update_learning_table__(ml, src, vlan, is_gratuitous_arp,
464 is_bond, in_port);
465 ovs_rwlock_unlock(&ml->rwlock);
466 }
467 }
468 return updated;
469 }
470
471 /* Looks up MAC 'dst' for VLAN 'vlan' in 'ml' and returns the associated MAC
472 * learning entry, if any. */
473 struct mac_entry *
474 mac_learning_lookup(const struct mac_learning *ml,
475 const struct eth_addr dst, uint16_t vlan)
476 {
477 if (eth_addr_is_multicast(dst)) {
478 /* No tag because the treatment of multicast destinations never
479 * changes. */
480 return NULL;
481 } else if (!is_learning_vlan(ml, vlan)) {
482 /* We don't tag this property. The set of learning VLANs changes so
483 * rarely that we revalidate every flow when it changes. */
484 return NULL;
485 } else {
486 struct mac_entry *e = mac_entry_lookup(ml, dst, vlan);
487
488 ovs_assert(e == NULL || mac_entry_get_port(ml, e) != NULL);
489 return e;
490 }
491 }
492
493 /* Expires 'e' from the 'ml' hash table. */
494 void
495 mac_learning_expire(struct mac_learning *ml, struct mac_entry *e)
496 {
497 ml->need_revalidate = true;
498 mac_entry_set_port(ml, e, NULL);
499 hmap_remove(&ml->table, &e->hmap_node);
500 ovs_list_remove(&e->lru_node);
501 free(e);
502 }
503
504 /* Expires all the mac-learning entries in 'ml'. */
505 void
506 mac_learning_flush(struct mac_learning *ml)
507 {
508 struct mac_entry *e;
509 while (get_lru(ml, &e)){
510 mac_learning_expire(ml, e);
511 }
512 hmap_shrink(&ml->table);
513 }
514
515 /* Does periodic work required by 'ml'. Returns true if something changed that
516 * may require flow revalidation. */
517 bool
518 mac_learning_run(struct mac_learning *ml)
519 {
520 bool need_revalidate;
521 struct mac_entry *e;
522
523 while (get_lru(ml, &e)
524 && (hmap_count(&ml->table) > ml->max_entries
525 || time_now() >= e->expires)) {
526 COVERAGE_INC(mac_learning_expired);
527 mac_learning_expire(ml, e);
528 }
529
530 need_revalidate = ml->need_revalidate;
531 ml->need_revalidate = false;
532 return need_revalidate;
533 }
534
535 void
536 mac_learning_wait(struct mac_learning *ml)
537 {
538 if (hmap_count(&ml->table) > ml->max_entries
539 || ml->need_revalidate) {
540 poll_immediate_wake();
541 } else if (!ovs_list_is_empty(&ml->lrus)) {
542 struct mac_entry *e = mac_entry_from_lru_node(ml->lrus.next);
543 poll_timer_wait_until(e->expires * 1000LL);
544 }
545 }