]>
git.proxmox.com Git - mirror_ovs.git/blob - lib/mac-learning.c
2 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 #include "mac-learning.h"
26 #include "openvswitch/list.h"
27 #include "openvswitch/poll-loop.h"
29 #include "unaligned.h"
31 #include "vlan-bitmap.h"
33 COVERAGE_DEFINE(mac_learning_learned
);
34 COVERAGE_DEFINE(mac_learning_expired
);
35 COVERAGE_DEFINE(mac_learning_evicted
);
36 COVERAGE_DEFINE(mac_learning_moved
);
38 /* Returns the number of seconds since 'e' (within 'ml') was last learned. */
40 mac_entry_age(const struct mac_learning
*ml
, const struct mac_entry
*e
)
42 time_t remaining
= e
->expires
- time_now();
43 return ml
->idle_time
- remaining
;
47 mac_table_hash(const struct mac_learning
*ml
, const struct eth_addr mac
,
50 return hash_mac(mac
, vlan
, ml
->secret
);
53 static struct mac_entry
*
54 mac_entry_from_lru_node(struct ovs_list
*list
)
56 return CONTAINER_OF(list
, struct mac_entry
, lru_node
);
59 static struct mac_entry
*
60 mac_entry_lookup(const struct mac_learning
*ml
,
61 const struct eth_addr mac
, uint16_t vlan
)
65 HMAP_FOR_EACH_WITH_HASH (e
, hmap_node
, mac_table_hash(ml
, mac
, vlan
),
67 if (e
->vlan
== vlan
&& eth_addr_equals(e
->mac
, mac
)) {
74 static struct mac_learning_port
*
75 mac_learning_port_lookup(struct mac_learning
*ml
, void *port
)
77 struct mac_learning_port
*mlport
;
79 HMAP_FOR_EACH_IN_BUCKET (mlport
, hmap_node
, hash_pointer(port
, ml
->secret
),
81 if (mlport
->port
== port
) {
88 /* Changes the client-owned pointer for entry 'e' in 'ml' to 'port'. The
89 * pointer can be retrieved with mac_entry_get_port().
91 * The MAC-learning implementation treats the data that 'port' points to as
92 * opaque and never tries to dereference it. However, when a MAC learning
93 * table becomes overfull, so that eviction is required, the implementation
94 * does first evict MAC entries for the most common 'port's values in 'ml', so
95 * that there is a degree of fairness, that is, each port is entitled to its
96 * fair share of MAC entries. */
98 mac_entry_set_port(struct mac_learning
*ml
, struct mac_entry
*e
, void *port
)
99 OVS_REQ_WRLOCK(ml
->rwlock
)
101 if (mac_entry_get_port(ml
, e
) != port
) {
102 ml
->need_revalidate
= true;
105 struct mac_learning_port
*mlport
= e
->mlport
;
106 ovs_list_remove(&e
->port_lru_node
);
108 if (ovs_list_is_empty(&mlport
->port_lrus
)) {
109 ovs_assert(mlport
->heap_node
.priority
== 1);
110 hmap_remove(&ml
->ports_by_ptr
, &mlport
->hmap_node
);
111 heap_remove(&ml
->ports_by_usage
, &mlport
->heap_node
);
114 ovs_assert(mlport
->heap_node
.priority
> 1);
115 heap_change(&ml
->ports_by_usage
, &mlport
->heap_node
,
116 mlport
->heap_node
.priority
- 1);
122 struct mac_learning_port
*mlport
;
124 mlport
= mac_learning_port_lookup(ml
, port
);
126 mlport
= xzalloc(sizeof *mlport
);
127 hmap_insert(&ml
->ports_by_ptr
, &mlport
->hmap_node
,
128 hash_pointer(port
, ml
->secret
));
129 heap_insert(&ml
->ports_by_usage
, &mlport
->heap_node
, 1);
131 ovs_list_init(&mlport
->port_lrus
);
133 heap_change(&ml
->ports_by_usage
, &mlport
->heap_node
,
134 mlport
->heap_node
.priority
+ 1);
136 ovs_list_push_back(&mlport
->port_lrus
, &e
->port_lru_node
);
142 /* Finds one of the ports with the most MAC entries and evicts its least
143 * recently used entry. */
145 evict_mac_entry_fairly(struct mac_learning
*ml
)
146 OVS_REQ_WRLOCK(ml
->rwlock
)
148 struct mac_learning_port
*mlport
;
151 mlport
= CONTAINER_OF(heap_max(&ml
->ports_by_usage
),
152 struct mac_learning_port
, heap_node
);
153 e
= CONTAINER_OF(ovs_list_front(&mlport
->port_lrus
),
154 struct mac_entry
, port_lru_node
);
155 COVERAGE_INC(mac_learning_evicted
);
156 mac_learning_expire(ml
, e
);
159 /* If the LRU list is not empty, stores the least-recently-used entry in '*e'
160 * and returns true. Otherwise, if the LRU list is empty, stores NULL in '*e'
161 * and return false. */
163 get_lru(struct mac_learning
*ml
, struct mac_entry
**e
)
164 OVS_REQ_RDLOCK(ml
->rwlock
)
166 if (!ovs_list_is_empty(&ml
->lrus
)) {
167 *e
= mac_entry_from_lru_node(ml
->lrus
.next
);
176 normalize_idle_time(unsigned int idle_time
)
178 return (idle_time
< 15 ? 15
179 : idle_time
> 3600 ? 3600
183 /* Creates and returns a new MAC learning table with an initial MAC aging
184 * timeout of 'idle_time' seconds and an initial maximum of MAC_DEFAULT_MAX
186 struct mac_learning
*
187 mac_learning_create(unsigned int idle_time
)
189 struct mac_learning
*ml
;
191 ml
= xmalloc(sizeof *ml
);
192 ovs_list_init(&ml
->lrus
);
193 hmap_init(&ml
->table
);
194 ml
->secret
= random_uint32();
195 ml
->flood_vlans
= NULL
;
196 ml
->idle_time
= normalize_idle_time(idle_time
);
197 ml
->max_entries
= MAC_DEFAULT_MAX
;
198 ml
->need_revalidate
= false;
199 hmap_init(&ml
->ports_by_ptr
);
200 heap_init(&ml
->ports_by_usage
);
201 ovs_refcount_init(&ml
->ref_cnt
);
202 ovs_rwlock_init(&ml
->rwlock
);
206 struct mac_learning
*
207 mac_learning_ref(const struct mac_learning
*ml_
)
209 struct mac_learning
*ml
= CONST_CAST(struct mac_learning
*, ml_
);
211 ovs_refcount_ref(&ml
->ref_cnt
);
216 /* Unreferences (and possibly destroys) MAC learning table 'ml'. */
218 mac_learning_unref(struct mac_learning
*ml
)
220 if (ml
&& ovs_refcount_unref(&ml
->ref_cnt
) == 1) {
221 struct mac_entry
*e
, *next
;
223 ovs_rwlock_wrlock(&ml
->rwlock
);
224 HMAP_FOR_EACH_SAFE (e
, next
, hmap_node
, &ml
->table
) {
225 mac_learning_expire(ml
, e
);
227 hmap_destroy(&ml
->table
);
228 hmap_destroy(&ml
->ports_by_ptr
);
229 heap_destroy(&ml
->ports_by_usage
);
231 bitmap_free(ml
->flood_vlans
);
232 ovs_rwlock_unlock(&ml
->rwlock
);
233 ovs_rwlock_destroy(&ml
->rwlock
);
238 /* Provides a bitmap of VLANs which have learning disabled, that is, VLANs on
239 * which all packets are flooded. Returns true if the set has changed from the
242 mac_learning_set_flood_vlans(struct mac_learning
*ml
,
243 const unsigned long *bitmap
)
245 if (vlan_bitmap_equal(ml
->flood_vlans
, bitmap
)) {
248 bitmap_free(ml
->flood_vlans
);
249 ml
->flood_vlans
= vlan_bitmap_clone(bitmap
);
254 /* Changes the MAC aging timeout of 'ml' to 'idle_time' seconds. */
256 mac_learning_set_idle_time(struct mac_learning
*ml
, unsigned int idle_time
)
258 idle_time
= normalize_idle_time(idle_time
);
259 if (idle_time
!= ml
->idle_time
) {
263 delta
= (int) idle_time
- (int) ml
->idle_time
;
264 LIST_FOR_EACH (e
, lru_node
, &ml
->lrus
) {
267 ml
->idle_time
= idle_time
;
271 /* Sets the maximum number of entries in 'ml' to 'max_entries', adjusting it
272 * to be within a reasonable range. */
274 mac_learning_set_max_entries(struct mac_learning
*ml
, size_t max_entries
)
276 ml
->max_entries
= (max_entries
< 10 ? 10
277 : max_entries
> 1000 * 1000 ? 1000 * 1000
282 is_learning_vlan(const struct mac_learning
*ml
, uint16_t vlan
)
284 return !ml
->flood_vlans
|| !bitmap_is_set(ml
->flood_vlans
, vlan
);
287 /* Returns true if 'src_mac' may be learned on 'vlan' for 'ml'.
288 * Returns false if 'ml' is NULL, if src_mac is not valid for learning, or if
289 * 'vlan' is configured on 'ml' to flood all packets. */
291 mac_learning_may_learn(const struct mac_learning
*ml
,
292 const struct eth_addr src_mac
, uint16_t vlan
)
294 return ml
&& is_learning_vlan(ml
, vlan
) && !eth_addr_is_multicast(src_mac
);
297 /* Searches 'ml' for and returns a MAC learning entry for 'src_mac' in 'vlan',
298 * inserting a new entry if necessary. The caller must have already verified,
299 * by calling mac_learning_may_learn(), that 'src_mac' and 'vlan' are
302 * If the returned MAC entry is new (that is, if it has a NULL client-provided
303 * port, as returned by mac_entry_get_port()), then the caller must initialize
304 * the new entry's port to a nonnull value with mac_entry_set_port(). */
306 mac_learning_insert(struct mac_learning
*ml
,
307 const struct eth_addr src_mac
, uint16_t vlan
)
311 e
= mac_entry_lookup(ml
, src_mac
, vlan
);
313 uint32_t hash
= mac_table_hash(ml
, src_mac
, vlan
);
315 if (hmap_count(&ml
->table
) >= ml
->max_entries
) {
316 evict_mac_entry_fairly(ml
);
319 e
= xmalloc(sizeof *e
);
320 hmap_insert(&ml
->table
, &e
->hmap_node
, hash
);
323 e
->grat_arp_lock
= TIME_MIN
;
325 COVERAGE_INC(mac_learning_learned
);
327 ovs_list_remove(&e
->lru_node
);
330 /* Mark 'e' as recently used. */
331 ovs_list_push_back(&ml
->lrus
, &e
->lru_node
);
333 ovs_list_remove(&e
->port_lru_node
);
334 ovs_list_push_back(&e
->mlport
->port_lrus
, &e
->port_lru_node
);
336 e
->expires
= time_now() + ml
->idle_time
;
341 /* Checks whether a MAC learning update is necessary for MAC learning table
342 * 'ml' given that a packet matching 'src' was received on 'in_port' in 'vlan',
343 * and given that the packet was gratuitous ARP if 'is_gratuitous_arp' is
344 * 'true' and 'in_port' is a bond port if 'is_bond' is 'true'.
346 * Most packets processed through the MAC learning table do not actually
347 * change it in any way. This function requires only a read lock on the MAC
348 * learning table, so it is much cheaper in this common case.
350 * Keep the code here synchronized with that in update_learning_table__()
353 is_mac_learning_update_needed(const struct mac_learning
*ml
,
354 struct eth_addr src
, int vlan
,
355 bool is_gratuitous_arp
, bool is_bond
,
357 OVS_REQ_RDLOCK(ml
->rwlock
)
359 struct mac_entry
*mac
;
361 if (!mac_learning_may_learn(ml
, src
, vlan
)) {
365 mac
= mac_learning_lookup(ml
, src
, vlan
);
366 if (!mac
|| mac_entry_age(ml
, mac
)) {
370 if (is_gratuitous_arp
) {
371 /* We don't want to learn from gratuitous ARP packets that are
372 * reflected back over bond slaves so we lock the learning table. For
373 * more detail, see the bigger comment in update_learning_table__(). */
375 return true; /* Need to set the gratuitous ARP lock. */
376 } else if (mac_entry_is_grat_arp_locked(mac
)) {
381 return mac_entry_get_port(ml
, mac
) != in_port
/* ofbundle */;
384 /* Updates MAC learning table 'ml' given that a packet matching 'src' was
385 * received on 'in_port' in 'vlan', and given that the packet was gratuitous
386 * ARP if 'is_gratuitous_arp' is 'true' and 'in_port' is a bond port if
387 * 'is_bond' is 'true'.
389 * This code repeats all the checks in is_mac_learning_update_needed() because
390 * the lock was released between there and here and thus the MAC learning state
391 * could have changed.
393 * Returns 'true' if 'ml' was updated, 'false' otherwise.
395 * Keep the code here synchronized with that in is_mac_learning_update_needed()
398 update_learning_table__(struct mac_learning
*ml
, struct eth_addr src
,
399 int vlan
, bool is_gratuitous_arp
, bool is_bond
,
401 OVS_REQ_WRLOCK(ml
->rwlock
)
403 struct mac_entry
*mac
;
405 if (!mac_learning_may_learn(ml
, src
, vlan
)) {
409 mac
= mac_learning_insert(ml
, src
, vlan
);
410 if (is_gratuitous_arp
) {
411 /* Gratuitous ARP packets received over non-bond interfaces could be
412 * reflected back over bond slaves. We don't want to learn from these
413 * reflected packets, so we lock each entry for which a gratuitous ARP
414 * packet was received over a non-bond interface and refrain from
415 * learning from gratuitous ARP packets that arrive over bond
416 * interfaces for this entry while the lock is in effect. Refer to the
417 * 'ovs-vswitch Internals' document for more in-depth discussion on
420 mac_entry_set_grat_arp_lock(mac
);
421 } else if (mac_entry_is_grat_arp_locked(mac
)) {
426 if (mac_entry_get_port(ml
, mac
) != in_port
) {
427 if (mac_entry_get_port(ml
, mac
) != NULL
) {
428 COVERAGE_INC(mac_learning_moved
);
430 mac_entry_set_port(ml
, mac
, in_port
);
436 /* Updates MAC learning table 'ml' given that a packet matching 'src' was
437 * received on 'in_port' in 'vlan', and given that the packet was gratuitous
438 * ARP if 'is_gratuitous_arp' is 'true' and 'in_port' is a bond port if
439 * 'is_bond' is 'true'.
441 * Returns 'true' if 'ml' was updated, 'false' otherwise. */
443 mac_learning_update(struct mac_learning
*ml
, struct eth_addr src
,
444 int vlan
, bool is_gratuitous_arp
, bool is_bond
,
446 OVS_EXCLUDED(ml
->rwlock
)
449 bool updated
= false;
451 /* Don't learn the OFPP_NONE port. */
452 if (in_port
!= NULL
) {
453 /* First try the common case: no change to MAC learning table. */
454 ovs_rwlock_rdlock(&ml
->rwlock
);
455 need_update
= is_mac_learning_update_needed(ml
, src
, vlan
,
456 is_gratuitous_arp
, is_bond
,
458 ovs_rwlock_unlock(&ml
->rwlock
);
461 /* Slow path: MAC learning table might need an update. */
462 ovs_rwlock_wrlock(&ml
->rwlock
);
463 updated
= update_learning_table__(ml
, src
, vlan
, is_gratuitous_arp
,
465 ovs_rwlock_unlock(&ml
->rwlock
);
471 /* Looks up MAC 'dst' for VLAN 'vlan' in 'ml' and returns the associated MAC
472 * learning entry, if any. */
474 mac_learning_lookup(const struct mac_learning
*ml
,
475 const struct eth_addr dst
, uint16_t vlan
)
477 if (eth_addr_is_multicast(dst
)) {
478 /* No tag because the treatment of multicast destinations never
481 } else if (!is_learning_vlan(ml
, vlan
)) {
482 /* We don't tag this property. The set of learning VLANs changes so
483 * rarely that we revalidate every flow when it changes. */
486 struct mac_entry
*e
= mac_entry_lookup(ml
, dst
, vlan
);
488 ovs_assert(e
== NULL
|| mac_entry_get_port(ml
, e
) != NULL
);
493 /* Expires 'e' from the 'ml' hash table. */
495 mac_learning_expire(struct mac_learning
*ml
, struct mac_entry
*e
)
497 ml
->need_revalidate
= true;
498 mac_entry_set_port(ml
, e
, NULL
);
499 hmap_remove(&ml
->table
, &e
->hmap_node
);
500 ovs_list_remove(&e
->lru_node
);
504 /* Expires all the mac-learning entries in 'ml'. */
506 mac_learning_flush(struct mac_learning
*ml
)
509 while (get_lru(ml
, &e
)){
510 mac_learning_expire(ml
, e
);
512 hmap_shrink(&ml
->table
);
515 /* Does periodic work required by 'ml'. Returns true if something changed that
516 * may require flow revalidation. */
518 mac_learning_run(struct mac_learning
*ml
)
520 bool need_revalidate
;
523 while (get_lru(ml
, &e
)
524 && (hmap_count(&ml
->table
) > ml
->max_entries
525 || time_now() >= e
->expires
)) {
526 COVERAGE_INC(mac_learning_expired
);
527 mac_learning_expire(ml
, e
);
530 need_revalidate
= ml
->need_revalidate
;
531 ml
->need_revalidate
= false;
532 return need_revalidate
;
536 mac_learning_wait(struct mac_learning
*ml
)
538 if (hmap_count(&ml
->table
) > ml
->max_entries
539 || ml
->need_revalidate
) {
540 poll_immediate_wake();
541 } else if (!ovs_list_is_empty(&ml
->lrus
)) {
542 struct mac_entry
*e
= mac_entry_from_lru_node(ml
->lrus
.next
);
543 poll_timer_wait_until(e
->expires
* 1000LL);