]>
git.proxmox.com Git - mirror_ovs.git/blob - lib/mac-learning.c
2 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 #include "mac-learning.h"
26 #include "openvswitch/list.h"
27 #include "openvswitch/poll-loop.h"
29 #include "unaligned.h"
31 #include "vlan-bitmap.h"
33 COVERAGE_DEFINE(mac_learning_learned
);
34 COVERAGE_DEFINE(mac_learning_expired
);
36 /* Returns the number of seconds since 'e' (within 'ml') was last learned. */
38 mac_entry_age(const struct mac_learning
*ml
, const struct mac_entry
*e
)
40 time_t remaining
= e
->expires
- time_now();
41 return ml
->idle_time
- remaining
;
45 mac_table_hash(const struct mac_learning
*ml
, const struct eth_addr mac
,
48 return hash_mac(mac
, vlan
, ml
->secret
);
51 static struct mac_entry
*
52 mac_entry_from_lru_node(struct ovs_list
*list
)
54 return CONTAINER_OF(list
, struct mac_entry
, lru_node
);
57 static struct mac_entry
*
58 mac_entry_lookup(const struct mac_learning
*ml
,
59 const struct eth_addr mac
, uint16_t vlan
)
63 HMAP_FOR_EACH_WITH_HASH (e
, hmap_node
, mac_table_hash(ml
, mac
, vlan
),
65 if (e
->vlan
== vlan
&& eth_addr_equals(e
->mac
, mac
)) {
72 static struct mac_learning_port
*
73 mac_learning_port_lookup(struct mac_learning
*ml
, void *port
)
75 struct mac_learning_port
*mlport
;
77 HMAP_FOR_EACH_IN_BUCKET (mlport
, hmap_node
, hash_pointer(port
, ml
->secret
),
79 if (mlport
->port
== port
) {
86 /* Changes the client-owned pointer for entry 'e' in 'ml' to 'port'. The
87 * pointer can be retrieved with mac_entry_get_port().
89 * The MAC-learning implementation treats the data that 'port' points to as
90 * opaque and never tries to dereference it. However, when a MAC learning
91 * table becomes overfull, so that eviction is required, the implementation
92 * does first evict MAC entries for the most common 'port's values in 'ml', so
93 * that there is a degree of fairness, that is, each port is entitled to its
94 * fair share of MAC entries. */
96 mac_entry_set_port(struct mac_learning
*ml
, struct mac_entry
*e
, void *port
)
97 OVS_REQ_WRLOCK(ml
->rwlock
)
99 if (mac_entry_get_port(ml
, e
) != port
) {
100 ml
->need_revalidate
= true;
103 struct mac_learning_port
*mlport
= e
->mlport
;
104 ovs_list_remove(&e
->port_lru_node
);
106 if (ovs_list_is_empty(&mlport
->port_lrus
)) {
107 ovs_assert(mlport
->heap_node
.priority
== 1);
108 hmap_remove(&ml
->ports_by_ptr
, &mlport
->hmap_node
);
109 heap_remove(&ml
->ports_by_usage
, &mlport
->heap_node
);
112 ovs_assert(mlport
->heap_node
.priority
> 1);
113 heap_change(&ml
->ports_by_usage
, &mlport
->heap_node
,
114 mlport
->heap_node
.priority
- 1);
120 struct mac_learning_port
*mlport
;
122 mlport
= mac_learning_port_lookup(ml
, port
);
124 mlport
= xzalloc(sizeof *mlport
);
125 hmap_insert(&ml
->ports_by_ptr
, &mlport
->hmap_node
,
126 hash_pointer(port
, ml
->secret
));
127 heap_insert(&ml
->ports_by_usage
, &mlport
->heap_node
, 1);
129 ovs_list_init(&mlport
->port_lrus
);
131 heap_change(&ml
->ports_by_usage
, &mlport
->heap_node
,
132 mlport
->heap_node
.priority
+ 1);
134 ovs_list_push_back(&mlport
->port_lrus
, &e
->port_lru_node
);
140 /* Finds one of the ports with the most MAC entries and evicts its least
141 * recently used entry. */
143 evict_mac_entry_fairly(struct mac_learning
*ml
)
144 OVS_REQ_WRLOCK(ml
->rwlock
)
146 struct mac_learning_port
*mlport
;
149 mlport
= CONTAINER_OF(heap_max(&ml
->ports_by_usage
),
150 struct mac_learning_port
, heap_node
);
151 e
= CONTAINER_OF(ovs_list_front(&mlport
->port_lrus
),
152 struct mac_entry
, port_lru_node
);
153 mac_learning_expire(ml
, e
);
156 /* If the LRU list is not empty, stores the least-recently-used entry in '*e'
157 * and returns true. Otherwise, if the LRU list is empty, stores NULL in '*e'
158 * and return false. */
160 get_lru(struct mac_learning
*ml
, struct mac_entry
**e
)
161 OVS_REQ_RDLOCK(ml
->rwlock
)
163 if (!ovs_list_is_empty(&ml
->lrus
)) {
164 *e
= mac_entry_from_lru_node(ml
->lrus
.next
);
173 normalize_idle_time(unsigned int idle_time
)
175 return (idle_time
< 15 ? 15
176 : idle_time
> 3600 ? 3600
180 /* Creates and returns a new MAC learning table with an initial MAC aging
181 * timeout of 'idle_time' seconds and an initial maximum of MAC_DEFAULT_MAX
183 struct mac_learning
*
184 mac_learning_create(unsigned int idle_time
)
186 struct mac_learning
*ml
;
188 ml
= xmalloc(sizeof *ml
);
189 ovs_list_init(&ml
->lrus
);
190 hmap_init(&ml
->table
);
191 ml
->secret
= random_uint32();
192 ml
->flood_vlans
= NULL
;
193 ml
->idle_time
= normalize_idle_time(idle_time
);
194 ml
->max_entries
= MAC_DEFAULT_MAX
;
195 ml
->need_revalidate
= false;
196 hmap_init(&ml
->ports_by_ptr
);
197 heap_init(&ml
->ports_by_usage
);
198 ovs_refcount_init(&ml
->ref_cnt
);
199 ovs_rwlock_init(&ml
->rwlock
);
203 struct mac_learning
*
204 mac_learning_ref(const struct mac_learning
*ml_
)
206 struct mac_learning
*ml
= CONST_CAST(struct mac_learning
*, ml_
);
208 ovs_refcount_ref(&ml
->ref_cnt
);
213 /* Unreferences (and possibly destroys) MAC learning table 'ml'. */
215 mac_learning_unref(struct mac_learning
*ml
)
217 if (ml
&& ovs_refcount_unref(&ml
->ref_cnt
) == 1) {
218 struct mac_entry
*e
, *next
;
220 ovs_rwlock_wrlock(&ml
->rwlock
);
221 HMAP_FOR_EACH_SAFE (e
, next
, hmap_node
, &ml
->table
) {
222 mac_learning_expire(ml
, e
);
224 hmap_destroy(&ml
->table
);
225 hmap_destroy(&ml
->ports_by_ptr
);
226 heap_destroy(&ml
->ports_by_usage
);
228 bitmap_free(ml
->flood_vlans
);
229 ovs_rwlock_unlock(&ml
->rwlock
);
230 ovs_rwlock_destroy(&ml
->rwlock
);
235 /* Provides a bitmap of VLANs which have learning disabled, that is, VLANs on
236 * which all packets are flooded. Returns true if the set has changed from the
239 mac_learning_set_flood_vlans(struct mac_learning
*ml
,
240 const unsigned long *bitmap
)
242 if (vlan_bitmap_equal(ml
->flood_vlans
, bitmap
)) {
245 bitmap_free(ml
->flood_vlans
);
246 ml
->flood_vlans
= vlan_bitmap_clone(bitmap
);
251 /* Changes the MAC aging timeout of 'ml' to 'idle_time' seconds. */
253 mac_learning_set_idle_time(struct mac_learning
*ml
, unsigned int idle_time
)
255 idle_time
= normalize_idle_time(idle_time
);
256 if (idle_time
!= ml
->idle_time
) {
260 delta
= (int) idle_time
- (int) ml
->idle_time
;
261 LIST_FOR_EACH (e
, lru_node
, &ml
->lrus
) {
264 ml
->idle_time
= idle_time
;
268 /* Sets the maximum number of entries in 'ml' to 'max_entries', adjusting it
269 * to be within a reasonable range. */
271 mac_learning_set_max_entries(struct mac_learning
*ml
, size_t max_entries
)
273 ml
->max_entries
= (max_entries
< 10 ? 10
274 : max_entries
> 1000 * 1000 ? 1000 * 1000
279 is_learning_vlan(const struct mac_learning
*ml
, uint16_t vlan
)
281 return !ml
->flood_vlans
|| !bitmap_is_set(ml
->flood_vlans
, vlan
);
284 /* Returns true if 'src_mac' may be learned on 'vlan' for 'ml'.
285 * Returns false if 'ml' is NULL, if src_mac is not valid for learning, or if
286 * 'vlan' is configured on 'ml' to flood all packets. */
288 mac_learning_may_learn(const struct mac_learning
*ml
,
289 const struct eth_addr src_mac
, uint16_t vlan
)
291 return ml
&& is_learning_vlan(ml
, vlan
) && !eth_addr_is_multicast(src_mac
);
294 /* Searches 'ml' for and returns a MAC learning entry for 'src_mac' in 'vlan',
295 * inserting a new entry if necessary. The caller must have already verified,
296 * by calling mac_learning_may_learn(), that 'src_mac' and 'vlan' are
299 * If the returned MAC entry is new (that is, if it has a NULL client-provided
300 * port, as returned by mac_entry_get_port()), then the caller must initialize
301 * the new entry's port to a nonnull value with mac_entry_set_port(). */
303 mac_learning_insert(struct mac_learning
*ml
,
304 const struct eth_addr src_mac
, uint16_t vlan
)
308 e
= mac_entry_lookup(ml
, src_mac
, vlan
);
310 uint32_t hash
= mac_table_hash(ml
, src_mac
, vlan
);
312 if (hmap_count(&ml
->table
) >= ml
->max_entries
) {
313 evict_mac_entry_fairly(ml
);
316 e
= xmalloc(sizeof *e
);
317 hmap_insert(&ml
->table
, &e
->hmap_node
, hash
);
320 e
->grat_arp_lock
= TIME_MIN
;
322 COVERAGE_INC(mac_learning_learned
);
324 ovs_list_remove(&e
->lru_node
);
327 /* Mark 'e' as recently used. */
328 ovs_list_push_back(&ml
->lrus
, &e
->lru_node
);
330 ovs_list_remove(&e
->port_lru_node
);
331 ovs_list_push_back(&e
->mlport
->port_lrus
, &e
->port_lru_node
);
333 e
->expires
= time_now() + ml
->idle_time
;
338 /* Checks whether a MAC learning update is necessary for MAC learning table
339 * 'ml' given that a packet matching 'src' was received on 'in_port' in 'vlan',
340 * and given that the packet was gratuitous ARP if 'is_gratuitous_arp' is
341 * 'true' and 'in_port' is a bond port if 'is_bond' is 'true'.
343 * Most packets processed through the MAC learning table do not actually
344 * change it in any way. This function requires only a read lock on the MAC
345 * learning table, so it is much cheaper in this common case.
347 * Keep the code here synchronized with that in update_learning_table__()
350 is_mac_learning_update_needed(const struct mac_learning
*ml
,
351 struct eth_addr src
, int vlan
,
352 bool is_gratuitous_arp
, bool is_bond
,
354 OVS_REQ_RDLOCK(ml
->rwlock
)
356 struct mac_entry
*mac
;
358 if (!mac_learning_may_learn(ml
, src
, vlan
)) {
362 mac
= mac_learning_lookup(ml
, src
, vlan
);
363 if (!mac
|| mac_entry_age(ml
, mac
)) {
367 if (is_gratuitous_arp
) {
368 /* We don't want to learn from gratuitous ARP packets that are
369 * reflected back over bond slaves so we lock the learning table. For
370 * more detail, see the bigger comment in update_learning_table__(). */
372 return true; /* Need to set the gratuitous ARP lock. */
373 } else if (mac_entry_is_grat_arp_locked(mac
)) {
378 return mac_entry_get_port(ml
, mac
) != in_port
/* ofbundle */;
381 /* Updates MAC learning table 'ml' given that a packet matching 'src' was
382 * received on 'in_port' in 'vlan', and given that the packet was gratuitous
383 * ARP if 'is_gratuitous_arp' is 'true' and 'in_port' is a bond port if
384 * 'is_bond' is 'true'.
386 * This code repeats all the checks in is_mac_learning_update_needed() because
387 * the lock was released between there and here and thus the MAC learning state
388 * could have changed.
390 * Returns 'true' if 'ml' was updated, 'false' otherwise.
392 * Keep the code here synchronized with that in is_mac_learning_update_needed()
395 update_learning_table__(struct mac_learning
*ml
, struct eth_addr src
,
396 int vlan
, bool is_gratuitous_arp
, bool is_bond
,
398 OVS_REQ_WRLOCK(ml
->rwlock
)
400 struct mac_entry
*mac
;
402 if (!mac_learning_may_learn(ml
, src
, vlan
)) {
406 mac
= mac_learning_insert(ml
, src
, vlan
);
407 if (is_gratuitous_arp
) {
408 /* Gratuitous ARP packets received over non-bond interfaces could be
409 * reflected back over bond slaves. We don't want to learn from these
410 * reflected packets, so we lock each entry for which a gratuitous ARP
411 * packet was received over a non-bond interface and refrain from
412 * learning from gratuitous ARP packets that arrive over bond
413 * interfaces for this entry while the lock is in effect. Refer to the
414 * 'ovs-vswitch Internals' document for more in-depth discussion on
417 mac_entry_set_grat_arp_lock(mac
);
418 } else if (mac_entry_is_grat_arp_locked(mac
)) {
423 if (mac_entry_get_port(ml
, mac
) != in_port
) {
424 mac_entry_set_port(ml
, mac
, in_port
);
430 /* Updates MAC learning table 'ml' given that a packet matching 'src' was
431 * received on 'in_port' in 'vlan', and given that the packet was gratuitous
432 * ARP if 'is_gratuitous_arp' is 'true' and 'in_port' is a bond port if
433 * 'is_bond' is 'true'.
435 * Returns 'true' if 'ml' was updated, 'false' otherwise. */
437 mac_learning_update(struct mac_learning
*ml
, struct eth_addr src
,
438 int vlan
, bool is_gratuitous_arp
, bool is_bond
,
440 OVS_EXCLUDED(ml
->rwlock
)
443 bool updated
= false;
445 /* Don't learn the OFPP_NONE port. */
446 if (in_port
!= NULL
) {
447 /* First try the common case: no change to MAC learning table. */
448 ovs_rwlock_rdlock(&ml
->rwlock
);
449 need_update
= is_mac_learning_update_needed(ml
, src
, vlan
,
450 is_gratuitous_arp
, is_bond
,
452 ovs_rwlock_unlock(&ml
->rwlock
);
455 /* Slow path: MAC learning table might need an update. */
456 ovs_rwlock_wrlock(&ml
->rwlock
);
457 updated
= update_learning_table__(ml
, src
, vlan
, is_gratuitous_arp
,
459 ovs_rwlock_unlock(&ml
->rwlock
);
465 /* Looks up MAC 'dst' for VLAN 'vlan' in 'ml' and returns the associated MAC
466 * learning entry, if any. */
468 mac_learning_lookup(const struct mac_learning
*ml
,
469 const struct eth_addr dst
, uint16_t vlan
)
471 if (eth_addr_is_multicast(dst
)) {
472 /* No tag because the treatment of multicast destinations never
475 } else if (!is_learning_vlan(ml
, vlan
)) {
476 /* We don't tag this property. The set of learning VLANs changes so
477 * rarely that we revalidate every flow when it changes. */
480 struct mac_entry
*e
= mac_entry_lookup(ml
, dst
, vlan
);
482 ovs_assert(e
== NULL
|| mac_entry_get_port(ml
, e
) != NULL
);
487 /* Expires 'e' from the 'ml' hash table. */
489 mac_learning_expire(struct mac_learning
*ml
, struct mac_entry
*e
)
491 ml
->need_revalidate
= true;
492 mac_entry_set_port(ml
, e
, NULL
);
493 hmap_remove(&ml
->table
, &e
->hmap_node
);
494 ovs_list_remove(&e
->lru_node
);
498 /* Expires all the mac-learning entries in 'ml'. */
500 mac_learning_flush(struct mac_learning
*ml
)
503 while (get_lru(ml
, &e
)){
504 mac_learning_expire(ml
, e
);
506 hmap_shrink(&ml
->table
);
509 /* Does periodic work required by 'ml'. Returns true if something changed that
510 * may require flow revalidation. */
512 mac_learning_run(struct mac_learning
*ml
)
514 bool need_revalidate
;
517 while (get_lru(ml
, &e
)
518 && (hmap_count(&ml
->table
) > ml
->max_entries
519 || time_now() >= e
->expires
)) {
520 COVERAGE_INC(mac_learning_expired
);
521 mac_learning_expire(ml
, e
);
524 need_revalidate
= ml
->need_revalidate
;
525 ml
->need_revalidate
= false;
526 return need_revalidate
;
530 mac_learning_wait(struct mac_learning
*ml
)
532 if (hmap_count(&ml
->table
) > ml
->max_entries
533 || ml
->need_revalidate
) {
534 poll_immediate_wake();
535 } else if (!ovs_list_is_empty(&ml
->lrus
)) {
536 struct mac_entry
*e
= mac_entry_from_lru_node(ml
->lrus
.next
);
537 poll_timer_wait_until(e
->expires
* 1000LL);