]>
git.proxmox.com Git - ovs.git/blob - lib/mac-learning.c
2 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 #include "mac-learning.h"
26 #include "openvswitch/list.h"
27 #include "openvswitch/poll-loop.h"
29 #include "unaligned.h"
31 #include "vlan-bitmap.h"
33 COVERAGE_DEFINE(mac_learning_learned
);
34 COVERAGE_DEFINE(mac_learning_expired
);
35 COVERAGE_DEFINE(mac_learning_evicted
);
36 COVERAGE_DEFINE(mac_learning_moved
);
38 /* Returns the number of seconds since 'e' (within 'ml') was last learned. */
40 mac_entry_age(const struct mac_learning
*ml
, const struct mac_entry
*e
)
42 time_t remaining
= e
->expires
- time_now();
43 return ml
->idle_time
- remaining
;
47 mac_table_hash(const struct mac_learning
*ml
, const struct eth_addr mac
,
50 return hash_mac(mac
, vlan
, ml
->secret
);
53 static struct mac_entry
*
54 mac_entry_from_lru_node(struct ovs_list
*list
)
56 return CONTAINER_OF(list
, struct mac_entry
, lru_node
);
59 static struct mac_entry
*
60 mac_entry_lookup(const struct mac_learning
*ml
,
61 const struct eth_addr mac
, uint16_t vlan
)
65 HMAP_FOR_EACH_WITH_HASH (e
, hmap_node
, mac_table_hash(ml
, mac
, vlan
),
67 if (e
->vlan
== vlan
&& eth_addr_equals(e
->mac
, mac
)) {
74 static struct mac_learning_port
*
75 mac_learning_port_lookup(struct mac_learning
*ml
, void *port
)
77 struct mac_learning_port
*mlport
;
79 HMAP_FOR_EACH_IN_BUCKET (mlport
, hmap_node
, hash_pointer(port
, ml
->secret
),
81 if (mlport
->port
== port
) {
88 /* Changes the client-owned pointer for entry 'e' in 'ml' to 'port'. The
89 * pointer can be retrieved with mac_entry_get_port().
91 * The MAC-learning implementation treats the data that 'port' points to as
92 * opaque and never tries to dereference it. However, when a MAC learning
93 * table becomes overfull, so that eviction is required, the implementation
94 * does first evict MAC entries for the most common 'port's values in 'ml', so
95 * that there is a degree of fairness, that is, each port is entitled to its
96 * fair share of MAC entries. */
98 mac_entry_set_port(struct mac_learning
*ml
, struct mac_entry
*e
, void *port
)
99 OVS_REQ_WRLOCK(ml
->rwlock
)
101 if (mac_entry_get_port(ml
, e
) != port
) {
102 ml
->need_revalidate
= true;
105 struct mac_learning_port
*mlport
= e
->mlport
;
106 ovs_list_remove(&e
->port_lru_node
);
108 if (ovs_list_is_empty(&mlport
->port_lrus
)) {
109 ovs_assert(mlport
->heap_node
.priority
== 1);
110 hmap_remove(&ml
->ports_by_ptr
, &mlport
->hmap_node
);
111 heap_remove(&ml
->ports_by_usage
, &mlport
->heap_node
);
114 ovs_assert(mlport
->heap_node
.priority
> 1);
115 heap_change(&ml
->ports_by_usage
, &mlport
->heap_node
,
116 mlport
->heap_node
.priority
- 1);
122 struct mac_learning_port
*mlport
;
124 mlport
= mac_learning_port_lookup(ml
, port
);
126 mlport
= xzalloc(sizeof *mlport
);
127 hmap_insert(&ml
->ports_by_ptr
, &mlport
->hmap_node
,
128 hash_pointer(port
, ml
->secret
));
129 heap_insert(&ml
->ports_by_usage
, &mlport
->heap_node
, 1);
131 ovs_list_init(&mlport
->port_lrus
);
133 heap_change(&ml
->ports_by_usage
, &mlport
->heap_node
,
134 mlport
->heap_node
.priority
+ 1);
136 ovs_list_push_back(&mlport
->port_lrus
, &e
->port_lru_node
);
142 /* Finds one of the ports with the most MAC entries and evicts its least
143 * recently used entry. */
145 evict_mac_entry_fairly(struct mac_learning
*ml
)
146 OVS_REQ_WRLOCK(ml
->rwlock
)
148 struct mac_learning_port
*mlport
;
151 mlport
= CONTAINER_OF(heap_max(&ml
->ports_by_usage
),
152 struct mac_learning_port
, heap_node
);
153 e
= CONTAINER_OF(ovs_list_front(&mlport
->port_lrus
),
154 struct mac_entry
, port_lru_node
);
155 COVERAGE_INC(mac_learning_evicted
);
157 mac_learning_expire(ml
, e
);
160 /* If the LRU list is not empty, stores the least-recently-used entry in '*e'
161 * and returns true. Otherwise, if the LRU list is empty, stores NULL in '*e'
162 * and return false. */
164 get_lru(struct mac_learning
*ml
, struct mac_entry
**e
)
165 OVS_REQ_RDLOCK(ml
->rwlock
)
167 if (!ovs_list_is_empty(&ml
->lrus
)) {
168 *e
= mac_entry_from_lru_node(ml
->lrus
.next
);
177 normalize_idle_time(unsigned int idle_time
)
179 return (idle_time
< 15 ? 15
180 : idle_time
> 3600 ? 3600
184 /* Clear all the mac_learning statistics */
186 mac_learning_clear_statistics(struct mac_learning
*ml
)
189 ml
->total_learned
= 0;
190 ml
->total_expired
= 0;
191 ml
->total_evicted
= 0;
196 /* Creates and returns a new MAC learning table with an initial MAC aging
197 * timeout of 'idle_time' seconds and an initial maximum of MAC_DEFAULT_MAX
199 struct mac_learning
*
200 mac_learning_create(unsigned int idle_time
)
202 struct mac_learning
*ml
;
204 ml
= xmalloc(sizeof *ml
);
205 ovs_list_init(&ml
->lrus
);
206 hmap_init(&ml
->table
);
207 ml
->secret
= random_uint32();
208 ml
->flood_vlans
= NULL
;
209 ml
->idle_time
= normalize_idle_time(idle_time
);
210 ml
->max_entries
= MAC_DEFAULT_MAX
;
211 ml
->need_revalidate
= false;
212 hmap_init(&ml
->ports_by_ptr
);
213 heap_init(&ml
->ports_by_usage
);
214 ovs_refcount_init(&ml
->ref_cnt
);
215 ovs_rwlock_init(&ml
->rwlock
);
216 mac_learning_clear_statistics(ml
);
220 struct mac_learning
*
221 mac_learning_ref(const struct mac_learning
*ml_
)
223 struct mac_learning
*ml
= CONST_CAST(struct mac_learning
*, ml_
);
225 ovs_refcount_ref(&ml
->ref_cnt
);
230 /* Unreferences (and possibly destroys) MAC learning table 'ml'. */
232 mac_learning_unref(struct mac_learning
*ml
)
234 if (ml
&& ovs_refcount_unref(&ml
->ref_cnt
) == 1) {
235 struct mac_entry
*e
, *next
;
237 ovs_rwlock_wrlock(&ml
->rwlock
);
238 HMAP_FOR_EACH_SAFE (e
, next
, hmap_node
, &ml
->table
) {
239 mac_learning_expire(ml
, e
);
241 hmap_destroy(&ml
->table
);
242 hmap_destroy(&ml
->ports_by_ptr
);
243 heap_destroy(&ml
->ports_by_usage
);
245 bitmap_free(ml
->flood_vlans
);
246 ovs_rwlock_unlock(&ml
->rwlock
);
247 ovs_rwlock_destroy(&ml
->rwlock
);
252 /* Provides a bitmap of VLANs which have learning disabled, that is, VLANs on
253 * which all packets are flooded. Returns true if the set has changed from the
256 mac_learning_set_flood_vlans(struct mac_learning
*ml
,
257 const unsigned long *bitmap
)
259 if (vlan_bitmap_equal(ml
->flood_vlans
, bitmap
)) {
262 bitmap_free(ml
->flood_vlans
);
263 ml
->flood_vlans
= vlan_bitmap_clone(bitmap
);
268 /* Changes the MAC aging timeout of 'ml' to 'idle_time' seconds. */
270 mac_learning_set_idle_time(struct mac_learning
*ml
, unsigned int idle_time
)
272 idle_time
= normalize_idle_time(idle_time
);
273 if (idle_time
!= ml
->idle_time
) {
277 delta
= (int) idle_time
- (int) ml
->idle_time
;
278 LIST_FOR_EACH (e
, lru_node
, &ml
->lrus
) {
281 ml
->idle_time
= idle_time
;
285 /* Sets the maximum number of entries in 'ml' to 'max_entries', adjusting it
286 * to be within a reasonable range. */
288 mac_learning_set_max_entries(struct mac_learning
*ml
, size_t max_entries
)
290 ml
->max_entries
= (max_entries
< 10 ? 10
291 : max_entries
> 1000 * 1000 ? 1000 * 1000
296 is_learning_vlan(const struct mac_learning
*ml
, uint16_t vlan
)
298 return !ml
->flood_vlans
|| !bitmap_is_set(ml
->flood_vlans
, vlan
);
301 /* Returns true if 'src_mac' may be learned on 'vlan' for 'ml'.
302 * Returns false if 'ml' is NULL, if src_mac is not valid for learning, or if
303 * 'vlan' is configured on 'ml' to flood all packets. */
305 mac_learning_may_learn(const struct mac_learning
*ml
,
306 const struct eth_addr src_mac
, uint16_t vlan
)
308 return ml
&& is_learning_vlan(ml
, vlan
) && !eth_addr_is_multicast(src_mac
);
311 /* Searches 'ml' for and returns a MAC learning entry for 'src_mac' in 'vlan',
312 * inserting a new entry if necessary. The caller must have already verified,
313 * by calling mac_learning_may_learn(), that 'src_mac' and 'vlan' are
316 * If the returned MAC entry is new (that is, if it has a NULL client-provided
317 * port, as returned by mac_entry_get_port()), then the caller must initialize
318 * the new entry's port to a nonnull value with mac_entry_set_port(). */
320 mac_learning_insert(struct mac_learning
*ml
,
321 const struct eth_addr src_mac
, uint16_t vlan
)
325 e
= mac_entry_lookup(ml
, src_mac
, vlan
);
327 uint32_t hash
= mac_table_hash(ml
, src_mac
, vlan
);
329 if (hmap_count(&ml
->table
) >= ml
->max_entries
) {
330 evict_mac_entry_fairly(ml
);
333 e
= xmalloc(sizeof *e
);
334 hmap_insert(&ml
->table
, &e
->hmap_node
, hash
);
337 e
->grat_arp_lock
= TIME_MIN
;
339 COVERAGE_INC(mac_learning_learned
);
342 ovs_list_remove(&e
->lru_node
);
345 /* Mark 'e' as recently used. */
346 ovs_list_push_back(&ml
->lrus
, &e
->lru_node
);
348 ovs_list_remove(&e
->port_lru_node
);
349 ovs_list_push_back(&e
->mlport
->port_lrus
, &e
->port_lru_node
);
351 e
->expires
= time_now() + ml
->idle_time
;
356 /* Checks whether a MAC learning update is necessary for MAC learning table
357 * 'ml' given that a packet matching 'src' was received on 'in_port' in 'vlan',
358 * and given that the packet was gratuitous ARP if 'is_gratuitous_arp' is
359 * 'true' and 'in_port' is a bond port if 'is_bond' is 'true'.
361 * Most packets processed through the MAC learning table do not actually
362 * change it in any way. This function requires only a read lock on the MAC
363 * learning table, so it is much cheaper in this common case.
365 * Keep the code here synchronized with that in update_learning_table__()
368 is_mac_learning_update_needed(const struct mac_learning
*ml
,
369 struct eth_addr src
, int vlan
,
370 bool is_gratuitous_arp
, bool is_bond
,
372 OVS_REQ_RDLOCK(ml
->rwlock
)
374 struct mac_entry
*mac
;
376 if (!mac_learning_may_learn(ml
, src
, vlan
)) {
380 mac
= mac_learning_lookup(ml
, src
, vlan
);
381 if (!mac
|| mac_entry_age(ml
, mac
)) {
385 if (is_gratuitous_arp
) {
386 /* We don't want to learn from gratuitous ARP packets that are
387 * reflected back over bond members so we lock the learning table. For
388 * more detail, see the bigger comment in update_learning_table__(). */
390 return true; /* Need to set the gratuitous ARP lock. */
391 } else if (mac_entry_is_grat_arp_locked(mac
)) {
396 return mac_entry_get_port(ml
, mac
) != in_port
/* ofbundle */;
399 /* Updates MAC learning table 'ml' given that a packet matching 'src' was
400 * received on 'in_port' in 'vlan', and given that the packet was gratuitous
401 * ARP if 'is_gratuitous_arp' is 'true' and 'in_port' is a bond port if
402 * 'is_bond' is 'true'.
404 * This code repeats all the checks in is_mac_learning_update_needed() because
405 * the lock was released between there and here and thus the MAC learning state
406 * could have changed.
408 * Returns 'true' if 'ml' was updated, 'false' otherwise.
410 * Keep the code here synchronized with that in is_mac_learning_update_needed()
413 update_learning_table__(struct mac_learning
*ml
, struct eth_addr src
,
414 int vlan
, bool is_gratuitous_arp
, bool is_bond
,
416 OVS_REQ_WRLOCK(ml
->rwlock
)
418 struct mac_entry
*mac
;
420 if (!mac_learning_may_learn(ml
, src
, vlan
)) {
424 mac
= mac_learning_insert(ml
, src
, vlan
);
425 if (is_gratuitous_arp
) {
426 /* Gratuitous ARP packets received over non-bond interfaces could be
427 * reflected back over bond members. We don't want to learn from these
428 * reflected packets, so we lock each entry for which a gratuitous ARP
429 * packet was received over a non-bond interface and refrain from
430 * learning from gratuitous ARP packets that arrive over bond
431 * interfaces for this entry while the lock is in effect. Refer to the
432 * 'ovs-vswitch Internals' document for more in-depth discussion on
435 mac_entry_set_grat_arp_lock(mac
);
436 } else if (mac_entry_is_grat_arp_locked(mac
)) {
441 if (mac_entry_get_port(ml
, mac
) != in_port
) {
442 if (mac_entry_get_port(ml
, mac
) != NULL
) {
443 COVERAGE_INC(mac_learning_moved
);
446 mac_entry_set_port(ml
, mac
, in_port
);
452 /* Updates MAC learning table 'ml' given that a packet matching 'src' was
453 * received on 'in_port' in 'vlan', and given that the packet was gratuitous
454 * ARP if 'is_gratuitous_arp' is 'true' and 'in_port' is a bond port if
455 * 'is_bond' is 'true'.
457 * Returns 'true' if 'ml' was updated, 'false' otherwise. */
459 mac_learning_update(struct mac_learning
*ml
, struct eth_addr src
,
460 int vlan
, bool is_gratuitous_arp
, bool is_bond
,
462 OVS_EXCLUDED(ml
->rwlock
)
465 bool updated
= false;
467 /* Don't learn the OFPP_NONE port. */
468 if (in_port
!= NULL
) {
469 /* First try the common case: no change to MAC learning table. */
470 ovs_rwlock_rdlock(&ml
->rwlock
);
471 need_update
= is_mac_learning_update_needed(ml
, src
, vlan
,
472 is_gratuitous_arp
, is_bond
,
474 ovs_rwlock_unlock(&ml
->rwlock
);
477 /* Slow path: MAC learning table might need an update. */
478 ovs_rwlock_wrlock(&ml
->rwlock
);
479 updated
= update_learning_table__(ml
, src
, vlan
, is_gratuitous_arp
,
481 ovs_rwlock_unlock(&ml
->rwlock
);
487 /* Looks up MAC 'dst' for VLAN 'vlan' in 'ml' and returns the associated MAC
488 * learning entry, if any. */
490 mac_learning_lookup(const struct mac_learning
*ml
,
491 const struct eth_addr dst
, uint16_t vlan
)
493 if (eth_addr_is_multicast(dst
)) {
494 /* No tag because the treatment of multicast destinations never
497 } else if (!is_learning_vlan(ml
, vlan
)) {
498 /* We don't tag this property. The set of learning VLANs changes so
499 * rarely that we revalidate every flow when it changes. */
502 struct mac_entry
*e
= mac_entry_lookup(ml
, dst
, vlan
);
504 ovs_assert(e
== NULL
|| mac_entry_get_port(ml
, e
) != NULL
);
509 /* Expires 'e' from the 'ml' hash table. */
511 mac_learning_expire(struct mac_learning
*ml
, struct mac_entry
*e
)
513 ml
->need_revalidate
= true;
514 mac_entry_set_port(ml
, e
, NULL
);
515 hmap_remove(&ml
->table
, &e
->hmap_node
);
516 ovs_list_remove(&e
->lru_node
);
520 /* Expires all the mac-learning entries in 'ml'. */
522 mac_learning_flush(struct mac_learning
*ml
)
525 while (get_lru(ml
, &e
)){
526 mac_learning_expire(ml
, e
);
528 hmap_shrink(&ml
->table
);
531 /* Does periodic work required by 'ml'. Returns true if something changed that
532 * may require flow revalidation. */
534 mac_learning_run(struct mac_learning
*ml
)
536 bool need_revalidate
;
539 while (get_lru(ml
, &e
)
540 && (hmap_count(&ml
->table
) > ml
->max_entries
541 || time_now() >= e
->expires
)) {
542 COVERAGE_INC(mac_learning_expired
);
544 mac_learning_expire(ml
, e
);
547 need_revalidate
= ml
->need_revalidate
;
548 ml
->need_revalidate
= false;
549 return need_revalidate
;
553 mac_learning_wait(struct mac_learning
*ml
)
555 if (hmap_count(&ml
->table
) > ml
->max_entries
556 || ml
->need_revalidate
) {
557 poll_immediate_wake();
558 } else if (!ovs_list_is_empty(&ml
->lrus
)) {
559 struct mac_entry
*e
= mac_entry_from_lru_node(ml
->lrus
.next
);
560 poll_timer_wait_until(e
->expires
* 1000LL);