]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc. | |
3 | * | |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
18 | #include "mac-learning.h" | |
19 | ||
20 | #include <inttypes.h> | |
21 | #include <stdlib.h> | |
22 | ||
23 | #include "bitmap.h" | |
24 | #include "coverage.h" | |
25 | #include "hash.h" | |
26 | #include "openvswitch/list.h" | |
27 | #include "openvswitch/poll-loop.h" | |
28 | #include "timeval.h" | |
29 | #include "unaligned.h" | |
30 | #include "util.h" | |
31 | #include "vlan-bitmap.h" | |
32 | ||
33 | COVERAGE_DEFINE(mac_learning_learned); | |
34 | COVERAGE_DEFINE(mac_learning_expired); | |
35 | ||
36 | /* Returns the number of seconds since 'e' (within 'ml') was last learned. */ | |
37 | int | |
38 | mac_entry_age(const struct mac_learning *ml, const struct mac_entry *e) | |
39 | { | |
40 | time_t remaining = e->expires - time_now(); | |
41 | return ml->idle_time - remaining; | |
42 | } | |
43 | ||
44 | static uint32_t | |
45 | mac_table_hash(const struct mac_learning *ml, const struct eth_addr mac, | |
46 | uint16_t vlan) | |
47 | { | |
48 | return hash_mac(mac, vlan, ml->secret); | |
49 | } | |
50 | ||
51 | static struct mac_entry * | |
52 | mac_entry_from_lru_node(struct ovs_list *list) | |
53 | { | |
54 | return CONTAINER_OF(list, struct mac_entry, lru_node); | |
55 | } | |
56 | ||
57 | static struct mac_entry * | |
58 | mac_entry_lookup(const struct mac_learning *ml, | |
59 | const struct eth_addr mac, uint16_t vlan) | |
60 | { | |
61 | struct mac_entry *e; | |
62 | ||
63 | HMAP_FOR_EACH_WITH_HASH (e, hmap_node, mac_table_hash(ml, mac, vlan), | |
64 | &ml->table) { | |
65 | if (e->vlan == vlan && eth_addr_equals(e->mac, mac)) { | |
66 | return e; | |
67 | } | |
68 | } | |
69 | return NULL; | |
70 | } | |
71 | ||
72 | static struct mac_learning_port * | |
73 | mac_learning_port_lookup(struct mac_learning *ml, void *port) | |
74 | { | |
75 | struct mac_learning_port *mlport; | |
76 | ||
77 | HMAP_FOR_EACH_IN_BUCKET (mlport, hmap_node, hash_pointer(port, ml->secret), | |
78 | &ml->ports_by_ptr) { | |
79 | if (mlport->port == port) { | |
80 | return mlport; | |
81 | } | |
82 | } | |
83 | return NULL; | |
84 | } | |
85 | ||
86 | /* Changes the client-owned pointer for entry 'e' in 'ml' to 'port'. The | |
87 | * pointer can be retrieved with mac_entry_get_port(). | |
88 | * | |
89 | * The MAC-learning implementation treats the data that 'port' points to as | |
90 | * opaque and never tries to dereference it. However, when a MAC learning | |
91 | * table becomes overfull, so that eviction is required, the implementation | |
92 | * does first evict MAC entries for the most common 'port's values in 'ml', so | |
93 | * that there is a degree of fairness, that is, each port is entitled to its | |
94 | * fair share of MAC entries. */ | |
95 | void | |
96 | mac_entry_set_port(struct mac_learning *ml, struct mac_entry *e, void *port) | |
97 | OVS_REQ_WRLOCK(ml->rwlock) | |
98 | { | |
99 | if (mac_entry_get_port(ml, e) != port) { | |
100 | ml->need_revalidate = true; | |
101 | ||
102 | if (e->mlport) { | |
103 | struct mac_learning_port *mlport = e->mlport; | |
104 | ovs_list_remove(&e->port_lru_node); | |
105 | ||
106 | if (ovs_list_is_empty(&mlport->port_lrus)) { | |
107 | ovs_assert(mlport->heap_node.priority == 1); | |
108 | hmap_remove(&ml->ports_by_ptr, &mlport->hmap_node); | |
109 | heap_remove(&ml->ports_by_usage, &mlport->heap_node); | |
110 | free(mlport); | |
111 | } else { | |
112 | ovs_assert(mlport->heap_node.priority > 1); | |
113 | heap_change(&ml->ports_by_usage, &mlport->heap_node, | |
114 | mlport->heap_node.priority - 1); | |
115 | } | |
116 | e->mlport = NULL; | |
117 | } | |
118 | ||
119 | if (port) { | |
120 | struct mac_learning_port *mlport; | |
121 | ||
122 | mlport = mac_learning_port_lookup(ml, port); | |
123 | if (!mlport) { | |
124 | mlport = xzalloc(sizeof *mlport); | |
125 | hmap_insert(&ml->ports_by_ptr, &mlport->hmap_node, | |
126 | hash_pointer(port, ml->secret)); | |
127 | heap_insert(&ml->ports_by_usage, &mlport->heap_node, 1); | |
128 | mlport->port = port; | |
129 | ovs_list_init(&mlport->port_lrus); | |
130 | } else { | |
131 | heap_change(&ml->ports_by_usage, &mlport->heap_node, | |
132 | mlport->heap_node.priority + 1); | |
133 | } | |
134 | ovs_list_push_back(&mlport->port_lrus, &e->port_lru_node); | |
135 | e->mlport = mlport; | |
136 | } | |
137 | } | |
138 | } | |
139 | ||
140 | /* Finds one of the ports with the most MAC entries and evicts its least | |
141 | * recently used entry. */ | |
142 | static void | |
143 | evict_mac_entry_fairly(struct mac_learning *ml) | |
144 | OVS_REQ_WRLOCK(ml->rwlock) | |
145 | { | |
146 | struct mac_learning_port *mlport; | |
147 | struct mac_entry *e; | |
148 | ||
149 | mlport = CONTAINER_OF(heap_max(&ml->ports_by_usage), | |
150 | struct mac_learning_port, heap_node); | |
151 | e = CONTAINER_OF(ovs_list_front(&mlport->port_lrus), | |
152 | struct mac_entry, port_lru_node); | |
153 | mac_learning_expire(ml, e); | |
154 | } | |
155 | ||
156 | /* If the LRU list is not empty, stores the least-recently-used entry in '*e' | |
157 | * and returns true. Otherwise, if the LRU list is empty, stores NULL in '*e' | |
158 | * and return false. */ | |
159 | static bool | |
160 | get_lru(struct mac_learning *ml, struct mac_entry **e) | |
161 | OVS_REQ_RDLOCK(ml->rwlock) | |
162 | { | |
163 | if (!ovs_list_is_empty(&ml->lrus)) { | |
164 | *e = mac_entry_from_lru_node(ml->lrus.next); | |
165 | return true; | |
166 | } else { | |
167 | *e = NULL; | |
168 | return false; | |
169 | } | |
170 | } | |
171 | ||
172 | static unsigned int | |
173 | normalize_idle_time(unsigned int idle_time) | |
174 | { | |
175 | return (idle_time < 15 ? 15 | |
176 | : idle_time > 3600 ? 3600 | |
177 | : idle_time); | |
178 | } | |
179 | ||
180 | /* Creates and returns a new MAC learning table with an initial MAC aging | |
181 | * timeout of 'idle_time' seconds and an initial maximum of MAC_DEFAULT_MAX | |
182 | * entries. */ | |
183 | struct mac_learning * | |
184 | mac_learning_create(unsigned int idle_time) | |
185 | { | |
186 | struct mac_learning *ml; | |
187 | ||
188 | ml = xmalloc(sizeof *ml); | |
189 | ovs_list_init(&ml->lrus); | |
190 | hmap_init(&ml->table); | |
191 | ml->secret = random_uint32(); | |
192 | ml->flood_vlans = NULL; | |
193 | ml->idle_time = normalize_idle_time(idle_time); | |
194 | ml->max_entries = MAC_DEFAULT_MAX; | |
195 | ml->need_revalidate = false; | |
196 | hmap_init(&ml->ports_by_ptr); | |
197 | heap_init(&ml->ports_by_usage); | |
198 | ovs_refcount_init(&ml->ref_cnt); | |
199 | ovs_rwlock_init(&ml->rwlock); | |
200 | return ml; | |
201 | } | |
202 | ||
203 | struct mac_learning * | |
204 | mac_learning_ref(const struct mac_learning *ml_) | |
205 | { | |
206 | struct mac_learning *ml = CONST_CAST(struct mac_learning *, ml_); | |
207 | if (ml) { | |
208 | ovs_refcount_ref(&ml->ref_cnt); | |
209 | } | |
210 | return ml; | |
211 | } | |
212 | ||
213 | /* Unreferences (and possibly destroys) MAC learning table 'ml'. */ | |
214 | void | |
215 | mac_learning_unref(struct mac_learning *ml) | |
216 | { | |
217 | if (ml && ovs_refcount_unref(&ml->ref_cnt) == 1) { | |
218 | struct mac_entry *e, *next; | |
219 | ||
220 | ovs_rwlock_wrlock(&ml->rwlock); | |
221 | HMAP_FOR_EACH_SAFE (e, next, hmap_node, &ml->table) { | |
222 | mac_learning_expire(ml, e); | |
223 | } | |
224 | hmap_destroy(&ml->table); | |
225 | hmap_destroy(&ml->ports_by_ptr); | |
226 | heap_destroy(&ml->ports_by_usage); | |
227 | ||
228 | bitmap_free(ml->flood_vlans); | |
229 | ovs_rwlock_unlock(&ml->rwlock); | |
230 | ovs_rwlock_destroy(&ml->rwlock); | |
231 | free(ml); | |
232 | } | |
233 | } | |
234 | ||
235 | /* Provides a bitmap of VLANs which have learning disabled, that is, VLANs on | |
236 | * which all packets are flooded. Returns true if the set has changed from the | |
237 | * previous value. */ | |
238 | bool | |
239 | mac_learning_set_flood_vlans(struct mac_learning *ml, | |
240 | const unsigned long *bitmap) | |
241 | { | |
242 | if (vlan_bitmap_equal(ml->flood_vlans, bitmap)) { | |
243 | return false; | |
244 | } else { | |
245 | bitmap_free(ml->flood_vlans); | |
246 | ml->flood_vlans = vlan_bitmap_clone(bitmap); | |
247 | return true; | |
248 | } | |
249 | } | |
250 | ||
251 | /* Changes the MAC aging timeout of 'ml' to 'idle_time' seconds. */ | |
252 | void | |
253 | mac_learning_set_idle_time(struct mac_learning *ml, unsigned int idle_time) | |
254 | { | |
255 | idle_time = normalize_idle_time(idle_time); | |
256 | if (idle_time != ml->idle_time) { | |
257 | struct mac_entry *e; | |
258 | int delta; | |
259 | ||
260 | delta = (int) idle_time - (int) ml->idle_time; | |
261 | LIST_FOR_EACH (e, lru_node, &ml->lrus) { | |
262 | e->expires += delta; | |
263 | } | |
264 | ml->idle_time = idle_time; | |
265 | } | |
266 | } | |
267 | ||
268 | /* Sets the maximum number of entries in 'ml' to 'max_entries', adjusting it | |
269 | * to be within a reasonable range. */ | |
270 | void | |
271 | mac_learning_set_max_entries(struct mac_learning *ml, size_t max_entries) | |
272 | { | |
273 | ml->max_entries = (max_entries < 10 ? 10 | |
274 | : max_entries > 1000 * 1000 ? 1000 * 1000 | |
275 | : max_entries); | |
276 | } | |
277 | ||
278 | static bool | |
279 | is_learning_vlan(const struct mac_learning *ml, uint16_t vlan) | |
280 | { | |
281 | return !ml->flood_vlans || !bitmap_is_set(ml->flood_vlans, vlan); | |
282 | } | |
283 | ||
284 | /* Returns true if 'src_mac' may be learned on 'vlan' for 'ml'. | |
285 | * Returns false if 'ml' is NULL, if src_mac is not valid for learning, or if | |
286 | * 'vlan' is configured on 'ml' to flood all packets. */ | |
287 | bool | |
288 | mac_learning_may_learn(const struct mac_learning *ml, | |
289 | const struct eth_addr src_mac, uint16_t vlan) | |
290 | { | |
291 | return ml && is_learning_vlan(ml, vlan) && !eth_addr_is_multicast(src_mac); | |
292 | } | |
293 | ||
294 | /* Searches 'ml' for and returns a MAC learning entry for 'src_mac' in 'vlan', | |
295 | * inserting a new entry if necessary. The caller must have already verified, | |
296 | * by calling mac_learning_may_learn(), that 'src_mac' and 'vlan' are | |
297 | * learnable. | |
298 | * | |
299 | * If the returned MAC entry is new (that is, if it has a NULL client-provided | |
300 | * port, as returned by mac_entry_get_port()), then the caller must initialize | |
301 | * the new entry's port to a nonnull value with mac_entry_set_port(). */ | |
302 | struct mac_entry * | |
303 | mac_learning_insert(struct mac_learning *ml, | |
304 | const struct eth_addr src_mac, uint16_t vlan) | |
305 | { | |
306 | struct mac_entry *e; | |
307 | ||
308 | e = mac_entry_lookup(ml, src_mac, vlan); | |
309 | if (!e) { | |
310 | uint32_t hash = mac_table_hash(ml, src_mac, vlan); | |
311 | ||
312 | if (hmap_count(&ml->table) >= ml->max_entries) { | |
313 | evict_mac_entry_fairly(ml); | |
314 | } | |
315 | ||
316 | e = xmalloc(sizeof *e); | |
317 | hmap_insert(&ml->table, &e->hmap_node, hash); | |
318 | e->mac = src_mac; | |
319 | e->vlan = vlan; | |
320 | e->grat_arp_lock = TIME_MIN; | |
321 | e->mlport = NULL; | |
322 | COVERAGE_INC(mac_learning_learned); | |
323 | } else { | |
324 | ovs_list_remove(&e->lru_node); | |
325 | } | |
326 | ||
327 | /* Mark 'e' as recently used. */ | |
328 | ovs_list_push_back(&ml->lrus, &e->lru_node); | |
329 | if (e->mlport) { | |
330 | ovs_list_remove(&e->port_lru_node); | |
331 | ovs_list_push_back(&e->mlport->port_lrus, &e->port_lru_node); | |
332 | } | |
333 | e->expires = time_now() + ml->idle_time; | |
334 | ||
335 | return e; | |
336 | } | |
337 | ||
338 | /* Checks whether a MAC learning update is necessary for MAC learning table | |
339 | * 'ml' given that a packet matching 'src' was received on 'in_port' in 'vlan', | |
340 | * and given that the packet was gratuitous ARP if 'is_gratuitous_arp' is | |
341 | * 'true' and 'in_port' is a bond port if 'is_bond' is 'true'. | |
342 | * | |
343 | * Most packets processed through the MAC learning table do not actually | |
344 | * change it in any way. This function requires only a read lock on the MAC | |
345 | * learning table, so it is much cheaper in this common case. | |
346 | * | |
347 | * Keep the code here synchronized with that in update_learning_table__() | |
348 | * below. */ | |
349 | static bool | |
350 | is_mac_learning_update_needed(const struct mac_learning *ml, | |
351 | struct eth_addr src, int vlan, | |
352 | bool is_gratuitous_arp, bool is_bond, | |
353 | void *in_port) | |
354 | OVS_REQ_RDLOCK(ml->rwlock) | |
355 | { | |
356 | struct mac_entry *mac; | |
357 | ||
358 | if (!mac_learning_may_learn(ml, src, vlan)) { | |
359 | return false; | |
360 | } | |
361 | ||
362 | mac = mac_learning_lookup(ml, src, vlan); | |
363 | if (!mac || mac_entry_age(ml, mac)) { | |
364 | return true; | |
365 | } | |
366 | ||
367 | if (is_gratuitous_arp) { | |
368 | /* We don't want to learn from gratuitous ARP packets that are | |
369 | * reflected back over bond slaves so we lock the learning table. For | |
370 | * more detail, see the bigger comment in update_learning_table__(). */ | |
371 | if (!is_bond) { | |
372 | return true; /* Need to set the gratuitous ARP lock. */ | |
373 | } else if (mac_entry_is_grat_arp_locked(mac)) { | |
374 | return false; | |
375 | } | |
376 | } | |
377 | ||
378 | return mac_entry_get_port(ml, mac) != in_port /* ofbundle */; | |
379 | } | |
380 | ||
381 | /* Updates MAC learning table 'ml' given that a packet matching 'src' was | |
382 | * received on 'in_port' in 'vlan', and given that the packet was gratuitous | |
383 | * ARP if 'is_gratuitous_arp' is 'true' and 'in_port' is a bond port if | |
384 | * 'is_bond' is 'true'. | |
385 | * | |
386 | * This code repeats all the checks in is_mac_learning_update_needed() because | |
387 | * the lock was released between there and here and thus the MAC learning state | |
388 | * could have changed. | |
389 | * | |
390 | * Returns 'true' if 'ml' was updated, 'false' otherwise. | |
391 | * | |
392 | * Keep the code here synchronized with that in is_mac_learning_update_needed() | |
393 | * above. */ | |
394 | static bool | |
395 | update_learning_table__(struct mac_learning *ml, struct eth_addr src, | |
396 | int vlan, bool is_gratuitous_arp, bool is_bond, | |
397 | void *in_port) | |
398 | OVS_REQ_WRLOCK(ml->rwlock) | |
399 | { | |
400 | struct mac_entry *mac; | |
401 | ||
402 | if (!mac_learning_may_learn(ml, src, vlan)) { | |
403 | return false; | |
404 | } | |
405 | ||
406 | mac = mac_learning_insert(ml, src, vlan); | |
407 | if (is_gratuitous_arp) { | |
408 | /* Gratuitous ARP packets received over non-bond interfaces could be | |
409 | * reflected back over bond slaves. We don't want to learn from these | |
410 | * reflected packets, so we lock each entry for which a gratuitous ARP | |
411 | * packet was received over a non-bond interface and refrain from | |
412 | * learning from gratuitous ARP packets that arrive over bond | |
413 | * interfaces for this entry while the lock is in effect. Refer to the | |
414 | * 'ovs-vswitch Internals' document for more in-depth discussion on | |
415 | * this topic. */ | |
416 | if (!is_bond) { | |
417 | mac_entry_set_grat_arp_lock(mac); | |
418 | } else if (mac_entry_is_grat_arp_locked(mac)) { | |
419 | return false; | |
420 | } | |
421 | } | |
422 | ||
423 | if (mac_entry_get_port(ml, mac) != in_port) { | |
424 | mac_entry_set_port(ml, mac, in_port); | |
425 | return true; | |
426 | } | |
427 | return false; | |
428 | } | |
429 | ||
430 | /* Updates MAC learning table 'ml' given that a packet matching 'src' was | |
431 | * received on 'in_port' in 'vlan', and given that the packet was gratuitous | |
432 | * ARP if 'is_gratuitous_arp' is 'true' and 'in_port' is a bond port if | |
433 | * 'is_bond' is 'true'. | |
434 | * | |
435 | * Returns 'true' if 'ml' was updated, 'false' otherwise. */ | |
436 | bool | |
437 | mac_learning_update(struct mac_learning *ml, struct eth_addr src, | |
438 | int vlan, bool is_gratuitous_arp, bool is_bond, | |
439 | void *in_port) | |
440 | OVS_EXCLUDED(ml->rwlock) | |
441 | { | |
442 | bool need_update; | |
443 | bool updated = false; | |
444 | ||
445 | /* Don't learn the OFPP_NONE port. */ | |
446 | if (in_port != NULL) { | |
447 | /* First try the common case: no change to MAC learning table. */ | |
448 | ovs_rwlock_rdlock(&ml->rwlock); | |
449 | need_update = is_mac_learning_update_needed(ml, src, vlan, | |
450 | is_gratuitous_arp, is_bond, | |
451 | in_port); | |
452 | ovs_rwlock_unlock(&ml->rwlock); | |
453 | ||
454 | if (need_update) { | |
455 | /* Slow path: MAC learning table might need an update. */ | |
456 | ovs_rwlock_wrlock(&ml->rwlock); | |
457 | updated = update_learning_table__(ml, src, vlan, is_gratuitous_arp, | |
458 | is_bond, in_port); | |
459 | ovs_rwlock_unlock(&ml->rwlock); | |
460 | } | |
461 | } | |
462 | return updated; | |
463 | } | |
464 | ||
465 | /* Looks up MAC 'dst' for VLAN 'vlan' in 'ml' and returns the associated MAC | |
466 | * learning entry, if any. */ | |
467 | struct mac_entry * | |
468 | mac_learning_lookup(const struct mac_learning *ml, | |
469 | const struct eth_addr dst, uint16_t vlan) | |
470 | { | |
471 | if (eth_addr_is_multicast(dst)) { | |
472 | /* No tag because the treatment of multicast destinations never | |
473 | * changes. */ | |
474 | return NULL; | |
475 | } else if (!is_learning_vlan(ml, vlan)) { | |
476 | /* We don't tag this property. The set of learning VLANs changes so | |
477 | * rarely that we revalidate every flow when it changes. */ | |
478 | return NULL; | |
479 | } else { | |
480 | struct mac_entry *e = mac_entry_lookup(ml, dst, vlan); | |
481 | ||
482 | ovs_assert(e == NULL || mac_entry_get_port(ml, e) != NULL); | |
483 | return e; | |
484 | } | |
485 | } | |
486 | ||
487 | /* Expires 'e' from the 'ml' hash table. */ | |
488 | void | |
489 | mac_learning_expire(struct mac_learning *ml, struct mac_entry *e) | |
490 | { | |
491 | ml->need_revalidate = true; | |
492 | mac_entry_set_port(ml, e, NULL); | |
493 | hmap_remove(&ml->table, &e->hmap_node); | |
494 | ovs_list_remove(&e->lru_node); | |
495 | free(e); | |
496 | } | |
497 | ||
498 | /* Expires all the mac-learning entries in 'ml'. */ | |
499 | void | |
500 | mac_learning_flush(struct mac_learning *ml) | |
501 | { | |
502 | struct mac_entry *e; | |
503 | while (get_lru(ml, &e)){ | |
504 | mac_learning_expire(ml, e); | |
505 | } | |
506 | hmap_shrink(&ml->table); | |
507 | } | |
508 | ||
509 | /* Does periodic work required by 'ml'. Returns true if something changed that | |
510 | * may require flow revalidation. */ | |
511 | bool | |
512 | mac_learning_run(struct mac_learning *ml) | |
513 | { | |
514 | bool need_revalidate; | |
515 | struct mac_entry *e; | |
516 | ||
517 | while (get_lru(ml, &e) | |
518 | && (hmap_count(&ml->table) > ml->max_entries | |
519 | || time_now() >= e->expires)) { | |
520 | COVERAGE_INC(mac_learning_expired); | |
521 | mac_learning_expire(ml, e); | |
522 | } | |
523 | ||
524 | need_revalidate = ml->need_revalidate; | |
525 | ml->need_revalidate = false; | |
526 | return need_revalidate; | |
527 | } | |
528 | ||
529 | void | |
530 | mac_learning_wait(struct mac_learning *ml) | |
531 | { | |
532 | if (hmap_count(&ml->table) > ml->max_entries | |
533 | || ml->need_revalidate) { | |
534 | poll_immediate_wake(); | |
535 | } else if (!ovs_list_is_empty(&ml->lrus)) { | |
536 | struct mac_entry *e = mac_entry_from_lru_node(ml->lrus.next); | |
537 | poll_timer_wait_until(e->expires * 1000LL); | |
538 | } | |
539 | } |