]> git.proxmox.com Git - mirror_ovs.git/blob - lib/mac-learning.c
mac-learning: Implement per-port MAC learning fairness.
[mirror_ovs.git] / lib / mac-learning.c
1 /*
2 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18 #include "mac-learning.h"
19
20 #include <inttypes.h>
21 #include <stdlib.h>
22
23 #include "bitmap.h"
24 #include "coverage.h"
25 #include "hash.h"
26 #include "list.h"
27 #include "poll-loop.h"
28 #include "timeval.h"
29 #include "unaligned.h"
30 #include "util.h"
31 #include "vlan-bitmap.h"
32
33 COVERAGE_DEFINE(mac_learning_learned);
34 COVERAGE_DEFINE(mac_learning_expired);
35
36 /* Returns the number of seconds since 'e' (within 'ml') was last learned. */
37 int
38 mac_entry_age(const struct mac_learning *ml, const struct mac_entry *e)
39 {
40 time_t remaining = e->expires - time_now();
41 return ml->idle_time - remaining;
42 }
43
44 static uint32_t
45 mac_table_hash(const struct mac_learning *ml, const uint8_t mac[ETH_ADDR_LEN],
46 uint16_t vlan)
47 {
48 return hash_mac(mac, vlan, ml->secret);
49 }
50
51 static struct mac_entry *
52 mac_entry_from_lru_node(struct ovs_list *list)
53 {
54 return CONTAINER_OF(list, struct mac_entry, lru_node);
55 }
56
57 static struct mac_entry *
58 mac_entry_lookup(const struct mac_learning *ml,
59 const uint8_t mac[ETH_ADDR_LEN], uint16_t vlan)
60 {
61 struct mac_entry *e;
62
63 HMAP_FOR_EACH_WITH_HASH (e, hmap_node, mac_table_hash(ml, mac, vlan),
64 &ml->table) {
65 if (e->vlan == vlan && eth_addr_equals(e->mac, mac)) {
66 return e;
67 }
68 }
69 return NULL;
70 }
71
72 static struct mac_learning_port *
73 mac_learning_port_lookup(struct mac_learning *ml, void *port)
74 {
75 struct mac_learning_port *mlport;
76
77 HMAP_FOR_EACH_IN_BUCKET (mlport, hmap_node, hash_pointer(port, ml->secret),
78 &ml->ports_by_ptr) {
79 if (mlport->port == port) {
80 return mlport;
81 }
82 }
83 return NULL;
84 }
85
86 /* Changes the client-owned pointer for entry 'e' in 'ml' to 'port'. The
87 * pointer can be retrieved with mac_entry_get_port().
88 *
89 * The MAC-learning implementation treats the data that 'port' points to as
90 * opaque and never tries to dereference it. However, when a MAC learning
91 * table becomes overfull, so that eviction is required, the implementation
92 * does first evict MAC entries for the most common 'port's values in 'ml', so
93 * that there is a degree of fairness, that is, each port is entitled to its
94 * fair share of MAC entries. */
95 void
96 mac_entry_set_port(struct mac_learning *ml, struct mac_entry *e, void *port)
97 OVS_REQ_WRLOCK(ml->rwlock)
98 {
99 if (mac_entry_get_port(ml, e) != port) {
100 ml->need_revalidate = true;
101
102 if (e->mlport) {
103 struct mac_learning_port *mlport = e->mlport;
104 list_remove(&e->port_lru_node);
105
106 if (list_is_empty(&mlport->port_lrus)) {
107 ovs_assert(mlport->heap_node.priority == 1);
108 hmap_remove(&ml->ports_by_ptr, &mlport->hmap_node);
109 heap_remove(&ml->ports_by_usage, &mlport->heap_node);
110 free(mlport);
111 } else {
112 ovs_assert(mlport->heap_node.priority > 1);
113 heap_change(&ml->ports_by_usage, &mlport->heap_node,
114 mlport->heap_node.priority - 1);
115 }
116 e->mlport = NULL;
117 }
118
119 if (port) {
120 struct mac_learning_port *mlport;
121
122 mlport = mac_learning_port_lookup(ml, port);
123 if (!mlport) {
124 mlport = xzalloc(sizeof *mlport);
125 hmap_insert(&ml->ports_by_ptr, &mlport->hmap_node,
126 hash_pointer(port, ml->secret));
127 heap_insert(&ml->ports_by_usage, &mlport->heap_node, 1);
128 mlport->port = port;
129 list_init(&mlport->port_lrus);
130 } else {
131 heap_change(&ml->ports_by_usage, &mlport->heap_node,
132 mlport->heap_node.priority + 1);
133 }
134 list_push_back(&mlport->port_lrus, &e->port_lru_node);
135 e->mlport = mlport;
136 }
137 }
138 }
139
140 /* Finds one of the ports with the most MAC entries and evicts its least
141 * recently used entry. */
142 static void
143 evict_mac_entry_fairly(struct mac_learning *ml)
144 OVS_REQ_WRLOCK(ml->rwlock)
145 {
146 struct mac_learning_port *mlport;
147 struct mac_entry *e;
148
149 mlport = CONTAINER_OF(heap_max(&ml->ports_by_usage),
150 struct mac_learning_port, heap_node);
151 e = CONTAINER_OF(list_front(&mlport->port_lrus),
152 struct mac_entry, port_lru_node);
153 mac_learning_expire(ml, e);
154 }
155
156 /* If the LRU list is not empty, stores the least-recently-used entry in '*e'
157 * and returns true. Otherwise, if the LRU list is empty, stores NULL in '*e'
158 * and return false. */
159 static bool
160 get_lru(struct mac_learning *ml, struct mac_entry **e)
161 OVS_REQ_RDLOCK(ml->rwlock)
162 {
163 if (!list_is_empty(&ml->lrus)) {
164 *e = mac_entry_from_lru_node(ml->lrus.next);
165 return true;
166 } else {
167 *e = NULL;
168 return false;
169 }
170 }
171
172 static unsigned int
173 normalize_idle_time(unsigned int idle_time)
174 {
175 return (idle_time < 15 ? 15
176 : idle_time > 3600 ? 3600
177 : idle_time);
178 }
179
180 /* Creates and returns a new MAC learning table with an initial MAC aging
181 * timeout of 'idle_time' seconds and an initial maximum of MAC_DEFAULT_MAX
182 * entries. */
183 struct mac_learning *
184 mac_learning_create(unsigned int idle_time)
185 {
186 struct mac_learning *ml;
187
188 ml = xmalloc(sizeof *ml);
189 list_init(&ml->lrus);
190 hmap_init(&ml->table);
191 ml->secret = random_uint32();
192 ml->flood_vlans = NULL;
193 ml->idle_time = normalize_idle_time(idle_time);
194 ml->max_entries = MAC_DEFAULT_MAX;
195 ml->need_revalidate = false;
196 hmap_init(&ml->ports_by_ptr);
197 heap_init(&ml->ports_by_usage);
198 ovs_refcount_init(&ml->ref_cnt);
199 ovs_rwlock_init(&ml->rwlock);
200 return ml;
201 }
202
203 struct mac_learning *
204 mac_learning_ref(const struct mac_learning *ml_)
205 {
206 struct mac_learning *ml = CONST_CAST(struct mac_learning *, ml_);
207 if (ml) {
208 ovs_refcount_ref(&ml->ref_cnt);
209 }
210 return ml;
211 }
212
213 /* Unreferences (and possibly destroys) MAC learning table 'ml'. */
214 void
215 mac_learning_unref(struct mac_learning *ml)
216 {
217 if (ml && ovs_refcount_unref(&ml->ref_cnt) == 1) {
218 struct mac_entry *e, *next;
219
220 ovs_rwlock_wrlock(&ml->rwlock);
221 HMAP_FOR_EACH_SAFE (e, next, hmap_node, &ml->table) {
222 mac_learning_expire(ml, e);
223 }
224 hmap_destroy(&ml->table);
225 hmap_destroy(&ml->ports_by_ptr);
226 heap_destroy(&ml->ports_by_usage);
227
228 bitmap_free(ml->flood_vlans);
229 ovs_rwlock_unlock(&ml->rwlock);
230 ovs_rwlock_destroy(&ml->rwlock);
231 free(ml);
232 }
233 }
234
235 /* Provides a bitmap of VLANs which have learning disabled, that is, VLANs on
236 * which all packets are flooded. Returns true if the set has changed from the
237 * previous value. */
238 bool
239 mac_learning_set_flood_vlans(struct mac_learning *ml,
240 const unsigned long *bitmap)
241 {
242 if (vlan_bitmap_equal(ml->flood_vlans, bitmap)) {
243 return false;
244 } else {
245 bitmap_free(ml->flood_vlans);
246 ml->flood_vlans = vlan_bitmap_clone(bitmap);
247 return true;
248 }
249 }
250
251 /* Changes the MAC aging timeout of 'ml' to 'idle_time' seconds. */
252 void
253 mac_learning_set_idle_time(struct mac_learning *ml, unsigned int idle_time)
254 {
255 idle_time = normalize_idle_time(idle_time);
256 if (idle_time != ml->idle_time) {
257 struct mac_entry *e;
258 int delta;
259
260 delta = (int) idle_time - (int) ml->idle_time;
261 LIST_FOR_EACH (e, lru_node, &ml->lrus) {
262 e->expires += delta;
263 }
264 ml->idle_time = idle_time;
265 }
266 }
267
268 /* Sets the maximum number of entries in 'ml' to 'max_entries', adjusting it
269 * to be within a reasonable range. */
270 void
271 mac_learning_set_max_entries(struct mac_learning *ml, size_t max_entries)
272 {
273 ml->max_entries = (max_entries < 10 ? 10
274 : max_entries > 1000 * 1000 ? 1000 * 1000
275 : max_entries);
276 }
277
278 static bool
279 is_learning_vlan(const struct mac_learning *ml, uint16_t vlan)
280 {
281 return !ml->flood_vlans || !bitmap_is_set(ml->flood_vlans, vlan);
282 }
283
284 /* Returns true if 'src_mac' may be learned on 'vlan' for 'ml'.
285 * Returns false if 'ml' is NULL, if src_mac is not valid for learning, or if
286 * 'vlan' is configured on 'ml' to flood all packets. */
287 bool
288 mac_learning_may_learn(const struct mac_learning *ml,
289 const uint8_t src_mac[ETH_ADDR_LEN], uint16_t vlan)
290 {
291 return ml && is_learning_vlan(ml, vlan) && !eth_addr_is_multicast(src_mac);
292 }
293
294 /* Searches 'ml' for and returns a MAC learning entry for 'src_mac' in 'vlan',
295 * inserting a new entry if necessary. The caller must have already verified,
296 * by calling mac_learning_may_learn(), that 'src_mac' and 'vlan' are
297 * learnable.
298 *
299 * If the returned MAC entry is new (that is, if it has a NULL client-provided
300 * port, as returned by mac_entry_get_port()), then the caller must initialize
301 * the new entry's port to a nonnull value with mac_entry_set_port(). */
302 struct mac_entry *
303 mac_learning_insert(struct mac_learning *ml,
304 const uint8_t src_mac[ETH_ADDR_LEN], uint16_t vlan)
305 {
306 struct mac_entry *e;
307
308 e = mac_entry_lookup(ml, src_mac, vlan);
309 if (!e) {
310 uint32_t hash = mac_table_hash(ml, src_mac, vlan);
311
312 if (hmap_count(&ml->table) >= ml->max_entries) {
313 evict_mac_entry_fairly(ml);
314 }
315
316 e = xmalloc(sizeof *e);
317 hmap_insert(&ml->table, &e->hmap_node, hash);
318 memcpy(e->mac, src_mac, ETH_ADDR_LEN);
319 e->vlan = vlan;
320 e->grat_arp_lock = TIME_MIN;
321 e->mlport = NULL;
322 COVERAGE_INC(mac_learning_learned);
323 } else {
324 list_remove(&e->lru_node);
325 }
326
327 /* Mark 'e' as recently used. */
328 list_push_back(&ml->lrus, &e->lru_node);
329 if (e->mlport) {
330 list_remove(&e->port_lru_node);
331 list_push_back(&e->mlport->port_lrus, &e->port_lru_node);
332 }
333 e->expires = time_now() + ml->idle_time;
334
335 return e;
336 }
337
338 /* Looks up MAC 'dst' for VLAN 'vlan' in 'ml' and returns the associated MAC
339 * learning entry, if any. */
340 struct mac_entry *
341 mac_learning_lookup(const struct mac_learning *ml,
342 const uint8_t dst[ETH_ADDR_LEN], uint16_t vlan)
343 {
344 if (eth_addr_is_multicast(dst)) {
345 /* No tag because the treatment of multicast destinations never
346 * changes. */
347 return NULL;
348 } else if (!is_learning_vlan(ml, vlan)) {
349 /* We don't tag this property. The set of learning VLANs changes so
350 * rarely that we revalidate every flow when it changes. */
351 return NULL;
352 } else {
353 struct mac_entry *e = mac_entry_lookup(ml, dst, vlan);
354
355 ovs_assert(e == NULL || mac_entry_get_port(ml, e) != NULL);
356 return e;
357 }
358 }
359
360 /* Expires 'e' from the 'ml' hash table. */
361 void
362 mac_learning_expire(struct mac_learning *ml, struct mac_entry *e)
363 {
364 ml->need_revalidate = true;
365 mac_entry_set_port(ml, e, NULL);
366 hmap_remove(&ml->table, &e->hmap_node);
367 list_remove(&e->lru_node);
368 free(e);
369 }
370
371 /* Expires all the mac-learning entries in 'ml'. */
372 void
373 mac_learning_flush(struct mac_learning *ml)
374 {
375 struct mac_entry *e;
376 while (get_lru(ml, &e)){
377 mac_learning_expire(ml, e);
378 }
379 hmap_shrink(&ml->table);
380 }
381
382 /* Does periodic work required by 'ml'. Returns true if something changed that
383 * may require flow revalidation. */
384 bool
385 mac_learning_run(struct mac_learning *ml)
386 {
387 bool need_revalidate;
388 struct mac_entry *e;
389
390 while (get_lru(ml, &e)
391 && (hmap_count(&ml->table) > ml->max_entries
392 || time_now() >= e->expires)) {
393 COVERAGE_INC(mac_learning_expired);
394 mac_learning_expire(ml, e);
395 }
396
397 need_revalidate = ml->need_revalidate;
398 ml->need_revalidate = false;
399 return need_revalidate;
400 }
401
402 void
403 mac_learning_wait(struct mac_learning *ml)
404 {
405 if (hmap_count(&ml->table) > ml->max_entries
406 || ml->need_revalidate) {
407 poll_immediate_wake();
408 } else if (!list_is_empty(&ml->lrus)) {
409 struct mac_entry *e = mac_entry_from_lru_node(ml->lrus.next);
410 poll_timer_wait_until(e->expires * 1000LL);
411 }
412 }