]>
Commit | Line | Data |
---|---|---|
064af421 | 1 | /* |
9d078ec2 | 2 | * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2015 Nicira, Inc. |
064af421 | 3 | * |
a14bc59f BP |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
064af421 | 7 | * |
a14bc59f BP |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
064af421 BP |
15 | */ |
16 | ||
17 | #ifndef MAC_LEARNING_H | |
18 | #define MAC_LEARNING_H 1 | |
19 | ||
962ff3d6 | 20 | #include <time.h> |
9d078ec2 | 21 | #include "heap.h" |
ee89ea7b | 22 | #include "openvswitch/hmap.h" |
b19bab5b | 23 | #include "openvswitch/list.h" |
509c0149 EJ |
24 | #include "ovs-atomic.h" |
25 | #include "ovs-thread.h" | |
064af421 | 26 | #include "packets.h" |
db8077c3 | 27 | #include "timeval.h" |
064af421 | 28 | |
9d078ec2 BP |
29 | /* MAC learning table |
30 | * ================== | |
31 | * | |
32 | * A MAC learning table is a dictionary data structure that is specialized to | |
33 | * map from an (Ethernet address, VLAN ID) pair to a user-provided pointer. In | |
34 | * an Ethernet switch implementation, it used to keep track of the port on | |
35 | * which a packet from a given Ethernet address was last seen. This knowledge | |
36 | * is useful when the switch receives a packet to such an Ethernet address, so | |
37 | * that the switch can send the packet directly to the correct port instead of | |
38 | * having to flood it to every port. | |
39 | * | |
40 | * A few complications make the implementation into more than a simple wrapper | |
41 | * around a hash table. First, and most simply, MAC learning can be disabled | |
42 | * on a per-VLAN basis. (This is most useful for RSPAN; see | |
43 | * ovs-vswitchd.conf.db(5) documentation of the "output_vlan" column in the | |
44 | * Mirror table for more information.). The data structure maintains a bitmap | |
45 | * to track such VLANs. | |
46 | * | |
47 | * Second, the implementation has the ability to "lock" a MAC table entry | |
48 | * updated by a gratuitous ARP. This is a simple feature but the rationale for | |
7c9afefd SF |
49 | * it is complicated. Refer to the description of SLB bonding in the |
50 | * 'ovs-vswitchd Internals' guide for an explanation. | |
9d078ec2 BP |
51 | * |
52 | * Third, the implementation expires entries that are idle for longer than a | |
53 | * configurable amount of time. This is implemented by keeping all of the | |
54 | * current table entries on a list ordered from least recently used (LRU) to | |
55 | * most recently used (MRU). Each time a MAC entry is used, it is moved to the | |
56 | * MRU end of the list. Periodically mac_learning_run() sweeps through the | |
57 | * list starting from the LRU end, deleting each entry that has been idle too | |
58 | * long. | |
59 | * | |
60 | * Finally, the number of MAC learning table entries has a configurable maximum | |
61 | * size to prevent memory exhaustion. When a new entry must be inserted but | |
62 | * the table is already full, the implementation uses an eviction strategy | |
63 | * based on fairness: it chooses the port that currently has greatest number of | |
64 | * learned MACs (choosing arbitrarily in case of a tie), and among that port's | |
65 | * entries it evicts the least recently used. (This is a security feature | |
66 | * because it prevents an attacker from forcing other ports' MACs out of the | |
67 | * MAC learning table with a "MAC flooding attack" that causes the other ports' | |
68 | * traffic to be flooded so that the attacker can easily sniff it.) The | |
69 | * implementation of this feature is like a specialized form of the | |
70 | * general-purpose "eviction groups" that OVS implements in OpenFlow (see the | |
71 | * documentation of the "groups" column in the Flow_Table table in | |
72 | * ovs-vswitchd.conf.db(5) for details). | |
73 | * | |
74 | * | |
75 | * Thread-safety | |
76 | * ============= | |
77 | * | |
78 | * Many operations require the caller to take the MAC learning table's rwlock | |
79 | * for writing (please refer to the Clang thread safety annotations). The | |
80 | * important exception to this is mac_learning_lookup(), which only needs a | |
81 | * read lock. This is useful for the common case where a MAC learning entry | |
82 | * being looked up already exists and does not need an update. However, | |
83 | * there's no deadlock-free way to upgrade a read lock to a write lock, so in | |
84 | * the case where the lookup result means that an update is required, the | |
85 | * caller must drop the read lock, take the write lock, and then repeat the | |
86 | * lookup (in case some other thread has already made a change). | |
87 | */ | |
88 | ||
e764773c BP |
89 | struct mac_learning; |
90 | ||
c4069512 | 91 | /* Default maximum size of a MAC learning table, in entries. */ |
2468f675 | 92 | #define MAC_DEFAULT_MAX 8192 |
962ff3d6 | 93 | |
321943f7 | 94 | /* Time, in seconds, before expiring a mac_entry due to inactivity. */ |
e764773c | 95 | #define MAC_ENTRY_DEFAULT_IDLE_TIME 300 |
321943f7 | 96 | |
7febb910 JG |
97 | /* Time, in seconds, to lock an entry updated by a gratuitous ARP to avoid |
98 | * relearning based on a reflection from a bond slave. */ | |
99 | #define MAC_GRAT_ARP_LOCK_TIME 5 | |
100 | ||
509c0149 | 101 | /* A MAC learning table entry. |
9d078ec2 | 102 | * Guarded by owning 'mac_learning''s rwlock. */ |
962ff3d6 | 103 | struct mac_entry { |
8ea45fdc | 104 | struct hmap_node hmap_node; /* Node in a mac_learning hmap. */ |
962ff3d6 | 105 | time_t expires; /* Expiration time. */ |
7febb910 | 106 | time_t grat_arp_lock; /* Gratuitous ARP lock expiration time. */ |
74ff3298 | 107 | struct eth_addr mac; /* Known MAC address. */ |
962ff3d6 | 108 | uint16_t vlan; /* VLAN tag. */ |
1bfe9681 | 109 | |
509c0149 | 110 | /* The following are marked guarded to prevent users from iterating over or |
9d078ec2 | 111 | * accessing a mac_entry without holding the parent mac_learning rwlock. */ |
ca6ba700 | 112 | struct ovs_list lru_node OVS_GUARDED; /* Element in 'lrus' list. */ |
509c0149 | 113 | |
9d078ec2 BP |
114 | /* Learned port. |
115 | * | |
116 | * The client-specified data is mlport->port. */ | |
117 | struct mac_learning_port *mlport; | |
118 | struct ovs_list port_lru_node; /* In mac_learning_port's "port_lru"s. */ | |
119 | }; | |
120 | ||
121 | static inline void *mac_entry_get_port(const struct mac_learning *ml, | |
122 | const struct mac_entry *); | |
123 | void mac_entry_set_port(struct mac_learning *, struct mac_entry *, void *port); | |
124 | ||
125 | /* Information about client-provided port pointers (the 'port' member), to | |
126 | * allow for per-port fairness. | |
127 | * | |
128 | * The client-provided pointer is opaque to the MAC-learning table, which never | |
129 | * dereferences it. */ | |
130 | struct mac_learning_port { | |
131 | struct hmap_node hmap_node; /* In mac_learning's "ports_by_ptr". */ | |
132 | struct heap_node heap_node; /* In mac_learning's "ports_by_usage". */ | |
133 | void *port; /* Client-provided port pointer. */ | |
134 | struct ovs_list port_lrus; /* Contains "struct mac_entry"s by port_lru. */ | |
962ff3d6 BP |
135 | }; |
136 | ||
db8077c3 BP |
137 | /* Sets a gratuitous ARP lock on 'mac' that will expire in |
138 | * MAC_GRAT_ARP_LOCK_TIME seconds. */ | |
139 | static inline void mac_entry_set_grat_arp_lock(struct mac_entry *mac) | |
140 | { | |
141 | mac->grat_arp_lock = time_now() + MAC_GRAT_ARP_LOCK_TIME; | |
142 | } | |
143 | ||
144 | /* Returns true if a gratuitous ARP lock is in effect on 'mac', false if none | |
145 | * has ever been asserted or if it has expired. */ | |
146 | static inline bool mac_entry_is_grat_arp_locked(const struct mac_entry *mac) | |
147 | { | |
33065c43 | 148 | return time_now() < mac->grat_arp_lock; |
db8077c3 BP |
149 | } |
150 | ||
962ff3d6 BP |
151 | /* MAC learning table. */ |
152 | struct mac_learning { | |
8ea45fdc | 153 | struct hmap table; /* Learning table. */ |
9d078ec2 | 154 | struct ovs_list lrus OVS_GUARDED; /* In-use entries, LRU at front. */ |
8f30d09a BP |
155 | uint32_t secret; /* Secret for randomizing hash table. */ |
156 | unsigned long *flood_vlans; /* Bitmap of learning disabled VLANs. */ | |
e764773c | 157 | unsigned int idle_time; /* Max age before deleting an entry. */ |
c4069512 | 158 | size_t max_entries; /* Max number of learned MACs. */ |
37bec3d3 | 159 | struct ovs_refcount ref_cnt; |
509c0149 | 160 | struct ovs_rwlock rwlock; |
30618594 | 161 | bool need_revalidate; |
9d078ec2 | 162 | |
f34a5d40 EC |
163 | /* Statistics */ |
164 | uint64_t total_learned; | |
165 | uint64_t total_expired; | |
166 | uint64_t total_evicted; | |
167 | uint64_t total_moved; | |
168 | ||
9d078ec2 BP |
169 | /* Fairness. |
170 | * | |
171 | * Both of these data structures include the same "struct | |
172 | * mac_learning_port" but indexed differently. | |
173 | * | |
174 | * ports_by_usage is a per-port max-heap, in which the priority is the | |
175 | * number of MAC addresses for the port. When the MAC learning table | |
176 | * overflows, this allows us to evict a MAC entry from one of the ports | |
177 | * that have the largest number of MAC entries, achieving a form of | |
178 | * fairness. | |
179 | * | |
180 | * ports_by_ptr is a hash table indexed by the client-provided pointer. */ | |
181 | struct hmap ports_by_ptr; /* struct mac_learning_port hmap_nodes. */ | |
182 | struct heap ports_by_usage; /* struct mac_learning_port heap_nodes. */ | |
962ff3d6 BP |
183 | }; |
184 | ||
509c0149 EJ |
185 | int mac_entry_age(const struct mac_learning *ml, const struct mac_entry *e) |
186 | OVS_REQ_RDLOCK(ml->rwlock); | |
187 | ||
db8077c3 | 188 | /* Basics. */ |
e764773c | 189 | struct mac_learning *mac_learning_create(unsigned int idle_time); |
5d989517 EJ |
190 | struct mac_learning *mac_learning_ref(const struct mac_learning *); |
191 | void mac_learning_unref(struct mac_learning *); | |
95dcecc5 | 192 | void mac_learning_clear_statistics(struct mac_learning *ml); |
db8077c3 | 193 | |
30618594 | 194 | bool mac_learning_run(struct mac_learning *ml) OVS_REQ_WRLOCK(ml->rwlock); |
509c0149 EJ |
195 | void mac_learning_wait(struct mac_learning *ml) |
196 | OVS_REQ_RDLOCK(ml->rwlock); | |
db8077c3 BP |
197 | |
198 | /* Configuration. */ | |
509c0149 EJ |
199 | bool mac_learning_set_flood_vlans(struct mac_learning *ml, |
200 | const unsigned long *bitmap) | |
201 | OVS_REQ_WRLOCK(ml->rwlock); | |
202 | void mac_learning_set_idle_time(struct mac_learning *ml, | |
203 | unsigned int idle_time) | |
204 | OVS_REQ_WRLOCK(ml->rwlock); | |
205 | void mac_learning_set_max_entries(struct mac_learning *ml, size_t max_entries) | |
206 | OVS_REQ_WRLOCK(ml->rwlock); | |
db8077c3 BP |
207 | |
208 | /* Learning. */ | |
509c0149 | 209 | bool mac_learning_may_learn(const struct mac_learning *ml, |
74ff3298 | 210 | const struct eth_addr src_mac, |
509c0149 EJ |
211 | uint16_t vlan) |
212 | OVS_REQ_RDLOCK(ml->rwlock); | |
213 | struct mac_entry *mac_learning_insert(struct mac_learning *ml, | |
74ff3298 | 214 | const struct eth_addr src, |
509c0149 EJ |
215 | uint16_t vlan) |
216 | OVS_REQ_WRLOCK(ml->rwlock); | |
064799a1 JR |
217 | bool mac_learning_update(struct mac_learning *ml, struct eth_addr src, |
218 | int vlan, bool is_gratuitous_arp, bool is_bond, | |
219 | void *in_port) | |
220 | OVS_EXCLUDED(ml->rwlock); | |
db8077c3 BP |
221 | |
222 | /* Lookup. */ | |
509c0149 | 223 | struct mac_entry *mac_learning_lookup(const struct mac_learning *ml, |
74ff3298 | 224 | const struct eth_addr dst, |
30618594 | 225 | uint16_t vlan) |
509c0149 | 226 | OVS_REQ_RDLOCK(ml->rwlock); |
db8077c3 BP |
227 | |
228 | /* Flushing. */ | |
509c0149 EJ |
229 | void mac_learning_expire(struct mac_learning *ml, struct mac_entry *e) |
230 | OVS_REQ_WRLOCK(ml->rwlock); | |
30618594 | 231 | void mac_learning_flush(struct mac_learning *ml) OVS_REQ_WRLOCK(ml->rwlock); |
9d078ec2 BP |
232 | \f |
233 | /* Inlines. */ | |
234 | ||
235 | static inline void * | |
236 | mac_entry_get_port(const struct mac_learning *ml OVS_UNUSED, | |
237 | const struct mac_entry *e) | |
238 | OVS_REQ_RDLOCK(ml->rwlock) | |
239 | { | |
240 | return e->mlport ? e->mlport->port : NULL; | |
241 | } | |
064af421 BP |
242 | |
243 | #endif /* mac-learning.h */ |