]> git.proxmox.com Git - mirror_ovs.git/blame - lib/mac-learning.h
ovsdb-idl: Fix iteration over tracked rows with no actual data.
[mirror_ovs.git] / lib / mac-learning.h
CommitLineData
064af421 1/*
9d078ec2 2 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2015 Nicira, Inc.
064af421 3 *
a14bc59f
BP
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
064af421 7 *
a14bc59f
BP
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
064af421
BP
15 */
16
17#ifndef MAC_LEARNING_H
18#define MAC_LEARNING_H 1
19
962ff3d6 20#include <time.h>
9d078ec2 21#include "heap.h"
ee89ea7b 22#include "openvswitch/hmap.h"
b19bab5b 23#include "openvswitch/list.h"
509c0149
EJ
24#include "ovs-atomic.h"
25#include "ovs-thread.h"
064af421 26#include "packets.h"
db8077c3 27#include "timeval.h"
064af421 28
9d078ec2
BP
29/* MAC learning table
30 * ==================
31 *
32 * A MAC learning table is a dictionary data structure that is specialized to
33 * map from an (Ethernet address, VLAN ID) pair to a user-provided pointer. In
34 * an Ethernet switch implementation, it used to keep track of the port on
35 * which a packet from a given Ethernet address was last seen. This knowledge
36 * is useful when the switch receives a packet to such an Ethernet address, so
37 * that the switch can send the packet directly to the correct port instead of
38 * having to flood it to every port.
39 *
40 * A few complications make the implementation into more than a simple wrapper
41 * around a hash table. First, and most simply, MAC learning can be disabled
42 * on a per-VLAN basis. (This is most useful for RSPAN; see
43 * ovs-vswitchd.conf.db(5) documentation of the "output_vlan" column in the
44 * Mirror table for more information.). The data structure maintains a bitmap
45 * to track such VLANs.
46 *
47 * Second, the implementation has the ability to "lock" a MAC table entry
48 * updated by a gratuitous ARP. This is a simple feature but the rationale for
7c9afefd
SF
49 * it is complicated. Refer to the description of SLB bonding in the
50 * 'ovs-vswitchd Internals' guide for an explanation.
9d078ec2
BP
51 *
52 * Third, the implementation expires entries that are idle for longer than a
53 * configurable amount of time. This is implemented by keeping all of the
54 * current table entries on a list ordered from least recently used (LRU) to
55 * most recently used (MRU). Each time a MAC entry is used, it is moved to the
56 * MRU end of the list. Periodically mac_learning_run() sweeps through the
57 * list starting from the LRU end, deleting each entry that has been idle too
58 * long.
59 *
60 * Finally, the number of MAC learning table entries has a configurable maximum
61 * size to prevent memory exhaustion. When a new entry must be inserted but
62 * the table is already full, the implementation uses an eviction strategy
63 * based on fairness: it chooses the port that currently has greatest number of
64 * learned MACs (choosing arbitrarily in case of a tie), and among that port's
65 * entries it evicts the least recently used. (This is a security feature
66 * because it prevents an attacker from forcing other ports' MACs out of the
67 * MAC learning table with a "MAC flooding attack" that causes the other ports'
68 * traffic to be flooded so that the attacker can easily sniff it.) The
69 * implementation of this feature is like a specialized form of the
70 * general-purpose "eviction groups" that OVS implements in OpenFlow (see the
71 * documentation of the "groups" column in the Flow_Table table in
72 * ovs-vswitchd.conf.db(5) for details).
73 *
74 *
75 * Thread-safety
76 * =============
77 *
78 * Many operations require the caller to take the MAC learning table's rwlock
79 * for writing (please refer to the Clang thread safety annotations). The
80 * important exception to this is mac_learning_lookup(), which only needs a
81 * read lock. This is useful for the common case where a MAC learning entry
82 * being looked up already exists and does not need an update. However,
83 * there's no deadlock-free way to upgrade a read lock to a write lock, so in
84 * the case where the lookup result means that an update is required, the
85 * caller must drop the read lock, take the write lock, and then repeat the
86 * lookup (in case some other thread has already made a change).
87 */
88
e764773c
BP
89struct mac_learning;
90
c4069512 91/* Default maximum size of a MAC learning table, in entries. */
2468f675 92#define MAC_DEFAULT_MAX 8192
962ff3d6 93
321943f7 94/* Time, in seconds, before expiring a mac_entry due to inactivity. */
e764773c 95#define MAC_ENTRY_DEFAULT_IDLE_TIME 300
321943f7 96
7febb910 97/* Time, in seconds, to lock an entry updated by a gratuitous ARP to avoid
91fc374a 98 * relearning based on a reflection from a bond member. */
7febb910
JG
99#define MAC_GRAT_ARP_LOCK_TIME 5
100
509c0149 101/* A MAC learning table entry.
9d078ec2 102 * Guarded by owning 'mac_learning''s rwlock. */
962ff3d6 103struct mac_entry {
8ea45fdc 104 struct hmap_node hmap_node; /* Node in a mac_learning hmap. */
962ff3d6 105 time_t expires; /* Expiration time. */
7febb910 106 time_t grat_arp_lock; /* Gratuitous ARP lock expiration time. */
74ff3298 107 struct eth_addr mac; /* Known MAC address. */
962ff3d6 108 uint16_t vlan; /* VLAN tag. */
1bfe9681 109
509c0149 110 /* The following are marked guarded to prevent users from iterating over or
9d078ec2 111 * accessing a mac_entry without holding the parent mac_learning rwlock. */
ca6ba700 112 struct ovs_list lru_node OVS_GUARDED; /* Element in 'lrus' list. */
509c0149 113
9d078ec2
BP
114 /* Learned port.
115 *
116 * The client-specified data is mlport->port. */
117 struct mac_learning_port *mlport;
118 struct ovs_list port_lru_node; /* In mac_learning_port's "port_lru"s. */
119};
120
121static inline void *mac_entry_get_port(const struct mac_learning *ml,
122 const struct mac_entry *);
123void mac_entry_set_port(struct mac_learning *, struct mac_entry *, void *port);
124
125/* Information about client-provided port pointers (the 'port' member), to
126 * allow for per-port fairness.
127 *
128 * The client-provided pointer is opaque to the MAC-learning table, which never
129 * dereferences it. */
130struct mac_learning_port {
131 struct hmap_node hmap_node; /* In mac_learning's "ports_by_ptr". */
132 struct heap_node heap_node; /* In mac_learning's "ports_by_usage". */
133 void *port; /* Client-provided port pointer. */
134 struct ovs_list port_lrus; /* Contains "struct mac_entry"s by port_lru. */
962ff3d6
BP
135};
136
db8077c3
BP
137/* Sets a gratuitous ARP lock on 'mac' that will expire in
138 * MAC_GRAT_ARP_LOCK_TIME seconds. */
139static inline void mac_entry_set_grat_arp_lock(struct mac_entry *mac)
140{
141 mac->grat_arp_lock = time_now() + MAC_GRAT_ARP_LOCK_TIME;
142}
143
144/* Returns true if a gratuitous ARP lock is in effect on 'mac', false if none
145 * has ever been asserted or if it has expired. */
146static inline bool mac_entry_is_grat_arp_locked(const struct mac_entry *mac)
147{
33065c43 148 return time_now() < mac->grat_arp_lock;
db8077c3
BP
149}
150
962ff3d6
BP
151/* MAC learning table. */
152struct mac_learning {
8ea45fdc 153 struct hmap table; /* Learning table. */
9d078ec2 154 struct ovs_list lrus OVS_GUARDED; /* In-use entries, LRU at front. */
8f30d09a
BP
155 uint32_t secret; /* Secret for randomizing hash table. */
156 unsigned long *flood_vlans; /* Bitmap of learning disabled VLANs. */
e764773c 157 unsigned int idle_time; /* Max age before deleting an entry. */
c4069512 158 size_t max_entries; /* Max number of learned MACs. */
37bec3d3 159 struct ovs_refcount ref_cnt;
509c0149 160 struct ovs_rwlock rwlock;
30618594 161 bool need_revalidate;
9d078ec2 162
f34a5d40
EC
163 /* Statistics */
164 uint64_t total_learned;
165 uint64_t total_expired;
166 uint64_t total_evicted;
167 uint64_t total_moved;
168
9d078ec2
BP
169 /* Fairness.
170 *
171 * Both of these data structures include the same "struct
172 * mac_learning_port" but indexed differently.
173 *
174 * ports_by_usage is a per-port max-heap, in which the priority is the
175 * number of MAC addresses for the port. When the MAC learning table
176 * overflows, this allows us to evict a MAC entry from one of the ports
177 * that have the largest number of MAC entries, achieving a form of
178 * fairness.
179 *
180 * ports_by_ptr is a hash table indexed by the client-provided pointer. */
181 struct hmap ports_by_ptr; /* struct mac_learning_port hmap_nodes. */
182 struct heap ports_by_usage; /* struct mac_learning_port heap_nodes. */
962ff3d6
BP
183};
184
509c0149
EJ
185int mac_entry_age(const struct mac_learning *ml, const struct mac_entry *e)
186 OVS_REQ_RDLOCK(ml->rwlock);
187
db8077c3 188/* Basics. */
e764773c 189struct mac_learning *mac_learning_create(unsigned int idle_time);
5d989517
EJ
190struct mac_learning *mac_learning_ref(const struct mac_learning *);
191void mac_learning_unref(struct mac_learning *);
95dcecc5 192void mac_learning_clear_statistics(struct mac_learning *ml);
db8077c3 193
30618594 194bool mac_learning_run(struct mac_learning *ml) OVS_REQ_WRLOCK(ml->rwlock);
509c0149
EJ
195void mac_learning_wait(struct mac_learning *ml)
196 OVS_REQ_RDLOCK(ml->rwlock);
db8077c3
BP
197
198/* Configuration. */
509c0149
EJ
199bool mac_learning_set_flood_vlans(struct mac_learning *ml,
200 const unsigned long *bitmap)
201 OVS_REQ_WRLOCK(ml->rwlock);
202void mac_learning_set_idle_time(struct mac_learning *ml,
203 unsigned int idle_time)
204 OVS_REQ_WRLOCK(ml->rwlock);
205void mac_learning_set_max_entries(struct mac_learning *ml, size_t max_entries)
206 OVS_REQ_WRLOCK(ml->rwlock);
db8077c3
BP
207
208/* Learning. */
509c0149 209bool mac_learning_may_learn(const struct mac_learning *ml,
74ff3298 210 const struct eth_addr src_mac,
509c0149
EJ
211 uint16_t vlan)
212 OVS_REQ_RDLOCK(ml->rwlock);
213struct mac_entry *mac_learning_insert(struct mac_learning *ml,
74ff3298 214 const struct eth_addr src,
509c0149
EJ
215 uint16_t vlan)
216 OVS_REQ_WRLOCK(ml->rwlock);
064799a1
JR
217bool mac_learning_update(struct mac_learning *ml, struct eth_addr src,
218 int vlan, bool is_gratuitous_arp, bool is_bond,
219 void *in_port)
220 OVS_EXCLUDED(ml->rwlock);
db8077c3
BP
221
222/* Lookup. */
509c0149 223struct mac_entry *mac_learning_lookup(const struct mac_learning *ml,
74ff3298 224 const struct eth_addr dst,
30618594 225 uint16_t vlan)
509c0149 226 OVS_REQ_RDLOCK(ml->rwlock);
db8077c3
BP
227
228/* Flushing. */
509c0149
EJ
229void mac_learning_expire(struct mac_learning *ml, struct mac_entry *e)
230 OVS_REQ_WRLOCK(ml->rwlock);
30618594 231void mac_learning_flush(struct mac_learning *ml) OVS_REQ_WRLOCK(ml->rwlock);
9d078ec2
BP
232\f
233/* Inlines. */
234
235static inline void *
236mac_entry_get_port(const struct mac_learning *ml OVS_UNUSED,
237 const struct mac_entry *e)
238 OVS_REQ_RDLOCK(ml->rwlock)
239{
240 return e->mlport ? e->mlport->port : NULL;
241}
064af421
BP
242
243#endif /* mac-learning.h */