]>
Commit | Line | Data |
---|---|---|
acddc0ed | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
5e5034b0 DL |
2 | /* |
3 | * PIMv6 MLD querier | |
4 | * Copyright (C) 2021-2022 David Lamparter for NetDEF, Inc. | |
5e5034b0 DL |
5 | */ |
6 | ||
7 | #ifndef PIM6_MLD_H | |
8 | #define PIM6_MLD_H | |
9 | ||
10 | #include "typesafe.h" | |
11 | #include "pim_addr.h" | |
12 | ||
e6685141 | 13 | struct event; |
5e5034b0 DL |
14 | struct pim_instance; |
15 | struct gm_packet_sg; | |
16 | struct gm_if; | |
17 | struct channel_oil; | |
18 | ||
5afe22f5 DL |
19 | #define MLD_DEFAULT_VERSION 2 |
20 | ||
5e5034b0 DL |
21 | /* see comment below on subs_negative/subs_positive */ |
22 | enum gm_sub_sense { | |
23 | /* negative/pruning: S,G in EXCLUDE */ | |
24 | GM_SUB_NEG = 0, | |
25 | /* positive/joining: *,G in EXCLUDE and S,G in INCLUDE */ | |
26 | GM_SUB_POS = 1, | |
27 | }; | |
28 | ||
29 | enum gm_sg_state { | |
30 | GM_SG_NOINFO = 0, | |
31 | GM_SG_JOIN, | |
32 | GM_SG_JOIN_EXPIRING, | |
33 | /* remaining 3 only valid for S,G when *,G in EXCLUDE */ | |
34 | GM_SG_PRUNE, | |
35 | GM_SG_NOPRUNE, | |
36 | GM_SG_NOPRUNE_EXPIRING, | |
37 | }; | |
38 | ||
39 | static inline bool gm_sg_state_want_join(enum gm_sg_state state) | |
40 | { | |
41 | return state != GM_SG_NOINFO && state != GM_SG_PRUNE; | |
42 | } | |
43 | ||
44 | /* MLD (S,G) state (on an interface) | |
45 | * | |
46 | * group is always != ::, src is :: for (*,G) joins. sort order in RB tree is | |
47 | * such that sources for a particular group can be iterated by starting at the | |
48 | * group. For INCLUDE, no (*,G) entry exists, only (S,G). | |
49 | */ | |
50 | ||
51 | PREDECL_RBTREE_UNIQ(gm_packet_sg_subs); | |
52 | PREDECL_RBTREE_UNIQ(gm_sgs); | |
53 | struct gm_sg { | |
54 | pim_sgaddr sgaddr; | |
55 | struct gm_if *iface; | |
56 | struct gm_sgs_item itm; | |
57 | ||
58 | enum gm_sg_state state; | |
59 | struct channel_oil *oil; | |
60 | bool tib_joined; | |
61 | ||
aa2f9349 DL |
62 | struct timeval created; |
63 | ||
5e5034b0 DL |
64 | /* if a group- or group-and-source specific query is running |
65 | * (implies we haven't received any report yet, since it's cancelled | |
66 | * by that) | |
67 | */ | |
e6685141 | 68 | struct event *t_sg_expire; |
5e5034b0 DL |
69 | |
70 | /* last-member-left triggered queries (group/group-source specific) | |
71 | * | |
72 | * this timer will be running even if we aren't the elected querier, | |
73 | * in case the election result changes midway through. | |
74 | */ | |
e6685141 | 75 | struct event *t_sg_query; |
5e5034b0 DL |
76 | |
77 | /* we must keep sending (QRV) queries even if we get a positive | |
78 | * response, to make sure other routers are updated. query_sbit | |
79 | * will be set in that case, since other routers need the *response*, | |
80 | * not the *query* | |
81 | */ | |
82 | uint8_t n_query; | |
83 | bool query_sbit; | |
84 | ||
85 | /* subs_positive tracks gm_packet_sg resulting in a JOIN, i.e. for | |
86 | * (*,G) it has *EXCLUDE* items, for (S,G) it has *INCLUDE* items. | |
87 | * | |
88 | * subs_negative is always empty for (*,G) and tracks EXCLUDE items | |
89 | * for (S,G). This means that an (S,G) entry is active as a PRUNE if | |
90 | * len(src->subs_negative) == len(grp->subs_positive) | |
91 | * && len(src->subs_positive) == 0 | |
92 | * (i.e. all receivers for the group opted to exclude this S,G and | |
93 | * noone did an SSM join for the S,G) | |
94 | */ | |
95 | union { | |
96 | struct { | |
97 | struct gm_packet_sg_subs_head subs_negative[1]; | |
98 | struct gm_packet_sg_subs_head subs_positive[1]; | |
99 | }; | |
100 | struct gm_packet_sg_subs_head subs[2]; | |
101 | }; | |
102 | ||
103 | /* If the elected querier is not ourselves, queries and reports might | |
104 | * get reordered in rare circumstances, i.e. the report could arrive | |
105 | * just a microsecond before the query kicks off the timer. This can | |
106 | * then result in us thinking there are no more receivers since no | |
107 | * report might be received during the query period. | |
108 | * | |
109 | * To avoid this, keep track of the most recent report for this (S,G) | |
110 | * so we can do a quick check to add just a little bit of slack. | |
111 | * | |
112 | * EXCLUDE S,Gs are never in most_recent. | |
113 | */ | |
114 | struct gm_packet_sg *most_recent; | |
115 | }; | |
00fed6ed A |
116 | int gm_sg_cmp(const struct gm_sg *a, const struct gm_sg *b); |
117 | DECLARE_RBTREE_UNIQ(gm_sgs, struct gm_sg, itm, gm_sg_cmp); | |
5e5034b0 DL |
118 | |
119 | /* host tracking entry. addr will be one of: | |
120 | * | |
121 | * :: - used by hosts during address acquisition | |
122 | * ::1 - may show up on some OS for joins by the router itself | |
123 | * link-local - regular operation by MLDv2 hosts | |
124 | * ffff:..:ffff - MLDv1 entry (cannot be tracked due to report suppression) | |
125 | * | |
126 | * global scope IPv6 addresses can never show up here | |
127 | */ | |
128 | PREDECL_HASH(gm_subscribers); | |
129 | PREDECL_DLIST(gm_packets); | |
130 | struct gm_subscriber { | |
131 | pim_addr addr; | |
132 | struct gm_subscribers_item itm; | |
133 | ||
134 | struct gm_if *iface; | |
135 | size_t refcount; | |
136 | ||
137 | struct gm_packets_head packets[1]; | |
aa2f9349 DL |
138 | |
139 | struct timeval created; | |
5e5034b0 DL |
140 | }; |
141 | ||
142 | /* | |
143 | * MLD join state is kept batched by packet. Since the timers for all items | |
144 | * in a packet are the same, this reduces the number of timers we're keeping | |
145 | * track of. It also eases tracking for EXCLUDE state groups because the | |
146 | * excluded sources are in the same packet. (MLD does not support splitting | |
147 | * that if it exceeds MTU, it's always a full replace for exclude.) | |
148 | * | |
149 | * Since packets may be partially superseded by newer packets, the "active" | |
150 | * field is used to track this. | |
151 | */ | |
152 | ||
153 | /* gm_packet_sg is allocated as part of gm_packet_state, note the items[0] | |
154 | * array at the end of that. gm_packet_sg is NEVER directly allocated with | |
155 | * XMALLOC/XFREE. | |
156 | */ | |
157 | struct gm_packet_sg { | |
158 | /* non-NULL as long as this gm_packet_sg is the most recent entry | |
159 | * for (subscriber,S,G). Cleared to NULL when a newer packet by the | |
160 | * subscriber replaces this item. | |
161 | * | |
162 | * (Old items are kept around so we don't need to realloc/resize | |
163 | * gm_packet_state, which would mess up a whole lot of pointers) | |
164 | */ | |
165 | struct gm_sg *sg; | |
166 | ||
167 | /* gm_sg -> (subscriber, gm_packet_sg) | |
168 | * only on RB-tree while sg != NULL, i.e. not superseded by newer. | |
169 | */ | |
170 | struct gm_packet_sg_subs_item subs_itm; | |
171 | ||
172 | bool is_src : 1; /* := (src != ::) */ | |
173 | bool is_excl : 1; | |
174 | ||
175 | /* for getting back to struct gm_packet_state, cf. | |
176 | * gm_packet_sg2state() below | |
177 | */ | |
178 | uint16_t offset; | |
179 | ||
180 | /* if this is a group entry in EXCLUDE state, n_exclude counts how | |
181 | * many sources are on the exclude list here. They follow immediately | |
182 | * after. | |
183 | */ | |
184 | uint16_t n_exclude; | |
185 | }; | |
186 | ||
187 | #define gm_packet_sg2state(sg) \ | |
188 | container_of(sg, struct gm_packet_state, items[sg->offset]) | |
189 | ||
190 | PREDECL_DLIST(gm_packet_expires); | |
191 | struct gm_packet_state { | |
192 | struct gm_if *iface; | |
193 | struct gm_subscriber *subscriber; | |
194 | struct gm_packets_item pkt_itm; | |
195 | ||
196 | struct timeval received; | |
197 | struct gm_packet_expires_item exp_itm; | |
198 | ||
199 | /* n_active starts equal to n_sg; whenever active is set to false on | |
200 | * an item it is decremented. When n_active == 0, the packet can be | |
201 | * freed. | |
202 | */ | |
203 | uint16_t n_sg, n_active; | |
204 | struct gm_packet_sg items[0]; | |
205 | }; | |
206 | ||
207 | /* general queries are rather different from group/S,G specific queries; it's | |
208 | * not particularly efficient or useful to try to shoehorn them into the S,G | |
209 | * timers. Instead, we keep a history of recent queries and their implied | |
210 | * expiries. | |
211 | */ | |
212 | struct gm_general_pending { | |
213 | struct timeval query, expiry; | |
214 | }; | |
215 | ||
216 | /* similarly, group queries also age out S,G entries for the group, but in | |
217 | * this case we only keep one query for each group | |
218 | * | |
219 | * why is this not in the *,G gm_sg? There may not be one (for INCLUDE mode | |
220 | * groups, or groups we don't know about.) Also, malicious clients could spam | |
221 | * random group-specific queries to trigger resource exhaustion, so it makes | |
222 | * sense to limit these. | |
223 | */ | |
224 | PREDECL_RBTREE_UNIQ(gm_grp_pends); | |
225 | struct gm_grp_pending { | |
226 | struct gm_grp_pends_item itm; | |
227 | struct gm_if *iface; | |
228 | pim_addr grp; | |
229 | ||
230 | struct timeval query; | |
e6685141 | 231 | struct event *t_expire; |
5e5034b0 DL |
232 | }; |
233 | ||
234 | /* guaranteed MTU for IPv6 is 1280 bytes. IPv6 header is 40 bytes, MLDv2 | |
235 | * query header is 24 bytes, RA option is 8 bytes - leaves 1208 bytes for the | |
236 | * source list, which is 151 IPv6 addresses. But we may have some more IPv6 | |
237 | * extension headers (e.g. IPsec AH), so just cap to 128 | |
238 | */ | |
239 | #define MLD_V2Q_MTU_MAX_SOURCES 128 | |
240 | ||
241 | /* group-and-source-specific queries are bundled together, if some host joins | |
242 | * multiple sources it's likely to drop all at the same time. | |
243 | * | |
244 | * Unlike gm_grp_pending, this is only used for aggregation since the S,G | |
245 | * state is kept directly in the gm_sg structure. | |
246 | */ | |
247 | PREDECL_HASH(gm_gsq_pends); | |
248 | struct gm_gsq_pending { | |
249 | struct gm_gsq_pends_item itm; | |
250 | ||
251 | struct gm_if *iface; | |
e6685141 | 252 | struct event *t_send; |
5e5034b0 DL |
253 | |
254 | pim_addr grp; | |
255 | bool s_bit; | |
256 | ||
257 | size_t n_src; | |
258 | pim_addr srcs[MLD_V2Q_MTU_MAX_SOURCES]; | |
259 | }; | |
260 | ||
261 | ||
262 | /* The size of this history is limited by QRV, i.e. there can't be more than | |
263 | * 8 items here. | |
264 | */ | |
265 | #define GM_MAX_PENDING 8 | |
266 | ||
267 | enum gm_version { | |
268 | GM_NONE, | |
269 | GM_MLDV1, | |
270 | GM_MLDV2, | |
271 | }; | |
272 | ||
aa2f9349 | 273 | struct gm_if_stats { |
aa2f9349 DL |
274 | uint64_t rx_drop_csum; |
275 | uint64_t rx_drop_srcaddr; | |
276 | uint64_t rx_drop_dstaddr; | |
277 | uint64_t rx_drop_ra; | |
278 | uint64_t rx_drop_malformed; | |
279 | uint64_t rx_trunc_report; | |
280 | ||
281 | /* since the types are different, this is rx_old_* not of rx_*_old */ | |
282 | uint64_t rx_old_report; | |
283 | uint64_t rx_old_leave; | |
284 | uint64_t rx_new_report; | |
285 | ||
286 | uint64_t rx_query_new_general; | |
287 | uint64_t rx_query_new_group; | |
288 | uint64_t rx_query_new_groupsrc; | |
289 | uint64_t rx_query_new_sbit; | |
290 | uint64_t rx_query_old_general; | |
291 | uint64_t rx_query_old_group; | |
292 | ||
293 | uint64_t tx_query_new_general; | |
294 | uint64_t tx_query_new_group; | |
295 | uint64_t tx_query_new_groupsrc; | |
296 | uint64_t tx_query_old_general; | |
297 | uint64_t tx_query_old_group; | |
298 | ||
299 | uint64_t tx_query_fail; | |
300 | }; | |
301 | ||
5e5034b0 DL |
302 | struct gm_if { |
303 | struct interface *ifp; | |
304 | struct pim_instance *pim; | |
e6685141 | 305 | struct event *t_query, *t_other_querier, *t_expire; |
5e5034b0 DL |
306 | |
307 | bool stopping; | |
308 | ||
309 | uint8_t n_startup; | |
310 | ||
311 | uint8_t cur_qrv; | |
312 | unsigned int cur_query_intv; /* ms */ | |
313 | unsigned int cur_query_intv_trig; /* ms */ | |
314 | unsigned int cur_max_resp; /* ms */ | |
315 | enum gm_version cur_version; | |
51b4991f | 316 | int cur_lmqc; /* last member query count in ds */ |
5e5034b0 DL |
317 | |
318 | /* this value (positive, default 10ms) defines our "timing tolerance": | |
319 | * - added to deadlines for expiring joins | |
320 | * - used to look backwards in time for queries, in case a report was | |
321 | * reordered before the query | |
322 | */ | |
323 | struct timeval cfg_timing_fuzz; | |
324 | ||
325 | /* items in pending[] are sorted by expiry, pending[0] is earliest */ | |
326 | struct gm_general_pending pending[GM_MAX_PENDING]; | |
327 | uint8_t n_pending; | |
328 | struct gm_grp_pends_head grp_pends[1]; | |
329 | struct gm_gsq_pends_head gsq_pends[1]; | |
330 | ||
5e5034b0 DL |
331 | pim_addr querier; |
332 | pim_addr cur_ll_lowest; | |
333 | ||
334 | struct gm_sgs_head sgs[1]; | |
335 | struct gm_subscribers_head subscribers[1]; | |
336 | struct gm_packet_expires_head expires[1]; | |
aa2f9349 DL |
337 | |
338 | struct timeval started; | |
339 | struct gm_if_stats stats; | |
5e5034b0 DL |
340 | }; |
341 | ||
342 | #if PIM_IPV == 6 | |
343 | extern void gm_ifp_update(struct interface *ifp); | |
344 | extern void gm_ifp_teardown(struct interface *ifp); | |
4459f499 | 345 | extern void gm_group_delete(struct gm_if *gm_ifp); |
5e5034b0 DL |
346 | #else |
347 | static inline void gm_ifp_update(struct interface *ifp) | |
348 | { | |
349 | } | |
350 | ||
351 | static inline void gm_ifp_teardown(struct interface *ifp) | |
352 | { | |
353 | } | |
354 | #endif | |
355 | ||
356 | extern void gm_cli_init(void); | |
00fed6ed | 357 | bool in6_multicast_nofwd(const pim_addr *addr); |
5e5034b0 DL |
358 | |
359 | #endif /* PIM6_MLD_H */ |