pimd/pim6_mld.h

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * PIMv6 MLD querier
   4  * Copyright (C) 2021-2022  David Lamparter for NetDEF, Inc.
   5  */
   6
   7 #ifndef PIM6_MLD_H
   8 #define PIM6_MLD_H
   9
  10 #include "typesafe.h"
  11 #include "pim_addr.h"
  12
  13 struct event;
  14 struct pim_instance;
  15 struct gm_packet_sg;
  16 struct gm_if;
  17 struct channel_oil;
  18
  19 #define MLD_DEFAULT_VERSION 2
  20
  21 /* see comment below on subs_negative/subs_positive */
  22 enum gm_sub_sense {
  23         /* negative/pruning: S,G in EXCLUDE */
  24         GM_SUB_NEG = 0,
  25         /* positive/joining: *,G in EXCLUDE and S,G in INCLUDE */
  26         GM_SUB_POS = 1,
  27 };
  28
  29 enum gm_sg_state {
  30         GM_SG_NOINFO = 0,
  31         GM_SG_JOIN,
  32         GM_SG_JOIN_EXPIRING,
  33         /* remaining 3 only valid for S,G when *,G in EXCLUDE */
  34         GM_SG_PRUNE,
  35         GM_SG_NOPRUNE,
  36         GM_SG_NOPRUNE_EXPIRING,
  37 };
  38
  39 static inline bool gm_sg_state_want_join(enum gm_sg_state state)
  40 {
  41         return state != GM_SG_NOINFO && state != GM_SG_PRUNE;
  42 }
  43
  44 /* MLD (S,G) state (on an interface)
  45  *
  46  * group is always != ::, src is :: for (*,G) joins.  sort order in RB tree is
  47  * such that sources for a particular group can be iterated by starting at the
  48  * group.  For INCLUDE, no (*,G) entry exists, only (S,G).
  49  */
  50
  51 PREDECL_RBTREE_UNIQ(gm_packet_sg_subs);
  52 PREDECL_RBTREE_UNIQ(gm_sgs);
  53 struct gm_sg {
  54         pim_sgaddr sgaddr;
  55         struct gm_if *iface;
  56         struct gm_sgs_item itm;
  57
  58         enum gm_sg_state state;
  59         struct channel_oil *oil;
  60         bool tib_joined;
  61
  62         struct timeval created;
  63
  64         /* if a group- or group-and-source specific query is running
  65          * (implies we haven't received any report yet, since it's cancelled
  66          * by that)
  67          */
  68         struct event *t_sg_expire;
  69
  70         /* last-member-left triggered queries (group/group-source specific)
  71          *
  72          * this timer will be running even if we aren't the elected querier,
  73          * in case the election result changes midway through.
  74          */
  75         struct event *t_sg_query;
  76
  77         /* we must keep sending (QRV) queries even if we get a positive
  78          * response, to make sure other routers are updated.  query_sbit
  79          * will be set in that case, since other routers need the *response*,
  80          * not the *query*
  81          */
  82         uint8_t n_query;
  83         bool query_sbit;
  84
  85         /* subs_positive tracks gm_packet_sg resulting in a JOIN, i.e. for
  86          * (*,G) it has *EXCLUDE* items, for (S,G) it has *INCLUDE* items.
  87          *
  88          * subs_negative is always empty for (*,G) and tracks EXCLUDE items
  89          * for (S,G).  This means that an (S,G) entry is active as a PRUNE if
  90          *   len(src->subs_negative) == len(grp->subs_positive)
  91          *   && len(src->subs_positive) == 0
  92          * (i.e. all receivers for the group opted to exclude this S,G and
  93          * noone did an SSM join for the S,G)
  94          */
  95         union {
  96                 struct {
  97                         struct gm_packet_sg_subs_head subs_negative[1];
  98                         struct gm_packet_sg_subs_head subs_positive[1];
  99                 };
 100                 struct gm_packet_sg_subs_head subs[2];
 101         };
 102
 103         /* If the elected querier is not ourselves, queries and reports might
 104          * get reordered in rare circumstances, i.e. the report could arrive
 105          * just a microsecond before the query kicks off the timer.  This can
 106          * then result in us thinking there are no more receivers since no
 107          * report might be received during the query period.
 108          *
 109          * To avoid this, keep track of the most recent report for this (S,G)
 110          * so we can do a quick check to add just a little bit of slack.
 111          *
 112          * EXCLUDE S,Gs are never in most_recent.
 113          */
 114         struct gm_packet_sg *most_recent;
 115 };
 116 int gm_sg_cmp(const struct gm_sg *a, const struct gm_sg *b);
 117 DECLARE_RBTREE_UNIQ(gm_sgs, struct gm_sg, itm, gm_sg_cmp);
 118
 119 /* host tracking entry.  addr will be one of:
 120  *
 121  * ::           - used by hosts during address acquisition
 122  * ::1          - may show up on some OS for joins by the router itself
 123  * link-local   - regular operation by MLDv2 hosts
 124  * ffff:..:ffff - MLDv1 entry (cannot be tracked due to report suppression)
 125  *
 126  * global scope IPv6 addresses can never show up here
 127  */
 128 PREDECL_HASH(gm_subscribers);
 129 PREDECL_DLIST(gm_packets);
 130 struct gm_subscriber {
 131         pim_addr addr;
 132         struct gm_subscribers_item itm;
 133
 134         struct gm_if *iface;
 135         size_t refcount;
 136
 137         struct gm_packets_head packets[1];
 138
 139         struct timeval created;
 140 };
 141
 142 /*
 143  * MLD join state is kept batched by packet.  Since the timers for all items
 144  * in a packet are the same, this reduces the number of timers we're keeping
 145  * track of.  It also eases tracking for EXCLUDE state groups because the
 146  * excluded sources are in the same packet.  (MLD does not support splitting
 147  * that if it exceeds MTU, it's always a full replace for exclude.)
 148  *
 149  * Since packets may be partially superseded by newer packets, the "active"
 150  * field is used to track this.
 151  */
 152
 153 /* gm_packet_sg is allocated as part of gm_packet_state, note the items[0]
 154  * array at the end of that.  gm_packet_sg is NEVER directly allocated with
 155  * XMALLOC/XFREE.
 156  */
 157 struct gm_packet_sg {
 158         /* non-NULL as long as this gm_packet_sg is the most recent entry
 159          * for (subscriber,S,G).  Cleared to NULL when a newer packet by the
 160          * subscriber replaces this item.
 161          *
 162          * (Old items are kept around so we don't need to realloc/resize
 163          * gm_packet_state, which would mess up a whole lot of pointers)
 164          */
 165         struct gm_sg *sg;
 166
 167         /* gm_sg -> (subscriber, gm_packet_sg)
 168          * only on RB-tree while sg != NULL, i.e. not superseded by newer.
 169          */
 170         struct gm_packet_sg_subs_item subs_itm;
 171
 172         bool is_src : 1; /* := (src != ::) */
 173         bool is_excl : 1;
 174
 175         /* for getting back to struct gm_packet_state, cf.
 176          * gm_packet_sg2state() below
 177          */
 178         uint16_t offset;
 179
 180         /* if this is a group entry in EXCLUDE state, n_exclude counts how
 181          * many sources are on the exclude list here.  They follow immediately
 182          * after.
 183          */
 184         uint16_t n_exclude;
 185 };
 186
 187 #define gm_packet_sg2state(sg)                                                 \
 188         container_of(sg, struct gm_packet_state, items[sg->offset])
 189
 190 PREDECL_DLIST(gm_packet_expires);
 191 struct gm_packet_state {
 192         struct gm_if *iface;
 193         struct gm_subscriber *subscriber;
 194         struct gm_packets_item pkt_itm;
 195
 196         struct timeval received;
 197         struct gm_packet_expires_item exp_itm;
 198
 199         /* n_active starts equal to n_sg;  whenever active is set to false on
 200          * an item it is decremented.  When n_active == 0, the packet can be
 201          * freed.
 202          */
 203         uint16_t n_sg, n_active;
 204         struct gm_packet_sg items[0];
 205 };
 206
 207 /* general queries are rather different from group/S,G specific queries;  it's
 208  * not particularly efficient or useful to try to shoehorn them into the S,G
 209  * timers.  Instead, we keep a history of recent queries and their implied
 210  * expiries.
 211  */
 212 struct gm_general_pending {
 213         struct timeval query, expiry;
 214 };
 215
 216 /* similarly, group queries also age out S,G entries for the group, but in
 217  * this case we only keep one query for each group
 218  *
 219  * why is this not in the *,G gm_sg?  There may not be one (for INCLUDE mode
 220  * groups, or groups we don't know about.)  Also, malicious clients could spam
 221  * random group-specific queries to trigger resource exhaustion, so it makes
 222  * sense to limit these.
 223  */
 224 PREDECL_RBTREE_UNIQ(gm_grp_pends);
 225 struct gm_grp_pending {
 226         struct gm_grp_pends_item itm;
 227         struct gm_if *iface;
 228         pim_addr grp;
 229
 230         struct timeval query;
 231         struct event *t_expire;
 232 };
 233
 234 /* guaranteed MTU for IPv6 is 1280 bytes.  IPv6 header is 40 bytes, MLDv2
 235  * query header is 24 bytes, RA option is 8 bytes - leaves 1208 bytes for the
 236  * source list, which is 151 IPv6 addresses.  But we may have some more IPv6
 237  * extension headers (e.g. IPsec AH), so just cap to 128
 238  */
 239 #define MLD_V2Q_MTU_MAX_SOURCES 128
 240
 241 /* group-and-source-specific queries are bundled together, if some host joins
 242  * multiple sources it's likely to drop all at the same time.
 243  *
 244  * Unlike gm_grp_pending, this is only used for aggregation since the S,G
 245  * state is kept directly in the gm_sg structure.
 246  */
 247 PREDECL_HASH(gm_gsq_pends);
 248 struct gm_gsq_pending {
 249         struct gm_gsq_pends_item itm;
 250
 251         struct gm_if *iface;
 252         struct event *t_send;
 253
 254         pim_addr grp;
 255         bool s_bit;
 256
 257         size_t n_src;
 258         pim_addr srcs[MLD_V2Q_MTU_MAX_SOURCES];
 259 };
 260
 261
 262 /* The size of this history is limited by QRV, i.e. there can't be more than
 263  * 8 items here.
 264  */
 265 #define GM_MAX_PENDING 8
 266
 267 enum gm_version {
 268         GM_NONE,
 269         GM_MLDV1,
 270         GM_MLDV2,
 271 };
 272
 273 struct gm_if_stats {
 274         uint64_t rx_drop_csum;
 275         uint64_t rx_drop_srcaddr;
 276         uint64_t rx_drop_dstaddr;
 277         uint64_t rx_drop_ra;
 278         uint64_t rx_drop_malformed;
 279         uint64_t rx_trunc_report;
 280
 281         /* since the types are different, this is rx_old_* not of rx_*_old */
 282         uint64_t rx_old_report;
 283         uint64_t rx_old_leave;
 284         uint64_t rx_new_report;
 285
 286         uint64_t rx_query_new_general;
 287         uint64_t rx_query_new_group;
 288         uint64_t rx_query_new_groupsrc;
 289         uint64_t rx_query_new_sbit;
 290         uint64_t rx_query_old_general;
 291         uint64_t rx_query_old_group;
 292
 293         uint64_t tx_query_new_general;
 294         uint64_t tx_query_new_group;
 295         uint64_t tx_query_new_groupsrc;
 296         uint64_t tx_query_old_general;
 297         uint64_t tx_query_old_group;
 298
 299         uint64_t tx_query_fail;
 300 };
 301
 302 struct gm_if {
 303         struct interface *ifp;
 304         struct pim_instance *pim;
 305         struct event *t_query, *t_other_querier, *t_expire;
 306
 307         bool stopping;
 308
 309         uint8_t n_startup;
 310
 311         uint8_t cur_qrv;
 312         unsigned int cur_query_intv;      /* ms */
 313         unsigned int cur_query_intv_trig; /* ms */
 314         unsigned int cur_max_resp;        /* ms */
 315         enum gm_version cur_version;
 316         int cur_lmqc; /* last member query count in ds */
 317
 318         /* this value (positive, default 10ms) defines our "timing tolerance":
 319          * - added to deadlines for expiring joins
 320          * - used to look backwards in time for queries, in case a report was
 321          *   reordered before the query
 322          */
 323         struct timeval cfg_timing_fuzz;
 324
 325         /* items in pending[] are sorted by expiry, pending[0] is earliest */
 326         struct gm_general_pending pending[GM_MAX_PENDING];
 327         uint8_t n_pending;
 328         struct gm_grp_pends_head grp_pends[1];
 329         struct gm_gsq_pends_head gsq_pends[1];
 330
 331         pim_addr querier;
 332         pim_addr cur_ll_lowest;
 333
 334         struct gm_sgs_head sgs[1];
 335         struct gm_subscribers_head subscribers[1];
 336         struct gm_packet_expires_head expires[1];
 337
 338         struct timeval started;
 339         struct gm_if_stats stats;
 340 };
 341
 342 #if PIM_IPV == 6
 343 extern void gm_ifp_update(struct interface *ifp);
 344 extern void gm_ifp_teardown(struct interface *ifp);
 345 extern void gm_group_delete(struct gm_if *gm_ifp);
 346 #else
 347 static inline void gm_ifp_update(struct interface *ifp)
 348 {
 349 }
 350
 351 static inline void gm_ifp_teardown(struct interface *ifp)
 352 {
 353 }
 354 #endif
 355
 356 extern void gm_cli_init(void);
 357 bool in6_multicast_nofwd(const pim_addr *addr);
 358
 359 #endif /* PIM6_MLD_H */