pimd/pim6_mld.h

   1 // SPDX-License-Identifier: GPL-2.0-or-later
   2 /*
   3  * PIMv6 MLD querier
   4  * Copyright (C) 2021-2022  David Lamparter for NetDEF, Inc.
   5  */
   6
   7 #ifndef PIM6_MLD_H
   8 #define PIM6_MLD_H
   9
  10 #include "typesafe.h"
  11 #include "pim_addr.h"
  12
  13 struct event;
  14 struct pim_instance;
  15 struct gm_packet_sg;
  16 struct gm_if;
  17 struct channel_oil;
  18
  19 #define MLD_DEFAULT_VERSION 2
  20
  21 /* see comment below on subs_negative/subs_positive */
  22 enum gm_sub_sense {
  23         /* negative/pruning: S,G in EXCLUDE */
  24         GM_SUB_NEG = 0,
  25         /* positive/joining: *,G in EXCLUDE and S,G in INCLUDE */
  26         GM_SUB_POS = 1,
  27 };
  28
  29 enum gm_sg_state {
  30         GM_SG_NOINFO = 0,
  31         GM_SG_JOIN,
  32         GM_SG_JOIN_EXPIRING,
  33         /* remaining 3 only valid for S,G when *,G in EXCLUDE */
  34         GM_SG_PRUNE,
  35         GM_SG_NOPRUNE,
  36         GM_SG_NOPRUNE_EXPIRING,
  37 };
  38
  39 static inline bool gm_sg_state_want_join(enum gm_sg_state state)
  40 {
  41         return state != GM_SG_NOINFO && state != GM_SG_PRUNE;
  42 }
  43
  44 /* MLD (S,G) state (on an interface)
  45  *
  46  * group is always != ::, src is :: for (*,G) joins.  sort order in RB tree is
  47  * such that sources for a particular group can be iterated by starting at the
  48  * group.  For INCLUDE, no (*,G) entry exists, only (S,G).
  49  */
  50
  51 PREDECL_RBTREE_UNIQ(gm_packet_sg_subs);
  52 PREDECL_RBTREE_UNIQ(gm_sgs);
  53 struct gm_sg {
  54         pim_sgaddr sgaddr;
  55         struct gm_if *iface;
  56         struct gm_sgs_item itm;
  57
  58         enum gm_sg_state state;
  59         struct channel_oil *oil;
  60         bool tib_joined;
  61
  62         struct timeval created;
  63
  64         /* if a group- or group-and-source specific query is running
  65          * (implies we haven't received any report yet, since it's cancelled
  66          * by that)
  67          */
  68         struct event *t_sg_expire;
  69
  70         /* last-member-left triggered queries (group/group-source specific)
  71          *
  72          * this timer will be running even if we aren't the elected querier,
  73          * in case the election result changes midway through.
  74          */
  75         struct event *t_sg_query;
  76
  77         /* we must keep sending (QRV) queries even if we get a positive
  78          * response, to make sure other routers are updated.  query_sbit
  79          * will be set in that case, since other routers need the *response*,
  80          * not the *query*
  81          */
  82         uint8_t n_query;
  83         bool query_sbit;
  84
  85         /* subs_positive tracks gm_packet_sg resulting in a JOIN, i.e. for
  86          * (*,G) it has *EXCLUDE* items, for (S,G) it has *INCLUDE* items.
  87          *
  88          * subs_negative is always empty for (*,G) and tracks EXCLUDE items
  89          * for (S,G).  This means that an (S,G) entry is active as a PRUNE if
  90          *   len(src->subs_negative) == len(grp->subs_positive)
  91          *   && len(src->subs_positive) == 0
  92          * (i.e. all receivers for the group opted to exclude this S,G and
  93          * noone did an SSM join for the S,G)
  94          */
  95         union {
  96                 struct {
  97                         struct gm_packet_sg_subs_head subs_negative[1];
  98                         struct gm_packet_sg_subs_head subs_positive[1];
  99                 };
 100                 struct gm_packet_sg_subs_head subs[2];
 101         };
 102
 103         /* If the elected querier is not ourselves, queries and reports might
 104          * get reordered in rare circumstances, i.e. the report could arrive
 105          * just a microsecond before the query kicks off the timer.  This can
 106          * then result in us thinking there are no more receivers since no
 107          * report might be received during the query period.
 108          *
 109          * To avoid this, keep track of the most recent report for this (S,G)
 110          * so we can do a quick check to add just a little bit of slack.
 111          *
 112          * EXCLUDE S,Gs are never in most_recent.
 113          */
 114         struct gm_packet_sg *most_recent;
 115 };
 116
 117 /* host tracking entry.  addr will be one of:
 118  *
 119  * ::           - used by hosts during address acquisition
 120  * ::1          - may show up on some OS for joins by the router itself
 121  * link-local   - regular operation by MLDv2 hosts
 122  * ffff:..:ffff - MLDv1 entry (cannot be tracked due to report suppression)
 123  *
 124  * global scope IPv6 addresses can never show up here
 125  */
 126 PREDECL_HASH(gm_subscribers);
 127 PREDECL_DLIST(gm_packets);
 128 struct gm_subscriber {
 129         pim_addr addr;
 130         struct gm_subscribers_item itm;
 131
 132         struct gm_if *iface;
 133         size_t refcount;
 134
 135         struct gm_packets_head packets[1];
 136
 137         struct timeval created;
 138 };
 139
 140 /*
 141  * MLD join state is kept batched by packet.  Since the timers for all items
 142  * in a packet are the same, this reduces the number of timers we're keeping
 143  * track of.  It also eases tracking for EXCLUDE state groups because the
 144  * excluded sources are in the same packet.  (MLD does not support splitting
 145  * that if it exceeds MTU, it's always a full replace for exclude.)
 146  *
 147  * Since packets may be partially superseded by newer packets, the "active"
 148  * field is used to track this.
 149  */
 150
 151 /* gm_packet_sg is allocated as part of gm_packet_state, note the items[0]
 152  * array at the end of that.  gm_packet_sg is NEVER directly allocated with
 153  * XMALLOC/XFREE.
 154  */
 155 struct gm_packet_sg {
 156         /* non-NULL as long as this gm_packet_sg is the most recent entry
 157          * for (subscriber,S,G).  Cleared to NULL when a newer packet by the
 158          * subscriber replaces this item.
 159          *
 160          * (Old items are kept around so we don't need to realloc/resize
 161          * gm_packet_state, which would mess up a whole lot of pointers)
 162          */
 163         struct gm_sg *sg;
 164
 165         /* gm_sg -> (subscriber, gm_packet_sg)
 166          * only on RB-tree while sg != NULL, i.e. not superseded by newer.
 167          */
 168         struct gm_packet_sg_subs_item subs_itm;
 169
 170         bool is_src : 1; /* := (src != ::) */
 171         bool is_excl : 1;
 172
 173         /* for getting back to struct gm_packet_state, cf.
 174          * gm_packet_sg2state() below
 175          */
 176         uint16_t offset;
 177
 178         /* if this is a group entry in EXCLUDE state, n_exclude counts how
 179          * many sources are on the exclude list here.  They follow immediately
 180          * after.
 181          */
 182         uint16_t n_exclude;
 183 };
 184
 185 #define gm_packet_sg2state(sg)                                                 \
 186         container_of(sg, struct gm_packet_state, items[sg->offset])
 187
 188 PREDECL_DLIST(gm_packet_expires);
 189 struct gm_packet_state {
 190         struct gm_if *iface;
 191         struct gm_subscriber *subscriber;
 192         struct gm_packets_item pkt_itm;
 193
 194         struct timeval received;
 195         struct gm_packet_expires_item exp_itm;
 196
 197         /* n_active starts equal to n_sg;  whenever active is set to false on
 198          * an item it is decremented.  When n_active == 0, the packet can be
 199          * freed.
 200          */
 201         uint16_t n_sg, n_active;
 202         struct gm_packet_sg items[0];
 203 };
 204
 205 /* general queries are rather different from group/S,G specific queries;  it's
 206  * not particularly efficient or useful to try to shoehorn them into the S,G
 207  * timers.  Instead, we keep a history of recent queries and their implied
 208  * expiries.
 209  */
 210 struct gm_general_pending {
 211         struct timeval query, expiry;
 212 };
 213
 214 /* similarly, group queries also age out S,G entries for the group, but in
 215  * this case we only keep one query for each group
 216  *
 217  * why is this not in the *,G gm_sg?  There may not be one (for INCLUDE mode
 218  * groups, or groups we don't know about.)  Also, malicious clients could spam
 219  * random group-specific queries to trigger resource exhaustion, so it makes
 220  * sense to limit these.
 221  */
 222 PREDECL_RBTREE_UNIQ(gm_grp_pends);
 223 struct gm_grp_pending {
 224         struct gm_grp_pends_item itm;
 225         struct gm_if *iface;
 226         pim_addr grp;
 227
 228         struct timeval query;
 229         struct event *t_expire;
 230 };
 231
 232 /* guaranteed MTU for IPv6 is 1280 bytes.  IPv6 header is 40 bytes, MLDv2
 233  * query header is 24 bytes, RA option is 8 bytes - leaves 1208 bytes for the
 234  * source list, which is 151 IPv6 addresses.  But we may have some more IPv6
 235  * extension headers (e.g. IPsec AH), so just cap to 128
 236  */
 237 #define MLD_V2Q_MTU_MAX_SOURCES 128
 238
 239 /* group-and-source-specific queries are bundled together, if some host joins
 240  * multiple sources it's likely to drop all at the same time.
 241  *
 242  * Unlike gm_grp_pending, this is only used for aggregation since the S,G
 243  * state is kept directly in the gm_sg structure.
 244  */
 245 PREDECL_HASH(gm_gsq_pends);
 246 struct gm_gsq_pending {
 247         struct gm_gsq_pends_item itm;
 248
 249         struct gm_if *iface;
 250         struct event *t_send;
 251
 252         pim_addr grp;
 253         bool s_bit;
 254
 255         size_t n_src;
 256         pim_addr srcs[MLD_V2Q_MTU_MAX_SOURCES];
 257 };
 258
 259
 260 /* The size of this history is limited by QRV, i.e. there can't be more than
 261  * 8 items here.
 262  */
 263 #define GM_MAX_PENDING 8
 264
 265 enum gm_version {
 266         GM_NONE,
 267         GM_MLDV1,
 268         GM_MLDV2,
 269 };
 270
 271 struct gm_if_stats {
 272         uint64_t rx_drop_csum;
 273         uint64_t rx_drop_srcaddr;
 274         uint64_t rx_drop_dstaddr;
 275         uint64_t rx_drop_ra;
 276         uint64_t rx_drop_malformed;
 277         uint64_t rx_trunc_report;
 278
 279         /* since the types are different, this is rx_old_* not of rx_*_old */
 280         uint64_t rx_old_report;
 281         uint64_t rx_old_leave;
 282         uint64_t rx_new_report;
 283
 284         uint64_t rx_query_new_general;
 285         uint64_t rx_query_new_group;
 286         uint64_t rx_query_new_groupsrc;
 287         uint64_t rx_query_new_sbit;
 288         uint64_t rx_query_old_general;
 289         uint64_t rx_query_old_group;
 290
 291         uint64_t tx_query_new_general;
 292         uint64_t tx_query_new_group;
 293         uint64_t tx_query_new_groupsrc;
 294         uint64_t tx_query_old_general;
 295         uint64_t tx_query_old_group;
 296
 297         uint64_t tx_query_fail;
 298 };
 299
 300 struct gm_if {
 301         struct interface *ifp;
 302         struct pim_instance *pim;
 303         struct event *t_query, *t_other_querier, *t_expire;
 304
 305         bool stopping;
 306
 307         uint8_t n_startup;
 308
 309         uint8_t cur_qrv;
 310         unsigned int cur_query_intv;      /* ms */
 311         unsigned int cur_query_intv_trig; /* ms */
 312         unsigned int cur_max_resp;        /* ms */
 313         enum gm_version cur_version;
 314         int cur_lmqc; /* last member query count in ds */
 315
 316         /* this value (positive, default 10ms) defines our "timing tolerance":
 317          * - added to deadlines for expiring joins
 318          * - used to look backwards in time for queries, in case a report was
 319          *   reordered before the query
 320          */
 321         struct timeval cfg_timing_fuzz;
 322
 323         /* items in pending[] are sorted by expiry, pending[0] is earliest */
 324         struct gm_general_pending pending[GM_MAX_PENDING];
 325         uint8_t n_pending;
 326         struct gm_grp_pends_head grp_pends[1];
 327         struct gm_gsq_pends_head gsq_pends[1];
 328
 329         pim_addr querier;
 330         pim_addr cur_ll_lowest;
 331
 332         struct gm_sgs_head sgs[1];
 333         struct gm_subscribers_head subscribers[1];
 334         struct gm_packet_expires_head expires[1];
 335
 336         struct timeval started;
 337         struct gm_if_stats stats;
 338 };
 339
 340 #if PIM_IPV == 6
 341 extern void gm_ifp_update(struct interface *ifp);
 342 extern void gm_ifp_teardown(struct interface *ifp);
 343 extern void gm_group_delete(struct gm_if *gm_ifp);
 344 #else
 345 static inline void gm_ifp_update(struct interface *ifp)
 346 {
 347 }
 348
 349 static inline void gm_ifp_teardown(struct interface *ifp)
 350 {
 351 }
 352 #endif
 353
 354 extern void gm_cli_init(void);
 355
 356 #endif /* PIM6_MLD_H */