]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. | |
2a1d9b7f RD |
3 | * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. |
4 | * Copyright (c) 2004 Voltaire, Inc. All rights reserved. | |
1da177e4 LT |
5 | * |
6 | * This software is available to you under a choice of one of two | |
7 | * licenses. You may choose to be licensed under the terms of the GNU | |
8 | * General Public License (GPL) Version 2, available from the file | |
9 | * COPYING in the main directory of this source tree, or the | |
10 | * OpenIB.org BSD license below: | |
11 | * | |
12 | * Redistribution and use in source and binary forms, with or | |
13 | * without modification, are permitted provided that the following | |
14 | * conditions are met: | |
15 | * | |
16 | * - Redistributions of source code must retain the above | |
17 | * copyright notice, this list of conditions and the following | |
18 | * disclaimer. | |
19 | * | |
20 | * - Redistributions in binary form must reproduce the above | |
21 | * copyright notice, this list of conditions and the following | |
22 | * disclaimer in the documentation and/or other materials | |
23 | * provided with the distribution. | |
24 | * | |
25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
26 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
27 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
28 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |
29 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |
30 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
31 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
32 | * SOFTWARE. | |
33 | * | |
34 | * $Id: ipoib_multicast.c 1362 2004-12-18 15:56:29Z roland $ | |
35 | */ | |
36 | ||
37 | #include <linux/skbuff.h> | |
38 | #include <linux/rtnetlink.h> | |
39 | #include <linux/ip.h> | |
40 | #include <linux/in.h> | |
41 | #include <linux/igmp.h> | |
42 | #include <linux/inetdevice.h> | |
43 | #include <linux/delay.h> | |
44 | #include <linux/completion.h> | |
45 | ||
14c85021 ACM |
46 | #include <net/dst.h> |
47 | ||
1da177e4 LT |
48 | #include "ipoib.h" |
49 | ||
50 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG | |
51 | static int mcast_debug_level; | |
52 | ||
53 | module_param(mcast_debug_level, int, 0644); | |
54 | MODULE_PARM_DESC(mcast_debug_level, | |
55 | "Enable multicast debug tracing if > 0"); | |
56 | #endif | |
57 | ||
95ed644f | 58 | static DEFINE_MUTEX(mcast_mutex); |
1da177e4 LT |
59 | |
60 | /* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */ | |
61 | struct ipoib_mcast { | |
62 | struct ib_sa_mcmember_rec mcmember; | |
faec2f7b | 63 | struct ib_sa_multicast *mc; |
1da177e4 LT |
64 | struct ipoib_ah *ah; |
65 | ||
66 | struct rb_node rb_node; | |
67 | struct list_head list; | |
1da177e4 LT |
68 | |
69 | unsigned long created; | |
70 | unsigned long backoff; | |
71 | ||
72 | unsigned long flags; | |
73 | unsigned char logcount; | |
74 | ||
75 | struct list_head neigh_list; | |
76 | ||
77 | struct sk_buff_head pkt_queue; | |
78 | ||
79 | struct net_device *dev; | |
80 | }; | |
81 | ||
82 | struct ipoib_mcast_iter { | |
83 | struct net_device *dev; | |
84 | union ib_gid mgid; | |
85 | unsigned long created; | |
86 | unsigned int queuelen; | |
87 | unsigned int complete; | |
88 | unsigned int send_only; | |
89 | }; | |
90 | ||
91 | static void ipoib_mcast_free(struct ipoib_mcast *mcast) | |
92 | { | |
93 | struct net_device *dev = mcast->dev; | |
94 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
95 | struct ipoib_neigh *neigh, *tmp; | |
96 | unsigned long flags; | |
b36f170b | 97 | int tx_dropped = 0; |
1da177e4 LT |
98 | |
99 | ipoib_dbg_mcast(netdev_priv(dev), | |
100 | "deleting multicast group " IPOIB_GID_FMT "\n", | |
101 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | |
102 | ||
103 | spin_lock_irqsave(&priv->lock, flags); | |
104 | ||
105 | list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) { | |
97460df3 EC |
106 | /* |
107 | * It's safe to call ipoib_put_ah() inside priv->lock | |
108 | * here, because we know that mcast->ah will always | |
109 | * hold one more reference, so ipoib_put_ah() will | |
110 | * never do more than decrement the ref count. | |
111 | */ | |
1da177e4 | 112 | if (neigh->ah) |
97460df3 | 113 | ipoib_put_ah(neigh->ah); |
2745b5b7 | 114 | ipoib_neigh_free(dev, neigh); |
1da177e4 LT |
115 | } |
116 | ||
117 | spin_unlock_irqrestore(&priv->lock, flags); | |
118 | ||
1da177e4 LT |
119 | if (mcast->ah) |
120 | ipoib_put_ah(mcast->ah); | |
121 | ||
b36f170b MT |
122 | while (!skb_queue_empty(&mcast->pkt_queue)) { |
123 | ++tx_dropped; | |
8c608a32 | 124 | dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); |
b36f170b MT |
125 | } |
126 | ||
127 | spin_lock_irqsave(&priv->tx_lock, flags); | |
128 | priv->stats.tx_dropped += tx_dropped; | |
129 | spin_unlock_irqrestore(&priv->tx_lock, flags); | |
1da177e4 LT |
130 | |
131 | kfree(mcast); | |
132 | } | |
133 | ||
134 | static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev, | |
135 | int can_sleep) | |
136 | { | |
137 | struct ipoib_mcast *mcast; | |
138 | ||
de6eb66b | 139 | mcast = kzalloc(sizeof *mcast, can_sleep ? GFP_KERNEL : GFP_ATOMIC); |
1da177e4 LT |
140 | if (!mcast) |
141 | return NULL; | |
142 | ||
1da177e4 LT |
143 | mcast->dev = dev; |
144 | mcast->created = jiffies; | |
ce5b65cc | 145 | mcast->backoff = 1; |
1da177e4 LT |
146 | |
147 | INIT_LIST_HEAD(&mcast->list); | |
148 | INIT_LIST_HEAD(&mcast->neigh_list); | |
149 | skb_queue_head_init(&mcast->pkt_queue); | |
150 | ||
1da177e4 LT |
151 | return mcast; |
152 | } | |
153 | ||
37c22a77 | 154 | static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid) |
1da177e4 LT |
155 | { |
156 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
157 | struct rb_node *n = priv->multicast_tree.rb_node; | |
158 | ||
159 | while (n) { | |
160 | struct ipoib_mcast *mcast; | |
161 | int ret; | |
162 | ||
163 | mcast = rb_entry(n, struct ipoib_mcast, rb_node); | |
164 | ||
37c22a77 | 165 | ret = memcmp(mgid, mcast->mcmember.mgid.raw, |
1da177e4 LT |
166 | sizeof (union ib_gid)); |
167 | if (ret < 0) | |
168 | n = n->rb_left; | |
169 | else if (ret > 0) | |
170 | n = n->rb_right; | |
171 | else | |
172 | return mcast; | |
173 | } | |
174 | ||
175 | return NULL; | |
176 | } | |
177 | ||
178 | static int __ipoib_mcast_add(struct net_device *dev, struct ipoib_mcast *mcast) | |
179 | { | |
180 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
181 | struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL; | |
182 | ||
183 | while (*n) { | |
184 | struct ipoib_mcast *tmcast; | |
185 | int ret; | |
186 | ||
187 | pn = *n; | |
188 | tmcast = rb_entry(pn, struct ipoib_mcast, rb_node); | |
189 | ||
190 | ret = memcmp(mcast->mcmember.mgid.raw, tmcast->mcmember.mgid.raw, | |
191 | sizeof (union ib_gid)); | |
192 | if (ret < 0) | |
193 | n = &pn->rb_left; | |
194 | else if (ret > 0) | |
195 | n = &pn->rb_right; | |
196 | else | |
197 | return -EEXIST; | |
198 | } | |
199 | ||
200 | rb_link_node(&mcast->rb_node, pn, n); | |
201 | rb_insert_color(&mcast->rb_node, &priv->multicast_tree); | |
202 | ||
203 | return 0; | |
204 | } | |
205 | ||
206 | static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, | |
207 | struct ib_sa_mcmember_rec *mcmember) | |
208 | { | |
209 | struct net_device *dev = mcast->dev; | |
210 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
7343b231 | 211 | struct ipoib_ah *ah; |
1da177e4 LT |
212 | int ret; |
213 | ||
214 | mcast->mcmember = *mcmember; | |
215 | ||
216 | /* Set the cached Q_Key before we attach if it's the broadcast group */ | |
217 | if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4, | |
218 | sizeof (union ib_gid))) { | |
219 | priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); | |
220 | priv->tx_wr.wr.ud.remote_qkey = priv->qkey; | |
221 | } | |
222 | ||
223 | if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { | |
224 | if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { | |
225 | ipoib_warn(priv, "multicast group " IPOIB_GID_FMT | |
226 | " already attached\n", | |
227 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | |
228 | ||
229 | return 0; | |
230 | } | |
231 | ||
232 | ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid), | |
233 | &mcast->mcmember.mgid); | |
234 | if (ret < 0) { | |
235 | ipoib_warn(priv, "couldn't attach QP to multicast group " | |
236 | IPOIB_GID_FMT "\n", | |
237 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | |
238 | ||
239 | clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags); | |
240 | return ret; | |
241 | } | |
242 | } | |
243 | ||
244 | { | |
245 | struct ib_ah_attr av = { | |
246 | .dlid = be16_to_cpu(mcast->mcmember.mlid), | |
247 | .port_num = priv->port, | |
248 | .sl = mcast->mcmember.sl, | |
249 | .ah_flags = IB_AH_GRH, | |
bf6a9e31 | 250 | .static_rate = mcast->mcmember.rate, |
1da177e4 LT |
251 | .grh = { |
252 | .flow_label = be32_to_cpu(mcast->mcmember.flow_label), | |
253 | .hop_limit = mcast->mcmember.hop_limit, | |
254 | .sgid_index = 0, | |
255 | .traffic_class = mcast->mcmember.traffic_class | |
256 | } | |
257 | }; | |
1da177e4 LT |
258 | av.grh.dgid = mcast->mcmember.mgid; |
259 | ||
7343b231 EC |
260 | ah = ipoib_create_ah(dev, priv->pd, &av); |
261 | if (!ah) { | |
1da177e4 LT |
262 | ipoib_warn(priv, "ib_address_create failed\n"); |
263 | } else { | |
624d01f8 OG |
264 | spin_lock_irq(&priv->lock); |
265 | mcast->ah = ah; | |
266 | spin_unlock_irq(&priv->lock); | |
267 | ||
1da177e4 LT |
268 | ipoib_dbg_mcast(priv, "MGID " IPOIB_GID_FMT |
269 | " AV %p, LID 0x%04x, SL %d\n", | |
270 | IPOIB_GID_ARG(mcast->mcmember.mgid), | |
271 | mcast->ah->ah, | |
272 | be16_to_cpu(mcast->mcmember.mlid), | |
273 | mcast->mcmember.sl); | |
274 | } | |
275 | } | |
276 | ||
277 | /* actually send any queued packets */ | |
b36f170b | 278 | spin_lock_irq(&priv->tx_lock); |
1da177e4 LT |
279 | while (!skb_queue_empty(&mcast->pkt_queue)) { |
280 | struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); | |
b36f170b | 281 | spin_unlock_irq(&priv->tx_lock); |
1da177e4 LT |
282 | |
283 | skb->dev = dev; | |
284 | ||
285 | if (!skb->dst || !skb->dst->neighbour) { | |
286 | /* put pseudoheader back on for next time */ | |
287 | skb_push(skb, sizeof (struct ipoib_pseudoheader)); | |
288 | } | |
289 | ||
290 | if (dev_queue_xmit(skb)) | |
291 | ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n"); | |
b36f170b | 292 | spin_lock_irq(&priv->tx_lock); |
1da177e4 | 293 | } |
b36f170b | 294 | spin_unlock_irq(&priv->tx_lock); |
1da177e4 LT |
295 | |
296 | return 0; | |
297 | } | |
298 | ||
faec2f7b | 299 | static int |
1da177e4 | 300 | ipoib_mcast_sendonly_join_complete(int status, |
faec2f7b | 301 | struct ib_sa_multicast *multicast) |
1da177e4 | 302 | { |
faec2f7b | 303 | struct ipoib_mcast *mcast = multicast->context; |
1da177e4 | 304 | struct net_device *dev = mcast->dev; |
b36f170b | 305 | struct ipoib_dev_priv *priv = netdev_priv(dev); |
1da177e4 | 306 | |
faec2f7b SH |
307 | /* We trap for port events ourselves. */ |
308 | if (status == -ENETRESET) | |
309 | return 0; | |
310 | ||
1da177e4 | 311 | if (!status) |
faec2f7b SH |
312 | status = ipoib_mcast_join_finish(mcast, &multicast->rec); |
313 | ||
314 | if (status) { | |
1da177e4 LT |
315 | if (mcast->logcount++ < 20) |
316 | ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for " | |
317 | IPOIB_GID_FMT ", status %d\n", | |
318 | IPOIB_GID_ARG(mcast->mcmember.mgid), status); | |
319 | ||
320 | /* Flush out any queued packets */ | |
b36f170b MT |
321 | spin_lock_irq(&priv->tx_lock); |
322 | while (!skb_queue_empty(&mcast->pkt_queue)) { | |
323 | ++priv->stats.tx_dropped; | |
8c608a32 | 324 | dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); |
b36f170b MT |
325 | } |
326 | spin_unlock_irq(&priv->tx_lock); | |
1da177e4 LT |
327 | |
328 | /* Clear the busy flag so we try again */ | |
faec2f7b SH |
329 | status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, |
330 | &mcast->flags); | |
1da177e4 | 331 | } |
faec2f7b | 332 | return status; |
1da177e4 LT |
333 | } |
334 | ||
335 | static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) | |
336 | { | |
337 | struct net_device *dev = mcast->dev; | |
338 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
339 | struct ib_sa_mcmember_rec rec = { | |
340 | #if 0 /* Some SMs don't support send-only yet */ | |
341 | .join_state = 4 | |
342 | #else | |
343 | .join_state = 1 | |
344 | #endif | |
345 | }; | |
346 | int ret = 0; | |
347 | ||
348 | if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) { | |
349 | ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n"); | |
350 | return -ENODEV; | |
351 | } | |
352 | ||
353 | if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) { | |
354 | ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n"); | |
355 | return -EBUSY; | |
356 | } | |
357 | ||
358 | rec.mgid = mcast->mcmember.mgid; | |
359 | rec.port_gid = priv->local_gid; | |
97f52eb4 | 360 | rec.pkey = cpu_to_be16(priv->pkey); |
1da177e4 | 361 | |
faec2f7b SH |
362 | mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, |
363 | priv->port, &rec, | |
364 | IB_SA_MCMEMBER_REC_MGID | | |
365 | IB_SA_MCMEMBER_REC_PORT_GID | | |
366 | IB_SA_MCMEMBER_REC_PKEY | | |
367 | IB_SA_MCMEMBER_REC_JOIN_STATE, | |
368 | GFP_ATOMIC, | |
369 | ipoib_mcast_sendonly_join_complete, | |
370 | mcast); | |
371 | if (IS_ERR(mcast->mc)) { | |
372 | ret = PTR_ERR(mcast->mc); | |
373 | clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | |
374 | ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n", | |
1da177e4 LT |
375 | ret); |
376 | } else { | |
377 | ipoib_dbg_mcast(priv, "no multicast record for " IPOIB_GID_FMT | |
378 | ", starting join\n", | |
379 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | |
1da177e4 LT |
380 | } |
381 | ||
382 | return ret; | |
383 | } | |
384 | ||
faec2f7b SH |
385 | static int ipoib_mcast_join_complete(int status, |
386 | struct ib_sa_multicast *multicast) | |
1da177e4 | 387 | { |
faec2f7b | 388 | struct ipoib_mcast *mcast = multicast->context; |
1da177e4 LT |
389 | struct net_device *dev = mcast->dev; |
390 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
391 | ||
392 | ipoib_dbg_mcast(priv, "join completion for " IPOIB_GID_FMT | |
393 | " (status %d)\n", | |
394 | IPOIB_GID_ARG(mcast->mcmember.mgid), status); | |
395 | ||
faec2f7b SH |
396 | /* We trap for port events ourselves. */ |
397 | if (status == -ENETRESET) | |
398 | return 0; | |
399 | ||
400 | if (!status) | |
401 | status = ipoib_mcast_join_finish(mcast, &multicast->rec); | |
402 | ||
403 | if (!status) { | |
ce5b65cc | 404 | mcast->backoff = 1; |
95ed644f | 405 | mutex_lock(&mcast_mutex); |
1da177e4 | 406 | if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) |
c4028958 DH |
407 | queue_delayed_work(ipoib_workqueue, |
408 | &priv->mcast_task, 0); | |
95ed644f | 409 | mutex_unlock(&mcast_mutex); |
faec2f7b | 410 | return 0; |
1da177e4 LT |
411 | } |
412 | ||
faec2f7b SH |
413 | if (mcast->logcount++ < 20) { |
414 | if (status == -ETIMEDOUT) { | |
1da177e4 LT |
415 | ipoib_dbg_mcast(priv, "multicast join failed for " IPOIB_GID_FMT |
416 | ", status %d\n", | |
417 | IPOIB_GID_ARG(mcast->mcmember.mgid), | |
418 | status); | |
419 | } else { | |
420 | ipoib_warn(priv, "multicast join failed for " | |
421 | IPOIB_GID_FMT ", status %d\n", | |
422 | IPOIB_GID_ARG(mcast->mcmember.mgid), | |
423 | status); | |
424 | } | |
425 | } | |
426 | ||
427 | mcast->backoff *= 2; | |
428 | if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) | |
429 | mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; | |
430 | ||
faec2f7b SH |
431 | /* Clear the busy flag so we try again */ |
432 | status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | |
9acf6a85 | 433 | |
faec2f7b | 434 | mutex_lock(&mcast_mutex); |
9acf6a85 | 435 | spin_lock_irq(&priv->lock); |
faec2f7b SH |
436 | if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) |
437 | queue_delayed_work(ipoib_workqueue, &priv->mcast_task, | |
438 | mcast->backoff * HZ); | |
9acf6a85 | 439 | spin_unlock_irq(&priv->lock); |
95ed644f | 440 | mutex_unlock(&mcast_mutex); |
1da177e4 | 441 | |
faec2f7b | 442 | return status; |
1da177e4 LT |
443 | } |
444 | ||
445 | static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, | |
446 | int create) | |
447 | { | |
448 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
449 | struct ib_sa_mcmember_rec rec = { | |
450 | .join_state = 1 | |
451 | }; | |
452 | ib_sa_comp_mask comp_mask; | |
453 | int ret = 0; | |
454 | ||
455 | ipoib_dbg_mcast(priv, "joining MGID " IPOIB_GID_FMT "\n", | |
456 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | |
457 | ||
458 | rec.mgid = mcast->mcmember.mgid; | |
459 | rec.port_gid = priv->local_gid; | |
97f52eb4 | 460 | rec.pkey = cpu_to_be16(priv->pkey); |
1da177e4 LT |
461 | |
462 | comp_mask = | |
463 | IB_SA_MCMEMBER_REC_MGID | | |
464 | IB_SA_MCMEMBER_REC_PORT_GID | | |
465 | IB_SA_MCMEMBER_REC_PKEY | | |
466 | IB_SA_MCMEMBER_REC_JOIN_STATE; | |
467 | ||
468 | if (create) { | |
469 | comp_mask |= | |
d0df6d6d RD |
470 | IB_SA_MCMEMBER_REC_QKEY | |
471 | IB_SA_MCMEMBER_REC_MTU_SELECTOR | | |
472 | IB_SA_MCMEMBER_REC_MTU | | |
473 | IB_SA_MCMEMBER_REC_TRAFFIC_CLASS | | |
474 | IB_SA_MCMEMBER_REC_RATE_SELECTOR | | |
475 | IB_SA_MCMEMBER_REC_RATE | | |
476 | IB_SA_MCMEMBER_REC_SL | | |
477 | IB_SA_MCMEMBER_REC_FLOW_LABEL | | |
478 | IB_SA_MCMEMBER_REC_HOP_LIMIT; | |
1da177e4 LT |
479 | |
480 | rec.qkey = priv->broadcast->mcmember.qkey; | |
d0df6d6d RD |
481 | rec.mtu_selector = IB_SA_EQ; |
482 | rec.mtu = priv->broadcast->mcmember.mtu; | |
483 | rec.traffic_class = priv->broadcast->mcmember.traffic_class; | |
484 | rec.rate_selector = IB_SA_EQ; | |
485 | rec.rate = priv->broadcast->mcmember.rate; | |
1da177e4 LT |
486 | rec.sl = priv->broadcast->mcmember.sl; |
487 | rec.flow_label = priv->broadcast->mcmember.flow_label; | |
d0df6d6d | 488 | rec.hop_limit = priv->broadcast->mcmember.hop_limit; |
1da177e4 LT |
489 | } |
490 | ||
faec2f7b SH |
491 | set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); |
492 | mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port, | |
493 | &rec, comp_mask, GFP_KERNEL, | |
494 | ipoib_mcast_join_complete, mcast); | |
495 | if (IS_ERR(mcast->mc)) { | |
496 | clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | |
497 | ret = PTR_ERR(mcast->mc); | |
498 | ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret); | |
1da177e4 LT |
499 | |
500 | mcast->backoff *= 2; | |
501 | if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) | |
502 | mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; | |
503 | ||
95ed644f | 504 | mutex_lock(&mcast_mutex); |
1da177e4 LT |
505 | if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) |
506 | queue_delayed_work(ipoib_workqueue, | |
507 | &priv->mcast_task, | |
ce5b65cc | 508 | mcast->backoff * HZ); |
95ed644f | 509 | mutex_unlock(&mcast_mutex); |
faec2f7b | 510 | } |
1da177e4 LT |
511 | } |
512 | ||
c4028958 | 513 | void ipoib_mcast_join_task(struct work_struct *work) |
1da177e4 | 514 | { |
c4028958 DH |
515 | struct ipoib_dev_priv *priv = |
516 | container_of(work, struct ipoib_dev_priv, mcast_task.work); | |
517 | struct net_device *dev = priv->dev; | |
1da177e4 LT |
518 | |
519 | if (!test_bit(IPOIB_MCAST_RUN, &priv->flags)) | |
520 | return; | |
521 | ||
522 | if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid)) | |
523 | ipoib_warn(priv, "ib_gid_entry_get() failed\n"); | |
524 | else | |
525 | memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid)); | |
526 | ||
527 | { | |
528 | struct ib_port_attr attr; | |
529 | ||
658bcef6 RD |
530 | if (!ib_query_port(priv->ca, priv->port, &attr)) |
531 | priv->local_lid = attr.lid; | |
532 | else | |
faec2f7b | 533 | ipoib_warn(priv, "ib_query_port failed\n"); |
1da177e4 LT |
534 | } |
535 | ||
536 | if (!priv->broadcast) { | |
20b83382 RD |
537 | struct ipoib_mcast *broadcast; |
538 | ||
539 | broadcast = ipoib_mcast_alloc(dev, 1); | |
540 | if (!broadcast) { | |
1da177e4 | 541 | ipoib_warn(priv, "failed to allocate broadcast group\n"); |
95ed644f | 542 | mutex_lock(&mcast_mutex); |
1da177e4 LT |
543 | if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) |
544 | queue_delayed_work(ipoib_workqueue, | |
545 | &priv->mcast_task, HZ); | |
95ed644f | 546 | mutex_unlock(&mcast_mutex); |
1da177e4 LT |
547 | return; |
548 | } | |
549 | ||
20b83382 RD |
550 | spin_lock_irq(&priv->lock); |
551 | memcpy(broadcast->mcmember.mgid.raw, priv->dev->broadcast + 4, | |
1da177e4 | 552 | sizeof (union ib_gid)); |
20b83382 | 553 | priv->broadcast = broadcast; |
1da177e4 | 554 | |
1da177e4 LT |
555 | __ipoib_mcast_add(dev, priv->broadcast); |
556 | spin_unlock_irq(&priv->lock); | |
557 | } | |
558 | ||
559 | if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { | |
faec2f7b SH |
560 | if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags)) |
561 | ipoib_mcast_join(dev, priv->broadcast, 0); | |
1da177e4 LT |
562 | return; |
563 | } | |
564 | ||
565 | while (1) { | |
566 | struct ipoib_mcast *mcast = NULL; | |
567 | ||
568 | spin_lock_irq(&priv->lock); | |
569 | list_for_each_entry(mcast, &priv->multicast_list, list) { | |
570 | if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) | |
571 | && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) | |
572 | && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { | |
573 | /* Found the next unjoined group */ | |
574 | break; | |
575 | } | |
576 | } | |
577 | spin_unlock_irq(&priv->lock); | |
578 | ||
579 | if (&mcast->list == &priv->multicast_list) { | |
580 | /* All done */ | |
581 | break; | |
582 | } | |
583 | ||
584 | ipoib_mcast_join(dev, mcast, 1); | |
585 | return; | |
586 | } | |
587 | ||
588 | priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) - | |
589 | IPOIB_ENCAP_LEN; | |
839fcaba MT |
590 | |
591 | if (!ipoib_cm_admin_enabled(dev)) | |
592 | dev->mtu = min(priv->mcast_mtu, priv->admin_mtu); | |
1da177e4 LT |
593 | |
594 | ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); | |
595 | ||
596 | clear_bit(IPOIB_MCAST_RUN, &priv->flags); | |
597 | netif_carrier_on(dev); | |
598 | } | |
599 | ||
600 | int ipoib_mcast_start_thread(struct net_device *dev) | |
601 | { | |
602 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
603 | ||
604 | ipoib_dbg_mcast(priv, "starting multicast thread\n"); | |
605 | ||
95ed644f | 606 | mutex_lock(&mcast_mutex); |
1da177e4 | 607 | if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags)) |
c4028958 | 608 | queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0); |
95ed644f | 609 | mutex_unlock(&mcast_mutex); |
1da177e4 | 610 | |
479a0796 MT |
611 | spin_lock_irq(&priv->lock); |
612 | set_bit(IPOIB_MCAST_STARTED, &priv->flags); | |
613 | spin_unlock_irq(&priv->lock); | |
614 | ||
1da177e4 LT |
615 | return 0; |
616 | } | |
617 | ||
8d2cae06 | 618 | int ipoib_mcast_stop_thread(struct net_device *dev, int flush) |
1da177e4 LT |
619 | { |
620 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
1da177e4 LT |
621 | |
622 | ipoib_dbg_mcast(priv, "stopping multicast thread\n"); | |
623 | ||
479a0796 MT |
624 | spin_lock_irq(&priv->lock); |
625 | clear_bit(IPOIB_MCAST_STARTED, &priv->flags); | |
626 | spin_unlock_irq(&priv->lock); | |
627 | ||
95ed644f | 628 | mutex_lock(&mcast_mutex); |
1da177e4 LT |
629 | clear_bit(IPOIB_MCAST_RUN, &priv->flags); |
630 | cancel_delayed_work(&priv->mcast_task); | |
95ed644f | 631 | mutex_unlock(&mcast_mutex); |
1da177e4 | 632 | |
8d2cae06 RD |
633 | if (flush) |
634 | flush_workqueue(ipoib_workqueue); | |
1da177e4 | 635 | |
1da177e4 LT |
636 | return 0; |
637 | } | |
638 | ||
639 | static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) | |
640 | { | |
641 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
1da177e4 LT |
642 | int ret = 0; |
643 | ||
faec2f7b SH |
644 | if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { |
645 | ipoib_dbg_mcast(priv, "leaving MGID " IPOIB_GID_FMT "\n", | |
646 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | |
1da177e4 | 647 | |
faec2f7b SH |
648 | /* Remove ourselves from the multicast group */ |
649 | ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid), | |
650 | &mcast->mcmember.mgid); | |
651 | if (ret) | |
652 | ipoib_warn(priv, "ipoib_mcast_detach failed (result = %d)\n", ret); | |
653 | } | |
1da177e4 | 654 | |
faec2f7b SH |
655 | if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) |
656 | ib_sa_free_multicast(mcast->mc); | |
1da177e4 LT |
657 | |
658 | return 0; | |
659 | } | |
660 | ||
37c22a77 | 661 | void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) |
1da177e4 LT |
662 | { |
663 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
664 | struct ipoib_mcast *mcast; | |
665 | ||
666 | /* | |
667 | * We can only be called from ipoib_start_xmit, so we're | |
668 | * inside tx_lock -- no need to save/restore flags. | |
669 | */ | |
670 | spin_lock(&priv->lock); | |
671 | ||
20b83382 RD |
672 | if (!test_bit(IPOIB_MCAST_STARTED, &priv->flags) || |
673 | !priv->broadcast || | |
674 | !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { | |
479a0796 MT |
675 | ++priv->stats.tx_dropped; |
676 | dev_kfree_skb_any(skb); | |
677 | goto unlock; | |
678 | } | |
679 | ||
1da177e4 LT |
680 | mcast = __ipoib_mcast_find(dev, mgid); |
681 | if (!mcast) { | |
682 | /* Let's create a new send only group now */ | |
683 | ipoib_dbg_mcast(priv, "setting up send only multicast group for " | |
37c22a77 | 684 | IPOIB_GID_FMT "\n", IPOIB_GID_RAW_ARG(mgid)); |
1da177e4 LT |
685 | |
686 | mcast = ipoib_mcast_alloc(dev, 0); | |
687 | if (!mcast) { | |
688 | ipoib_warn(priv, "unable to allocate memory for " | |
689 | "multicast structure\n"); | |
b36f170b | 690 | ++priv->stats.tx_dropped; |
1da177e4 LT |
691 | dev_kfree_skb_any(skb); |
692 | goto out; | |
693 | } | |
694 | ||
695 | set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags); | |
37c22a77 | 696 | memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid)); |
1da177e4 LT |
697 | __ipoib_mcast_add(dev, mcast); |
698 | list_add_tail(&mcast->list, &priv->multicast_list); | |
699 | } | |
700 | ||
701 | if (!mcast->ah) { | |
702 | if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE) | |
703 | skb_queue_tail(&mcast->pkt_queue, skb); | |
b36f170b MT |
704 | else { |
705 | ++priv->stats.tx_dropped; | |
1da177e4 | 706 | dev_kfree_skb_any(skb); |
b36f170b | 707 | } |
1da177e4 | 708 | |
faec2f7b | 709 | if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) |
1da177e4 LT |
710 | ipoib_dbg_mcast(priv, "no address vector, " |
711 | "but multicast join already started\n"); | |
712 | else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) | |
713 | ipoib_mcast_sendonly_join(mcast); | |
714 | ||
715 | /* | |
716 | * If lookup completes between here and out:, don't | |
717 | * want to send packet twice. | |
718 | */ | |
719 | mcast = NULL; | |
720 | } | |
721 | ||
722 | out: | |
723 | if (mcast && mcast->ah) { | |
724 | if (skb->dst && | |
725 | skb->dst->neighbour && | |
726 | !*to_ipoib_neigh(skb->dst->neighbour)) { | |
d2e0655e | 727 | struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb->dst->neighbour); |
1da177e4 LT |
728 | |
729 | if (neigh) { | |
730 | kref_get(&mcast->ah->ref); | |
731 | neigh->ah = mcast->ah; | |
1da177e4 LT |
732 | list_add_tail(&neigh->list, &mcast->neigh_list); |
733 | } | |
734 | } | |
735 | ||
736 | ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN); | |
737 | } | |
738 | ||
479a0796 | 739 | unlock: |
1da177e4 LT |
740 | spin_unlock(&priv->lock); |
741 | } | |
742 | ||
743 | void ipoib_mcast_dev_flush(struct net_device *dev) | |
744 | { | |
745 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
746 | LIST_HEAD(remove_list); | |
988bd503 | 747 | struct ipoib_mcast *mcast, *tmcast; |
1da177e4 LT |
748 | unsigned long flags; |
749 | ||
750 | ipoib_dbg_mcast(priv, "flushing multicast list\n"); | |
751 | ||
752 | spin_lock_irqsave(&priv->lock, flags); | |
1da177e4 | 753 | |
988bd503 EC |
754 | list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { |
755 | list_del(&mcast->list); | |
756 | rb_erase(&mcast->rb_node, &priv->multicast_tree); | |
757 | list_add_tail(&mcast->list, &remove_list); | |
1da177e4 LT |
758 | } |
759 | ||
760 | if (priv->broadcast) { | |
3cd96564 | 761 | rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree); |
988bd503 EC |
762 | list_add_tail(&priv->broadcast->list, &remove_list); |
763 | priv->broadcast = NULL; | |
1da177e4 LT |
764 | } |
765 | ||
766 | spin_unlock_irqrestore(&priv->lock, flags); | |
767 | ||
768 | list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { | |
769 | ipoib_mcast_leave(dev, mcast); | |
770 | ipoib_mcast_free(mcast); | |
771 | } | |
772 | } | |
773 | ||
c4028958 | 774 | void ipoib_mcast_restart_task(struct work_struct *work) |
1da177e4 | 775 | { |
c4028958 DH |
776 | struct ipoib_dev_priv *priv = |
777 | container_of(work, struct ipoib_dev_priv, restart_task); | |
778 | struct net_device *dev = priv->dev; | |
1da177e4 LT |
779 | struct dev_mc_list *mclist; |
780 | struct ipoib_mcast *mcast, *tmcast; | |
781 | LIST_HEAD(remove_list); | |
782 | unsigned long flags; | |
783 | ||
784 | ipoib_dbg_mcast(priv, "restarting multicast task\n"); | |
785 | ||
8d2cae06 | 786 | ipoib_mcast_stop_thread(dev, 0); |
1da177e4 | 787 | |
932ff279 HX |
788 | local_irq_save(flags); |
789 | netif_tx_lock(dev); | |
78bfe0b5 | 790 | spin_lock(&priv->lock); |
1da177e4 LT |
791 | |
792 | /* | |
793 | * Unfortunately, the networking core only gives us a list of all of | |
794 | * the multicast hardware addresses. We need to figure out which ones | |
795 | * are new and which ones have been removed | |
796 | */ | |
797 | ||
798 | /* Clear out the found flag */ | |
799 | list_for_each_entry(mcast, &priv->multicast_list, list) | |
800 | clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags); | |
801 | ||
802 | /* Mark all of the entries that are found or don't exist */ | |
803 | for (mclist = dev->mc_list; mclist; mclist = mclist->next) { | |
804 | union ib_gid mgid; | |
805 | ||
806 | memcpy(mgid.raw, mclist->dmi_addr + 4, sizeof mgid); | |
807 | ||
808 | /* Add in the P_Key */ | |
809 | mgid.raw[4] = (priv->pkey >> 8) & 0xff; | |
810 | mgid.raw[5] = priv->pkey & 0xff; | |
811 | ||
812 | mcast = __ipoib_mcast_find(dev, &mgid); | |
813 | if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { | |
814 | struct ipoib_mcast *nmcast; | |
815 | ||
816 | /* Not found or send-only group, let's add a new entry */ | |
817 | ipoib_dbg_mcast(priv, "adding multicast entry for mgid " | |
818 | IPOIB_GID_FMT "\n", IPOIB_GID_ARG(mgid)); | |
819 | ||
820 | nmcast = ipoib_mcast_alloc(dev, 0); | |
821 | if (!nmcast) { | |
822 | ipoib_warn(priv, "unable to allocate memory for multicast structure\n"); | |
823 | continue; | |
824 | } | |
825 | ||
826 | set_bit(IPOIB_MCAST_FLAG_FOUND, &nmcast->flags); | |
827 | ||
828 | nmcast->mcmember.mgid = mgid; | |
829 | ||
830 | if (mcast) { | |
831 | /* Destroy the send only entry */ | |
179e0917 | 832 | list_move_tail(&mcast->list, &remove_list); |
1da177e4 LT |
833 | |
834 | rb_replace_node(&mcast->rb_node, | |
835 | &nmcast->rb_node, | |
836 | &priv->multicast_tree); | |
837 | } else | |
838 | __ipoib_mcast_add(dev, nmcast); | |
839 | ||
840 | list_add_tail(&nmcast->list, &priv->multicast_list); | |
841 | } | |
842 | ||
843 | if (mcast) | |
844 | set_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags); | |
845 | } | |
846 | ||
847 | /* Remove all of the entries don't exist anymore */ | |
848 | list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { | |
849 | if (!test_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags) && | |
850 | !test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { | |
851 | ipoib_dbg_mcast(priv, "deleting multicast group " IPOIB_GID_FMT "\n", | |
852 | IPOIB_GID_ARG(mcast->mcmember.mgid)); | |
853 | ||
854 | rb_erase(&mcast->rb_node, &priv->multicast_tree); | |
855 | ||
856 | /* Move to the remove list */ | |
179e0917 | 857 | list_move_tail(&mcast->list, &remove_list); |
1da177e4 LT |
858 | } |
859 | } | |
78bfe0b5 MT |
860 | |
861 | spin_unlock(&priv->lock); | |
932ff279 HX |
862 | netif_tx_unlock(dev); |
863 | local_irq_restore(flags); | |
1da177e4 LT |
864 | |
865 | /* We have to cancel outside of the spinlock */ | |
866 | list_for_each_entry_safe(mcast, tmcast, &remove_list, list) { | |
867 | ipoib_mcast_leave(mcast->dev, mcast); | |
868 | ipoib_mcast_free(mcast); | |
869 | } | |
870 | ||
871 | if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) | |
872 | ipoib_mcast_start_thread(dev); | |
873 | } | |
874 | ||
8ae5a8a2 RD |
875 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG |
876 | ||
1da177e4 LT |
877 | struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev) |
878 | { | |
879 | struct ipoib_mcast_iter *iter; | |
880 | ||
881 | iter = kmalloc(sizeof *iter, GFP_KERNEL); | |
882 | if (!iter) | |
883 | return NULL; | |
884 | ||
885 | iter->dev = dev; | |
1732b0ef | 886 | memset(iter->mgid.raw, 0, 16); |
1da177e4 LT |
887 | |
888 | if (ipoib_mcast_iter_next(iter)) { | |
1732b0ef | 889 | kfree(iter); |
1da177e4 LT |
890 | return NULL; |
891 | } | |
892 | ||
893 | return iter; | |
894 | } | |
895 | ||
1da177e4 LT |
896 | int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter) |
897 | { | |
898 | struct ipoib_dev_priv *priv = netdev_priv(iter->dev); | |
899 | struct rb_node *n; | |
900 | struct ipoib_mcast *mcast; | |
901 | int ret = 1; | |
902 | ||
903 | spin_lock_irq(&priv->lock); | |
904 | ||
905 | n = rb_first(&priv->multicast_tree); | |
906 | ||
907 | while (n) { | |
908 | mcast = rb_entry(n, struct ipoib_mcast, rb_node); | |
909 | ||
910 | if (memcmp(iter->mgid.raw, mcast->mcmember.mgid.raw, | |
911 | sizeof (union ib_gid)) < 0) { | |
912 | iter->mgid = mcast->mcmember.mgid; | |
913 | iter->created = mcast->created; | |
914 | iter->queuelen = skb_queue_len(&mcast->pkt_queue); | |
915 | iter->complete = !!mcast->ah; | |
916 | iter->send_only = !!(mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY)); | |
917 | ||
918 | ret = 0; | |
919 | ||
920 | break; | |
921 | } | |
922 | ||
923 | n = rb_next(n); | |
924 | } | |
925 | ||
926 | spin_unlock_irq(&priv->lock); | |
927 | ||
928 | return ret; | |
929 | } | |
930 | ||
931 | void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter, | |
932 | union ib_gid *mgid, | |
933 | unsigned long *created, | |
934 | unsigned int *queuelen, | |
935 | unsigned int *complete, | |
936 | unsigned int *send_only) | |
937 | { | |
938 | *mgid = iter->mgid; | |
939 | *created = iter->created; | |
940 | *queuelen = iter->queuelen; | |
941 | *complete = iter->complete; | |
942 | *send_only = iter->send_only; | |
943 | } | |
8ae5a8a2 RD |
944 | |
945 | #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ |