]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. | |
2a1d9b7f RD |
3 | * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. |
4 | * Copyright (c) 2004 Voltaire, Inc. All rights reserved. | |
1da177e4 LT |
5 | * |
6 | * This software is available to you under a choice of one of two | |
7 | * licenses. You may choose to be licensed under the terms of the GNU | |
8 | * General Public License (GPL) Version 2, available from the file | |
9 | * COPYING in the main directory of this source tree, or the | |
10 | * OpenIB.org BSD license below: | |
11 | * | |
12 | * Redistribution and use in source and binary forms, with or | |
13 | * without modification, are permitted provided that the following | |
14 | * conditions are met: | |
15 | * | |
16 | * - Redistributions of source code must retain the above | |
17 | * copyright notice, this list of conditions and the following | |
18 | * disclaimer. | |
19 | * | |
20 | * - Redistributions in binary form must reproduce the above | |
21 | * copyright notice, this list of conditions and the following | |
22 | * disclaimer in the documentation and/or other materials | |
23 | * provided with the distribution. | |
24 | * | |
25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
26 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
27 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
28 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |
29 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |
30 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
31 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
32 | * SOFTWARE. | |
1da177e4 LT |
33 | */ |
34 | ||
35 | #include <linux/skbuff.h> | |
36 | #include <linux/rtnetlink.h> | |
fec14d2f | 37 | #include <linux/moduleparam.h> |
1da177e4 LT |
38 | #include <linux/ip.h> |
39 | #include <linux/in.h> | |
40 | #include <linux/igmp.h> | |
41 | #include <linux/inetdevice.h> | |
42 | #include <linux/delay.h> | |
43 | #include <linux/completion.h> | |
5a0e3ad6 | 44 | #include <linux/slab.h> |
1da177e4 | 45 | |
14c85021 ACM |
46 | #include <net/dst.h> |
47 | ||
1da177e4 LT |
48 | #include "ipoib.h" |
49 | ||
50 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG | |
51 | static int mcast_debug_level; | |
52 | ||
53 | module_param(mcast_debug_level, int, 0644); | |
54 | MODULE_PARM_DESC(mcast_debug_level, | |
55 | "Enable multicast debug tracing if > 0"); | |
56 | #endif | |
57 | ||
1da177e4 LT |
58 | struct ipoib_mcast_iter { |
59 | struct net_device *dev; | |
60 | union ib_gid mgid; | |
61 | unsigned long created; | |
62 | unsigned int queuelen; | |
63 | unsigned int complete; | |
64 | unsigned int send_only; | |
65 | }; | |
66 | ||
3b561130 ES |
67 | /* join state that allows creating mcg with sendonly member request */ |
68 | #define SENDONLY_FULLMEMBER_JOIN 8 | |
69 | ||
69911416 | 70 | /* |
1c0453d6 | 71 | * This should be called with the priv->lock held |
69911416 DL |
72 | */ |
73 | static void __ipoib_mcast_schedule_join_thread(struct ipoib_dev_priv *priv, | |
74 | struct ipoib_mcast *mcast, | |
75 | bool delay) | |
76 | { | |
0e5544d9 | 77 | if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) |
69911416 DL |
78 | return; |
79 | ||
80 | /* | |
81 | * We will be scheduling *something*, so cancel whatever is | |
82 | * currently scheduled first | |
83 | */ | |
84 | cancel_delayed_work(&priv->mcast_task); | |
85 | if (mcast && delay) { | |
86 | /* | |
87 | * We had a failure and want to schedule a retry later | |
88 | */ | |
89 | mcast->backoff *= 2; | |
90 | if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) | |
91 | mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; | |
92 | mcast->delay_until = jiffies + (mcast->backoff * HZ); | |
93 | /* | |
94 | * Mark this mcast for its delay, but restart the | |
95 | * task immediately. The join task will make sure to | |
96 | * clear out all entries without delays, and then | |
97 | * schedule itself to run again when the earliest | |
98 | * delay expires | |
99 | */ | |
100 | queue_delayed_work(priv->wq, &priv->mcast_task, 0); | |
101 | } else if (delay) { | |
102 | /* | |
103 | * Special case of retrying after a failure to | |
104 | * allocate the broadcast multicast group, wait | |
105 | * 1 second and try again | |
106 | */ | |
107 | queue_delayed_work(priv->wq, &priv->mcast_task, HZ); | |
108 | } else | |
109 | queue_delayed_work(priv->wq, &priv->mcast_task, 0); | |
110 | } | |
111 | ||
5a0e81f6 | 112 | static void ipoib_mcast_free(struct ipoib_mcast *mcast) |
1da177e4 LT |
113 | { |
114 | struct net_device *dev = mcast->dev; | |
b36f170b | 115 | int tx_dropped = 0; |
1da177e4 | 116 | |
5b095d98 | 117 | ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n", |
fcace2fe | 118 | mcast->mcmember.mgid.raw); |
1da177e4 | 119 | |
b63b70d8 SP |
120 | /* remove all neigh connected to this mcast */ |
121 | ipoib_del_neighs_by_gid(dev, mcast->mcmember.mgid.raw); | |
1da177e4 | 122 | |
1da177e4 LT |
123 | if (mcast->ah) |
124 | ipoib_put_ah(mcast->ah); | |
125 | ||
b36f170b MT |
126 | while (!skb_queue_empty(&mcast->pkt_queue)) { |
127 | ++tx_dropped; | |
8c608a32 | 128 | dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); |
b36f170b MT |
129 | } |
130 | ||
943c246e | 131 | netif_tx_lock_bh(dev); |
de903512 | 132 | dev->stats.tx_dropped += tx_dropped; |
943c246e | 133 | netif_tx_unlock_bh(dev); |
1da177e4 LT |
134 | |
135 | kfree(mcast); | |
136 | } | |
137 | ||
138 | static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev, | |
139 | int can_sleep) | |
140 | { | |
141 | struct ipoib_mcast *mcast; | |
142 | ||
de6eb66b | 143 | mcast = kzalloc(sizeof *mcast, can_sleep ? GFP_KERNEL : GFP_ATOMIC); |
1da177e4 LT |
144 | if (!mcast) |
145 | return NULL; | |
146 | ||
1da177e4 LT |
147 | mcast->dev = dev; |
148 | mcast->created = jiffies; | |
69911416 | 149 | mcast->delay_until = jiffies; |
ce5b65cc | 150 | mcast->backoff = 1; |
1da177e4 LT |
151 | |
152 | INIT_LIST_HEAD(&mcast->list); | |
153 | INIT_LIST_HEAD(&mcast->neigh_list); | |
154 | skb_queue_head_init(&mcast->pkt_queue); | |
155 | ||
1da177e4 LT |
156 | return mcast; |
157 | } | |
158 | ||
432c55ff | 159 | static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid) |
1da177e4 LT |
160 | { |
161 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
162 | struct rb_node *n = priv->multicast_tree.rb_node; | |
163 | ||
164 | while (n) { | |
165 | struct ipoib_mcast *mcast; | |
166 | int ret; | |
167 | ||
168 | mcast = rb_entry(n, struct ipoib_mcast, rb_node); | |
169 | ||
37c22a77 | 170 | ret = memcmp(mgid, mcast->mcmember.mgid.raw, |
1da177e4 LT |
171 | sizeof (union ib_gid)); |
172 | if (ret < 0) | |
173 | n = n->rb_left; | |
174 | else if (ret > 0) | |
175 | n = n->rb_right; | |
176 | else | |
177 | return mcast; | |
178 | } | |
179 | ||
180 | return NULL; | |
181 | } | |
182 | ||
183 | static int __ipoib_mcast_add(struct net_device *dev, struct ipoib_mcast *mcast) | |
184 | { | |
185 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
186 | struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL; | |
187 | ||
188 | while (*n) { | |
189 | struct ipoib_mcast *tmcast; | |
190 | int ret; | |
191 | ||
192 | pn = *n; | |
193 | tmcast = rb_entry(pn, struct ipoib_mcast, rb_node); | |
194 | ||
195 | ret = memcmp(mcast->mcmember.mgid.raw, tmcast->mcmember.mgid.raw, | |
196 | sizeof (union ib_gid)); | |
197 | if (ret < 0) | |
198 | n = &pn->rb_left; | |
199 | else if (ret > 0) | |
200 | n = &pn->rb_right; | |
201 | else | |
202 | return -EEXIST; | |
203 | } | |
204 | ||
205 | rb_link_node(&mcast->rb_node, pn, n); | |
206 | rb_insert_color(&mcast->rb_node, &priv->multicast_tree); | |
207 | ||
208 | return 0; | |
209 | } | |
210 | ||
211 | static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, | |
212 | struct ib_sa_mcmember_rec *mcmember) | |
213 | { | |
214 | struct net_device *dev = mcast->dev; | |
215 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
7343b231 | 216 | struct ipoib_ah *ah; |
1da177e4 | 217 | int ret; |
d0de1362 | 218 | int set_qkey = 0; |
1da177e4 LT |
219 | |
220 | mcast->mcmember = *mcmember; | |
221 | ||
bea1e22d PM |
222 | /* Set the multicast MTU and cached Q_Key before we attach if it's |
223 | * the broadcast group. | |
224 | */ | |
1da177e4 LT |
225 | if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4, |
226 | sizeof (union ib_gid))) { | |
e1d50dce JM |
227 | spin_lock_irq(&priv->lock); |
228 | if (!priv->broadcast) { | |
229 | spin_unlock_irq(&priv->lock); | |
230 | return -EAGAIN; | |
231 | } | |
3fd0605c ES |
232 | /*update priv member according to the new mcast*/ |
233 | priv->broadcast->mcmember.qkey = mcmember->qkey; | |
234 | priv->broadcast->mcmember.mtu = mcmember->mtu; | |
235 | priv->broadcast->mcmember.traffic_class = mcmember->traffic_class; | |
236 | priv->broadcast->mcmember.rate = mcmember->rate; | |
237 | priv->broadcast->mcmember.sl = mcmember->sl; | |
238 | priv->broadcast->mcmember.flow_label = mcmember->flow_label; | |
239 | priv->broadcast->mcmember.hop_limit = mcmember->hop_limit; | |
240 | /* assume if the admin and the mcast are the same both can be changed */ | |
241 | if (priv->mcast_mtu == priv->admin_mtu) | |
242 | priv->admin_mtu = | |
243 | priv->mcast_mtu = | |
244 | IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); | |
245 | else | |
246 | priv->mcast_mtu = | |
247 | IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); | |
248 | ||
1da177e4 | 249 | priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); |
e1d50dce | 250 | spin_unlock_irq(&priv->lock); |
e622f2f4 | 251 | priv->tx_wr.remote_qkey = priv->qkey; |
d0de1362 | 252 | set_qkey = 1; |
1da177e4 LT |
253 | } |
254 | ||
255 | if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { | |
256 | if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { | |
5b095d98 | 257 | ipoib_warn(priv, "multicast group %pI6 already attached\n", |
fcace2fe | 258 | mcast->mcmember.mgid.raw); |
1da177e4 LT |
259 | |
260 | return 0; | |
261 | } | |
262 | ||
263 | ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid), | |
d0de1362 | 264 | &mcast->mcmember.mgid, set_qkey); |
1da177e4 | 265 | if (ret < 0) { |
5b095d98 | 266 | ipoib_warn(priv, "couldn't attach QP to multicast group %pI6\n", |
fcace2fe | 267 | mcast->mcmember.mgid.raw); |
1da177e4 LT |
268 | |
269 | clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags); | |
270 | return ret; | |
271 | } | |
272 | } | |
273 | ||
274 | { | |
275 | struct ib_ah_attr av = { | |
276 | .dlid = be16_to_cpu(mcast->mcmember.mlid), | |
277 | .port_num = priv->port, | |
278 | .sl = mcast->mcmember.sl, | |
279 | .ah_flags = IB_AH_GRH, | |
bf6a9e31 | 280 | .static_rate = mcast->mcmember.rate, |
1da177e4 LT |
281 | .grh = { |
282 | .flow_label = be32_to_cpu(mcast->mcmember.flow_label), | |
283 | .hop_limit = mcast->mcmember.hop_limit, | |
284 | .sgid_index = 0, | |
285 | .traffic_class = mcast->mcmember.traffic_class | |
286 | } | |
287 | }; | |
1da177e4 LT |
288 | av.grh.dgid = mcast->mcmember.mgid; |
289 | ||
7343b231 | 290 | ah = ipoib_create_ah(dev, priv->pd, &av); |
3874397c MM |
291 | if (IS_ERR(ah)) { |
292 | ipoib_warn(priv, "ib_address_create failed %ld\n", | |
293 | -PTR_ERR(ah)); | |
294 | /* use original error */ | |
295 | return PTR_ERR(ah); | |
1da177e4 | 296 | } else { |
624d01f8 OG |
297 | spin_lock_irq(&priv->lock); |
298 | mcast->ah = ah; | |
299 | spin_unlock_irq(&priv->lock); | |
300 | ||
5b095d98 | 301 | ipoib_dbg_mcast(priv, "MGID %pI6 AV %p, LID 0x%04x, SL %d\n", |
fcace2fe | 302 | mcast->mcmember.mgid.raw, |
1da177e4 LT |
303 | mcast->ah->ah, |
304 | be16_to_cpu(mcast->mcmember.mlid), | |
305 | mcast->mcmember.sl); | |
306 | } | |
307 | } | |
308 | ||
309 | /* actually send any queued packets */ | |
943c246e | 310 | netif_tx_lock_bh(dev); |
1da177e4 LT |
311 | while (!skb_queue_empty(&mcast->pkt_queue)) { |
312 | struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); | |
69cce1d1 | 313 | |
943c246e | 314 | netif_tx_unlock_bh(dev); |
1da177e4 LT |
315 | |
316 | skb->dev = dev; | |
1da177e4 LT |
317 | if (dev_queue_xmit(skb)) |
318 | ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n"); | |
936d7de3 | 319 | |
943c246e | 320 | netif_tx_lock_bh(dev); |
1da177e4 | 321 | } |
943c246e | 322 | netif_tx_unlock_bh(dev); |
1da177e4 LT |
323 | |
324 | return 0; | |
325 | } | |
326 | ||
e8224e4b YE |
327 | void ipoib_mcast_carrier_on_task(struct work_struct *work) |
328 | { | |
329 | struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, | |
330 | carrier_on_task); | |
5ee95120 | 331 | struct ib_port_attr attr; |
3b561130 | 332 | int ret; |
e8224e4b | 333 | |
5ee95120 MS |
334 | if (ib_query_port(priv->ca, priv->port, &attr) || |
335 | attr.state != IB_PORT_ACTIVE) { | |
336 | ipoib_dbg(priv, "Keeping carrier off until IB port is active\n"); | |
337 | return; | |
338 | } | |
3b561130 ES |
339 | /* |
340 | * Check if can send sendonly MCG's with sendonly-fullmember join state. | |
341 | * It done here after the successfully join to the broadcast group, | |
342 | * because the broadcast group must always be joined first and is always | |
343 | * re-joined if the SM changes substantially. | |
344 | */ | |
345 | ret = ipoib_check_sm_sendonly_fullmember_support(priv); | |
346 | if (ret < 0) | |
347 | pr_debug("%s failed query sm support for sendonly-fullmember (ret: %d)\n", | |
348 | priv->dev->name, ret); | |
5ee95120 | 349 | |
894021a7 DL |
350 | /* |
351 | * Take rtnl_lock to avoid racing with ipoib_stop() and | |
352 | * turning the carrier back on while a device is being | |
353 | * removed. However, ipoib_stop() will attempt to flush | |
354 | * the workqueue while holding the rtnl lock, so loop | |
355 | * on trylock until either we get the lock or we see | |
356 | * FLAG_OPER_UP go away as that signals that we are bailing | |
357 | * and can safely ignore the carrier on work. | |
358 | */ | |
359 | while (!rtnl_trylock()) { | |
360 | if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) | |
361 | return; | |
362 | else | |
363 | msleep(20); | |
364 | } | |
c84ca6d2 DL |
365 | if (!ipoib_cm_admin_enabled(priv->dev)) |
366 | dev_set_mtu(priv->dev, min(priv->mcast_mtu, priv->admin_mtu)); | |
e8224e4b YE |
367 | netif_carrier_on(priv->dev); |
368 | rtnl_unlock(); | |
369 | } | |
370 | ||
faec2f7b SH |
371 | static int ipoib_mcast_join_complete(int status, |
372 | struct ib_sa_multicast *multicast) | |
1da177e4 | 373 | { |
faec2f7b | 374 | struct ipoib_mcast *mcast = multicast->context; |
1da177e4 LT |
375 | struct net_device *dev = mcast->dev; |
376 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
377 | ||
d2fe937c DL |
378 | ipoib_dbg_mcast(priv, "%sjoin completion for %pI6 (status %d)\n", |
379 | test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? | |
380 | "sendonly " : "", | |
fcace2fe | 381 | mcast->mcmember.mgid.raw, status); |
1da177e4 | 382 | |
faec2f7b | 383 | /* We trap for port events ourselves. */ |
e7a623d2 RD |
384 | if (status == -ENETRESET) { |
385 | status = 0; | |
a9c8ba58 | 386 | goto out; |
e7a623d2 | 387 | } |
faec2f7b SH |
388 | |
389 | if (!status) | |
390 | status = ipoib_mcast_join_finish(mcast, &multicast->rec); | |
391 | ||
392 | if (!status) { | |
ce5b65cc | 393 | mcast->backoff = 1; |
69911416 | 394 | mcast->delay_until = jiffies; |
55c9adde | 395 | |
e8224e4b | 396 | /* |
0b39578b | 397 | * Defer carrier on work to priv->wq to avoid a |
d2fe937c DL |
398 | * deadlock on rtnl_lock here. Requeue our multicast |
399 | * work too, which will end up happening right after | |
400 | * our carrier on task work and will allow us to | |
401 | * send out all of the non-broadcast joins | |
e8224e4b | 402 | */ |
d2fe937c | 403 | if (mcast == priv->broadcast) { |
1c0453d6 | 404 | spin_lock_irq(&priv->lock); |
0b39578b | 405 | queue_work(priv->wq, &priv->carrier_on_task); |
d2fe937c | 406 | __ipoib_mcast_schedule_join_thread(priv, NULL, 0); |
1c0453d6 | 407 | goto out_locked; |
d2fe937c | 408 | } |
69911416 | 409 | } else { |
d1178cbc JG |
410 | bool silent_fail = |
411 | test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) && | |
412 | status == -EINVAL; | |
413 | ||
414 | if (mcast->logcount < 20) { | |
415 | if (status == -ETIMEDOUT || status == -EAGAIN || | |
416 | silent_fail) { | |
d2fe937c DL |
417 | ipoib_dbg_mcast(priv, "%smulticast join failed for %pI6, status %d\n", |
418 | test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "", | |
69911416 DL |
419 | mcast->mcmember.mgid.raw, status); |
420 | } else { | |
d2fe937c DL |
421 | ipoib_warn(priv, "%smulticast join failed for %pI6, status %d\n", |
422 | test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "", | |
69911416 DL |
423 | mcast->mcmember.mgid.raw, status); |
424 | } | |
d1178cbc JG |
425 | |
426 | if (!silent_fail) | |
427 | mcast->logcount++; | |
e7a623d2 | 428 | } |
e7a623d2 | 429 | |
d2fe937c DL |
430 | if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) && |
431 | mcast->backoff >= 2) { | |
432 | /* | |
433 | * We only retry sendonly joins once before we drop | |
434 | * the packet and quit trying to deal with the | |
435 | * group. However, we leave the group in the | |
436 | * mcast list as an unjoined group. If we want to | |
437 | * try joining again, we simply queue up a packet | |
438 | * and restart the join thread. The empty queue | |
439 | * is why the join thread ignores this group. | |
440 | */ | |
441 | mcast->backoff = 1; | |
442 | netif_tx_lock_bh(dev); | |
443 | while (!skb_queue_empty(&mcast->pkt_queue)) { | |
444 | ++dev->stats.tx_dropped; | |
445 | dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); | |
446 | } | |
447 | netif_tx_unlock_bh(dev); | |
1c0453d6 DL |
448 | } else { |
449 | spin_lock_irq(&priv->lock); | |
d2fe937c DL |
450 | /* Requeue this join task with a backoff delay */ |
451 | __ipoib_mcast_schedule_join_thread(priv, mcast, 1); | |
1c0453d6 DL |
452 | goto out_locked; |
453 | } | |
69911416 | 454 | } |
e7a623d2 | 455 | out: |
1c0453d6 DL |
456 | spin_lock_irq(&priv->lock); |
457 | out_locked: | |
458 | /* | |
459 | * Make sure to set mcast->mc before we clear the busy flag to avoid | |
460 | * racing with code that checks for BUSY before checking mcast->mc | |
461 | */ | |
69911416 DL |
462 | if (status) |
463 | mcast->mc = NULL; | |
1c0453d6 DL |
464 | else |
465 | mcast->mc = multicast; | |
466 | clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | |
467 | spin_unlock_irq(&priv->lock); | |
e7a623d2 | 468 | complete(&mcast->done); |
1c0453d6 | 469 | |
faec2f7b | 470 | return status; |
1da177e4 LT |
471 | } |
472 | ||
08bc3276 AE |
473 | /* |
474 | * Caller must hold 'priv->lock' | |
475 | */ | |
476 | static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast) | |
1da177e4 LT |
477 | { |
478 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
1c0453d6 | 479 | struct ib_sa_multicast *multicast; |
1da177e4 LT |
480 | struct ib_sa_mcmember_rec rec = { |
481 | .join_state = 1 | |
482 | }; | |
483 | ib_sa_comp_mask comp_mask; | |
484 | int ret = 0; | |
485 | ||
08bc3276 AE |
486 | if (!priv->broadcast || |
487 | !test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) | |
488 | return -EINVAL; | |
489 | ||
5b095d98 | 490 | ipoib_dbg_mcast(priv, "joining MGID %pI6\n", mcast->mcmember.mgid.raw); |
1da177e4 LT |
491 | |
492 | rec.mgid = mcast->mcmember.mgid; | |
493 | rec.port_gid = priv->local_gid; | |
97f52eb4 | 494 | rec.pkey = cpu_to_be16(priv->pkey); |
1da177e4 LT |
495 | |
496 | comp_mask = | |
497 | IB_SA_MCMEMBER_REC_MGID | | |
498 | IB_SA_MCMEMBER_REC_PORT_GID | | |
499 | IB_SA_MCMEMBER_REC_PKEY | | |
500 | IB_SA_MCMEMBER_REC_JOIN_STATE; | |
501 | ||
c3acdc06 DL |
502 | if (mcast != priv->broadcast) { |
503 | /* | |
504 | * RFC 4391: | |
505 | * The MGID MUST use the same P_Key, Q_Key, SL, MTU, | |
506 | * and HopLimit as those used in the broadcast-GID. The rest | |
507 | * of attributes SHOULD follow the values used in the | |
508 | * broadcast-GID as well. | |
509 | */ | |
1da177e4 | 510 | comp_mask |= |
d0df6d6d RD |
511 | IB_SA_MCMEMBER_REC_QKEY | |
512 | IB_SA_MCMEMBER_REC_MTU_SELECTOR | | |
513 | IB_SA_MCMEMBER_REC_MTU | | |
514 | IB_SA_MCMEMBER_REC_TRAFFIC_CLASS | | |
515 | IB_SA_MCMEMBER_REC_RATE_SELECTOR | | |
516 | IB_SA_MCMEMBER_REC_RATE | | |
517 | IB_SA_MCMEMBER_REC_SL | | |
518 | IB_SA_MCMEMBER_REC_FLOW_LABEL | | |
519 | IB_SA_MCMEMBER_REC_HOP_LIMIT; | |
1da177e4 LT |
520 | |
521 | rec.qkey = priv->broadcast->mcmember.qkey; | |
d0df6d6d RD |
522 | rec.mtu_selector = IB_SA_EQ; |
523 | rec.mtu = priv->broadcast->mcmember.mtu; | |
524 | rec.traffic_class = priv->broadcast->mcmember.traffic_class; | |
525 | rec.rate_selector = IB_SA_EQ; | |
526 | rec.rate = priv->broadcast->mcmember.rate; | |
1da177e4 LT |
527 | rec.sl = priv->broadcast->mcmember.sl; |
528 | rec.flow_label = priv->broadcast->mcmember.flow_label; | |
d0df6d6d | 529 | rec.hop_limit = priv->broadcast->mcmember.hop_limit; |
c3acdc06 DL |
530 | |
531 | /* | |
3b561130 ES |
532 | * Send-only IB Multicast joins work at the core IB layer but |
533 | * require specific SM support. | |
534 | * We can use such joins here only if the current SM supports that feature. | |
535 | * However, if not, we emulate an Ethernet multicast send, | |
536 | * which does not require a multicast subscription and will | |
537 | * still send properly. The most appropriate thing to | |
c3852ab0 DL |
538 | * do is to create the group if it doesn't exist as that |
539 | * most closely emulates the behavior, from a user space | |
3b561130 | 540 | * application perspective, of Ethernet multicast operation. |
c3acdc06 | 541 | */ |
3b561130 ES |
542 | if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) && |
543 | priv->sm_fullmember_sendonly_support) | |
544 | /* SM supports sendonly-fullmember, otherwise fallback to full-member */ | |
545 | rec.join_state = SENDONLY_FULLMEMBER_JOIN; | |
1da177e4 | 546 | } |
08bc3276 | 547 | spin_unlock_irq(&priv->lock); |
1da177e4 | 548 | |
1c0453d6 | 549 | multicast = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port, |
faec2f7b SH |
550 | &rec, comp_mask, GFP_KERNEL, |
551 | ipoib_mcast_join_complete, mcast); | |
08bc3276 | 552 | spin_lock_irq(&priv->lock); |
1c0453d6 DL |
553 | if (IS_ERR(multicast)) { |
554 | ret = PTR_ERR(multicast); | |
faec2f7b | 555 | ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret); |
1c0453d6 | 556 | /* Requeue this join task with a backoff delay */ |
69911416 | 557 | __ipoib_mcast_schedule_join_thread(priv, mcast, 1); |
1c0453d6 DL |
558 | clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); |
559 | spin_unlock_irq(&priv->lock); | |
69911416 | 560 | complete(&mcast->done); |
08bc3276 | 561 | spin_lock_irq(&priv->lock); |
faec2f7b | 562 | } |
08bc3276 | 563 | return 0; |
1da177e4 LT |
564 | } |
565 | ||
c4028958 | 566 | void ipoib_mcast_join_task(struct work_struct *work) |
1da177e4 | 567 | { |
c4028958 DH |
568 | struct ipoib_dev_priv *priv = |
569 | container_of(work, struct ipoib_dev_priv, mcast_task.work); | |
570 | struct net_device *dev = priv->dev; | |
94232d9c | 571 | struct ib_port_attr port_attr; |
69911416 DL |
572 | unsigned long delay_until = 0; |
573 | struct ipoib_mcast *mcast = NULL; | |
1da177e4 | 574 | |
0e5544d9 | 575 | if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) |
1da177e4 LT |
576 | return; |
577 | ||
94232d9c ES |
578 | if (ib_query_port(priv->ca, priv->port, &port_attr) || |
579 | port_attr.state != IB_PORT_ACTIVE) { | |
580 | ipoib_dbg(priv, "port state is not ACTIVE (state = %d) suspending join task\n", | |
581 | port_attr.state); | |
582 | return; | |
583 | } | |
68f9d83c | 584 | priv->local_lid = port_attr.lid; |
492a7e67 | 585 | netif_addr_lock(dev); |
94232d9c | 586 | |
492a7e67 MB |
587 | if (!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) { |
588 | netif_addr_unlock(dev); | |
589 | return; | |
590 | } | |
591 | netif_addr_unlock(dev); | |
1da177e4 | 592 | |
69911416 DL |
593 | spin_lock_irq(&priv->lock); |
594 | if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) | |
595 | goto out; | |
596 | ||
1da177e4 | 597 | if (!priv->broadcast) { |
20b83382 RD |
598 | struct ipoib_mcast *broadcast; |
599 | ||
69911416 | 600 | broadcast = ipoib_mcast_alloc(dev, 0); |
20b83382 | 601 | if (!broadcast) { |
1da177e4 | 602 | ipoib_warn(priv, "failed to allocate broadcast group\n"); |
69911416 DL |
603 | /* |
604 | * Restart us after a 1 second delay to retry | |
605 | * creating our broadcast group and attaching to | |
606 | * it. Until this succeeds, this ipoib dev is | |
607 | * completely stalled (multicast wise). | |
608 | */ | |
609 | __ipoib_mcast_schedule_join_thread(priv, NULL, 1); | |
610 | goto out; | |
1da177e4 LT |
611 | } |
612 | ||
20b83382 | 613 | memcpy(broadcast->mcmember.mgid.raw, priv->dev->broadcast + 4, |
1da177e4 | 614 | sizeof (union ib_gid)); |
20b83382 | 615 | priv->broadcast = broadcast; |
1da177e4 | 616 | |
1da177e4 | 617 | __ipoib_mcast_add(dev, priv->broadcast); |
1da177e4 LT |
618 | } |
619 | ||
620 | if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { | |
69911416 DL |
621 | if (IS_ERR_OR_NULL(priv->broadcast->mc) && |
622 | !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags)) { | |
623 | mcast = priv->broadcast; | |
69911416 DL |
624 | if (mcast->backoff > 1 && |
625 | time_before(jiffies, mcast->delay_until)) { | |
626 | delay_until = mcast->delay_until; | |
627 | mcast = NULL; | |
628 | } | |
629 | } | |
630 | goto out; | |
1da177e4 LT |
631 | } |
632 | ||
69911416 DL |
633 | /* |
634 | * We'll never get here until the broadcast group is both allocated | |
635 | * and attached | |
636 | */ | |
637 | list_for_each_entry(mcast, &priv->multicast_list, list) { | |
638 | if (IS_ERR_OR_NULL(mcast->mc) && | |
639 | !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) && | |
d2fe937c DL |
640 | (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) || |
641 | !skb_queue_empty(&mcast->pkt_queue))) { | |
69911416 | 642 | if (mcast->backoff == 1 || |
d2fe937c | 643 | time_after_eq(jiffies, mcast->delay_until)) { |
1da177e4 | 644 | /* Found the next unjoined group */ |
d2fe937c DL |
645 | init_completion(&mcast->done); |
646 | set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | |
08bc3276 AE |
647 | if (ipoib_mcast_join(dev, mcast)) { |
648 | spin_unlock_irq(&priv->lock); | |
649 | return; | |
650 | } | |
d2fe937c | 651 | } else if (!delay_until || |
69911416 DL |
652 | time_before(mcast->delay_until, delay_until)) |
653 | delay_until = mcast->delay_until; | |
1da177e4 | 654 | } |
1da177e4 LT |
655 | } |
656 | ||
d2fe937c DL |
657 | mcast = NULL; |
658 | ipoib_dbg_mcast(priv, "successfully started all multicast joins\n"); | |
1da177e4 | 659 | |
69911416 | 660 | out: |
d2fe937c DL |
661 | if (delay_until) { |
662 | cancel_delayed_work(&priv->mcast_task); | |
663 | queue_delayed_work(priv->wq, &priv->mcast_task, | |
664 | delay_until - jiffies); | |
665 | } | |
69911416 DL |
666 | if (mcast) { |
667 | init_completion(&mcast->done); | |
668 | set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | |
08bc3276 | 669 | ipoib_mcast_join(dev, mcast); |
69911416 DL |
670 | } |
671 | spin_unlock_irq(&priv->lock); | |
1da177e4 LT |
672 | } |
673 | ||
674 | int ipoib_mcast_start_thread(struct net_device *dev) | |
675 | { | |
676 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
1c0453d6 | 677 | unsigned long flags; |
1da177e4 LT |
678 | |
679 | ipoib_dbg_mcast(priv, "starting multicast thread\n"); | |
680 | ||
1c0453d6 | 681 | spin_lock_irqsave(&priv->lock, flags); |
69911416 | 682 | __ipoib_mcast_schedule_join_thread(priv, NULL, 0); |
1c0453d6 | 683 | spin_unlock_irqrestore(&priv->lock, flags); |
1da177e4 LT |
684 | |
685 | return 0; | |
686 | } | |
687 | ||
efc82eee | 688 | int ipoib_mcast_stop_thread(struct net_device *dev) |
1da177e4 LT |
689 | { |
690 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
1c0453d6 | 691 | unsigned long flags; |
1da177e4 LT |
692 | |
693 | ipoib_dbg_mcast(priv, "stopping multicast thread\n"); | |
694 | ||
1c0453d6 | 695 | spin_lock_irqsave(&priv->lock, flags); |
1da177e4 | 696 | cancel_delayed_work(&priv->mcast_task); |
1c0453d6 | 697 | spin_unlock_irqrestore(&priv->lock, flags); |
1da177e4 | 698 | |
efc82eee | 699 | flush_workqueue(priv->wq); |
1da177e4 | 700 | |
1da177e4 LT |
701 | return 0; |
702 | } | |
703 | ||
5a0e81f6 | 704 | static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) |
1da177e4 LT |
705 | { |
706 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
1da177e4 LT |
707 | int ret = 0; |
708 | ||
e07832b6 | 709 | if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) |
69911416 DL |
710 | ipoib_warn(priv, "ipoib_mcast_leave on an in-flight join\n"); |
711 | ||
712 | if (!IS_ERR_OR_NULL(mcast->mc)) | |
e07832b6 SH |
713 | ib_sa_free_multicast(mcast->mc); |
714 | ||
faec2f7b | 715 | if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { |
5b095d98 | 716 | ipoib_dbg_mcast(priv, "leaving MGID %pI6\n", |
fcace2fe | 717 | mcast->mcmember.mgid.raw); |
1da177e4 | 718 | |
faec2f7b | 719 | /* Remove ourselves from the multicast group */ |
9eae554c RD |
720 | ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid, |
721 | be16_to_cpu(mcast->mcmember.mlid)); | |
faec2f7b | 722 | if (ret) |
9eae554c | 723 | ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret); |
69911416 DL |
724 | } else if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) |
725 | ipoib_dbg(priv, "leaving with no mcmember but not a " | |
726 | "SENDONLY join\n"); | |
1da177e4 | 727 | |
1da177e4 LT |
728 | return 0; |
729 | } | |
730 | ||
432c55ff CL |
731 | /* |
732 | * Check if the multicast group is sendonly. If so remove it from the maps | |
733 | * and add to the remove list | |
734 | */ | |
735 | void ipoib_check_and_add_mcast_sendonly(struct ipoib_dev_priv *priv, u8 *mgid, | |
736 | struct list_head *remove_list) | |
737 | { | |
738 | /* Is this multicast ? */ | |
739 | if (*mgid == 0xff) { | |
740 | struct ipoib_mcast *mcast = __ipoib_mcast_find(priv->dev, mgid); | |
741 | ||
742 | if (mcast && test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { | |
743 | list_del(&mcast->list); | |
744 | rb_erase(&mcast->rb_node, &priv->multicast_tree); | |
745 | list_add_tail(&mcast->list, remove_list); | |
746 | } | |
747 | } | |
748 | } | |
749 | ||
50be28de | 750 | void ipoib_mcast_remove_list(struct list_head *remove_list) |
5a0e81f6 CL |
751 | { |
752 | struct ipoib_mcast *mcast, *tmcast; | |
753 | ||
754 | list_for_each_entry_safe(mcast, tmcast, remove_list, list) { | |
50be28de | 755 | ipoib_mcast_leave(mcast->dev, mcast); |
5a0e81f6 CL |
756 | ipoib_mcast_free(mcast); |
757 | } | |
758 | } | |
759 | ||
b63b70d8 | 760 | void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb) |
1da177e4 LT |
761 | { |
762 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
763 | struct ipoib_mcast *mcast; | |
943c246e | 764 | unsigned long flags; |
b63b70d8 | 765 | void *mgid = daddr + 4; |
700db99d | 766 | |
943c246e | 767 | spin_lock_irqsave(&priv->lock, flags); |
1da177e4 | 768 | |
b3e2749b | 769 | if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags) || |
20b83382 RD |
770 | !priv->broadcast || |
771 | !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { | |
de903512 | 772 | ++dev->stats.tx_dropped; |
479a0796 MT |
773 | dev_kfree_skb_any(skb); |
774 | goto unlock; | |
775 | } | |
776 | ||
1da177e4 | 777 | mcast = __ipoib_mcast_find(dev, mgid); |
d2fe937c | 778 | if (!mcast || !mcast->ah) { |
1da177e4 | 779 | if (!mcast) { |
d2fe937c DL |
780 | /* Let's create a new send only group now */ |
781 | ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n", | |
782 | mgid); | |
783 | ||
784 | mcast = ipoib_mcast_alloc(dev, 0); | |
785 | if (!mcast) { | |
786 | ipoib_warn(priv, "unable to allocate memory " | |
787 | "for multicast structure\n"); | |
788 | ++dev->stats.tx_dropped; | |
789 | dev_kfree_skb_any(skb); | |
790 | goto unlock; | |
791 | } | |
1da177e4 | 792 | |
d2fe937c DL |
793 | set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags); |
794 | memcpy(mcast->mcmember.mgid.raw, mgid, | |
795 | sizeof (union ib_gid)); | |
796 | __ipoib_mcast_add(dev, mcast); | |
797 | list_add_tail(&mcast->list, &priv->multicast_list); | |
798 | } | |
1da177e4 LT |
799 | if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE) |
800 | skb_queue_tail(&mcast->pkt_queue, skb); | |
b36f170b | 801 | else { |
de903512 | 802 | ++dev->stats.tx_dropped; |
1da177e4 | 803 | dev_kfree_skb_any(skb); |
b36f170b | 804 | } |
d2fe937c DL |
805 | if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) { |
806 | __ipoib_mcast_schedule_join_thread(priv, NULL, 0); | |
807 | } | |
808 | } else { | |
b63b70d8 SP |
809 | struct ipoib_neigh *neigh; |
810 | ||
811 | spin_unlock_irqrestore(&priv->lock, flags); | |
812 | neigh = ipoib_neigh_get(dev, daddr); | |
813 | spin_lock_irqsave(&priv->lock, flags); | |
814 | if (!neigh) { | |
b63b70d8 | 815 | neigh = ipoib_neigh_alloc(daddr, dev); |
b63b70d8 SP |
816 | if (neigh) { |
817 | kref_get(&mcast->ah->ref); | |
818 | neigh->ah = mcast->ah; | |
819 | list_add_tail(&neigh->list, &mcast->neigh_list); | |
1da177e4 LT |
820 | } |
821 | } | |
721d67cd | 822 | spin_unlock_irqrestore(&priv->lock, flags); |
1da177e4 | 823 | ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN); |
b63b70d8 SP |
824 | if (neigh) |
825 | ipoib_neigh_put(neigh); | |
721d67cd | 826 | return; |
1da177e4 LT |
827 | } |
828 | ||
479a0796 | 829 | unlock: |
943c246e | 830 | spin_unlock_irqrestore(&priv->lock, flags); |
1da177e4 LT |
831 | } |
832 | ||
833 | void ipoib_mcast_dev_flush(struct net_device *dev) | |
834 | { | |
835 | struct ipoib_dev_priv *priv = netdev_priv(dev); | |
836 | LIST_HEAD(remove_list); | |
988bd503 | 837 | struct ipoib_mcast *mcast, *tmcast; |
1da177e4 LT |
838 | unsigned long flags; |
839 | ||
840 | ipoib_dbg_mcast(priv, "flushing multicast list\n"); | |
841 | ||
842 | spin_lock_irqsave(&priv->lock, flags); | |
1da177e4 | 843 | |
988bd503 EC |
844 | list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { |
845 | list_del(&mcast->list); | |
846 | rb_erase(&mcast->rb_node, &priv->multicast_tree); | |
847 | list_add_tail(&mcast->list, &remove_list); | |
1da177e4 LT |
848 | } |
849 | ||
850 | if (priv->broadcast) { | |
3cd96564 | 851 | rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree); |
988bd503 EC |
852 | list_add_tail(&priv->broadcast->list, &remove_list); |
853 | priv->broadcast = NULL; | |
1da177e4 LT |
854 | } |
855 | ||
856 | spin_unlock_irqrestore(&priv->lock, flags); | |
857 | ||
69911416 DL |
858 | /* |
859 | * make sure the in-flight joins have finished before we attempt | |
860 | * to leave | |
861 | */ | |
a9c8ba58 | 862 | list_for_each_entry_safe(mcast, tmcast, &remove_list, list) |
69911416 | 863 | if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) |
a9c8ba58 ES |
864 | wait_for_completion(&mcast->done); |
865 | ||
50be28de | 866 | ipoib_mcast_remove_list(&remove_list); |
1da177e4 LT |
867 | } |
868 | ||
3e4aa12f | 869 | static int ipoib_mcast_addr_is_valid(const u8 *addr, const u8 *broadcast) |
5e47596b | 870 | { |
5e47596b JG |
871 | /* reserved QPN, prefix, scope */ |
872 | if (memcmp(addr, broadcast, 6)) | |
873 | return 0; | |
874 | /* signature lower, pkey */ | |
875 | if (memcmp(addr + 7, broadcast + 7, 3)) | |
876 | return 0; | |
877 | return 1; | |
878 | } | |
879 | ||
c4028958 | 880 | void ipoib_mcast_restart_task(struct work_struct *work) |
1da177e4 | 881 | { |
c4028958 DH |
882 | struct ipoib_dev_priv *priv = |
883 | container_of(work, struct ipoib_dev_priv, restart_task); | |
884 | struct net_device *dev = priv->dev; | |
22bedad3 | 885 | struct netdev_hw_addr *ha; |
1da177e4 LT |
886 | struct ipoib_mcast *mcast, *tmcast; |
887 | LIST_HEAD(remove_list); | |
888 | unsigned long flags; | |
335a64a5 | 889 | struct ib_sa_mcmember_rec rec; |
1da177e4 | 890 | |
69911416 DL |
891 | if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) |
892 | /* | |
893 | * shortcut...on shutdown flush is called next, just | |
894 | * let it do all the work | |
895 | */ | |
896 | return; | |
1da177e4 | 897 | |
69911416 | 898 | ipoib_dbg_mcast(priv, "restarting multicast task\n"); |
4e0ab200 | 899 | |
932ff279 | 900 | local_irq_save(flags); |
e308a5d8 | 901 | netif_addr_lock(dev); |
78bfe0b5 | 902 | spin_lock(&priv->lock); |
1da177e4 LT |
903 | |
904 | /* | |
905 | * Unfortunately, the networking core only gives us a list of all of | |
906 | * the multicast hardware addresses. We need to figure out which ones | |
907 | * are new and which ones have been removed | |
908 | */ | |
909 | ||
910 | /* Clear out the found flag */ | |
911 | list_for_each_entry(mcast, &priv->multicast_list, list) | |
912 | clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags); | |
913 | ||
914 | /* Mark all of the entries that are found or don't exist */ | |
22bedad3 | 915 | netdev_for_each_mc_addr(ha, dev) { |
1da177e4 LT |
916 | union ib_gid mgid; |
917 | ||
22bedad3 | 918 | if (!ipoib_mcast_addr_is_valid(ha->addr, dev->broadcast)) |
5e47596b JG |
919 | continue; |
920 | ||
22bedad3 | 921 | memcpy(mgid.raw, ha->addr + 4, sizeof mgid); |
1da177e4 | 922 | |
1da177e4 LT |
923 | mcast = __ipoib_mcast_find(dev, &mgid); |
924 | if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { | |
925 | struct ipoib_mcast *nmcast; | |
926 | ||
335a64a5 OG |
927 | /* ignore group which is directly joined by userspace */ |
928 | if (test_bit(IPOIB_FLAG_UMCAST, &priv->flags) && | |
929 | !ib_sa_get_mcmember_rec(priv->ca, priv->port, &mgid, &rec)) { | |
5b095d98 | 930 | ipoib_dbg_mcast(priv, "ignoring multicast entry for mgid %pI6\n", |
fcace2fe | 931 | mgid.raw); |
335a64a5 OG |
932 | continue; |
933 | } | |
934 | ||
1da177e4 | 935 | /* Not found or send-only group, let's add a new entry */ |
5b095d98 | 936 | ipoib_dbg_mcast(priv, "adding multicast entry for mgid %pI6\n", |
fcace2fe | 937 | mgid.raw); |
1da177e4 LT |
938 | |
939 | nmcast = ipoib_mcast_alloc(dev, 0); | |
940 | if (!nmcast) { | |
941 | ipoib_warn(priv, "unable to allocate memory for multicast structure\n"); | |
942 | continue; | |
943 | } | |
944 | ||
945 | set_bit(IPOIB_MCAST_FLAG_FOUND, &nmcast->flags); | |
946 | ||
947 | nmcast->mcmember.mgid = mgid; | |
948 | ||
949 | if (mcast) { | |
950 | /* Destroy the send only entry */ | |
179e0917 | 951 | list_move_tail(&mcast->list, &remove_list); |
1da177e4 LT |
952 | |
953 | rb_replace_node(&mcast->rb_node, | |
954 | &nmcast->rb_node, | |
955 | &priv->multicast_tree); | |
956 | } else | |
957 | __ipoib_mcast_add(dev, nmcast); | |
958 | ||
959 | list_add_tail(&nmcast->list, &priv->multicast_list); | |
960 | } | |
961 | ||
962 | if (mcast) | |
963 | set_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags); | |
964 | } | |
965 | ||
966 | /* Remove all of the entries don't exist anymore */ | |
967 | list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { | |
968 | if (!test_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags) && | |
969 | !test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { | |
5b095d98 | 970 | ipoib_dbg_mcast(priv, "deleting multicast group %pI6\n", |
fcace2fe | 971 | mcast->mcmember.mgid.raw); |
1da177e4 LT |
972 | |
973 | rb_erase(&mcast->rb_node, &priv->multicast_tree); | |
974 | ||
975 | /* Move to the remove list */ | |
179e0917 | 976 | list_move_tail(&mcast->list, &remove_list); |
1da177e4 LT |
977 | } |
978 | } | |
78bfe0b5 MT |
979 | |
980 | spin_unlock(&priv->lock); | |
e308a5d8 | 981 | netif_addr_unlock(dev); |
932ff279 | 982 | local_irq_restore(flags); |
1da177e4 | 983 | |
69911416 DL |
984 | /* |
985 | * make sure the in-flight joins have finished before we attempt | |
986 | * to leave | |
987 | */ | |
988 | list_for_each_entry_safe(mcast, tmcast, &remove_list, list) | |
989 | if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) | |
990 | wait_for_completion(&mcast->done); | |
991 | ||
50be28de | 992 | ipoib_mcast_remove_list(&remove_list); |
962121b4 | 993 | |
69911416 DL |
994 | /* |
995 | * Double check that we are still up | |
996 | */ | |
997 | if (test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) { | |
998 | spin_lock_irqsave(&priv->lock, flags); | |
999 | __ipoib_mcast_schedule_join_thread(priv, NULL, 0); | |
1000 | spin_unlock_irqrestore(&priv->lock, flags); | |
1001 | } | |
1da177e4 LT |
1002 | } |
1003 | ||
8ae5a8a2 RD |
1004 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG |
1005 | ||
1da177e4 LT |
1006 | struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev) |
1007 | { | |
1008 | struct ipoib_mcast_iter *iter; | |
1009 | ||
1010 | iter = kmalloc(sizeof *iter, GFP_KERNEL); | |
1011 | if (!iter) | |
1012 | return NULL; | |
1013 | ||
1014 | iter->dev = dev; | |
1732b0ef | 1015 | memset(iter->mgid.raw, 0, 16); |
1da177e4 LT |
1016 | |
1017 | if (ipoib_mcast_iter_next(iter)) { | |
1732b0ef | 1018 | kfree(iter); |
1da177e4 LT |
1019 | return NULL; |
1020 | } | |
1021 | ||
1022 | return iter; | |
1023 | } | |
1024 | ||
1da177e4 LT |
1025 | int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter) |
1026 | { | |
1027 | struct ipoib_dev_priv *priv = netdev_priv(iter->dev); | |
1028 | struct rb_node *n; | |
1029 | struct ipoib_mcast *mcast; | |
1030 | int ret = 1; | |
1031 | ||
1032 | spin_lock_irq(&priv->lock); | |
1033 | ||
1034 | n = rb_first(&priv->multicast_tree); | |
1035 | ||
1036 | while (n) { | |
1037 | mcast = rb_entry(n, struct ipoib_mcast, rb_node); | |
1038 | ||
1039 | if (memcmp(iter->mgid.raw, mcast->mcmember.mgid.raw, | |
1040 | sizeof (union ib_gid)) < 0) { | |
1041 | iter->mgid = mcast->mcmember.mgid; | |
1042 | iter->created = mcast->created; | |
1043 | iter->queuelen = skb_queue_len(&mcast->pkt_queue); | |
1044 | iter->complete = !!mcast->ah; | |
1045 | iter->send_only = !!(mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY)); | |
1046 | ||
1047 | ret = 0; | |
1048 | ||
1049 | break; | |
1050 | } | |
1051 | ||
1052 | n = rb_next(n); | |
1053 | } | |
1054 | ||
1055 | spin_unlock_irq(&priv->lock); | |
1056 | ||
1057 | return ret; | |
1058 | } | |
1059 | ||
1060 | void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter, | |
1061 | union ib_gid *mgid, | |
1062 | unsigned long *created, | |
1063 | unsigned int *queuelen, | |
1064 | unsigned int *complete, | |
1065 | unsigned int *send_only) | |
1066 | { | |
1067 | *mgid = iter->mgid; | |
1068 | *created = iter->created; | |
1069 | *queuelen = iter->queuelen; | |
1070 | *complete = iter->complete; | |
1071 | *send_only = iter->send_only; | |
1072 | } | |
8ae5a8a2 RD |
1073 | |
1074 | #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ |