]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. | |
2a1d9b7f RD |
3 | * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. |
4 | * Copyright (c) 2004 Voltaire, Inc. All rights reserved. | |
1da177e4 LT |
5 | * |
6 | * This software is available to you under a choice of one of two | |
7 | * licenses. You may choose to be licensed under the terms of the GNU | |
8 | * General Public License (GPL) Version 2, available from the file | |
9 | * COPYING in the main directory of this source tree, or the | |
10 | * OpenIB.org BSD license below: | |
11 | * | |
12 | * Redistribution and use in source and binary forms, with or | |
13 | * without modification, are permitted provided that the following | |
14 | * conditions are met: | |
15 | * | |
16 | * - Redistributions of source code must retain the above | |
17 | * copyright notice, this list of conditions and the following | |
18 | * disclaimer. | |
19 | * | |
20 | * - Redistributions in binary form must reproduce the above | |
21 | * copyright notice, this list of conditions and the following | |
22 | * disclaimer in the documentation and/or other materials | |
23 | * provided with the distribution. | |
24 | * | |
25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
26 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
27 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
28 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |
29 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |
30 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
31 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
32 | * SOFTWARE. | |
1da177e4 LT |
33 | */ |
34 | ||
35 | #include <linux/skbuff.h> | |
36 | #include <linux/rtnetlink.h> | |
fec14d2f | 37 | #include <linux/moduleparam.h> |
1da177e4 LT |
38 | #include <linux/ip.h> |
39 | #include <linux/in.h> | |
40 | #include <linux/igmp.h> | |
41 | #include <linux/inetdevice.h> | |
42 | #include <linux/delay.h> | |
43 | #include <linux/completion.h> | |
5a0e3ad6 | 44 | #include <linux/slab.h> |
1da177e4 | 45 | |
14c85021 ACM |
46 | #include <net/dst.h> |
47 | ||
1da177e4 LT |
48 | #include "ipoib.h" |
49 | ||
50 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG | |
51 | static int mcast_debug_level; | |
52 | ||
53 | module_param(mcast_debug_level, int, 0644); | |
54 | MODULE_PARM_DESC(mcast_debug_level, | |
55 | "Enable multicast debug tracing if > 0"); | |
56 | #endif | |
57 | ||
1da177e4 LT |
58 | struct ipoib_mcast_iter { |
59 | struct net_device *dev; | |
60 | union ib_gid mgid; | |
61 | unsigned long created; | |
62 | unsigned int queuelen; | |
63 | unsigned int complete; | |
64 | unsigned int send_only; | |
65 | }; | |
66 | ||
3b561130 ES |
67 | /* join state that allows creating mcg with sendonly member request */ |
68 | #define SENDONLY_FULLMEMBER_JOIN 8 | |
69 | ||
69911416 | 70 | /* |
1c0453d6 | 71 | * This should be called with the priv->lock held |
69911416 DL |
72 | */ |
73 | static void __ipoib_mcast_schedule_join_thread(struct ipoib_dev_priv *priv, | |
74 | struct ipoib_mcast *mcast, | |
75 | bool delay) | |
76 | { | |
0e5544d9 | 77 | if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) |
69911416 DL |
78 | return; |
79 | ||
80 | /* | |
81 | * We will be scheduling *something*, so cancel whatever is | |
82 | * currently scheduled first | |
83 | */ | |
84 | cancel_delayed_work(&priv->mcast_task); | |
85 | if (mcast && delay) { | |
86 | /* | |
87 | * We had a failure and want to schedule a retry later | |
88 | */ | |
89 | mcast->backoff *= 2; | |
90 | if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS) | |
91 | mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS; | |
92 | mcast->delay_until = jiffies + (mcast->backoff * HZ); | |
93 | /* | |
94 | * Mark this mcast for its delay, but restart the | |
95 | * task immediately. The join task will make sure to | |
96 | * clear out all entries without delays, and then | |
97 | * schedule itself to run again when the earliest | |
98 | * delay expires | |
99 | */ | |
100 | queue_delayed_work(priv->wq, &priv->mcast_task, 0); | |
101 | } else if (delay) { | |
102 | /* | |
103 | * Special case of retrying after a failure to | |
104 | * allocate the broadcast multicast group, wait | |
105 | * 1 second and try again | |
106 | */ | |
107 | queue_delayed_work(priv->wq, &priv->mcast_task, HZ); | |
108 | } else | |
109 | queue_delayed_work(priv->wq, &priv->mcast_task, 0); | |
110 | } | |
111 | ||
5a0e81f6 | 112 | static void ipoib_mcast_free(struct ipoib_mcast *mcast) |
1da177e4 LT |
113 | { |
114 | struct net_device *dev = mcast->dev; | |
b36f170b | 115 | int tx_dropped = 0; |
1da177e4 | 116 | |
c1048aff | 117 | ipoib_dbg_mcast(ipoib_priv(dev), "deleting multicast group %pI6\n", |
fcace2fe | 118 | mcast->mcmember.mgid.raw); |
1da177e4 | 119 | |
b63b70d8 SP |
120 | /* remove all neigh connected to this mcast */ |
121 | ipoib_del_neighs_by_gid(dev, mcast->mcmember.mgid.raw); | |
1da177e4 | 122 | |
1da177e4 LT |
123 | if (mcast->ah) |
124 | ipoib_put_ah(mcast->ah); | |
125 | ||
b36f170b MT |
126 | while (!skb_queue_empty(&mcast->pkt_queue)) { |
127 | ++tx_dropped; | |
8c608a32 | 128 | dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); |
b36f170b MT |
129 | } |
130 | ||
943c246e | 131 | netif_tx_lock_bh(dev); |
de903512 | 132 | dev->stats.tx_dropped += tx_dropped; |
943c246e | 133 | netif_tx_unlock_bh(dev); |
1da177e4 LT |
134 | |
135 | kfree(mcast); | |
136 | } | |
137 | ||
138 | static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev, | |
139 | int can_sleep) | |
140 | { | |
141 | struct ipoib_mcast *mcast; | |
142 | ||
de6eb66b | 143 | mcast = kzalloc(sizeof *mcast, can_sleep ? GFP_KERNEL : GFP_ATOMIC); |
1da177e4 LT |
144 | if (!mcast) |
145 | return NULL; | |
146 | ||
1da177e4 LT |
147 | mcast->dev = dev; |
148 | mcast->created = jiffies; | |
69911416 | 149 | mcast->delay_until = jiffies; |
ce5b65cc | 150 | mcast->backoff = 1; |
1da177e4 LT |
151 | |
152 | INIT_LIST_HEAD(&mcast->list); | |
153 | INIT_LIST_HEAD(&mcast->neigh_list); | |
154 | skb_queue_head_init(&mcast->pkt_queue); | |
155 | ||
1da177e4 LT |
156 | return mcast; |
157 | } | |
158 | ||
432c55ff | 159 | static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid) |
1da177e4 | 160 | { |
c1048aff | 161 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
1da177e4 LT |
162 | struct rb_node *n = priv->multicast_tree.rb_node; |
163 | ||
164 | while (n) { | |
165 | struct ipoib_mcast *mcast; | |
166 | int ret; | |
167 | ||
168 | mcast = rb_entry(n, struct ipoib_mcast, rb_node); | |
169 | ||
37c22a77 | 170 | ret = memcmp(mgid, mcast->mcmember.mgid.raw, |
1da177e4 LT |
171 | sizeof (union ib_gid)); |
172 | if (ret < 0) | |
173 | n = n->rb_left; | |
174 | else if (ret > 0) | |
175 | n = n->rb_right; | |
176 | else | |
177 | return mcast; | |
178 | } | |
179 | ||
180 | return NULL; | |
181 | } | |
182 | ||
183 | static int __ipoib_mcast_add(struct net_device *dev, struct ipoib_mcast *mcast) | |
184 | { | |
c1048aff | 185 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
1da177e4 LT |
186 | struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL; |
187 | ||
188 | while (*n) { | |
189 | struct ipoib_mcast *tmcast; | |
190 | int ret; | |
191 | ||
192 | pn = *n; | |
193 | tmcast = rb_entry(pn, struct ipoib_mcast, rb_node); | |
194 | ||
195 | ret = memcmp(mcast->mcmember.mgid.raw, tmcast->mcmember.mgid.raw, | |
196 | sizeof (union ib_gid)); | |
197 | if (ret < 0) | |
198 | n = &pn->rb_left; | |
199 | else if (ret > 0) | |
200 | n = &pn->rb_right; | |
201 | else | |
202 | return -EEXIST; | |
203 | } | |
204 | ||
205 | rb_link_node(&mcast->rb_node, pn, n); | |
206 | rb_insert_color(&mcast->rb_node, &priv->multicast_tree); | |
207 | ||
208 | return 0; | |
209 | } | |
210 | ||
211 | static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, | |
212 | struct ib_sa_mcmember_rec *mcmember) | |
213 | { | |
214 | struct net_device *dev = mcast->dev; | |
c1048aff | 215 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
cd565b4b | 216 | struct rdma_netdev *rn = netdev_priv(dev); |
7343b231 | 217 | struct ipoib_ah *ah; |
d8966fcd | 218 | struct rdma_ah_attr av; |
1da177e4 | 219 | int ret; |
d0de1362 | 220 | int set_qkey = 0; |
1da177e4 LT |
221 | |
222 | mcast->mcmember = *mcmember; | |
223 | ||
bea1e22d PM |
224 | /* Set the multicast MTU and cached Q_Key before we attach if it's |
225 | * the broadcast group. | |
226 | */ | |
1da177e4 LT |
227 | if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4, |
228 | sizeof (union ib_gid))) { | |
e1d50dce JM |
229 | spin_lock_irq(&priv->lock); |
230 | if (!priv->broadcast) { | |
231 | spin_unlock_irq(&priv->lock); | |
232 | return -EAGAIN; | |
233 | } | |
3fd0605c ES |
234 | /*update priv member according to the new mcast*/ |
235 | priv->broadcast->mcmember.qkey = mcmember->qkey; | |
236 | priv->broadcast->mcmember.mtu = mcmember->mtu; | |
237 | priv->broadcast->mcmember.traffic_class = mcmember->traffic_class; | |
238 | priv->broadcast->mcmember.rate = mcmember->rate; | |
239 | priv->broadcast->mcmember.sl = mcmember->sl; | |
240 | priv->broadcast->mcmember.flow_label = mcmember->flow_label; | |
241 | priv->broadcast->mcmember.hop_limit = mcmember->hop_limit; | |
242 | /* assume if the admin and the mcast are the same both can be changed */ | |
243 | if (priv->mcast_mtu == priv->admin_mtu) | |
244 | priv->admin_mtu = | |
245 | priv->mcast_mtu = | |
246 | IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); | |
247 | else | |
248 | priv->mcast_mtu = | |
249 | IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu)); | |
250 | ||
1da177e4 | 251 | priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); |
e1d50dce | 252 | spin_unlock_irq(&priv->lock); |
e622f2f4 | 253 | priv->tx_wr.remote_qkey = priv->qkey; |
d0de1362 | 254 | set_qkey = 1; |
1da177e4 LT |
255 | } |
256 | ||
257 | if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { | |
258 | if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { | |
5b095d98 | 259 | ipoib_warn(priv, "multicast group %pI6 already attached\n", |
fcace2fe | 260 | mcast->mcmember.mgid.raw); |
1da177e4 LT |
261 | |
262 | return 0; | |
263 | } | |
264 | ||
cd565b4b ES |
265 | ret = rn->attach_mcast(dev, priv->ca, &mcast->mcmember.mgid, |
266 | be16_to_cpu(mcast->mcmember.mlid), | |
267 | set_qkey, priv->qkey); | |
1da177e4 | 268 | if (ret < 0) { |
5b095d98 | 269 | ipoib_warn(priv, "couldn't attach QP to multicast group %pI6\n", |
fcace2fe | 270 | mcast->mcmember.mgid.raw); |
1da177e4 LT |
271 | |
272 | clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags); | |
273 | return ret; | |
274 | } | |
275 | } | |
276 | ||
d8966fcd | 277 | memset(&av, 0, sizeof(av)); |
44c58487 | 278 | av.type = rdma_ah_find_type(priv->ca, priv->port); |
d8966fcd DC |
279 | rdma_ah_set_dlid(&av, be16_to_cpu(mcast->mcmember.mlid)), |
280 | rdma_ah_set_port_num(&av, priv->port); | |
281 | rdma_ah_set_sl(&av, mcast->mcmember.sl); | |
282 | rdma_ah_set_static_rate(&av, mcast->mcmember.rate); | |
283 | ||
284 | rdma_ah_set_grh(&av, &mcast->mcmember.mgid, | |
285 | be32_to_cpu(mcast->mcmember.flow_label), | |
286 | 0, mcast->mcmember.hop_limit, | |
287 | mcast->mcmember.traffic_class); | |
288 | ||
289 | ah = ipoib_create_ah(dev, priv->pd, &av); | |
290 | if (IS_ERR(ah)) { | |
291 | ipoib_warn(priv, "ib_address_create failed %ld\n", | |
292 | -PTR_ERR(ah)); | |
293 | /* use original error */ | |
294 | return PTR_ERR(ah); | |
1da177e4 | 295 | } |
d8966fcd DC |
296 | spin_lock_irq(&priv->lock); |
297 | mcast->ah = ah; | |
298 | spin_unlock_irq(&priv->lock); | |
299 | ||
300 | ipoib_dbg_mcast(priv, "MGID %pI6 AV %p, LID 0x%04x, SL %d\n", | |
301 | mcast->mcmember.mgid.raw, | |
302 | mcast->ah->ah, | |
303 | be16_to_cpu(mcast->mcmember.mlid), | |
304 | mcast->mcmember.sl); | |
1da177e4 LT |
305 | |
306 | /* actually send any queued packets */ | |
943c246e | 307 | netif_tx_lock_bh(dev); |
1da177e4 LT |
308 | while (!skb_queue_empty(&mcast->pkt_queue)) { |
309 | struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); | |
69cce1d1 | 310 | |
943c246e | 311 | netif_tx_unlock_bh(dev); |
1da177e4 LT |
312 | |
313 | skb->dev = dev; | |
936d7de3 | 314 | |
d32b9a81 FD |
315 | ret = dev_queue_xmit(skb); |
316 | if (ret) | |
317 | ipoib_warn(priv, "%s:dev_queue_xmit failed to re-queue packet, ret:%d\n", | |
318 | __func__, ret); | |
943c246e | 319 | netif_tx_lock_bh(dev); |
1da177e4 | 320 | } |
943c246e | 321 | netif_tx_unlock_bh(dev); |
1da177e4 LT |
322 | |
323 | return 0; | |
324 | } | |
325 | ||
e8224e4b YE |
326 | void ipoib_mcast_carrier_on_task(struct work_struct *work) |
327 | { | |
328 | struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, | |
329 | carrier_on_task); | |
5ee95120 | 330 | struct ib_port_attr attr; |
e8224e4b | 331 | |
5ee95120 MS |
332 | if (ib_query_port(priv->ca, priv->port, &attr) || |
333 | attr.state != IB_PORT_ACTIVE) { | |
334 | ipoib_dbg(priv, "Keeping carrier off until IB port is active\n"); | |
335 | return; | |
336 | } | |
3b561130 ES |
337 | /* |
338 | * Check if can send sendonly MCG's with sendonly-fullmember join state. | |
339 | * It done here after the successfully join to the broadcast group, | |
340 | * because the broadcast group must always be joined first and is always | |
341 | * re-joined if the SM changes substantially. | |
342 | */ | |
ee1c60b1 DC |
343 | priv->sm_fullmember_sendonly_support = |
344 | ib_sa_sendonly_fullmem_support(&ipoib_sa_client, | |
345 | priv->ca, priv->port); | |
894021a7 DL |
346 | /* |
347 | * Take rtnl_lock to avoid racing with ipoib_stop() and | |
348 | * turning the carrier back on while a device is being | |
349 | * removed. However, ipoib_stop() will attempt to flush | |
350 | * the workqueue while holding the rtnl lock, so loop | |
351 | * on trylock until either we get the lock or we see | |
352 | * FLAG_OPER_UP go away as that signals that we are bailing | |
353 | * and can safely ignore the carrier on work. | |
354 | */ | |
355 | while (!rtnl_trylock()) { | |
356 | if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) | |
357 | return; | |
358 | else | |
359 | msleep(20); | |
360 | } | |
c84ca6d2 DL |
361 | if (!ipoib_cm_admin_enabled(priv->dev)) |
362 | dev_set_mtu(priv->dev, min(priv->mcast_mtu, priv->admin_mtu)); | |
e8224e4b YE |
363 | netif_carrier_on(priv->dev); |
364 | rtnl_unlock(); | |
365 | } | |
366 | ||
faec2f7b SH |
367 | static int ipoib_mcast_join_complete(int status, |
368 | struct ib_sa_multicast *multicast) | |
1da177e4 | 369 | { |
faec2f7b | 370 | struct ipoib_mcast *mcast = multicast->context; |
1da177e4 | 371 | struct net_device *dev = mcast->dev; |
c1048aff | 372 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
1da177e4 | 373 | |
d2fe937c DL |
374 | ipoib_dbg_mcast(priv, "%sjoin completion for %pI6 (status %d)\n", |
375 | test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? | |
376 | "sendonly " : "", | |
fcace2fe | 377 | mcast->mcmember.mgid.raw, status); |
1da177e4 | 378 | |
faec2f7b | 379 | /* We trap for port events ourselves. */ |
e7a623d2 RD |
380 | if (status == -ENETRESET) { |
381 | status = 0; | |
a9c8ba58 | 382 | goto out; |
e7a623d2 | 383 | } |
faec2f7b SH |
384 | |
385 | if (!status) | |
386 | status = ipoib_mcast_join_finish(mcast, &multicast->rec); | |
387 | ||
388 | if (!status) { | |
ce5b65cc | 389 | mcast->backoff = 1; |
69911416 | 390 | mcast->delay_until = jiffies; |
55c9adde | 391 | |
e8224e4b | 392 | /* |
0b39578b | 393 | * Defer carrier on work to priv->wq to avoid a |
d2fe937c DL |
394 | * deadlock on rtnl_lock here. Requeue our multicast |
395 | * work too, which will end up happening right after | |
396 | * our carrier on task work and will allow us to | |
397 | * send out all of the non-broadcast joins | |
e8224e4b | 398 | */ |
d2fe937c | 399 | if (mcast == priv->broadcast) { |
1c0453d6 | 400 | spin_lock_irq(&priv->lock); |
0b39578b | 401 | queue_work(priv->wq, &priv->carrier_on_task); |
d2fe937c | 402 | __ipoib_mcast_schedule_join_thread(priv, NULL, 0); |
1c0453d6 | 403 | goto out_locked; |
d2fe937c | 404 | } |
69911416 | 405 | } else { |
d1178cbc JG |
406 | bool silent_fail = |
407 | test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) && | |
408 | status == -EINVAL; | |
409 | ||
410 | if (mcast->logcount < 20) { | |
411 | if (status == -ETIMEDOUT || status == -EAGAIN || | |
412 | silent_fail) { | |
d2fe937c DL |
413 | ipoib_dbg_mcast(priv, "%smulticast join failed for %pI6, status %d\n", |
414 | test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "", | |
69911416 DL |
415 | mcast->mcmember.mgid.raw, status); |
416 | } else { | |
d2fe937c DL |
417 | ipoib_warn(priv, "%smulticast join failed for %pI6, status %d\n", |
418 | test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "", | |
69911416 DL |
419 | mcast->mcmember.mgid.raw, status); |
420 | } | |
d1178cbc JG |
421 | |
422 | if (!silent_fail) | |
423 | mcast->logcount++; | |
e7a623d2 | 424 | } |
e7a623d2 | 425 | |
d2fe937c DL |
426 | if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) && |
427 | mcast->backoff >= 2) { | |
428 | /* | |
429 | * We only retry sendonly joins once before we drop | |
430 | * the packet and quit trying to deal with the | |
431 | * group. However, we leave the group in the | |
432 | * mcast list as an unjoined group. If we want to | |
433 | * try joining again, we simply queue up a packet | |
434 | * and restart the join thread. The empty queue | |
435 | * is why the join thread ignores this group. | |
436 | */ | |
437 | mcast->backoff = 1; | |
438 | netif_tx_lock_bh(dev); | |
439 | while (!skb_queue_empty(&mcast->pkt_queue)) { | |
440 | ++dev->stats.tx_dropped; | |
441 | dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue)); | |
442 | } | |
443 | netif_tx_unlock_bh(dev); | |
1c0453d6 DL |
444 | } else { |
445 | spin_lock_irq(&priv->lock); | |
d2fe937c DL |
446 | /* Requeue this join task with a backoff delay */ |
447 | __ipoib_mcast_schedule_join_thread(priv, mcast, 1); | |
1c0453d6 DL |
448 | goto out_locked; |
449 | } | |
69911416 | 450 | } |
e7a623d2 | 451 | out: |
1c0453d6 DL |
452 | spin_lock_irq(&priv->lock); |
453 | out_locked: | |
454 | /* | |
455 | * Make sure to set mcast->mc before we clear the busy flag to avoid | |
456 | * racing with code that checks for BUSY before checking mcast->mc | |
457 | */ | |
69911416 DL |
458 | if (status) |
459 | mcast->mc = NULL; | |
1c0453d6 DL |
460 | else |
461 | mcast->mc = multicast; | |
462 | clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | |
463 | spin_unlock_irq(&priv->lock); | |
e7a623d2 | 464 | complete(&mcast->done); |
1c0453d6 | 465 | |
faec2f7b | 466 | return status; |
1da177e4 LT |
467 | } |
468 | ||
08bc3276 AE |
469 | /* |
470 | * Caller must hold 'priv->lock' | |
471 | */ | |
472 | static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast) | |
1da177e4 | 473 | { |
c1048aff | 474 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
1c0453d6 | 475 | struct ib_sa_multicast *multicast; |
1da177e4 LT |
476 | struct ib_sa_mcmember_rec rec = { |
477 | .join_state = 1 | |
478 | }; | |
479 | ib_sa_comp_mask comp_mask; | |
480 | int ret = 0; | |
481 | ||
08bc3276 AE |
482 | if (!priv->broadcast || |
483 | !test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) | |
484 | return -EINVAL; | |
485 | ||
3e31a490 FD |
486 | init_completion(&mcast->done); |
487 | set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); | |
488 | ||
5b095d98 | 489 | ipoib_dbg_mcast(priv, "joining MGID %pI6\n", mcast->mcmember.mgid.raw); |
1da177e4 LT |
490 | |
491 | rec.mgid = mcast->mcmember.mgid; | |
492 | rec.port_gid = priv->local_gid; | |
97f52eb4 | 493 | rec.pkey = cpu_to_be16(priv->pkey); |
1da177e4 LT |
494 | |
495 | comp_mask = | |
496 | IB_SA_MCMEMBER_REC_MGID | | |
497 | IB_SA_MCMEMBER_REC_PORT_GID | | |
498 | IB_SA_MCMEMBER_REC_PKEY | | |
499 | IB_SA_MCMEMBER_REC_JOIN_STATE; | |
500 | ||
c3acdc06 DL |
501 | if (mcast != priv->broadcast) { |
502 | /* | |
503 | * RFC 4391: | |
504 | * The MGID MUST use the same P_Key, Q_Key, SL, MTU, | |
505 | * and HopLimit as those used in the broadcast-GID. The rest | |
506 | * of attributes SHOULD follow the values used in the | |
507 | * broadcast-GID as well. | |
508 | */ | |
1da177e4 | 509 | comp_mask |= |
d0df6d6d RD |
510 | IB_SA_MCMEMBER_REC_QKEY | |
511 | IB_SA_MCMEMBER_REC_MTU_SELECTOR | | |
512 | IB_SA_MCMEMBER_REC_MTU | | |
513 | IB_SA_MCMEMBER_REC_TRAFFIC_CLASS | | |
514 | IB_SA_MCMEMBER_REC_RATE_SELECTOR | | |
515 | IB_SA_MCMEMBER_REC_RATE | | |
516 | IB_SA_MCMEMBER_REC_SL | | |
517 | IB_SA_MCMEMBER_REC_FLOW_LABEL | | |
518 | IB_SA_MCMEMBER_REC_HOP_LIMIT; | |
1da177e4 LT |
519 | |
520 | rec.qkey = priv->broadcast->mcmember.qkey; | |
d0df6d6d RD |
521 | rec.mtu_selector = IB_SA_EQ; |
522 | rec.mtu = priv->broadcast->mcmember.mtu; | |
523 | rec.traffic_class = priv->broadcast->mcmember.traffic_class; | |
524 | rec.rate_selector = IB_SA_EQ; | |
525 | rec.rate = priv->broadcast->mcmember.rate; | |
1da177e4 LT |
526 | rec.sl = priv->broadcast->mcmember.sl; |
527 | rec.flow_label = priv->broadcast->mcmember.flow_label; | |
d0df6d6d | 528 | rec.hop_limit = priv->broadcast->mcmember.hop_limit; |
c3acdc06 DL |
529 | |
530 | /* | |
3b561130 ES |
531 | * Send-only IB Multicast joins work at the core IB layer but |
532 | * require specific SM support. | |
533 | * We can use such joins here only if the current SM supports that feature. | |
534 | * However, if not, we emulate an Ethernet multicast send, | |
535 | * which does not require a multicast subscription and will | |
536 | * still send properly. The most appropriate thing to | |
c3852ab0 DL |
537 | * do is to create the group if it doesn't exist as that |
538 | * most closely emulates the behavior, from a user space | |
3b561130 | 539 | * application perspective, of Ethernet multicast operation. |
c3acdc06 | 540 | */ |
3b561130 ES |
541 | if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) && |
542 | priv->sm_fullmember_sendonly_support) | |
543 | /* SM supports sendonly-fullmember, otherwise fallback to full-member */ | |
544 | rec.join_state = SENDONLY_FULLMEMBER_JOIN; | |
1da177e4 | 545 | } |
08bc3276 | 546 | spin_unlock_irq(&priv->lock); |
1da177e4 | 547 | |
1c0453d6 | 548 | multicast = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port, |
faec2f7b SH |
549 | &rec, comp_mask, GFP_KERNEL, |
550 | ipoib_mcast_join_complete, mcast); | |
08bc3276 | 551 | spin_lock_irq(&priv->lock); |
1c0453d6 DL |
552 | if (IS_ERR(multicast)) { |
553 | ret = PTR_ERR(multicast); | |
faec2f7b | 554 | ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret); |
1c0453d6 | 555 | /* Requeue this join task with a backoff delay */ |
69911416 | 556 | __ipoib_mcast_schedule_join_thread(priv, mcast, 1); |
1c0453d6 DL |
557 | clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags); |
558 | spin_unlock_irq(&priv->lock); | |
69911416 | 559 | complete(&mcast->done); |
08bc3276 | 560 | spin_lock_irq(&priv->lock); |
faec2f7b | 561 | } |
08bc3276 | 562 | return 0; |
1da177e4 LT |
563 | } |
564 | ||
c4028958 | 565 | void ipoib_mcast_join_task(struct work_struct *work) |
1da177e4 | 566 | { |
c4028958 DH |
567 | struct ipoib_dev_priv *priv = |
568 | container_of(work, struct ipoib_dev_priv, mcast_task.work); | |
569 | struct net_device *dev = priv->dev; | |
94232d9c | 570 | struct ib_port_attr port_attr; |
69911416 DL |
571 | unsigned long delay_until = 0; |
572 | struct ipoib_mcast *mcast = NULL; | |
1da177e4 | 573 | |
0e5544d9 | 574 | if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) |
1da177e4 LT |
575 | return; |
576 | ||
11b642b8 BVA |
577 | if (ib_query_port(priv->ca, priv->port, &port_attr)) { |
578 | ipoib_dbg(priv, "ib_query_port() failed\n"); | |
579 | return; | |
580 | } | |
581 | if (port_attr.state != IB_PORT_ACTIVE) { | |
94232d9c ES |
582 | ipoib_dbg(priv, "port state is not ACTIVE (state = %d) suspending join task\n", |
583 | port_attr.state); | |
584 | return; | |
585 | } | |
68f9d83c | 586 | priv->local_lid = port_attr.lid; |
9b29953b | 587 | netif_addr_lock_bh(dev); |
94232d9c | 588 | |
492a7e67 | 589 | if (!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) { |
9b29953b | 590 | netif_addr_unlock_bh(dev); |
492a7e67 MB |
591 | return; |
592 | } | |
9b29953b | 593 | netif_addr_unlock_bh(dev); |
1da177e4 | 594 | |
69911416 DL |
595 | spin_lock_irq(&priv->lock); |
596 | if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) | |
597 | goto out; | |
598 | ||
1da177e4 | 599 | if (!priv->broadcast) { |
20b83382 RD |
600 | struct ipoib_mcast *broadcast; |
601 | ||
69911416 | 602 | broadcast = ipoib_mcast_alloc(dev, 0); |
20b83382 | 603 | if (!broadcast) { |
1da177e4 | 604 | ipoib_warn(priv, "failed to allocate broadcast group\n"); |
69911416 DL |
605 | /* |
606 | * Restart us after a 1 second delay to retry | |
607 | * creating our broadcast group and attaching to | |
608 | * it. Until this succeeds, this ipoib dev is | |
609 | * completely stalled (multicast wise). | |
610 | */ | |
611 | __ipoib_mcast_schedule_join_thread(priv, NULL, 1); | |
612 | goto out; | |
1da177e4 LT |
613 | } |
614 | ||
20b83382 | 615 | memcpy(broadcast->mcmember.mgid.raw, priv->dev->broadcast + 4, |
1da177e4 | 616 | sizeof (union ib_gid)); |
20b83382 | 617 | priv->broadcast = broadcast; |
1da177e4 | 618 | |
1da177e4 | 619 | __ipoib_mcast_add(dev, priv->broadcast); |
1da177e4 LT |
620 | } |
621 | ||
622 | if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { | |
69911416 DL |
623 | if (IS_ERR_OR_NULL(priv->broadcast->mc) && |
624 | !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags)) { | |
625 | mcast = priv->broadcast; | |
69911416 DL |
626 | if (mcast->backoff > 1 && |
627 | time_before(jiffies, mcast->delay_until)) { | |
628 | delay_until = mcast->delay_until; | |
629 | mcast = NULL; | |
630 | } | |
631 | } | |
632 | goto out; | |
1da177e4 LT |
633 | } |
634 | ||
69911416 DL |
635 | /* |
636 | * We'll never get here until the broadcast group is both allocated | |
637 | * and attached | |
638 | */ | |
639 | list_for_each_entry(mcast, &priv->multicast_list, list) { | |
640 | if (IS_ERR_OR_NULL(mcast->mc) && | |
641 | !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) && | |
d2fe937c DL |
642 | (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) || |
643 | !skb_queue_empty(&mcast->pkt_queue))) { | |
69911416 | 644 | if (mcast->backoff == 1 || |
d2fe937c | 645 | time_after_eq(jiffies, mcast->delay_until)) { |
1da177e4 | 646 | /* Found the next unjoined group */ |
08bc3276 AE |
647 | if (ipoib_mcast_join(dev, mcast)) { |
648 | spin_unlock_irq(&priv->lock); | |
649 | return; | |
650 | } | |
d2fe937c | 651 | } else if (!delay_until || |
69911416 DL |
652 | time_before(mcast->delay_until, delay_until)) |
653 | delay_until = mcast->delay_until; | |
1da177e4 | 654 | } |
1da177e4 LT |
655 | } |
656 | ||
d2fe937c DL |
657 | mcast = NULL; |
658 | ipoib_dbg_mcast(priv, "successfully started all multicast joins\n"); | |
1da177e4 | 659 | |
69911416 | 660 | out: |
d2fe937c DL |
661 | if (delay_until) { |
662 | cancel_delayed_work(&priv->mcast_task); | |
663 | queue_delayed_work(priv->wq, &priv->mcast_task, | |
664 | delay_until - jiffies); | |
665 | } | |
3e31a490 | 666 | if (mcast) |
08bc3276 | 667 | ipoib_mcast_join(dev, mcast); |
3e31a490 | 668 | |
69911416 | 669 | spin_unlock_irq(&priv->lock); |
1da177e4 LT |
670 | } |
671 | ||
5c37077f | 672 | void ipoib_mcast_start_thread(struct net_device *dev) |
1da177e4 | 673 | { |
c1048aff | 674 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
1c0453d6 | 675 | unsigned long flags; |
1da177e4 LT |
676 | |
677 | ipoib_dbg_mcast(priv, "starting multicast thread\n"); | |
678 | ||
1c0453d6 | 679 | spin_lock_irqsave(&priv->lock, flags); |
69911416 | 680 | __ipoib_mcast_schedule_join_thread(priv, NULL, 0); |
1c0453d6 | 681 | spin_unlock_irqrestore(&priv->lock, flags); |
1da177e4 LT |
682 | } |
683 | ||
efc82eee | 684 | int ipoib_mcast_stop_thread(struct net_device *dev) |
1da177e4 | 685 | { |
c1048aff | 686 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
1da177e4 LT |
687 | |
688 | ipoib_dbg_mcast(priv, "stopping multicast thread\n"); | |
689 | ||
6bdc8de2 | 690 | cancel_delayed_work_sync(&priv->mcast_task); |
1da177e4 | 691 | |
1da177e4 LT |
692 | return 0; |
693 | } | |
694 | ||
5a0e81f6 | 695 | static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) |
1da177e4 | 696 | { |
c1048aff | 697 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
cd565b4b | 698 | struct rdma_netdev *rn = netdev_priv(dev); |
1da177e4 LT |
699 | int ret = 0; |
700 | ||
e07832b6 | 701 | if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) |
69911416 DL |
702 | ipoib_warn(priv, "ipoib_mcast_leave on an in-flight join\n"); |
703 | ||
704 | if (!IS_ERR_OR_NULL(mcast->mc)) | |
e07832b6 SH |
705 | ib_sa_free_multicast(mcast->mc); |
706 | ||
faec2f7b | 707 | if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { |
5b095d98 | 708 | ipoib_dbg_mcast(priv, "leaving MGID %pI6\n", |
fcace2fe | 709 | mcast->mcmember.mgid.raw); |
1da177e4 | 710 | |
faec2f7b | 711 | /* Remove ourselves from the multicast group */ |
cd565b4b ES |
712 | ret = rn->detach_mcast(dev, priv->ca, &mcast->mcmember.mgid, |
713 | be16_to_cpu(mcast->mcmember.mlid)); | |
faec2f7b | 714 | if (ret) |
9eae554c | 715 | ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret); |
69911416 DL |
716 | } else if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) |
717 | ipoib_dbg(priv, "leaving with no mcmember but not a " | |
718 | "SENDONLY join\n"); | |
1da177e4 | 719 | |
1da177e4 LT |
720 | return 0; |
721 | } | |
722 | ||
432c55ff CL |
723 | /* |
724 | * Check if the multicast group is sendonly. If so remove it from the maps | |
725 | * and add to the remove list | |
726 | */ | |
727 | void ipoib_check_and_add_mcast_sendonly(struct ipoib_dev_priv *priv, u8 *mgid, | |
728 | struct list_head *remove_list) | |
729 | { | |
730 | /* Is this multicast ? */ | |
731 | if (*mgid == 0xff) { | |
732 | struct ipoib_mcast *mcast = __ipoib_mcast_find(priv->dev, mgid); | |
733 | ||
734 | if (mcast && test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { | |
735 | list_del(&mcast->list); | |
736 | rb_erase(&mcast->rb_node, &priv->multicast_tree); | |
737 | list_add_tail(&mcast->list, remove_list); | |
738 | } | |
739 | } | |
740 | } | |
741 | ||
50be28de | 742 | void ipoib_mcast_remove_list(struct list_head *remove_list) |
5a0e81f6 CL |
743 | { |
744 | struct ipoib_mcast *mcast, *tmcast; | |
745 | ||
a08e1120 ES |
746 | /* |
747 | * make sure the in-flight joins have finished before we attempt | |
748 | * to leave | |
749 | */ | |
750 | list_for_each_entry_safe(mcast, tmcast, remove_list, list) | |
751 | if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) | |
752 | wait_for_completion(&mcast->done); | |
753 | ||
5a0e81f6 | 754 | list_for_each_entry_safe(mcast, tmcast, remove_list, list) { |
50be28de | 755 | ipoib_mcast_leave(mcast->dev, mcast); |
5a0e81f6 CL |
756 | ipoib_mcast_free(mcast); |
757 | } | |
758 | } | |
759 | ||
b63b70d8 | 760 | void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb) |
1da177e4 | 761 | { |
c1048aff | 762 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
cd565b4b | 763 | struct rdma_netdev *rn = netdev_priv(dev); |
1da177e4 | 764 | struct ipoib_mcast *mcast; |
943c246e | 765 | unsigned long flags; |
b63b70d8 | 766 | void *mgid = daddr + 4; |
700db99d | 767 | |
943c246e | 768 | spin_lock_irqsave(&priv->lock, flags); |
1da177e4 | 769 | |
b3e2749b | 770 | if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags) || |
20b83382 RD |
771 | !priv->broadcast || |
772 | !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) { | |
de903512 | 773 | ++dev->stats.tx_dropped; |
479a0796 MT |
774 | dev_kfree_skb_any(skb); |
775 | goto unlock; | |
776 | } | |
777 | ||
1da177e4 | 778 | mcast = __ipoib_mcast_find(dev, mgid); |
d2fe937c | 779 | if (!mcast || !mcast->ah) { |
1da177e4 | 780 | if (!mcast) { |
d2fe937c DL |
781 | /* Let's create a new send only group now */ |
782 | ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n", | |
783 | mgid); | |
784 | ||
785 | mcast = ipoib_mcast_alloc(dev, 0); | |
786 | if (!mcast) { | |
787 | ipoib_warn(priv, "unable to allocate memory " | |
788 | "for multicast structure\n"); | |
789 | ++dev->stats.tx_dropped; | |
790 | dev_kfree_skb_any(skb); | |
791 | goto unlock; | |
792 | } | |
1da177e4 | 793 | |
d2fe937c DL |
794 | set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags); |
795 | memcpy(mcast->mcmember.mgid.raw, mgid, | |
796 | sizeof (union ib_gid)); | |
797 | __ipoib_mcast_add(dev, mcast); | |
798 | list_add_tail(&mcast->list, &priv->multicast_list); | |
799 | } | |
fc791b63 PA |
800 | if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE) { |
801 | /* put pseudoheader back on for next time */ | |
802 | skb_push(skb, sizeof(struct ipoib_pseudo_header)); | |
1da177e4 | 803 | skb_queue_tail(&mcast->pkt_queue, skb); |
fc791b63 | 804 | } else { |
de903512 | 805 | ++dev->stats.tx_dropped; |
1da177e4 | 806 | dev_kfree_skb_any(skb); |
b36f170b | 807 | } |
d2fe937c DL |
808 | if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) { |
809 | __ipoib_mcast_schedule_join_thread(priv, NULL, 0); | |
810 | } | |
811 | } else { | |
b63b70d8 SP |
812 | struct ipoib_neigh *neigh; |
813 | ||
814 | spin_unlock_irqrestore(&priv->lock, flags); | |
815 | neigh = ipoib_neigh_get(dev, daddr); | |
816 | spin_lock_irqsave(&priv->lock, flags); | |
817 | if (!neigh) { | |
b63b70d8 | 818 | neigh = ipoib_neigh_alloc(daddr, dev); |
b63b70d8 SP |
819 | if (neigh) { |
820 | kref_get(&mcast->ah->ref); | |
821 | neigh->ah = mcast->ah; | |
822 | list_add_tail(&neigh->list, &mcast->neigh_list); | |
1da177e4 LT |
823 | } |
824 | } | |
721d67cd | 825 | spin_unlock_irqrestore(&priv->lock, flags); |
cd565b4b ES |
826 | mcast->ah->last_send = rn->send(dev, skb, mcast->ah->ah, |
827 | IB_MULTICAST_QPN); | |
b63b70d8 SP |
828 | if (neigh) |
829 | ipoib_neigh_put(neigh); | |
721d67cd | 830 | return; |
1da177e4 LT |
831 | } |
832 | ||
479a0796 | 833 | unlock: |
943c246e | 834 | spin_unlock_irqrestore(&priv->lock, flags); |
1da177e4 LT |
835 | } |
836 | ||
837 | void ipoib_mcast_dev_flush(struct net_device *dev) | |
838 | { | |
c1048aff | 839 | struct ipoib_dev_priv *priv = ipoib_priv(dev); |
1da177e4 | 840 | LIST_HEAD(remove_list); |
988bd503 | 841 | struct ipoib_mcast *mcast, *tmcast; |
1da177e4 LT |
842 | unsigned long flags; |
843 | ||
edf3f301 | 844 | mutex_lock(&priv->mcast_mutex); |
1da177e4 LT |
845 | ipoib_dbg_mcast(priv, "flushing multicast list\n"); |
846 | ||
847 | spin_lock_irqsave(&priv->lock, flags); | |
1da177e4 | 848 | |
988bd503 EC |
849 | list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { |
850 | list_del(&mcast->list); | |
851 | rb_erase(&mcast->rb_node, &priv->multicast_tree); | |
852 | list_add_tail(&mcast->list, &remove_list); | |
1da177e4 LT |
853 | } |
854 | ||
855 | if (priv->broadcast) { | |
3cd96564 | 856 | rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree); |
988bd503 EC |
857 | list_add_tail(&priv->broadcast->list, &remove_list); |
858 | priv->broadcast = NULL; | |
1da177e4 LT |
859 | } |
860 | ||
861 | spin_unlock_irqrestore(&priv->lock, flags); | |
862 | ||
50be28de | 863 | ipoib_mcast_remove_list(&remove_list); |
edf3f301 | 864 | mutex_unlock(&priv->mcast_mutex); |
1da177e4 LT |
865 | } |
866 | ||
3e4aa12f | 867 | static int ipoib_mcast_addr_is_valid(const u8 *addr, const u8 *broadcast) |
5e47596b | 868 | { |
5e47596b JG |
869 | /* reserved QPN, prefix, scope */ |
870 | if (memcmp(addr, broadcast, 6)) | |
871 | return 0; | |
872 | /* signature lower, pkey */ | |
873 | if (memcmp(addr + 7, broadcast + 7, 3)) | |
874 | return 0; | |
875 | return 1; | |
876 | } | |
877 | ||
c4028958 | 878 | void ipoib_mcast_restart_task(struct work_struct *work) |
1da177e4 | 879 | { |
c4028958 DH |
880 | struct ipoib_dev_priv *priv = |
881 | container_of(work, struct ipoib_dev_priv, restart_task); | |
882 | struct net_device *dev = priv->dev; | |
22bedad3 | 883 | struct netdev_hw_addr *ha; |
1da177e4 LT |
884 | struct ipoib_mcast *mcast, *tmcast; |
885 | LIST_HEAD(remove_list); | |
886 | unsigned long flags; | |
335a64a5 | 887 | struct ib_sa_mcmember_rec rec; |
1da177e4 | 888 | |
69911416 DL |
889 | if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) |
890 | /* | |
891 | * shortcut...on shutdown flush is called next, just | |
892 | * let it do all the work | |
893 | */ | |
894 | return; | |
1da177e4 | 895 | |
69911416 | 896 | ipoib_dbg_mcast(priv, "restarting multicast task\n"); |
4e0ab200 | 897 | |
932ff279 | 898 | local_irq_save(flags); |
e308a5d8 | 899 | netif_addr_lock(dev); |
78bfe0b5 | 900 | spin_lock(&priv->lock); |
1da177e4 LT |
901 | |
902 | /* | |
903 | * Unfortunately, the networking core only gives us a list of all of | |
904 | * the multicast hardware addresses. We need to figure out which ones | |
905 | * are new and which ones have been removed | |
906 | */ | |
907 | ||
908 | /* Clear out the found flag */ | |
909 | list_for_each_entry(mcast, &priv->multicast_list, list) | |
910 | clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags); | |
911 | ||
912 | /* Mark all of the entries that are found or don't exist */ | |
22bedad3 | 913 | netdev_for_each_mc_addr(ha, dev) { |
1da177e4 LT |
914 | union ib_gid mgid; |
915 | ||
22bedad3 | 916 | if (!ipoib_mcast_addr_is_valid(ha->addr, dev->broadcast)) |
5e47596b JG |
917 | continue; |
918 | ||
22bedad3 | 919 | memcpy(mgid.raw, ha->addr + 4, sizeof mgid); |
1da177e4 | 920 | |
1da177e4 LT |
921 | mcast = __ipoib_mcast_find(dev, &mgid); |
922 | if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { | |
923 | struct ipoib_mcast *nmcast; | |
924 | ||
335a64a5 OG |
925 | /* ignore group which is directly joined by userspace */ |
926 | if (test_bit(IPOIB_FLAG_UMCAST, &priv->flags) && | |
927 | !ib_sa_get_mcmember_rec(priv->ca, priv->port, &mgid, &rec)) { | |
5b095d98 | 928 | ipoib_dbg_mcast(priv, "ignoring multicast entry for mgid %pI6\n", |
fcace2fe | 929 | mgid.raw); |
335a64a5 OG |
930 | continue; |
931 | } | |
932 | ||
1da177e4 | 933 | /* Not found or send-only group, let's add a new entry */ |
5b095d98 | 934 | ipoib_dbg_mcast(priv, "adding multicast entry for mgid %pI6\n", |
fcace2fe | 935 | mgid.raw); |
1da177e4 LT |
936 | |
937 | nmcast = ipoib_mcast_alloc(dev, 0); | |
938 | if (!nmcast) { | |
939 | ipoib_warn(priv, "unable to allocate memory for multicast structure\n"); | |
940 | continue; | |
941 | } | |
942 | ||
943 | set_bit(IPOIB_MCAST_FLAG_FOUND, &nmcast->flags); | |
944 | ||
945 | nmcast->mcmember.mgid = mgid; | |
946 | ||
947 | if (mcast) { | |
948 | /* Destroy the send only entry */ | |
179e0917 | 949 | list_move_tail(&mcast->list, &remove_list); |
1da177e4 LT |
950 | |
951 | rb_replace_node(&mcast->rb_node, | |
952 | &nmcast->rb_node, | |
953 | &priv->multicast_tree); | |
954 | } else | |
955 | __ipoib_mcast_add(dev, nmcast); | |
956 | ||
957 | list_add_tail(&nmcast->list, &priv->multicast_list); | |
958 | } | |
959 | ||
960 | if (mcast) | |
961 | set_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags); | |
962 | } | |
963 | ||
964 | /* Remove all of the entries don't exist anymore */ | |
965 | list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { | |
966 | if (!test_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags) && | |
967 | !test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { | |
5b095d98 | 968 | ipoib_dbg_mcast(priv, "deleting multicast group %pI6\n", |
fcace2fe | 969 | mcast->mcmember.mgid.raw); |
1da177e4 LT |
970 | |
971 | rb_erase(&mcast->rb_node, &priv->multicast_tree); | |
972 | ||
973 | /* Move to the remove list */ | |
179e0917 | 974 | list_move_tail(&mcast->list, &remove_list); |
1da177e4 LT |
975 | } |
976 | } | |
78bfe0b5 MT |
977 | |
978 | spin_unlock(&priv->lock); | |
e308a5d8 | 979 | netif_addr_unlock(dev); |
932ff279 | 980 | local_irq_restore(flags); |
1da177e4 | 981 | |
50be28de | 982 | ipoib_mcast_remove_list(&remove_list); |
962121b4 | 983 | |
69911416 DL |
984 | /* |
985 | * Double check that we are still up | |
986 | */ | |
987 | if (test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) { | |
988 | spin_lock_irqsave(&priv->lock, flags); | |
989 | __ipoib_mcast_schedule_join_thread(priv, NULL, 0); | |
990 | spin_unlock_irqrestore(&priv->lock, flags); | |
991 | } | |
1da177e4 LT |
992 | } |
993 | ||
8ae5a8a2 RD |
994 | #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG |
995 | ||
1da177e4 LT |
996 | struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev) |
997 | { | |
998 | struct ipoib_mcast_iter *iter; | |
999 | ||
1000 | iter = kmalloc(sizeof *iter, GFP_KERNEL); | |
1001 | if (!iter) | |
1002 | return NULL; | |
1003 | ||
1004 | iter->dev = dev; | |
1732b0ef | 1005 | memset(iter->mgid.raw, 0, 16); |
1da177e4 LT |
1006 | |
1007 | if (ipoib_mcast_iter_next(iter)) { | |
1732b0ef | 1008 | kfree(iter); |
1da177e4 LT |
1009 | return NULL; |
1010 | } | |
1011 | ||
1012 | return iter; | |
1013 | } | |
1014 | ||
1da177e4 LT |
1015 | int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter) |
1016 | { | |
c1048aff | 1017 | struct ipoib_dev_priv *priv = ipoib_priv(iter->dev); |
1da177e4 LT |
1018 | struct rb_node *n; |
1019 | struct ipoib_mcast *mcast; | |
1020 | int ret = 1; | |
1021 | ||
1022 | spin_lock_irq(&priv->lock); | |
1023 | ||
1024 | n = rb_first(&priv->multicast_tree); | |
1025 | ||
1026 | while (n) { | |
1027 | mcast = rb_entry(n, struct ipoib_mcast, rb_node); | |
1028 | ||
1029 | if (memcmp(iter->mgid.raw, mcast->mcmember.mgid.raw, | |
1030 | sizeof (union ib_gid)) < 0) { | |
1031 | iter->mgid = mcast->mcmember.mgid; | |
1032 | iter->created = mcast->created; | |
1033 | iter->queuelen = skb_queue_len(&mcast->pkt_queue); | |
1034 | iter->complete = !!mcast->ah; | |
1035 | iter->send_only = !!(mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY)); | |
1036 | ||
1037 | ret = 0; | |
1038 | ||
1039 | break; | |
1040 | } | |
1041 | ||
1042 | n = rb_next(n); | |
1043 | } | |
1044 | ||
1045 | spin_unlock_irq(&priv->lock); | |
1046 | ||
1047 | return ret; | |
1048 | } | |
1049 | ||
1050 | void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter, | |
1051 | union ib_gid *mgid, | |
1052 | unsigned long *created, | |
1053 | unsigned int *queuelen, | |
1054 | unsigned int *complete, | |
1055 | unsigned int *send_only) | |
1056 | { | |
1057 | *mgid = iter->mgid; | |
1058 | *created = iter->created; | |
1059 | *queuelen = iter->queuelen; | |
1060 | *complete = iter->complete; | |
1061 | *send_only = iter->send_only; | |
1062 | } | |
8ae5a8a2 RD |
1063 | |
1064 | #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */ |