1 // SPDX-License-Identifier: GPL-2.0
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
5 * Generic netlink support functions to configure an SMC-R PNET table
7 * Copyright IBM Corp. 2016
9 * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com>
12 #include <linux/module.h>
13 #include <linux/list.h>
14 #include <linux/ctype.h>
15 #include <net/netlink.h>
16 #include <net/genetlink.h>
18 #include <uapi/linux/if.h>
19 #include <uapi/linux/smc.h>
21 #include <rdma/ib_verbs.h>
23 #include <net/netns/generic.h>
24 #include "smc_netns.h"
31 #define SMC_ASCII_BLANK 32
33 static struct net_device
*pnet_find_base_ndev(struct net_device
*ndev
);
35 static const struct nla_policy smc_pnet_policy
[SMC_PNETID_MAX
+ 1] = {
37 .type
= NLA_NUL_STRING
,
38 .len
= SMC_MAX_PNETID_LEN
40 [SMC_PNETID_ETHNAME
] = {
41 .type
= NLA_NUL_STRING
,
44 [SMC_PNETID_IBNAME
] = {
45 .type
= NLA_NUL_STRING
,
46 .len
= IB_DEVICE_NAME_MAX
- 1
48 [SMC_PNETID_IBPORT
] = { .type
= NLA_U8
}
51 static struct genl_family smc_pnet_nl_family
;
53 enum smc_pnet_nametype
{
58 /* pnet entry stored in pnet table */
59 struct smc_pnetentry
{
60 struct list_head list
;
61 char pnet_name
[SMC_MAX_PNETID_LEN
+ 1];
62 enum smc_pnet_nametype type
;
65 char eth_name
[IFNAMSIZ
+ 1];
66 struct net_device
*ndev
;
69 char ib_name
[IB_DEVICE_NAME_MAX
+ 1];
75 /* Check if two given pnetids match */
76 static bool smc_pnet_match(u8
*pnetid1
, u8
*pnetid2
)
80 for (i
= 0; i
< SMC_MAX_PNETID_LEN
; i
++) {
81 if ((pnetid1
[i
] == 0 || pnetid1
[i
] == SMC_ASCII_BLANK
) &&
82 (pnetid2
[i
] == 0 || pnetid2
[i
] == SMC_ASCII_BLANK
))
84 if (pnetid1
[i
] != pnetid2
[i
])
90 /* Remove a pnetid from the pnet table.
92 static int smc_pnet_remove_by_pnetid(struct net
*net
, char *pnet_name
)
94 struct smc_pnetentry
*pnetelem
, *tmp_pe
;
95 struct smc_pnettable
*pnettable
;
96 struct smc_ib_device
*ibdev
;
97 struct smcd_dev
*smcd_dev
;
102 /* get pnettable for namespace */
103 sn
= net_generic(net
, smc_net_id
);
104 pnettable
= &sn
->pnettable
;
106 /* remove table entry */
107 write_lock(&pnettable
->lock
);
108 list_for_each_entry_safe(pnetelem
, tmp_pe
, &pnettable
->pnetlist
,
111 smc_pnet_match(pnetelem
->pnet_name
, pnet_name
)) {
112 list_del(&pnetelem
->list
);
113 if (pnetelem
->type
== SMC_PNET_ETH
&& pnetelem
->ndev
) {
114 dev_put(pnetelem
->ndev
);
115 pr_warn_ratelimited("smc: net device %s "
116 "erased user defined "
119 pnetelem
->pnet_name
);
125 write_unlock(&pnettable
->lock
);
127 /* if this is not the initial namespace, stop here */
128 if (net
!= &init_net
)
131 /* remove ib devices */
132 spin_lock(&smc_ib_devices
.lock
);
133 list_for_each_entry(ibdev
, &smc_ib_devices
.list
, list
) {
134 for (ibport
= 0; ibport
< SMC_MAX_PORTS
; ibport
++) {
135 if (ibdev
->pnetid_by_user
[ibport
] &&
137 smc_pnet_match(pnet_name
,
138 ibdev
->pnetid
[ibport
]))) {
139 pr_warn_ratelimited("smc: ib device %s ibport "
140 "%d erased user defined "
144 ibdev
->pnetid
[ibport
]);
145 memset(ibdev
->pnetid
[ibport
], 0,
147 ibdev
->pnetid_by_user
[ibport
] = false;
152 spin_unlock(&smc_ib_devices
.lock
);
153 /* remove smcd devices */
154 spin_lock(&smcd_dev_list
.lock
);
155 list_for_each_entry(smcd_dev
, &smcd_dev_list
.list
, list
) {
156 if (smcd_dev
->pnetid_by_user
&&
158 smc_pnet_match(pnet_name
, smcd_dev
->pnetid
))) {
159 pr_warn_ratelimited("smc: smcd device %s "
160 "erased user defined pnetid "
161 "%.16s\n", dev_name(&smcd_dev
->dev
),
163 memset(smcd_dev
->pnetid
, 0, SMC_MAX_PNETID_LEN
);
164 smcd_dev
->pnetid_by_user
= false;
168 spin_unlock(&smcd_dev_list
.lock
);
172 /* Add the reference to a given network device to the pnet table.
174 static int smc_pnet_add_by_ndev(struct net_device
*ndev
)
176 struct smc_pnetentry
*pnetelem
, *tmp_pe
;
177 struct smc_pnettable
*pnettable
;
178 struct net
*net
= dev_net(ndev
);
182 /* get pnettable for namespace */
183 sn
= net_generic(net
, smc_net_id
);
184 pnettable
= &sn
->pnettable
;
186 write_lock(&pnettable
->lock
);
187 list_for_each_entry_safe(pnetelem
, tmp_pe
, &pnettable
->pnetlist
, list
) {
188 if (pnetelem
->type
== SMC_PNET_ETH
&& !pnetelem
->ndev
&&
189 !strncmp(pnetelem
->eth_name
, ndev
->name
, IFNAMSIZ
)) {
191 pnetelem
->ndev
= ndev
;
193 pr_warn_ratelimited("smc: adding net device %s with "
194 "user defined pnetid %.16s\n",
196 pnetelem
->pnet_name
);
200 write_unlock(&pnettable
->lock
);
204 /* Remove the reference to a given network device from the pnet table.
206 static int smc_pnet_remove_by_ndev(struct net_device
*ndev
)
208 struct smc_pnetentry
*pnetelem
, *tmp_pe
;
209 struct smc_pnettable
*pnettable
;
210 struct net
*net
= dev_net(ndev
);
214 /* get pnettable for namespace */
215 sn
= net_generic(net
, smc_net_id
);
216 pnettable
= &sn
->pnettable
;
218 write_lock(&pnettable
->lock
);
219 list_for_each_entry_safe(pnetelem
, tmp_pe
, &pnettable
->pnetlist
, list
) {
220 if (pnetelem
->type
== SMC_PNET_ETH
&& pnetelem
->ndev
== ndev
) {
221 dev_put(pnetelem
->ndev
);
222 pnetelem
->ndev
= NULL
;
224 pr_warn_ratelimited("smc: removing net device %s with "
225 "user defined pnetid %.16s\n",
227 pnetelem
->pnet_name
);
231 write_unlock(&pnettable
->lock
);
235 /* Apply pnetid to ib device when no pnetid is set.
237 static bool smc_pnet_apply_ib(struct smc_ib_device
*ib_dev
, u8 ib_port
,
240 u8 pnet_null
[SMC_MAX_PNETID_LEN
] = {0};
241 bool applied
= false;
243 spin_lock(&smc_ib_devices
.lock
);
244 if (smc_pnet_match(ib_dev
->pnetid
[ib_port
- 1], pnet_null
)) {
245 memcpy(ib_dev
->pnetid
[ib_port
- 1], pnet_name
,
247 ib_dev
->pnetid_by_user
[ib_port
- 1] = true;
250 spin_unlock(&smc_ib_devices
.lock
);
254 /* Apply pnetid to smcd device when no pnetid is set.
256 static bool smc_pnet_apply_smcd(struct smcd_dev
*smcd_dev
, char *pnet_name
)
258 u8 pnet_null
[SMC_MAX_PNETID_LEN
] = {0};
259 bool applied
= false;
261 spin_lock(&smcd_dev_list
.lock
);
262 if (smc_pnet_match(smcd_dev
->pnetid
, pnet_null
)) {
263 memcpy(smcd_dev
->pnetid
, pnet_name
, SMC_MAX_PNETID_LEN
);
264 smcd_dev
->pnetid_by_user
= true;
267 spin_unlock(&smcd_dev_list
.lock
);
271 /* The limit for pnetid is 16 characters.
272 * Valid characters should be (single-byte character set) a-z, A-Z, 0-9.
273 * Lower case letters are converted to upper case.
274 * Interior blanks should not be used.
276 static bool smc_pnetid_valid(const char *pnet_name
, char *pnetid
)
278 char *bf
= skip_spaces(pnet_name
);
279 size_t len
= strlen(bf
);
280 char *end
= bf
+ len
;
284 while (--end
>= bf
&& isspace(*end
))
286 if (end
- bf
>= SMC_MAX_PNETID_LEN
)
291 *pnetid
++ = islower(*bf
) ? toupper(*bf
) : *bf
;
298 /* Find an infiniband device by a given name. The device might not exist. */
299 static struct smc_ib_device
*smc_pnet_find_ib(char *ib_name
)
301 struct smc_ib_device
*ibdev
;
303 spin_lock(&smc_ib_devices
.lock
);
304 list_for_each_entry(ibdev
, &smc_ib_devices
.list
, list
) {
305 if (!strncmp(ibdev
->ibdev
->name
, ib_name
,
306 sizeof(ibdev
->ibdev
->name
)) ||
307 !strncmp(dev_name(ibdev
->ibdev
->dev
.parent
), ib_name
,
308 IB_DEVICE_NAME_MAX
- 1)) {
314 spin_unlock(&smc_ib_devices
.lock
);
318 /* Find an smcd device by a given name. The device might not exist. */
319 static struct smcd_dev
*smc_pnet_find_smcd(char *smcd_name
)
321 struct smcd_dev
*smcd_dev
;
323 spin_lock(&smcd_dev_list
.lock
);
324 list_for_each_entry(smcd_dev
, &smcd_dev_list
.list
, list
) {
325 if (!strncmp(dev_name(&smcd_dev
->dev
), smcd_name
,
326 IB_DEVICE_NAME_MAX
- 1))
331 spin_unlock(&smcd_dev_list
.lock
);
335 static int smc_pnet_add_eth(struct smc_pnettable
*pnettable
, struct net
*net
,
336 char *eth_name
, char *pnet_name
)
338 struct smc_pnetentry
*tmp_pe
, *new_pe
;
339 struct net_device
*ndev
, *base_ndev
;
340 u8 ndev_pnetid
[SMC_MAX_PNETID_LEN
];
344 /* check if (base) netdev already has a pnetid. If there is one, we do
345 * not want to add a pnet table entry
348 ndev
= dev_get_by_name(net
, eth_name
); /* dev_hold() */
350 base_ndev
= pnet_find_base_ndev(ndev
);
351 if (!smc_pnetid_by_dev_port(base_ndev
->dev
.parent
,
352 base_ndev
->dev_port
, ndev_pnetid
))
356 /* add a new netdev entry to the pnet table if there isn't one */
358 new_pe
= kzalloc(sizeof(*new_pe
), GFP_KERNEL
);
361 new_pe
->type
= SMC_PNET_ETH
;
362 memcpy(new_pe
->pnet_name
, pnet_name
, SMC_MAX_PNETID_LEN
);
363 strncpy(new_pe
->eth_name
, eth_name
, IFNAMSIZ
);
368 write_lock(&pnettable
->lock
);
369 list_for_each_entry(tmp_pe
, &pnettable
->pnetlist
, list
) {
370 if (tmp_pe
->type
== SMC_PNET_ETH
&&
371 !strncmp(tmp_pe
->eth_name
, eth_name
, IFNAMSIZ
)) {
377 list_add_tail(&new_pe
->list
, &pnettable
->pnetlist
);
378 write_unlock(&pnettable
->lock
);
380 write_unlock(&pnettable
->lock
);
385 pr_warn_ratelimited("smc: net device %s "
386 "applied user defined pnetid %.16s\n",
387 new_pe
->eth_name
, new_pe
->pnet_name
);
396 static int smc_pnet_add_ib(struct smc_pnettable
*pnettable
, char *ib_name
,
397 u8 ib_port
, char *pnet_name
)
399 struct smc_pnetentry
*tmp_pe
, *new_pe
;
400 struct smc_ib_device
*ib_dev
;
401 bool smcddev_applied
= true;
402 bool ibdev_applied
= true;
403 struct smcd_dev
*smcd_dev
;
406 /* try to apply the pnetid to active devices */
407 ib_dev
= smc_pnet_find_ib(ib_name
);
409 ibdev_applied
= smc_pnet_apply_ib(ib_dev
, ib_port
, pnet_name
);
411 pr_warn_ratelimited("smc: ib device %s ibport %d "
412 "applied user defined pnetid "
413 "%.16s\n", ib_dev
->ibdev
->name
,
415 ib_dev
->pnetid
[ib_port
- 1]);
417 smcd_dev
= smc_pnet_find_smcd(ib_name
);
419 smcddev_applied
= smc_pnet_apply_smcd(smcd_dev
, pnet_name
);
421 pr_warn_ratelimited("smc: smcd device %s "
422 "applied user defined pnetid "
423 "%.16s\n", dev_name(&smcd_dev
->dev
),
426 /* Apply fails when a device has a hardware-defined pnetid set, do not
427 * add a pnet table entry in that case.
429 if (!ibdev_applied
|| !smcddev_applied
)
432 /* add a new ib entry to the pnet table if there isn't one */
433 new_pe
= kzalloc(sizeof(*new_pe
), GFP_KERNEL
);
436 new_pe
->type
= SMC_PNET_IB
;
437 memcpy(new_pe
->pnet_name
, pnet_name
, SMC_MAX_PNETID_LEN
);
438 strncpy(new_pe
->ib_name
, ib_name
, IB_DEVICE_NAME_MAX
);
439 new_pe
->ib_port
= ib_port
;
442 write_lock(&pnettable
->lock
);
443 list_for_each_entry(tmp_pe
, &pnettable
->pnetlist
, list
) {
444 if (tmp_pe
->type
== SMC_PNET_IB
&&
445 !strncmp(tmp_pe
->ib_name
, ib_name
, IB_DEVICE_NAME_MAX
)) {
451 list_add_tail(&new_pe
->list
, &pnettable
->pnetlist
);
452 write_unlock(&pnettable
->lock
);
454 write_unlock(&pnettable
->lock
);
457 return (new_ibdev
) ? 0 : -EEXIST
;
460 /* Append a pnetid to the end of the pnet table if not already on this list.
462 static int smc_pnet_enter(struct net
*net
, struct nlattr
*tb
[])
464 char pnet_name
[SMC_MAX_PNETID_LEN
+ 1];
465 struct smc_pnettable
*pnettable
;
466 bool new_netdev
= false;
467 bool new_ibdev
= false;
473 /* get pnettable for namespace */
474 sn
= net_generic(net
, smc_net_id
);
475 pnettable
= &sn
->pnettable
;
478 if (!tb
[SMC_PNETID_NAME
])
480 string
= (char *)nla_data(tb
[SMC_PNETID_NAME
]);
481 if (!smc_pnetid_valid(string
, pnet_name
))
484 if (tb
[SMC_PNETID_ETHNAME
]) {
485 string
= (char *)nla_data(tb
[SMC_PNETID_ETHNAME
]);
486 rc
= smc_pnet_add_eth(pnettable
, net
, string
, pnet_name
);
489 else if (rc
!= -EEXIST
)
493 /* if this is not the initial namespace, stop here */
494 if (net
!= &init_net
)
495 return new_netdev
? 0 : -EEXIST
;
498 if (tb
[SMC_PNETID_IBNAME
]) {
499 string
= (char *)nla_data(tb
[SMC_PNETID_IBNAME
]);
500 string
= strim(string
);
501 if (tb
[SMC_PNETID_IBPORT
]) {
502 ibport
= nla_get_u8(tb
[SMC_PNETID_IBPORT
]);
503 if (ibport
< 1 || ibport
> SMC_MAX_PORTS
)
506 rc
= smc_pnet_add_ib(pnettable
, string
, ibport
, pnet_name
);
509 else if (rc
!= -EEXIST
)
512 return (new_netdev
|| new_ibdev
) ? 0 : -EEXIST
;
518 /* Convert an smc_pnetentry to a netlink attribute sequence */
519 static int smc_pnet_set_nla(struct sk_buff
*msg
,
520 struct smc_pnetentry
*pnetelem
)
522 if (nla_put_string(msg
, SMC_PNETID_NAME
, pnetelem
->pnet_name
))
524 if (pnetelem
->type
== SMC_PNET_ETH
) {
525 if (nla_put_string(msg
, SMC_PNETID_ETHNAME
,
529 if (nla_put_string(msg
, SMC_PNETID_ETHNAME
, "n/a"))
532 if (pnetelem
->type
== SMC_PNET_IB
) {
533 if (nla_put_string(msg
, SMC_PNETID_IBNAME
, pnetelem
->ib_name
) ||
534 nla_put_u8(msg
, SMC_PNETID_IBPORT
, pnetelem
->ib_port
))
537 if (nla_put_string(msg
, SMC_PNETID_IBNAME
, "n/a") ||
538 nla_put_u8(msg
, SMC_PNETID_IBPORT
, 0xff))
545 static int smc_pnet_add(struct sk_buff
*skb
, struct genl_info
*info
)
547 struct net
*net
= genl_info_net(info
);
549 return smc_pnet_enter(net
, info
->attrs
);
552 static int smc_pnet_del(struct sk_buff
*skb
, struct genl_info
*info
)
554 struct net
*net
= genl_info_net(info
);
556 if (!info
->attrs
[SMC_PNETID_NAME
])
558 return smc_pnet_remove_by_pnetid(net
,
559 (char *)nla_data(info
->attrs
[SMC_PNETID_NAME
]));
562 static int smc_pnet_dump_start(struct netlink_callback
*cb
)
568 static int smc_pnet_dumpinfo(struct sk_buff
*skb
,
569 u32 portid
, u32 seq
, u32 flags
,
570 struct smc_pnetentry
*pnetelem
)
574 hdr
= genlmsg_put(skb
, portid
, seq
, &smc_pnet_nl_family
,
575 flags
, SMC_PNETID_GET
);
578 if (smc_pnet_set_nla(skb
, pnetelem
) < 0) {
579 genlmsg_cancel(skb
, hdr
);
582 genlmsg_end(skb
, hdr
);
586 static int _smc_pnet_dump(struct net
*net
, struct sk_buff
*skb
, u32 portid
,
587 u32 seq
, u8
*pnetid
, int start_idx
)
589 struct smc_pnettable
*pnettable
;
590 struct smc_pnetentry
*pnetelem
;
594 /* get pnettable for namespace */
595 sn
= net_generic(net
, smc_net_id
);
596 pnettable
= &sn
->pnettable
;
598 /* dump pnettable entries */
599 read_lock(&pnettable
->lock
);
600 list_for_each_entry(pnetelem
, &pnettable
->pnetlist
, list
) {
601 if (pnetid
&& !smc_pnet_match(pnetelem
->pnet_name
, pnetid
))
603 if (idx
++ < start_idx
)
605 /* if this is not the initial namespace, dump only netdev */
606 if (net
!= &init_net
&& pnetelem
->type
!= SMC_PNET_ETH
)
608 if (smc_pnet_dumpinfo(skb
, portid
, seq
, NLM_F_MULTI
,
614 read_unlock(&pnettable
->lock
);
618 static int smc_pnet_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
)
620 struct net
*net
= sock_net(skb
->sk
);
623 idx
= _smc_pnet_dump(net
, skb
, NETLINK_CB(cb
->skb
).portid
,
624 cb
->nlh
->nlmsg_seq
, NULL
, cb
->args
[0]);
630 /* Retrieve one PNETID entry */
631 static int smc_pnet_get(struct sk_buff
*skb
, struct genl_info
*info
)
633 struct net
*net
= genl_info_net(info
);
637 if (!info
->attrs
[SMC_PNETID_NAME
])
640 msg
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_KERNEL
);
644 _smc_pnet_dump(net
, msg
, info
->snd_portid
, info
->snd_seq
,
645 nla_data(info
->attrs
[SMC_PNETID_NAME
]), 0);
647 /* finish multi part message and send it */
648 hdr
= nlmsg_put(msg
, info
->snd_portid
, info
->snd_seq
, NLMSG_DONE
, 0,
654 return genlmsg_reply(msg
, info
);
657 /* Remove and delete all pnetids from pnet table.
659 static int smc_pnet_flush(struct sk_buff
*skb
, struct genl_info
*info
)
661 struct net
*net
= genl_info_net(info
);
663 smc_pnet_remove_by_pnetid(net
, NULL
);
667 /* SMC_PNETID generic netlink operation definition */
668 static const struct genl_ops smc_pnet_ops
[] = {
670 .cmd
= SMC_PNETID_GET
,
671 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
672 /* can be retrieved by unprivileged users */
673 .doit
= smc_pnet_get
,
674 .dumpit
= smc_pnet_dump
,
675 .start
= smc_pnet_dump_start
678 .cmd
= SMC_PNETID_ADD
,
679 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
680 .flags
= GENL_ADMIN_PERM
,
684 .cmd
= SMC_PNETID_DEL
,
685 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
686 .flags
= GENL_ADMIN_PERM
,
690 .cmd
= SMC_PNETID_FLUSH
,
691 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
692 .flags
= GENL_ADMIN_PERM
,
693 .doit
= smc_pnet_flush
697 /* SMC_PNETID family definition */
698 static struct genl_family smc_pnet_nl_family __ro_after_init
= {
700 .name
= SMCR_GENL_FAMILY_NAME
,
701 .version
= SMCR_GENL_FAMILY_VERSION
,
702 .maxattr
= SMC_PNETID_MAX
,
703 .policy
= smc_pnet_policy
,
705 .module
= THIS_MODULE
,
707 .n_ops
= ARRAY_SIZE(smc_pnet_ops
)
710 static int smc_pnet_netdev_event(struct notifier_block
*this,
711 unsigned long event
, void *ptr
)
713 struct net_device
*event_dev
= netdev_notifier_info_to_dev(ptr
);
717 case NETDEV_UNREGISTER
:
718 smc_pnet_remove_by_ndev(event_dev
);
720 case NETDEV_REGISTER
:
721 smc_pnet_add_by_ndev(event_dev
);
728 static struct notifier_block smc_netdev_notifier
= {
729 .notifier_call
= smc_pnet_netdev_event
732 /* init network namespace */
733 int smc_pnet_net_init(struct net
*net
)
735 struct smc_net
*sn
= net_generic(net
, smc_net_id
);
736 struct smc_pnettable
*pnettable
= &sn
->pnettable
;
738 INIT_LIST_HEAD(&pnettable
->pnetlist
);
739 rwlock_init(&pnettable
->lock
);
744 int __init
smc_pnet_init(void)
748 rc
= genl_register_family(&smc_pnet_nl_family
);
751 rc
= register_netdevice_notifier(&smc_netdev_notifier
);
753 genl_unregister_family(&smc_pnet_nl_family
);
757 /* exit network namespace */
758 void smc_pnet_net_exit(struct net
*net
)
760 /* flush pnet table */
761 smc_pnet_remove_by_pnetid(net
, NULL
);
764 void smc_pnet_exit(void)
766 unregister_netdevice_notifier(&smc_netdev_notifier
);
767 genl_unregister_family(&smc_pnet_nl_family
);
770 /* Determine one base device for stacked net devices.
771 * If the lower device level contains more than one devices
772 * (for instance with bonding slaves), just the first device
773 * is used to reach a base device.
775 static struct net_device
*pnet_find_base_ndev(struct net_device
*ndev
)
780 nest_lvl
= ndev
->lower_level
;
781 for (i
= 0; i
< nest_lvl
; i
++) {
782 struct list_head
*lower
= &ndev
->adj_list
.lower
;
784 if (list_empty(lower
))
787 ndev
= netdev_lower_get_next(ndev
, &lower
);
793 static int smc_pnet_find_ndev_pnetid_by_table(struct net_device
*ndev
,
796 struct smc_pnettable
*pnettable
;
797 struct net
*net
= dev_net(ndev
);
798 struct smc_pnetentry
*pnetelem
;
802 /* get pnettable for namespace */
803 sn
= net_generic(net
, smc_net_id
);
804 pnettable
= &sn
->pnettable
;
806 read_lock(&pnettable
->lock
);
807 list_for_each_entry(pnetelem
, &pnettable
->pnetlist
, list
) {
808 if (pnetelem
->type
== SMC_PNET_ETH
&& ndev
== pnetelem
->ndev
) {
809 /* get pnetid of netdev device */
810 memcpy(pnetid
, pnetelem
->pnet_name
, SMC_MAX_PNETID_LEN
);
815 read_unlock(&pnettable
->lock
);
819 /* find a roce device for the given pnetid */
820 static void _smc_pnet_find_roce_by_pnetid(u8
*pnet_id
,
821 struct smc_init_info
*ini
,
822 struct smc_ib_device
*known_dev
)
824 struct smc_ib_device
*ibdev
;
828 spin_lock(&smc_ib_devices
.lock
);
829 list_for_each_entry(ibdev
, &smc_ib_devices
.list
, list
) {
830 if (ibdev
== known_dev
)
832 for (i
= 1; i
<= SMC_MAX_PORTS
; i
++) {
833 if (!rdma_is_port_valid(ibdev
->ibdev
, i
))
835 if (smc_pnet_match(ibdev
->pnetid
[i
- 1], pnet_id
) &&
836 smc_ib_port_active(ibdev
, i
) &&
837 !test_bit(i
- 1, ibdev
->ports_going_away
) &&
838 !smc_ib_determine_gid(ibdev
, i
, ini
->vlan_id
,
839 ini
->ib_gid
, NULL
)) {
847 spin_unlock(&smc_ib_devices
.lock
);
850 /* find alternate roce device with same pnet_id and vlan_id */
851 void smc_pnet_find_alt_roce(struct smc_link_group
*lgr
,
852 struct smc_init_info
*ini
,
853 struct smc_ib_device
*known_dev
)
855 _smc_pnet_find_roce_by_pnetid(lgr
->pnet_id
, ini
, known_dev
);
858 /* if handshake network device belongs to a roce device, return its
861 static void smc_pnet_find_rdma_dev(struct net_device
*netdev
,
862 struct smc_init_info
*ini
)
864 struct smc_ib_device
*ibdev
;
866 spin_lock(&smc_ib_devices
.lock
);
867 list_for_each_entry(ibdev
, &smc_ib_devices
.list
, list
) {
868 struct net_device
*ndev
;
871 for (i
= 1; i
<= SMC_MAX_PORTS
; i
++) {
872 if (!rdma_is_port_valid(ibdev
->ibdev
, i
))
874 if (!ibdev
->ibdev
->ops
.get_netdev
)
876 ndev
= ibdev
->ibdev
->ops
.get_netdev(ibdev
->ibdev
, i
);
880 if (netdev
== ndev
&&
881 smc_ib_port_active(ibdev
, i
) &&
882 !test_bit(i
- 1, ibdev
->ports_going_away
) &&
883 !smc_ib_determine_gid(ibdev
, i
, ini
->vlan_id
,
884 ini
->ib_gid
, NULL
)) {
891 spin_unlock(&smc_ib_devices
.lock
);
894 /* Determine the corresponding IB device port based on the hardware PNETID.
895 * Searching stops at the first matching active IB device port with vlan_id
897 * If nothing found, check pnetid table.
898 * If nothing found, try to use handshake device
900 static void smc_pnet_find_roce_by_pnetid(struct net_device
*ndev
,
901 struct smc_init_info
*ini
)
903 u8 ndev_pnetid
[SMC_MAX_PNETID_LEN
];
905 ndev
= pnet_find_base_ndev(ndev
);
906 if (smc_pnetid_by_dev_port(ndev
->dev
.parent
, ndev
->dev_port
,
908 smc_pnet_find_ndev_pnetid_by_table(ndev
, ndev_pnetid
)) {
909 smc_pnet_find_rdma_dev(ndev
, ini
);
910 return; /* pnetid could not be determined */
912 _smc_pnet_find_roce_by_pnetid(ndev_pnetid
, ini
, NULL
);
915 static void smc_pnet_find_ism_by_pnetid(struct net_device
*ndev
,
916 struct smc_init_info
*ini
)
918 u8 ndev_pnetid
[SMC_MAX_PNETID_LEN
];
919 struct smcd_dev
*ismdev
;
921 ndev
= pnet_find_base_ndev(ndev
);
922 if (smc_pnetid_by_dev_port(ndev
->dev
.parent
, ndev
->dev_port
,
924 smc_pnet_find_ndev_pnetid_by_table(ndev
, ndev_pnetid
))
925 return; /* pnetid could not be determined */
927 spin_lock(&smcd_dev_list
.lock
);
928 list_for_each_entry(ismdev
, &smcd_dev_list
.list
, list
) {
929 if (smc_pnet_match(ismdev
->pnetid
, ndev_pnetid
) &&
930 !ismdev
->going_away
) {
931 ini
->ism_dev
= ismdev
;
935 spin_unlock(&smcd_dev_list
.lock
);
938 /* PNET table analysis for a given sock:
939 * determine ib_device and port belonging to used internal TCP socket
940 * ethernet interface.
942 void smc_pnet_find_roce_resource(struct sock
*sk
, struct smc_init_info
*ini
)
944 struct dst_entry
*dst
= sk_dst_get(sk
);
953 smc_pnet_find_roce_by_pnetid(dst
->dev
, ini
);
961 void smc_pnet_find_ism_resource(struct sock
*sk
, struct smc_init_info
*ini
)
963 struct dst_entry
*dst
= sk_dst_get(sk
);
971 smc_pnet_find_ism_by_pnetid(dst
->dev
, ini
);
979 /* Lookup and apply a pnet table entry to the given ib device.
981 int smc_pnetid_by_table_ib(struct smc_ib_device
*smcibdev
, u8 ib_port
)
983 char *ib_name
= smcibdev
->ibdev
->name
;
984 struct smc_pnettable
*pnettable
;
985 struct smc_pnetentry
*tmp_pe
;
989 /* get pnettable for init namespace */
990 sn
= net_generic(&init_net
, smc_net_id
);
991 pnettable
= &sn
->pnettable
;
993 read_lock(&pnettable
->lock
);
994 list_for_each_entry(tmp_pe
, &pnettable
->pnetlist
, list
) {
995 if (tmp_pe
->type
== SMC_PNET_IB
&&
996 !strncmp(tmp_pe
->ib_name
, ib_name
, IB_DEVICE_NAME_MAX
) &&
997 tmp_pe
->ib_port
== ib_port
) {
998 smc_pnet_apply_ib(smcibdev
, ib_port
, tmp_pe
->pnet_name
);
1003 read_unlock(&pnettable
->lock
);
1008 /* Lookup and apply a pnet table entry to the given smcd device.
1010 int smc_pnetid_by_table_smcd(struct smcd_dev
*smcddev
)
1012 const char *ib_name
= dev_name(&smcddev
->dev
);
1013 struct smc_pnettable
*pnettable
;
1014 struct smc_pnetentry
*tmp_pe
;
1018 /* get pnettable for init namespace */
1019 sn
= net_generic(&init_net
, smc_net_id
);
1020 pnettable
= &sn
->pnettable
;
1022 read_lock(&pnettable
->lock
);
1023 list_for_each_entry(tmp_pe
, &pnettable
->pnetlist
, list
) {
1024 if (tmp_pe
->type
== SMC_PNET_IB
&&
1025 !strncmp(tmp_pe
->ib_name
, ib_name
, IB_DEVICE_NAME_MAX
)) {
1026 smc_pnet_apply_smcd(smcddev
, tmp_pe
->pnet_name
);
1031 read_unlock(&pnettable
->lock
);