1 // SPDX-License-Identifier: GPL-2.0
3 * Shared Memory Communications over RDMA (SMC-R) and RoCE
5 * Generic netlink support functions to configure an SMC-R PNET table
7 * Copyright IBM Corp. 2016
9 * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com>
12 #include <linux/module.h>
13 #include <linux/list.h>
14 #include <linux/ctype.h>
15 #include <net/netlink.h>
16 #include <net/genetlink.h>
18 #include <uapi/linux/if.h>
19 #include <uapi/linux/smc.h>
21 #include <rdma/ib_verbs.h>
26 #define SMC_MAX_PNET_ID_LEN 16 /* Max. length of PNET id */
28 static struct nla_policy smc_pnet_policy
[SMC_PNETID_MAX
+ 1] = {
30 .type
= NLA_NUL_STRING
,
31 .len
= SMC_MAX_PNET_ID_LEN
- 1
33 [SMC_PNETID_ETHNAME
] = {
34 .type
= NLA_NUL_STRING
,
37 [SMC_PNETID_IBNAME
] = {
38 .type
= NLA_NUL_STRING
,
39 .len
= IB_DEVICE_NAME_MAX
- 1
41 [SMC_PNETID_IBPORT
] = { .type
= NLA_U8
}
44 static struct genl_family smc_pnet_nl_family
;
47 * struct smc_pnettable - SMC PNET table anchor
48 * @lock: Lock for list action
49 * @pnetlist: List of PNETIDs
51 static struct smc_pnettable
{
53 struct list_head pnetlist
;
55 .pnetlist
= LIST_HEAD_INIT(smc_pnettable
.pnetlist
),
56 .lock
= __RW_LOCK_UNLOCKED(smc_pnettable
.lock
)
60 * struct smc_pnetentry - pnet identifier name entry
62 * @pnet_name: Pnet identifier name
63 * @ndev: pointer to network device.
64 * @smcibdev: Pointer to IB device.
66 struct smc_pnetentry
{
67 struct list_head list
;
68 char pnet_name
[SMC_MAX_PNET_ID_LEN
+ 1];
69 struct net_device
*ndev
;
70 struct smc_ib_device
*smcibdev
;
74 /* Check if two RDMA device entries are identical. Use device name and port
75 * number for comparison.
77 static bool smc_pnet_same_ibname(struct smc_pnetentry
*pnetelem
, char *ibname
,
80 return pnetelem
->ib_port
== ibport
&&
81 !strncmp(pnetelem
->smcibdev
->ibdev
->name
, ibname
,
82 sizeof(pnetelem
->smcibdev
->ibdev
->name
));
85 /* Find a pnetid in the pnet table.
87 static struct smc_pnetentry
*smc_pnet_find_pnetid(char *pnet_name
)
89 struct smc_pnetentry
*pnetelem
, *found_pnetelem
= NULL
;
91 read_lock(&smc_pnettable
.lock
);
92 list_for_each_entry(pnetelem
, &smc_pnettable
.pnetlist
, list
) {
93 if (!strncmp(pnetelem
->pnet_name
, pnet_name
,
94 sizeof(pnetelem
->pnet_name
))) {
95 found_pnetelem
= pnetelem
;
99 read_unlock(&smc_pnettable
.lock
);
100 return found_pnetelem
;
103 /* Remove a pnetid from the pnet table.
105 static int smc_pnet_remove_by_pnetid(char *pnet_name
)
107 struct smc_pnetentry
*pnetelem
, *tmp_pe
;
110 write_lock(&smc_pnettable
.lock
);
111 list_for_each_entry_safe(pnetelem
, tmp_pe
, &smc_pnettable
.pnetlist
,
113 if (!strncmp(pnetelem
->pnet_name
, pnet_name
,
114 sizeof(pnetelem
->pnet_name
))) {
115 list_del(&pnetelem
->list
);
116 dev_put(pnetelem
->ndev
);
122 write_unlock(&smc_pnettable
.lock
);
126 /* Remove a pnet entry mentioning a given network device from the pnet table.
128 static int smc_pnet_remove_by_ndev(struct net_device
*ndev
)
130 struct smc_pnetentry
*pnetelem
, *tmp_pe
;
133 write_lock(&smc_pnettable
.lock
);
134 list_for_each_entry_safe(pnetelem
, tmp_pe
, &smc_pnettable
.pnetlist
,
136 if (pnetelem
->ndev
== ndev
) {
137 list_del(&pnetelem
->list
);
138 dev_put(pnetelem
->ndev
);
144 write_unlock(&smc_pnettable
.lock
);
148 /* Remove a pnet entry mentioning a given ib device from the pnet table.
150 int smc_pnet_remove_by_ibdev(struct smc_ib_device
*ibdev
)
152 struct smc_pnetentry
*pnetelem
, *tmp_pe
;
155 write_lock(&smc_pnettable
.lock
);
156 list_for_each_entry_safe(pnetelem
, tmp_pe
, &smc_pnettable
.pnetlist
,
158 if (pnetelem
->smcibdev
== ibdev
) {
159 list_del(&pnetelem
->list
);
160 dev_put(pnetelem
->ndev
);
166 write_unlock(&smc_pnettable
.lock
);
170 /* Append a pnetid to the end of the pnet table if not already on this list.
172 static int smc_pnet_enter(struct smc_pnetentry
*new_pnetelem
)
174 struct smc_pnetentry
*pnetelem
;
177 write_lock(&smc_pnettable
.lock
);
178 list_for_each_entry(pnetelem
, &smc_pnettable
.pnetlist
, list
) {
179 if (!strncmp(pnetelem
->pnet_name
, new_pnetelem
->pnet_name
,
180 sizeof(new_pnetelem
->pnet_name
)) ||
181 !strncmp(pnetelem
->ndev
->name
, new_pnetelem
->ndev
->name
,
182 sizeof(new_pnetelem
->ndev
->name
)) ||
183 smc_pnet_same_ibname(pnetelem
,
184 new_pnetelem
->smcibdev
->ibdev
->name
,
185 new_pnetelem
->ib_port
)) {
186 dev_put(pnetelem
->ndev
);
190 list_add_tail(&new_pnetelem
->list
, &smc_pnettable
.pnetlist
);
193 write_unlock(&smc_pnettable
.lock
);
197 /* The limit for pnetid is 16 characters.
198 * Valid characters should be (single-byte character set) a-z, A-Z, 0-9.
199 * Lower case letters are converted to upper case.
200 * Interior blanks should not be used.
202 static bool smc_pnetid_valid(const char *pnet_name
, char *pnetid
)
204 char *bf
= skip_spaces(pnet_name
);
205 size_t len
= strlen(bf
);
206 char *end
= bf
+ len
;
210 while (--end
>= bf
&& isspace(*end
))
212 if (end
- bf
>= SMC_MAX_PNET_ID_LEN
)
217 *pnetid
++ = islower(*bf
) ? toupper(*bf
) : *bf
;
224 /* Find an infiniband device by a given name. The device might not exist. */
225 static struct smc_ib_device
*smc_pnet_find_ib(char *ib_name
)
227 struct smc_ib_device
*ibdev
;
229 spin_lock(&smc_ib_devices
.lock
);
230 list_for_each_entry(ibdev
, &smc_ib_devices
.list
, list
) {
231 if (!strncmp(ibdev
->ibdev
->name
, ib_name
,
232 sizeof(ibdev
->ibdev
->name
))) {
238 spin_unlock(&smc_ib_devices
.lock
);
242 /* Parse the supplied netlink attributes and fill a pnetentry structure.
243 * For ethernet and infiniband device names verify that the devices exist.
245 static int smc_pnet_fill_entry(struct net
*net
, struct smc_pnetentry
*pnetelem
,
248 char *string
, *ibname
;
251 memset(pnetelem
, 0, sizeof(*pnetelem
));
252 INIT_LIST_HEAD(&pnetelem
->list
);
255 if (!tb
[SMC_PNETID_NAME
])
257 string
= (char *)nla_data(tb
[SMC_PNETID_NAME
]);
258 if (!smc_pnetid_valid(string
, pnetelem
->pnet_name
))
262 if (!tb
[SMC_PNETID_ETHNAME
])
265 string
= (char *)nla_data(tb
[SMC_PNETID_ETHNAME
]);
266 pnetelem
->ndev
= dev_get_by_name(net
, string
);
271 if (!tb
[SMC_PNETID_IBNAME
])
274 ibname
= (char *)nla_data(tb
[SMC_PNETID_IBNAME
]);
275 ibname
= strim(ibname
);
276 pnetelem
->smcibdev
= smc_pnet_find_ib(ibname
);
277 if (!pnetelem
->smcibdev
)
281 if (!tb
[SMC_PNETID_IBPORT
])
283 pnetelem
->ib_port
= nla_get_u8(tb
[SMC_PNETID_IBPORT
]);
284 if (pnetelem
->ib_port
< 1 || pnetelem
->ib_port
> SMC_MAX_PORTS
)
291 dev_put(pnetelem
->ndev
);
295 /* Convert an smc_pnetentry to a netlink attribute sequence */
296 static int smc_pnet_set_nla(struct sk_buff
*msg
, struct smc_pnetentry
*pnetelem
)
298 if (nla_put_string(msg
, SMC_PNETID_NAME
, pnetelem
->pnet_name
) ||
299 nla_put_string(msg
, SMC_PNETID_ETHNAME
, pnetelem
->ndev
->name
) ||
300 nla_put_string(msg
, SMC_PNETID_IBNAME
,
301 pnetelem
->smcibdev
->ibdev
->name
) ||
302 nla_put_u8(msg
, SMC_PNETID_IBPORT
, pnetelem
->ib_port
))
307 /* Retrieve one PNETID entry */
308 static int smc_pnet_get(struct sk_buff
*skb
, struct genl_info
*info
)
310 struct smc_pnetentry
*pnetelem
;
315 if (!info
->attrs
[SMC_PNETID_NAME
])
317 pnetelem
= smc_pnet_find_pnetid(
318 (char *)nla_data(info
->attrs
[SMC_PNETID_NAME
]));
321 msg
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_KERNEL
);
325 hdr
= genlmsg_put(msg
, info
->snd_portid
, info
->snd_seq
,
326 &smc_pnet_nl_family
, 0, SMC_PNETID_GET
);
332 if (smc_pnet_set_nla(msg
, pnetelem
)) {
337 genlmsg_end(msg
, hdr
);
338 return genlmsg_reply(msg
, info
);
345 static int smc_pnet_add(struct sk_buff
*skb
, struct genl_info
*info
)
347 struct net
*net
= genl_info_net(info
);
348 struct smc_pnetentry
*pnetelem
;
351 pnetelem
= kzalloc(sizeof(*pnetelem
), GFP_KERNEL
);
354 rc
= smc_pnet_fill_entry(net
, pnetelem
, info
->attrs
);
356 rc
= smc_pnet_enter(pnetelem
);
361 rc
= smc_ib_remember_port_attr(pnetelem
->smcibdev
, pnetelem
->ib_port
);
363 smc_pnet_remove_by_pnetid(pnetelem
->pnet_name
);
367 static int smc_pnet_del(struct sk_buff
*skb
, struct genl_info
*info
)
369 if (!info
->attrs
[SMC_PNETID_NAME
])
371 return smc_pnet_remove_by_pnetid(
372 (char *)nla_data(info
->attrs
[SMC_PNETID_NAME
]));
375 static int smc_pnet_dump_start(struct netlink_callback
*cb
)
381 static int smc_pnet_dumpinfo(struct sk_buff
*skb
,
382 u32 portid
, u32 seq
, u32 flags
,
383 struct smc_pnetentry
*pnetelem
)
387 hdr
= genlmsg_put(skb
, portid
, seq
, &smc_pnet_nl_family
,
388 flags
, SMC_PNETID_GET
);
391 if (smc_pnet_set_nla(skb
, pnetelem
) < 0) {
392 genlmsg_cancel(skb
, hdr
);
395 genlmsg_end(skb
, hdr
);
399 static int smc_pnet_dump(struct sk_buff
*skb
, struct netlink_callback
*cb
)
401 struct smc_pnetentry
*pnetelem
;
404 read_lock(&smc_pnettable
.lock
);
405 list_for_each_entry(pnetelem
, &smc_pnettable
.pnetlist
, list
) {
406 if (idx
++ < cb
->args
[0])
408 if (smc_pnet_dumpinfo(skb
, NETLINK_CB(cb
->skb
).portid
,
409 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
,
416 read_unlock(&smc_pnettable
.lock
);
420 /* Remove and delete all pnetids from pnet table.
422 static int smc_pnet_flush(struct sk_buff
*skb
, struct genl_info
*info
)
424 struct smc_pnetentry
*pnetelem
, *tmp_pe
;
426 write_lock(&smc_pnettable
.lock
);
427 list_for_each_entry_safe(pnetelem
, tmp_pe
, &smc_pnettable
.pnetlist
,
429 list_del(&pnetelem
->list
);
430 dev_put(pnetelem
->ndev
);
433 write_unlock(&smc_pnettable
.lock
);
437 /* SMC_PNETID generic netlink operation definition */
438 static const struct genl_ops smc_pnet_ops
[] = {
440 .cmd
= SMC_PNETID_GET
,
441 .flags
= GENL_ADMIN_PERM
,
442 .policy
= smc_pnet_policy
,
443 .doit
= smc_pnet_get
,
444 .dumpit
= smc_pnet_dump
,
445 .start
= smc_pnet_dump_start
448 .cmd
= SMC_PNETID_ADD
,
449 .flags
= GENL_ADMIN_PERM
,
450 .policy
= smc_pnet_policy
,
454 .cmd
= SMC_PNETID_DEL
,
455 .flags
= GENL_ADMIN_PERM
,
456 .policy
= smc_pnet_policy
,
460 .cmd
= SMC_PNETID_FLUSH
,
461 .flags
= GENL_ADMIN_PERM
,
462 .policy
= smc_pnet_policy
,
463 .doit
= smc_pnet_flush
467 /* SMC_PNETID family definition */
468 static struct genl_family smc_pnet_nl_family
= {
470 .name
= SMCR_GENL_FAMILY_NAME
,
471 .version
= SMCR_GENL_FAMILY_VERSION
,
472 .maxattr
= SMC_PNETID_MAX
,
474 .module
= THIS_MODULE
,
476 .n_ops
= ARRAY_SIZE(smc_pnet_ops
)
479 static int smc_pnet_netdev_event(struct notifier_block
*this,
480 unsigned long event
, void *ptr
)
482 struct net_device
*event_dev
= netdev_notifier_info_to_dev(ptr
);
486 case NETDEV_UNREGISTER
:
487 smc_pnet_remove_by_ndev(event_dev
);
494 static struct notifier_block smc_netdev_notifier
= {
495 .notifier_call
= smc_pnet_netdev_event
498 int __init
smc_pnet_init(void)
502 rc
= genl_register_family(&smc_pnet_nl_family
);
505 rc
= register_netdevice_notifier(&smc_netdev_notifier
);
507 genl_unregister_family(&smc_pnet_nl_family
);
511 void smc_pnet_exit(void)
513 smc_pnet_flush(NULL
, NULL
);
514 unregister_netdevice_notifier(&smc_netdev_notifier
);
515 genl_unregister_family(&smc_pnet_nl_family
);
518 /* PNET table analysis for a given sock:
519 * determine ib_device and port belonging to used internal TCP socket
520 * ethernet interface.
522 void smc_pnet_find_roce_resource(struct sock
*sk
,
523 struct smc_ib_device
**smcibdev
, u8
*ibport
)
525 struct dst_entry
*dst
= sk_dst_get(sk
);
526 struct smc_pnetentry
*pnetelem
;
535 read_lock(&smc_pnettable
.lock
);
536 list_for_each_entry(pnetelem
, &smc_pnettable
.pnetlist
, list
) {
537 if (dst
->dev
== pnetelem
->ndev
) {
538 if (smc_ib_port_active(pnetelem
->smcibdev
,
539 pnetelem
->ib_port
)) {
540 *smcibdev
= pnetelem
->smcibdev
;
541 *ibport
= pnetelem
->ib_port
;
546 read_unlock(&smc_pnettable
.lock
);