2 * Copyright (c) 2004 Topspin Communications. All rights reserved.
3 * Copyright (c) 2005 Intel Corporation. All rights reserved.
4 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
5 * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
7 * This software is available to you under a choice of one of two
8 * licenses. You may choose to be licensed under the terms of the GNU
9 * General Public License (GPL) Version 2, available from the file
10 * COPYING in the main directory of this source tree, or the
11 * OpenIB.org BSD license below:
13 * Redistribution and use in source and binary forms, with or
14 * without modification, are permitted provided that the following
17 * - Redistributions of source code must retain the above
18 * copyright notice, this list of conditions and the following
21 * - Redistributions in binary form must reproduce the above
22 * copyright notice, this list of conditions and the following
23 * disclaimer in the documentation and/or other materials
24 * provided with the distribution.
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
36 #include <linux/module.h>
37 #include <linux/errno.h>
38 #include <linux/slab.h>
39 #include <linux/workqueue.h>
40 #include <linux/netdevice.h>
41 #include <net/addrconf.h>
43 #include <rdma/ib_cache.h>
45 #include "core_priv.h"
47 struct ib_pkey_cache
{
52 struct ib_update_work
{
53 struct work_struct work
;
54 struct ib_device
*device
;
56 bool enforce_security
;
62 enum gid_attr_find_mask
{
63 GID_ATTR_FIND_MASK_GID
= 1UL << 0,
64 GID_ATTR_FIND_MASK_NETDEV
= 1UL << 1,
65 GID_ATTR_FIND_MASK_DEFAULT
= 1UL << 2,
66 GID_ATTR_FIND_MASK_GID_TYPE
= 1UL << 3,
69 enum gid_table_entry_state
{
70 GID_TABLE_ENTRY_INVALID
= 1,
71 GID_TABLE_ENTRY_VALID
= 2,
73 * Indicates that entry is pending to be removed, there may
74 * be active users of this GID entry.
75 * When last user of the GID entry releases reference to it,
76 * GID entry is detached from the table.
78 GID_TABLE_ENTRY_PENDING_DEL
= 3,
81 struct ib_gid_table_entry
{
83 struct work_struct del_work
;
84 struct ib_gid_attr attr
;
86 enum gid_table_entry_state state
;
91 /* In RoCE, adding a GID to the table requires:
92 * (a) Find if this GID is already exists.
93 * (b) Find a free space.
94 * (c) Write the new GID
96 * Delete requires different set of operations:
101 /* Any writer to data_vec must hold this lock and the write side of
102 * rwlock. Readers must hold only rwlock. All writers must be in a
106 /* rwlock protects data_vec[ix]->state and entry pointer.
109 struct ib_gid_table_entry
**data_vec
;
110 /* bit field, each bit indicates the index of default GID */
111 u32 default_gid_indices
;
114 static void dispatch_gid_change_event(struct ib_device
*ib_dev
, u8 port
)
116 struct ib_event event
;
118 event
.device
= ib_dev
;
119 event
.element
.port_num
= port
;
120 event
.event
= IB_EVENT_GID_CHANGE
;
122 ib_dispatch_event(&event
);
125 static const char * const gid_type_str
[] = {
126 [IB_GID_TYPE_IB
] = "IB/RoCE v1",
127 [IB_GID_TYPE_ROCE_UDP_ENCAP
] = "RoCE v2",
130 const char *ib_cache_gid_type_str(enum ib_gid_type gid_type
)
132 if (gid_type
< ARRAY_SIZE(gid_type_str
) && gid_type_str
[gid_type
])
133 return gid_type_str
[gid_type
];
135 return "Invalid GID type";
137 EXPORT_SYMBOL(ib_cache_gid_type_str
);
139 /** rdma_is_zero_gid - Check if given GID is zero or not.
141 * Returns true if given GID is zero, returns false otherwise.
143 bool rdma_is_zero_gid(const union ib_gid
*gid
)
145 return !memcmp(gid
, &zgid
, sizeof(*gid
));
147 EXPORT_SYMBOL(rdma_is_zero_gid
);
149 /** is_gid_index_default - Check if a given index belongs to
150 * reserved default GIDs or not.
151 * @table: GID table pointer
152 * @index: Index to check in GID table
153 * Returns true if index is one of the reserved default GID index otherwise
156 static bool is_gid_index_default(const struct ib_gid_table
*table
,
159 return index
< 32 && (BIT(index
) & table
->default_gid_indices
);
162 int ib_cache_gid_parse_type_str(const char *buf
)
172 if (buf
[len
- 1] == '\n')
175 for (i
= 0; i
< ARRAY_SIZE(gid_type_str
); ++i
)
176 if (gid_type_str
[i
] && !strncmp(buf
, gid_type_str
[i
], len
) &&
177 len
== strlen(gid_type_str
[i
])) {
184 EXPORT_SYMBOL(ib_cache_gid_parse_type_str
);
186 static struct ib_gid_table
*rdma_gid_table(struct ib_device
*device
, u8 port
)
188 return device
->port_data
[port
].cache
.gid
;
191 static bool is_gid_entry_free(const struct ib_gid_table_entry
*entry
)
196 static bool is_gid_entry_valid(const struct ib_gid_table_entry
*entry
)
198 return entry
&& entry
->state
== GID_TABLE_ENTRY_VALID
;
201 static void schedule_free_gid(struct kref
*kref
)
203 struct ib_gid_table_entry
*entry
=
204 container_of(kref
, struct ib_gid_table_entry
, kref
);
206 queue_work(ib_wq
, &entry
->del_work
);
209 static void free_gid_entry_locked(struct ib_gid_table_entry
*entry
)
211 struct ib_device
*device
= entry
->attr
.device
;
212 u8 port_num
= entry
->attr
.port_num
;
213 struct ib_gid_table
*table
= rdma_gid_table(device
, port_num
);
215 dev_dbg(&device
->dev
, "%s port=%d index=%d gid %pI6\n", __func__
,
216 port_num
, entry
->attr
.index
, entry
->attr
.gid
.raw
);
218 write_lock_irq(&table
->rwlock
);
221 * The only way to avoid overwriting NULL in table is
222 * by comparing if it is same entry in table or not!
223 * If new entry in table is added by the time we free here,
224 * don't overwrite the table entry.
226 if (entry
== table
->data_vec
[entry
->attr
.index
])
227 table
->data_vec
[entry
->attr
.index
] = NULL
;
228 /* Now this index is ready to be allocated */
229 write_unlock_irq(&table
->rwlock
);
231 if (entry
->attr
.ndev
)
232 dev_put(entry
->attr
.ndev
);
236 static void free_gid_entry(struct kref
*kref
)
238 struct ib_gid_table_entry
*entry
=
239 container_of(kref
, struct ib_gid_table_entry
, kref
);
241 free_gid_entry_locked(entry
);
245 * free_gid_work - Release reference to the GID entry
246 * @work: Work structure to refer to GID entry which needs to be
249 * free_gid_work() frees the entry from the HCA's hardware table
250 * if provider supports it. It releases reference to netdevice.
252 static void free_gid_work(struct work_struct
*work
)
254 struct ib_gid_table_entry
*entry
=
255 container_of(work
, struct ib_gid_table_entry
, del_work
);
256 struct ib_device
*device
= entry
->attr
.device
;
257 u8 port_num
= entry
->attr
.port_num
;
258 struct ib_gid_table
*table
= rdma_gid_table(device
, port_num
);
260 mutex_lock(&table
->lock
);
261 free_gid_entry_locked(entry
);
262 mutex_unlock(&table
->lock
);
265 static struct ib_gid_table_entry
*
266 alloc_gid_entry(const struct ib_gid_attr
*attr
)
268 struct ib_gid_table_entry
*entry
;
270 entry
= kzalloc(sizeof(*entry
), GFP_KERNEL
);
273 kref_init(&entry
->kref
);
274 memcpy(&entry
->attr
, attr
, sizeof(*attr
));
275 if (entry
->attr
.ndev
)
276 dev_hold(entry
->attr
.ndev
);
277 INIT_WORK(&entry
->del_work
, free_gid_work
);
278 entry
->state
= GID_TABLE_ENTRY_INVALID
;
282 static void store_gid_entry(struct ib_gid_table
*table
,
283 struct ib_gid_table_entry
*entry
)
285 entry
->state
= GID_TABLE_ENTRY_VALID
;
287 dev_dbg(&entry
->attr
.device
->dev
, "%s port=%d index=%d gid %pI6\n",
288 __func__
, entry
->attr
.port_num
, entry
->attr
.index
,
289 entry
->attr
.gid
.raw
);
291 lockdep_assert_held(&table
->lock
);
292 write_lock_irq(&table
->rwlock
);
293 table
->data_vec
[entry
->attr
.index
] = entry
;
294 write_unlock_irq(&table
->rwlock
);
297 static void get_gid_entry(struct ib_gid_table_entry
*entry
)
299 kref_get(&entry
->kref
);
302 static void put_gid_entry(struct ib_gid_table_entry
*entry
)
304 kref_put(&entry
->kref
, schedule_free_gid
);
307 static void put_gid_entry_locked(struct ib_gid_table_entry
*entry
)
309 kref_put(&entry
->kref
, free_gid_entry
);
312 static int add_roce_gid(struct ib_gid_table_entry
*entry
)
314 const struct ib_gid_attr
*attr
= &entry
->attr
;
318 dev_err(&attr
->device
->dev
, "%s NULL netdev port=%d index=%d\n",
319 __func__
, attr
->port_num
, attr
->index
);
322 if (rdma_cap_roce_gid_table(attr
->device
, attr
->port_num
)) {
323 ret
= attr
->device
->ops
.add_gid(attr
, &entry
->context
);
325 dev_err(&attr
->device
->dev
,
326 "%s GID add failed port=%d index=%d\n",
327 __func__
, attr
->port_num
, attr
->index
);
335 * del_gid - Delete GID table entry
337 * @ib_dev: IB device whose GID entry to be deleted
338 * @port: Port number of the IB device
339 * @table: GID table of the IB device for a port
340 * @ix: GID entry index to delete
343 static void del_gid(struct ib_device
*ib_dev
, u8 port
,
344 struct ib_gid_table
*table
, int ix
)
346 struct ib_gid_table_entry
*entry
;
348 lockdep_assert_held(&table
->lock
);
350 dev_dbg(&ib_dev
->dev
, "%s port=%d index=%d gid %pI6\n", __func__
, port
,
351 ix
, table
->data_vec
[ix
]->attr
.gid
.raw
);
353 write_lock_irq(&table
->rwlock
);
354 entry
= table
->data_vec
[ix
];
355 entry
->state
= GID_TABLE_ENTRY_PENDING_DEL
;
357 * For non RoCE protocol, GID entry slot is ready to use.
359 if (!rdma_protocol_roce(ib_dev
, port
))
360 table
->data_vec
[ix
] = NULL
;
361 write_unlock_irq(&table
->rwlock
);
363 if (rdma_cap_roce_gid_table(ib_dev
, port
))
364 ib_dev
->ops
.del_gid(&entry
->attr
, &entry
->context
);
366 put_gid_entry_locked(entry
);
370 * add_modify_gid - Add or modify GID table entry
372 * @table: GID table in which GID to be added or modified
373 * @attr: Attributes of the GID
375 * Returns 0 on success or appropriate error code. It accepts zero
376 * GID addition for non RoCE ports for HCA's who report them as valid
377 * GID. However such zero GIDs are not added to the cache.
379 static int add_modify_gid(struct ib_gid_table
*table
,
380 const struct ib_gid_attr
*attr
)
382 struct ib_gid_table_entry
*entry
;
386 * Invalidate any old entry in the table to make it safe to write to
389 if (is_gid_entry_valid(table
->data_vec
[attr
->index
]))
390 del_gid(attr
->device
, attr
->port_num
, table
, attr
->index
);
393 * Some HCA's report multiple GID entries with only one valid GID, and
394 * leave other unused entries as the zero GID. Convert zero GIDs to
395 * empty table entries instead of storing them.
397 if (rdma_is_zero_gid(&attr
->gid
))
400 entry
= alloc_gid_entry(attr
);
404 if (rdma_protocol_roce(attr
->device
, attr
->port_num
)) {
405 ret
= add_roce_gid(entry
);
410 store_gid_entry(table
, entry
);
414 put_gid_entry(entry
);
418 /* rwlock should be read locked, or lock should be held */
419 static int find_gid(struct ib_gid_table
*table
, const union ib_gid
*gid
,
420 const struct ib_gid_attr
*val
, bool default_gid
,
421 unsigned long mask
, int *pempty
)
425 int empty
= pempty
? -1 : 0;
427 while (i
< table
->sz
&& (found
< 0 || empty
< 0)) {
428 struct ib_gid_table_entry
*data
= table
->data_vec
[i
];
429 struct ib_gid_attr
*attr
;
434 /* find_gid() is used during GID addition where it is expected
435 * to return a free entry slot which is not duplicate.
436 * Free entry slot is requested and returned if pempty is set,
437 * so lookup free slot only if requested.
439 if (pempty
&& empty
< 0) {
440 if (is_gid_entry_free(data
) &&
442 is_gid_index_default(table
, curr_index
)) {
444 * Found an invalid (free) entry; allocate it.
445 * If default GID is requested, then our
446 * found slot must be one of the DEFAULT
447 * reserved slots or we fail.
448 * This ensures that only DEFAULT reserved
449 * slots are used for default property GIDs.
456 * Additionally find_gid() is used to find valid entry during
457 * lookup operation; so ignore the entries which are marked as
458 * pending for removal and the entries which are marked as
461 if (!is_gid_entry_valid(data
))
468 if (mask
& GID_ATTR_FIND_MASK_GID_TYPE
&&
469 attr
->gid_type
!= val
->gid_type
)
472 if (mask
& GID_ATTR_FIND_MASK_GID
&&
473 memcmp(gid
, &data
->attr
.gid
, sizeof(*gid
)))
476 if (mask
& GID_ATTR_FIND_MASK_NETDEV
&&
477 attr
->ndev
!= val
->ndev
)
480 if (mask
& GID_ATTR_FIND_MASK_DEFAULT
&&
481 is_gid_index_default(table
, curr_index
) != default_gid
)
493 static void make_default_gid(struct net_device
*dev
, union ib_gid
*gid
)
495 gid
->global
.subnet_prefix
= cpu_to_be64(0xfe80000000000000LL
);
496 addrconf_ifid_eui48(&gid
->raw
[8], dev
);
499 static int __ib_cache_gid_add(struct ib_device
*ib_dev
, u8 port
,
500 union ib_gid
*gid
, struct ib_gid_attr
*attr
,
501 unsigned long mask
, bool default_gid
)
503 struct ib_gid_table
*table
;
508 /* Do not allow adding zero GID in support of
509 * IB spec version 1.3 section 4.1.1 point (6) and
510 * section 12.7.10 and section 12.7.20
512 if (rdma_is_zero_gid(gid
))
515 table
= rdma_gid_table(ib_dev
, port
);
517 mutex_lock(&table
->lock
);
519 ix
= find_gid(table
, gid
, attr
, default_gid
, mask
, &empty
);
527 attr
->device
= ib_dev
;
529 attr
->port_num
= port
;
531 ret
= add_modify_gid(table
, attr
);
533 dispatch_gid_change_event(ib_dev
, port
);
536 mutex_unlock(&table
->lock
);
538 pr_warn("%s: unable to add gid %pI6 error=%d\n",
539 __func__
, gid
->raw
, ret
);
543 int ib_cache_gid_add(struct ib_device
*ib_dev
, u8 port
,
544 union ib_gid
*gid
, struct ib_gid_attr
*attr
)
546 struct net_device
*idev
;
550 idev
= ib_device_get_netdev(ib_dev
, port
);
551 if (idev
&& attr
->ndev
!= idev
) {
552 union ib_gid default_gid
;
554 /* Adding default GIDs is not permitted */
555 make_default_gid(idev
, &default_gid
);
556 if (!memcmp(gid
, &default_gid
, sizeof(*gid
))) {
564 mask
= GID_ATTR_FIND_MASK_GID
|
565 GID_ATTR_FIND_MASK_GID_TYPE
|
566 GID_ATTR_FIND_MASK_NETDEV
;
568 ret
= __ib_cache_gid_add(ib_dev
, port
, gid
, attr
, mask
, false);
573 _ib_cache_gid_del(struct ib_device
*ib_dev
, u8 port
,
574 union ib_gid
*gid
, struct ib_gid_attr
*attr
,
575 unsigned long mask
, bool default_gid
)
577 struct ib_gid_table
*table
;
581 table
= rdma_gid_table(ib_dev
, port
);
583 mutex_lock(&table
->lock
);
585 ix
= find_gid(table
, gid
, attr
, default_gid
, mask
, NULL
);
591 del_gid(ib_dev
, port
, table
, ix
);
592 dispatch_gid_change_event(ib_dev
, port
);
595 mutex_unlock(&table
->lock
);
597 pr_debug("%s: can't delete gid %pI6 error=%d\n",
598 __func__
, gid
->raw
, ret
);
602 int ib_cache_gid_del(struct ib_device
*ib_dev
, u8 port
,
603 union ib_gid
*gid
, struct ib_gid_attr
*attr
)
605 unsigned long mask
= GID_ATTR_FIND_MASK_GID
|
606 GID_ATTR_FIND_MASK_GID_TYPE
|
607 GID_ATTR_FIND_MASK_DEFAULT
|
608 GID_ATTR_FIND_MASK_NETDEV
;
610 return _ib_cache_gid_del(ib_dev
, port
, gid
, attr
, mask
, false);
613 int ib_cache_gid_del_all_netdev_gids(struct ib_device
*ib_dev
, u8 port
,
614 struct net_device
*ndev
)
616 struct ib_gid_table
*table
;
618 bool deleted
= false;
620 table
= rdma_gid_table(ib_dev
, port
);
622 mutex_lock(&table
->lock
);
624 for (ix
= 0; ix
< table
->sz
; ix
++) {
625 if (is_gid_entry_valid(table
->data_vec
[ix
]) &&
626 table
->data_vec
[ix
]->attr
.ndev
== ndev
) {
627 del_gid(ib_dev
, port
, table
, ix
);
632 mutex_unlock(&table
->lock
);
635 dispatch_gid_change_event(ib_dev
, port
);
641 * rdma_find_gid_by_port - Returns the GID entry attributes when it finds
642 * a valid GID entry for given search parameters. It searches for the specified
643 * GID value in the local software cache.
644 * @device: The device to query.
645 * @gid: The GID value to search for.
646 * @gid_type: The GID type to search for.
647 * @port_num: The port number of the device where the GID value should be
649 * @ndev: In RoCE, the net device of the device. NULL means ignore.
651 * Returns sgid attributes if the GID is found with valid reference or
652 * returns ERR_PTR for the error.
653 * The caller must invoke rdma_put_gid_attr() to release the reference.
655 const struct ib_gid_attr
*
656 rdma_find_gid_by_port(struct ib_device
*ib_dev
,
657 const union ib_gid
*gid
,
658 enum ib_gid_type gid_type
,
659 u8 port
, struct net_device
*ndev
)
662 struct ib_gid_table
*table
;
663 unsigned long mask
= GID_ATTR_FIND_MASK_GID
|
664 GID_ATTR_FIND_MASK_GID_TYPE
;
665 struct ib_gid_attr val
= {.ndev
= ndev
, .gid_type
= gid_type
};
666 const struct ib_gid_attr
*attr
;
669 if (!rdma_is_port_valid(ib_dev
, port
))
670 return ERR_PTR(-ENOENT
);
672 table
= rdma_gid_table(ib_dev
, port
);
675 mask
|= GID_ATTR_FIND_MASK_NETDEV
;
677 read_lock_irqsave(&table
->rwlock
, flags
);
678 local_index
= find_gid(table
, gid
, &val
, false, mask
, NULL
);
679 if (local_index
>= 0) {
680 get_gid_entry(table
->data_vec
[local_index
]);
681 attr
= &table
->data_vec
[local_index
]->attr
;
682 read_unlock_irqrestore(&table
->rwlock
, flags
);
686 read_unlock_irqrestore(&table
->rwlock
, flags
);
687 return ERR_PTR(-ENOENT
);
689 EXPORT_SYMBOL(rdma_find_gid_by_port
);
692 * rdma_find_gid_by_filter - Returns the GID table attribute where a
693 * specified GID value occurs
694 * @device: The device to query.
695 * @gid: The GID value to search for.
696 * @port: The port number of the device where the GID value could be
698 * @filter: The filter function is executed on any matching GID in the table.
699 * If the filter function returns true, the corresponding index is returned,
700 * otherwise, we continue searching the GID table. It's guaranteed that
701 * while filter is executed, ndev field is valid and the structure won't
702 * change. filter is executed in an atomic context. filter must not be NULL.
704 * rdma_find_gid_by_filter() searches for the specified GID value
705 * of which the filter function returns true in the port's GID table.
708 const struct ib_gid_attr
*rdma_find_gid_by_filter(
709 struct ib_device
*ib_dev
, const union ib_gid
*gid
, u8 port
,
710 bool (*filter
)(const union ib_gid
*gid
, const struct ib_gid_attr
*,
714 const struct ib_gid_attr
*res
= ERR_PTR(-ENOENT
);
715 struct ib_gid_table
*table
;
719 if (!rdma_is_port_valid(ib_dev
, port
))
720 return ERR_PTR(-EINVAL
);
722 table
= rdma_gid_table(ib_dev
, port
);
724 read_lock_irqsave(&table
->rwlock
, flags
);
725 for (i
= 0; i
< table
->sz
; i
++) {
726 struct ib_gid_table_entry
*entry
= table
->data_vec
[i
];
728 if (!is_gid_entry_valid(entry
))
731 if (memcmp(gid
, &entry
->attr
.gid
, sizeof(*gid
)))
734 if (filter(gid
, &entry
->attr
, context
)) {
735 get_gid_entry(entry
);
740 read_unlock_irqrestore(&table
->rwlock
, flags
);
744 static struct ib_gid_table
*alloc_gid_table(int sz
)
746 struct ib_gid_table
*table
= kzalloc(sizeof(*table
), GFP_KERNEL
);
751 table
->data_vec
= kcalloc(sz
, sizeof(*table
->data_vec
), GFP_KERNEL
);
752 if (!table
->data_vec
)
755 mutex_init(&table
->lock
);
758 rwlock_init(&table
->rwlock
);
766 static void release_gid_table(struct ib_device
*device
,
767 struct ib_gid_table
*table
)
775 for (i
= 0; i
< table
->sz
; i
++) {
776 if (is_gid_entry_free(table
->data_vec
[i
]))
778 if (kref_read(&table
->data_vec
[i
]->kref
) > 1) {
779 dev_err(&device
->dev
,
780 "GID entry ref leak for index %d ref=%d\n", i
,
781 kref_read(&table
->data_vec
[i
]->kref
));
788 kfree(table
->data_vec
);
792 static void cleanup_gid_table_port(struct ib_device
*ib_dev
, u8 port
,
793 struct ib_gid_table
*table
)
796 bool deleted
= false;
801 mutex_lock(&table
->lock
);
802 for (i
= 0; i
< table
->sz
; ++i
) {
803 if (is_gid_entry_valid(table
->data_vec
[i
])) {
804 del_gid(ib_dev
, port
, table
, i
);
808 mutex_unlock(&table
->lock
);
811 dispatch_gid_change_event(ib_dev
, port
);
814 void ib_cache_gid_set_default_gid(struct ib_device
*ib_dev
, u8 port
,
815 struct net_device
*ndev
,
816 unsigned long gid_type_mask
,
817 enum ib_cache_gid_default_mode mode
)
819 union ib_gid gid
= { };
820 struct ib_gid_attr gid_attr
;
821 unsigned int gid_type
;
824 mask
= GID_ATTR_FIND_MASK_GID_TYPE
|
825 GID_ATTR_FIND_MASK_DEFAULT
|
826 GID_ATTR_FIND_MASK_NETDEV
;
827 memset(&gid_attr
, 0, sizeof(gid_attr
));
828 gid_attr
.ndev
= ndev
;
830 for (gid_type
= 0; gid_type
< IB_GID_TYPE_SIZE
; ++gid_type
) {
831 if (1UL << gid_type
& ~gid_type_mask
)
834 gid_attr
.gid_type
= gid_type
;
836 if (mode
== IB_CACHE_GID_DEFAULT_MODE_SET
) {
837 make_default_gid(ndev
, &gid
);
838 __ib_cache_gid_add(ib_dev
, port
, &gid
,
839 &gid_attr
, mask
, true);
840 } else if (mode
== IB_CACHE_GID_DEFAULT_MODE_DELETE
) {
841 _ib_cache_gid_del(ib_dev
, port
, &gid
,
842 &gid_attr
, mask
, true);
847 static void gid_table_reserve_default(struct ib_device
*ib_dev
, u8 port
,
848 struct ib_gid_table
*table
)
851 unsigned long roce_gid_type_mask
;
852 unsigned int num_default_gids
;
854 roce_gid_type_mask
= roce_gid_type_mask_support(ib_dev
, port
);
855 num_default_gids
= hweight_long(roce_gid_type_mask
);
856 /* Reserve starting indices for default GIDs */
857 for (i
= 0; i
< num_default_gids
&& i
< table
->sz
; i
++)
858 table
->default_gid_indices
|= BIT(i
);
862 static void gid_table_release_one(struct ib_device
*ib_dev
)
866 rdma_for_each_port (ib_dev
, p
) {
867 release_gid_table(ib_dev
, ib_dev
->port_data
[p
].cache
.gid
);
868 ib_dev
->port_data
[p
].cache
.gid
= NULL
;
872 static int _gid_table_setup_one(struct ib_device
*ib_dev
)
874 struct ib_gid_table
*table
;
875 unsigned int rdma_port
;
877 rdma_for_each_port (ib_dev
, rdma_port
) {
878 table
= alloc_gid_table(
879 ib_dev
->port_data
[rdma_port
].immutable
.gid_tbl_len
);
881 goto rollback_table_setup
;
883 gid_table_reserve_default(ib_dev
, rdma_port
, table
);
884 ib_dev
->port_data
[rdma_port
].cache
.gid
= table
;
888 rollback_table_setup
:
889 gid_table_release_one(ib_dev
);
893 static void gid_table_cleanup_one(struct ib_device
*ib_dev
)
897 rdma_for_each_port (ib_dev
, p
)
898 cleanup_gid_table_port(ib_dev
, p
,
899 ib_dev
->port_data
[p
].cache
.gid
);
902 static int gid_table_setup_one(struct ib_device
*ib_dev
)
906 err
= _gid_table_setup_one(ib_dev
);
911 rdma_roce_rescan_device(ib_dev
);
917 * rdma_query_gid - Read the GID content from the GID software cache
918 * @device: Device to query the GID
919 * @port_num: Port number of the device
920 * @index: Index of the GID table entry to read
921 * @gid: Pointer to GID where to store the entry's GID
923 * rdma_query_gid() only reads the GID entry content for requested device,
924 * port and index. It reads for IB, RoCE and iWarp link layers. It doesn't
925 * hold any reference to the GID table entry in the HCA or software cache.
927 * Returns 0 on success or appropriate error code.
930 int rdma_query_gid(struct ib_device
*device
, u8 port_num
,
931 int index
, union ib_gid
*gid
)
933 struct ib_gid_table
*table
;
937 if (!rdma_is_port_valid(device
, port_num
))
940 table
= rdma_gid_table(device
, port_num
);
941 read_lock_irqsave(&table
->rwlock
, flags
);
943 if (index
< 0 || index
>= table
->sz
||
944 !is_gid_entry_valid(table
->data_vec
[index
]))
947 memcpy(gid
, &table
->data_vec
[index
]->attr
.gid
, sizeof(*gid
));
951 read_unlock_irqrestore(&table
->rwlock
, flags
);
954 EXPORT_SYMBOL(rdma_query_gid
);
957 * rdma_find_gid - Returns SGID attributes if the matching GID is found.
958 * @device: The device to query.
959 * @gid: The GID value to search for.
960 * @gid_type: The GID type to search for.
961 * @ndev: In RoCE, the net device of the device. NULL means ignore.
963 * rdma_find_gid() searches for the specified GID value in the software cache.
965 * Returns GID attributes if a valid GID is found or returns ERR_PTR for the
966 * error. The caller must invoke rdma_put_gid_attr() to release the reference.
969 const struct ib_gid_attr
*rdma_find_gid(struct ib_device
*device
,
970 const union ib_gid
*gid
,
971 enum ib_gid_type gid_type
,
972 struct net_device
*ndev
)
974 unsigned long mask
= GID_ATTR_FIND_MASK_GID
|
975 GID_ATTR_FIND_MASK_GID_TYPE
;
976 struct ib_gid_attr gid_attr_val
= {.ndev
= ndev
, .gid_type
= gid_type
};
980 mask
|= GID_ATTR_FIND_MASK_NETDEV
;
982 rdma_for_each_port(device
, p
) {
983 struct ib_gid_table
*table
;
987 table
= device
->port_data
[p
].cache
.gid
;
988 read_lock_irqsave(&table
->rwlock
, flags
);
989 index
= find_gid(table
, gid
, &gid_attr_val
, false, mask
, NULL
);
991 const struct ib_gid_attr
*attr
;
993 get_gid_entry(table
->data_vec
[index
]);
994 attr
= &table
->data_vec
[index
]->attr
;
995 read_unlock_irqrestore(&table
->rwlock
, flags
);
998 read_unlock_irqrestore(&table
->rwlock
, flags
);
1001 return ERR_PTR(-ENOENT
);
1003 EXPORT_SYMBOL(rdma_find_gid
);
1005 int ib_get_cached_pkey(struct ib_device
*device
,
1010 struct ib_pkey_cache
*cache
;
1011 unsigned long flags
;
1014 if (!rdma_is_port_valid(device
, port_num
))
1017 read_lock_irqsave(&device
->cache
.lock
, flags
);
1019 cache
= device
->port_data
[port_num
].cache
.pkey
;
1021 if (index
< 0 || index
>= cache
->table_len
)
1024 *pkey
= cache
->table
[index
];
1026 read_unlock_irqrestore(&device
->cache
.lock
, flags
);
1030 EXPORT_SYMBOL(ib_get_cached_pkey
);
1032 int ib_get_cached_subnet_prefix(struct ib_device
*device
,
1036 unsigned long flags
;
1038 if (!rdma_is_port_valid(device
, port_num
))
1041 read_lock_irqsave(&device
->cache
.lock
, flags
);
1042 *sn_pfx
= device
->port_data
[port_num
].cache
.subnet_prefix
;
1043 read_unlock_irqrestore(&device
->cache
.lock
, flags
);
1047 EXPORT_SYMBOL(ib_get_cached_subnet_prefix
);
1049 int ib_find_cached_pkey(struct ib_device
*device
,
1054 struct ib_pkey_cache
*cache
;
1055 unsigned long flags
;
1058 int partial_ix
= -1;
1060 if (!rdma_is_port_valid(device
, port_num
))
1063 read_lock_irqsave(&device
->cache
.lock
, flags
);
1065 cache
= device
->port_data
[port_num
].cache
.pkey
;
1069 for (i
= 0; i
< cache
->table_len
; ++i
)
1070 if ((cache
->table
[i
] & 0x7fff) == (pkey
& 0x7fff)) {
1071 if (cache
->table
[i
] & 0x8000) {
1079 if (ret
&& partial_ix
>= 0) {
1080 *index
= partial_ix
;
1084 read_unlock_irqrestore(&device
->cache
.lock
, flags
);
1088 EXPORT_SYMBOL(ib_find_cached_pkey
);
1090 int ib_find_exact_cached_pkey(struct ib_device
*device
,
1095 struct ib_pkey_cache
*cache
;
1096 unsigned long flags
;
1100 if (!rdma_is_port_valid(device
, port_num
))
1103 read_lock_irqsave(&device
->cache
.lock
, flags
);
1105 cache
= device
->port_data
[port_num
].cache
.pkey
;
1109 for (i
= 0; i
< cache
->table_len
; ++i
)
1110 if (cache
->table
[i
] == pkey
) {
1116 read_unlock_irqrestore(&device
->cache
.lock
, flags
);
1120 EXPORT_SYMBOL(ib_find_exact_cached_pkey
);
1122 int ib_get_cached_lmc(struct ib_device
*device
,
1126 unsigned long flags
;
1129 if (!rdma_is_port_valid(device
, port_num
))
1132 read_lock_irqsave(&device
->cache
.lock
, flags
);
1133 *lmc
= device
->port_data
[port_num
].cache
.lmc
;
1134 read_unlock_irqrestore(&device
->cache
.lock
, flags
);
1138 EXPORT_SYMBOL(ib_get_cached_lmc
);
1140 int ib_get_cached_port_state(struct ib_device
*device
,
1142 enum ib_port_state
*port_state
)
1144 unsigned long flags
;
1147 if (!rdma_is_port_valid(device
, port_num
))
1150 read_lock_irqsave(&device
->cache
.lock
, flags
);
1151 *port_state
= device
->port_data
[port_num
].cache
.port_state
;
1152 read_unlock_irqrestore(&device
->cache
.lock
, flags
);
1156 EXPORT_SYMBOL(ib_get_cached_port_state
);
1159 * rdma_get_gid_attr - Returns GID attributes for a port of a device
1160 * at a requested gid_index, if a valid GID entry exists.
1161 * @device: The device to query.
1162 * @port_num: The port number on the device where the GID value
1164 * @index: Index of the GID table entry whose attributes are to
1167 * rdma_get_gid_attr() acquires reference count of gid attributes from the
1168 * cached GID table. Caller must invoke rdma_put_gid_attr() to release
1169 * reference to gid attribute regardless of link layer.
1171 * Returns pointer to valid gid attribute or ERR_PTR for the appropriate error
1174 const struct ib_gid_attr
*
1175 rdma_get_gid_attr(struct ib_device
*device
, u8 port_num
, int index
)
1177 const struct ib_gid_attr
*attr
= ERR_PTR(-EINVAL
);
1178 struct ib_gid_table
*table
;
1179 unsigned long flags
;
1181 if (!rdma_is_port_valid(device
, port_num
))
1182 return ERR_PTR(-EINVAL
);
1184 table
= rdma_gid_table(device
, port_num
);
1185 if (index
< 0 || index
>= table
->sz
)
1186 return ERR_PTR(-EINVAL
);
1188 read_lock_irqsave(&table
->rwlock
, flags
);
1189 if (!is_gid_entry_valid(table
->data_vec
[index
]))
1192 get_gid_entry(table
->data_vec
[index
]);
1193 attr
= &table
->data_vec
[index
]->attr
;
1195 read_unlock_irqrestore(&table
->rwlock
, flags
);
1198 EXPORT_SYMBOL(rdma_get_gid_attr
);
1201 * rdma_put_gid_attr - Release reference to the GID attribute
1202 * @attr: Pointer to the GID attribute whose reference
1203 * needs to be released.
1205 * rdma_put_gid_attr() must be used to release reference whose
1206 * reference is acquired using rdma_get_gid_attr() or any APIs
1207 * which returns a pointer to the ib_gid_attr regardless of link layer
1211 void rdma_put_gid_attr(const struct ib_gid_attr
*attr
)
1213 struct ib_gid_table_entry
*entry
=
1214 container_of(attr
, struct ib_gid_table_entry
, attr
);
1216 put_gid_entry(entry
);
1218 EXPORT_SYMBOL(rdma_put_gid_attr
);
1221 * rdma_hold_gid_attr - Get reference to existing GID attribute
1223 * @attr: Pointer to the GID attribute whose reference
1224 * needs to be taken.
1226 * Increase the reference count to a GID attribute to keep it from being
1227 * freed. Callers are required to already be holding a reference to attribute.
1230 void rdma_hold_gid_attr(const struct ib_gid_attr
*attr
)
1232 struct ib_gid_table_entry
*entry
=
1233 container_of(attr
, struct ib_gid_table_entry
, attr
);
1235 get_gid_entry(entry
);
1237 EXPORT_SYMBOL(rdma_hold_gid_attr
);
1240 * rdma_read_gid_attr_ndev_rcu - Read GID attribute netdevice
1241 * which must be in UP state.
1243 * @attr:Pointer to the GID attribute
1245 * Returns pointer to netdevice if the netdevice was attached to GID and
1246 * netdevice is in UP state. Caller must hold RCU lock as this API
1247 * reads the netdev flags which can change while netdevice migrates to
1248 * different net namespace. Returns ERR_PTR with error code otherwise.
1251 struct net_device
*rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr
*attr
)
1253 struct ib_gid_table_entry
*entry
=
1254 container_of(attr
, struct ib_gid_table_entry
, attr
);
1255 struct ib_device
*device
= entry
->attr
.device
;
1256 struct net_device
*ndev
= ERR_PTR(-ENODEV
);
1257 u8 port_num
= entry
->attr
.port_num
;
1258 struct ib_gid_table
*table
;
1259 unsigned long flags
;
1262 table
= rdma_gid_table(device
, port_num
);
1264 read_lock_irqsave(&table
->rwlock
, flags
);
1265 valid
= is_gid_entry_valid(table
->data_vec
[attr
->index
]);
1266 if (valid
&& attr
->ndev
&& (READ_ONCE(attr
->ndev
->flags
) & IFF_UP
))
1268 read_unlock_irqrestore(&table
->rwlock
, flags
);
1272 static int config_non_roce_gid_cache(struct ib_device
*device
,
1273 u8 port
, int gid_tbl_len
)
1275 struct ib_gid_attr gid_attr
= {};
1276 struct ib_gid_table
*table
;
1280 gid_attr
.device
= device
;
1281 gid_attr
.port_num
= port
;
1282 table
= rdma_gid_table(device
, port
);
1284 mutex_lock(&table
->lock
);
1285 for (i
= 0; i
< gid_tbl_len
; ++i
) {
1286 if (!device
->ops
.query_gid
)
1288 ret
= device
->ops
.query_gid(device
, port
, i
, &gid_attr
.gid
);
1290 dev_warn(&device
->dev
,
1291 "query_gid failed (%d) for index %d\n", ret
,
1296 add_modify_gid(table
, &gid_attr
);
1299 mutex_unlock(&table
->lock
);
1303 static void ib_cache_update(struct ib_device
*device
,
1305 bool enforce_security
)
1307 struct ib_port_attr
*tprops
= NULL
;
1308 struct ib_pkey_cache
*pkey_cache
= NULL
, *old_pkey_cache
;
1312 if (!rdma_is_port_valid(device
, port
))
1315 tprops
= kmalloc(sizeof *tprops
, GFP_KERNEL
);
1319 ret
= ib_query_port(device
, port
, tprops
);
1321 dev_warn(&device
->dev
, "ib_query_port failed (%d)\n", ret
);
1325 if (!rdma_protocol_roce(device
, port
)) {
1326 ret
= config_non_roce_gid_cache(device
, port
,
1327 tprops
->gid_tbl_len
);
1332 pkey_cache
= kmalloc(struct_size(pkey_cache
, table
,
1333 tprops
->pkey_tbl_len
),
1338 pkey_cache
->table_len
= tprops
->pkey_tbl_len
;
1340 for (i
= 0; i
< pkey_cache
->table_len
; ++i
) {
1341 ret
= ib_query_pkey(device
, port
, i
, pkey_cache
->table
+ i
);
1343 dev_warn(&device
->dev
,
1344 "ib_query_pkey failed (%d) for index %d\n",
1350 write_lock_irq(&device
->cache
.lock
);
1352 old_pkey_cache
= device
->port_data
[port
].cache
.pkey
;
1354 device
->port_data
[port
].cache
.pkey
= pkey_cache
;
1355 device
->port_data
[port
].cache
.lmc
= tprops
->lmc
;
1356 device
->port_data
[port
].cache
.port_state
= tprops
->state
;
1358 device
->port_data
[port
].cache
.subnet_prefix
= tprops
->subnet_prefix
;
1359 write_unlock_irq(&device
->cache
.lock
);
1361 if (enforce_security
)
1362 ib_security_cache_change(device
,
1364 tprops
->subnet_prefix
);
1366 kfree(old_pkey_cache
);
1375 static void ib_cache_task(struct work_struct
*_work
)
1377 struct ib_update_work
*work
=
1378 container_of(_work
, struct ib_update_work
, work
);
1380 ib_cache_update(work
->device
,
1382 work
->enforce_security
);
1386 static void ib_cache_event(struct ib_event_handler
*handler
,
1387 struct ib_event
*event
)
1389 struct ib_update_work
*work
;
1391 if (event
->event
== IB_EVENT_PORT_ERR
||
1392 event
->event
== IB_EVENT_PORT_ACTIVE
||
1393 event
->event
== IB_EVENT_LID_CHANGE
||
1394 event
->event
== IB_EVENT_PKEY_CHANGE
||
1395 event
->event
== IB_EVENT_SM_CHANGE
||
1396 event
->event
== IB_EVENT_CLIENT_REREGISTER
||
1397 event
->event
== IB_EVENT_GID_CHANGE
) {
1398 work
= kmalloc(sizeof *work
, GFP_ATOMIC
);
1400 INIT_WORK(&work
->work
, ib_cache_task
);
1401 work
->device
= event
->device
;
1402 work
->port_num
= event
->element
.port_num
;
1403 if (event
->event
== IB_EVENT_PKEY_CHANGE
||
1404 event
->event
== IB_EVENT_GID_CHANGE
)
1405 work
->enforce_security
= true;
1407 work
->enforce_security
= false;
1409 queue_work(ib_wq
, &work
->work
);
1414 int ib_cache_setup_one(struct ib_device
*device
)
1419 rwlock_init(&device
->cache
.lock
);
1421 err
= gid_table_setup_one(device
);
1425 rdma_for_each_port (device
, p
)
1426 ib_cache_update(device
, p
, true);
1428 INIT_IB_EVENT_HANDLER(&device
->cache
.event_handler
,
1429 device
, ib_cache_event
);
1430 ib_register_event_handler(&device
->cache
.event_handler
);
1434 void ib_cache_release_one(struct ib_device
*device
)
1439 * The release function frees all the cache elements.
1440 * This function should be called as part of freeing
1441 * all the device's resources when the cache could no
1442 * longer be accessed.
1444 rdma_for_each_port (device
, p
)
1445 kfree(device
->port_data
[p
].cache
.pkey
);
1447 gid_table_release_one(device
);
1450 void ib_cache_cleanup_one(struct ib_device
*device
)
1452 /* The cleanup function unregisters the event handler,
1453 * waits for all in-progress workqueue elements and cleans
1454 * up the GID cache. This function should be called after
1455 * the device was removed from the devices list and all
1456 * clients were removed, so the cache exists but is
1457 * non-functional and shouldn't be updated anymore.
1459 ib_unregister_event_handler(&device
->cache
.event_handler
);
1460 flush_workqueue(ib_wq
);
1461 gid_table_cleanup_one(device
);
1464 * Flush the wq second time for any pending GID delete work.
1466 flush_workqueue(ib_wq
);