]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blame - drivers/infiniband/hw/mlx5/mem.c
IB/mlx5: Add support for big MRs
[mirror_ubuntu-focal-kernel.git] / drivers / infiniband / hw / mlx5 / mem.c
CommitLineData
e126ba97 1/*
6cf0a15f 2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
e126ba97
EC
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/module.h>
34#include <rdma/ib_umem.h>
cc149f75 35#include <rdma/ib_umem_odp.h>
e126ba97
EC
36#include "mlx5_ib.h"
37
38/* @umem: umem object to scan
39 * @addr: ib virtual address requested by the user
762f899a 40 * @max_page_shift: high limit for page_shift - 0 means no limit
e126ba97
EC
41 * @count: number of PAGE_SIZE pages covered by umem
42 * @shift: page shift for the compound pages found in the region
43 * @ncont: number of compund pages
44 * @order: log2 of the number of compound pages
45 */
762f899a
MD
46void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
47 unsigned long max_page_shift,
48 int *count, int *shift,
e126ba97
EC
49 int *ncont, int *order)
50{
e126ba97
EC
51 unsigned long tmp;
52 unsigned long m;
eeb8461e 53 int i, k;
e126ba97
EC
54 u64 base = 0;
55 int p = 0;
56 int skip;
57 int mask;
58 u64 len;
59 u64 pfn;
eeb8461e
YH
60 struct scatterlist *sg;
61 int entry;
f39f8697 62 unsigned long page_shift = ilog2(umem->page_size);
e126ba97 63
cc149f75
HE
64 /* With ODP we must always match OS page size. */
65 if (umem->odp_data) {
66 *count = ib_umem_page_count(umem);
67 *shift = PAGE_SHIFT;
68 *ncont = *count;
69 if (order)
70 *order = ilog2(roundup_pow_of_two(*count));
71
72 return;
73 }
74
f39f8697 75 addr = addr >> page_shift;
e126ba97 76 tmp = (unsigned long)addr;
fffd6873 77 m = find_first_bit(&tmp, BITS_PER_LONG);
762f899a
MD
78 if (max_page_shift)
79 m = min_t(unsigned long, max_page_shift - page_shift, m);
e126ba97
EC
80 skip = 1 << m;
81 mask = skip - 1;
82 i = 0;
eeb8461e 83 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
f39f8697
YH
84 len = sg_dma_len(sg) >> page_shift;
85 pfn = sg_dma_address(sg) >> page_shift;
eeb8461e
YH
86 for (k = 0; k < len; k++) {
87 if (!(i & mask)) {
88 tmp = (unsigned long)pfn;
fffd6873 89 m = min_t(unsigned long, m, find_first_bit(&tmp, BITS_PER_LONG));
eeb8461e
YH
90 skip = 1 << m;
91 mask = skip - 1;
92 base = pfn;
93 p = 0;
94 } else {
95 if (base + p != pfn) {
96 tmp = (unsigned long)p;
fffd6873 97 m = find_first_bit(&tmp, BITS_PER_LONG);
e126ba97
EC
98 skip = 1 << m;
99 mask = skip - 1;
100 base = pfn;
101 p = 0;
e126ba97 102 }
e126ba97 103 }
eeb8461e
YH
104 p++;
105 i++;
e126ba97 106 }
eeb8461e 107 }
e126ba97
EC
108
109 if (i) {
110 m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m);
111
112 if (order)
113 *order = ilog2(roundup_pow_of_two(i) >> m);
114
115 *ncont = DIV_ROUND_UP(i, (1 << m));
116 } else {
117 m = 0;
118
119 if (order)
120 *order = 0;
121
122 *ncont = 0;
123 }
f39f8697 124 *shift = page_shift + m;
e126ba97
EC
125 *count = i;
126}
127
cc149f75
HE
128#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
129static u64 umem_dma_to_mtt(dma_addr_t umem_dma)
130{
131 u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK;
132
133 if (umem_dma & ODP_READ_ALLOWED_BIT)
134 mtt_entry |= MLX5_IB_MTT_READ;
135 if (umem_dma & ODP_WRITE_ALLOWED_BIT)
136 mtt_entry |= MLX5_IB_MTT_WRITE;
137
138 return mtt_entry;
139}
140#endif
141
142/*
143 * Populate the given array with bus addresses from the umem.
144 *
145 * dev - mlx5_ib device
146 * umem - umem to use to fill the pages
147 * page_shift - determines the page size used in the resulting array
832a6b06
HE
148 * offset - offset into the umem to start from,
149 * only implemented for ODP umems
150 * num_pages - total number of pages to fill
cc149f75
HE
151 * pas - bus addresses array to fill
152 * access_flags - access flags to set on all present pages.
153 use enum mlx5_ib_mtt_access_flags for this.
154 */
832a6b06
HE
155void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
156 int page_shift, size_t offset, size_t num_pages,
157 __be64 *pas, int access_flags)
e126ba97 158{
f39f8697
YH
159 unsigned long umem_page_shift = ilog2(umem->page_size);
160 int shift = page_shift - umem_page_shift;
e126ba97 161 int mask = (1 << shift) - 1;
eeb8461e 162 int i, k;
e126ba97
EC
163 u64 cur = 0;
164 u64 base;
165 int len;
eeb8461e
YH
166 struct scatterlist *sg;
167 int entry;
cc149f75
HE
168#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
169 const bool odp = umem->odp_data != NULL;
170
171 if (odp) {
cc149f75
HE
172 WARN_ON(shift != 0);
173 WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE));
174
175 for (i = 0; i < num_pages; ++i) {
832a6b06 176 dma_addr_t pa = umem->odp_data->dma_list[offset + i];
cc149f75
HE
177
178 pas[i] = cpu_to_be64(umem_dma_to_mtt(pa));
179 }
180 return;
181 }
182#endif
e126ba97
EC
183
184 i = 0;
eeb8461e 185 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
f39f8697 186 len = sg_dma_len(sg) >> umem_page_shift;
eeb8461e
YH
187 base = sg_dma_address(sg);
188 for (k = 0; k < len; k++) {
189 if (!(i & mask)) {
f39f8697 190 cur = base + (k << umem_page_shift);
cc149f75 191 cur |= access_flags;
e126ba97 192
eeb8461e
YH
193 pas[i >> shift] = cpu_to_be64(cur);
194 mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
195 i >> shift, be64_to_cpu(pas[i >> shift]));
196 } else
197 mlx5_ib_dbg(dev, "=====> 0x%llx\n",
f39f8697 198 base + (k << umem_page_shift));
eeb8461e 199 i++;
e126ba97 200 }
eeb8461e 201 }
e126ba97
EC
202}
203
832a6b06
HE
204void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
205 int page_shift, __be64 *pas, int access_flags)
206{
207 return __mlx5_ib_populate_pas(dev, umem, page_shift, 0,
208 ib_umem_num_pages(umem), pas,
209 access_flags);
210}
e126ba97
EC
211int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
212{
213 u64 page_size;
214 u64 page_mask;
215 u64 off_size;
216 u64 off_mask;
217 u64 buf_off;
218
f241e749 219 page_size = (u64)1 << page_shift;
e126ba97
EC
220 page_mask = page_size - 1;
221 buf_off = addr & page_mask;
222 off_size = page_size >> 6;
223 off_mask = off_size - 1;
224
225 if (buf_off & off_mask)
226 return -EINVAL;
227
228 *offset = buf_off >> ilog2(off_size);
229 return 0;
230}