]>
Commit | Line | Data |
---|---|---|
47da7e62 VP |
1 | /* |
2 | * cxgb4_ppm.c: Chelsio common library for T4/T5 iSCSI PagePod Manager | |
3 | * | |
4 | * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 as | |
8 | * published by the Free Software Foundation. | |
9 | * | |
10 | * Written by: Karen Xie (kxie@chelsio.com) | |
11 | */ | |
12 | ||
13 | #include <linux/kernel.h> | |
14 | #include <linux/version.h> | |
15 | #include <linux/module.h> | |
16 | #include <linux/errno.h> | |
17 | #include <linux/types.h> | |
18 | #include <linux/debugfs.h> | |
19 | #include <linux/export.h> | |
20 | #include <linux/list.h> | |
21 | #include <linux/skbuff.h> | |
22 | #include <linux/pci.h> | |
23 | #include <linux/scatterlist.h> | |
24 | ||
25 | #include "cxgb4_ppm.h" | |
26 | ||
27 | /* Direct Data Placement - | |
28 | * Directly place the iSCSI Data-In or Data-Out PDU's payload into | |
29 | * pre-posted final destination host-memory buffers based on the | |
30 | * Initiator Task Tag (ITT) in Data-In or Target Task Tag (TTT) | |
31 | * in Data-Out PDUs. The host memory address is programmed into | |
32 | * h/w in the format of pagepod entries. The location of the | |
33 | * pagepod entry is encoded into ddp tag which is used as the base | |
34 | * for ITT/TTT. | |
35 | */ | |
36 | ||
37 | /* Direct-Data Placement page size adjustment | |
38 | */ | |
39 | int cxgbi_ppm_find_page_index(struct cxgbi_ppm *ppm, unsigned long pgsz) | |
40 | { | |
41 | struct cxgbi_tag_format *tformat = &ppm->tformat; | |
42 | int i; | |
43 | ||
44 | for (i = 0; i < DDP_PGIDX_MAX; i++) { | |
45 | if (pgsz == 1UL << (DDP_PGSZ_BASE_SHIFT + | |
46 | tformat->pgsz_order[i])) { | |
47 | pr_debug("%s: %s ppm, pgsz %lu -> idx %d.\n", | |
48 | __func__, ppm->ndev->name, pgsz, i); | |
49 | return i; | |
50 | } | |
51 | } | |
52 | pr_info("ippm: ddp page size %lu not supported.\n", pgsz); | |
53 | return DDP_PGIDX_MAX; | |
54 | } | |
55 | ||
56 | /* DDP setup & teardown | |
57 | */ | |
58 | static int ppm_find_unused_entries(unsigned long *bmap, | |
59 | unsigned int max_ppods, | |
60 | unsigned int start, | |
61 | unsigned int nr, | |
62 | unsigned int align_mask) | |
63 | { | |
64 | unsigned long i; | |
65 | ||
66 | i = bitmap_find_next_zero_area(bmap, max_ppods, start, nr, align_mask); | |
67 | ||
68 | if (unlikely(i >= max_ppods) && (start > nr)) | |
69 | i = bitmap_find_next_zero_area(bmap, max_ppods, 0, start - 1, | |
70 | align_mask); | |
71 | if (unlikely(i >= max_ppods)) | |
72 | return -ENOSPC; | |
73 | ||
74 | bitmap_set(bmap, i, nr); | |
75 | return (int)i; | |
76 | } | |
77 | ||
78 | static void ppm_mark_entries(struct cxgbi_ppm *ppm, int i, int count, | |
79 | unsigned long caller_data) | |
80 | { | |
81 | struct cxgbi_ppod_data *pdata = ppm->ppod_data + i; | |
82 | ||
83 | pdata->caller_data = caller_data; | |
84 | pdata->npods = count; | |
85 | ||
86 | if (pdata->color == ((1 << PPOD_IDX_SHIFT) - 1)) | |
87 | pdata->color = 0; | |
88 | else | |
89 | pdata->color++; | |
90 | } | |
91 | ||
92 | static int ppm_get_cpu_entries(struct cxgbi_ppm *ppm, unsigned int count, | |
93 | unsigned long caller_data) | |
94 | { | |
95 | struct cxgbi_ppm_pool *pool; | |
96 | unsigned int cpu; | |
97 | int i; | |
98 | ||
99 | cpu = get_cpu(); | |
100 | pool = per_cpu_ptr(ppm->pool, cpu); | |
101 | spin_lock_bh(&pool->lock); | |
102 | put_cpu(); | |
103 | ||
104 | i = ppm_find_unused_entries(pool->bmap, ppm->pool_index_max, | |
105 | pool->next, count, 0); | |
106 | if (i < 0) { | |
107 | pool->next = 0; | |
108 | spin_unlock_bh(&pool->lock); | |
109 | return -ENOSPC; | |
110 | } | |
111 | ||
112 | pool->next = i + count; | |
113 | if (pool->next >= ppm->pool_index_max) | |
114 | pool->next = 0; | |
115 | ||
116 | spin_unlock_bh(&pool->lock); | |
117 | ||
118 | pr_debug("%s: cpu %u, idx %d + %d (%d), next %u.\n", | |
119 | __func__, cpu, i, count, i + cpu * ppm->pool_index_max, | |
120 | pool->next); | |
121 | ||
122 | i += cpu * ppm->pool_index_max; | |
123 | ppm_mark_entries(ppm, i, count, caller_data); | |
124 | ||
125 | return i; | |
126 | } | |
127 | ||
128 | static int ppm_get_entries(struct cxgbi_ppm *ppm, unsigned int count, | |
129 | unsigned long caller_data) | |
130 | { | |
131 | int i; | |
132 | ||
133 | spin_lock_bh(&ppm->map_lock); | |
134 | i = ppm_find_unused_entries(ppm->ppod_bmap, ppm->bmap_index_max, | |
135 | ppm->next, count, 0); | |
136 | if (i < 0) { | |
137 | ppm->next = 0; | |
138 | spin_unlock_bh(&ppm->map_lock); | |
139 | pr_debug("ippm: NO suitable entries %u available.\n", | |
140 | count); | |
141 | return -ENOSPC; | |
142 | } | |
143 | ||
144 | ppm->next = i + count; | |
145 | if (ppm->next >= ppm->bmap_index_max) | |
146 | ppm->next = 0; | |
147 | ||
148 | spin_unlock_bh(&ppm->map_lock); | |
149 | ||
150 | pr_debug("%s: idx %d + %d (%d), next %u, caller_data 0x%lx.\n", | |
151 | __func__, i, count, i + ppm->pool_rsvd, ppm->next, | |
152 | caller_data); | |
153 | ||
154 | i += ppm->pool_rsvd; | |
155 | ppm_mark_entries(ppm, i, count, caller_data); | |
156 | ||
157 | return i; | |
158 | } | |
159 | ||
160 | static void ppm_unmark_entries(struct cxgbi_ppm *ppm, int i, int count) | |
161 | { | |
162 | pr_debug("%s: idx %d + %d.\n", __func__, i, count); | |
163 | ||
164 | if (i < ppm->pool_rsvd) { | |
165 | unsigned int cpu; | |
166 | struct cxgbi_ppm_pool *pool; | |
167 | ||
168 | cpu = i / ppm->pool_index_max; | |
169 | i %= ppm->pool_index_max; | |
170 | ||
171 | pool = per_cpu_ptr(ppm->pool, cpu); | |
172 | spin_lock_bh(&pool->lock); | |
173 | bitmap_clear(pool->bmap, i, count); | |
174 | ||
175 | if (i < pool->next) | |
176 | pool->next = i; | |
177 | spin_unlock_bh(&pool->lock); | |
178 | ||
179 | pr_debug("%s: cpu %u, idx %d, next %u.\n", | |
180 | __func__, cpu, i, pool->next); | |
181 | } else { | |
182 | spin_lock_bh(&ppm->map_lock); | |
183 | ||
184 | i -= ppm->pool_rsvd; | |
185 | bitmap_clear(ppm->ppod_bmap, i, count); | |
186 | ||
187 | if (i < ppm->next) | |
188 | ppm->next = i; | |
189 | spin_unlock_bh(&ppm->map_lock); | |
190 | ||
191 | pr_debug("%s: idx %d, next %u.\n", __func__, i, ppm->next); | |
192 | } | |
193 | } | |
194 | ||
195 | void cxgbi_ppm_ppod_release(struct cxgbi_ppm *ppm, u32 idx) | |
196 | { | |
197 | struct cxgbi_ppod_data *pdata; | |
198 | ||
199 | if (idx >= ppm->ppmax) { | |
200 | pr_warn("ippm: idx too big %u > %u.\n", idx, ppm->ppmax); | |
201 | return; | |
202 | } | |
203 | ||
204 | pdata = ppm->ppod_data + idx; | |
205 | if (!pdata->npods) { | |
206 | pr_warn("ippm: idx %u, npods 0.\n", idx); | |
207 | return; | |
208 | } | |
209 | ||
210 | pr_debug("release idx %u, npods %u.\n", idx, pdata->npods); | |
211 | ppm_unmark_entries(ppm, idx, pdata->npods); | |
212 | } | |
213 | EXPORT_SYMBOL(cxgbi_ppm_ppod_release); | |
214 | ||
215 | int cxgbi_ppm_ppods_reserve(struct cxgbi_ppm *ppm, unsigned short nr_pages, | |
216 | u32 per_tag_pg_idx, u32 *ppod_idx, | |
217 | u32 *ddp_tag, unsigned long caller_data) | |
218 | { | |
219 | struct cxgbi_ppod_data *pdata; | |
220 | unsigned int npods; | |
221 | int idx = -1; | |
222 | unsigned int hwidx; | |
223 | u32 tag; | |
224 | ||
225 | npods = (nr_pages + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT; | |
226 | if (!npods) { | |
227 | pr_warn("%s: pages %u -> npods %u, full.\n", | |
228 | __func__, nr_pages, npods); | |
229 | return -EINVAL; | |
230 | } | |
231 | ||
232 | /* grab from cpu pool first */ | |
233 | idx = ppm_get_cpu_entries(ppm, npods, caller_data); | |
234 | /* try the general pool */ | |
235 | if (idx < 0) | |
236 | idx = ppm_get_entries(ppm, npods, caller_data); | |
237 | if (idx < 0) { | |
238 | pr_debug("ippm: pages %u, nospc %u, nxt %u, 0x%lx.\n", | |
239 | nr_pages, npods, ppm->next, caller_data); | |
240 | return idx; | |
241 | } | |
242 | ||
243 | pdata = ppm->ppod_data + idx; | |
244 | hwidx = ppm->base_idx + idx; | |
245 | ||
246 | tag = cxgbi_ppm_make_ddp_tag(hwidx, pdata->color); | |
247 | ||
248 | if (per_tag_pg_idx) | |
249 | tag |= (per_tag_pg_idx << 30) & 0xC0000000; | |
250 | ||
251 | *ppod_idx = idx; | |
252 | *ddp_tag = tag; | |
253 | ||
254 | pr_debug("ippm: sg %u, tag 0x%x(%u,%u), data 0x%lx.\n", | |
255 | nr_pages, tag, idx, npods, caller_data); | |
256 | ||
257 | return npods; | |
258 | } | |
259 | EXPORT_SYMBOL(cxgbi_ppm_ppods_reserve); | |
260 | ||
261 | void cxgbi_ppm_make_ppod_hdr(struct cxgbi_ppm *ppm, u32 tag, | |
262 | unsigned int tid, unsigned int offset, | |
263 | unsigned int length, | |
264 | struct cxgbi_pagepod_hdr *hdr) | |
265 | { | |
266 | /* The ddp tag in pagepod should be with bit 31:30 set to 0. | |
267 | * The ddp Tag on the wire should be with non-zero 31:30 to the peer | |
268 | */ | |
269 | tag &= 0x3FFFFFFF; | |
270 | ||
271 | hdr->vld_tid = htonl(PPOD_VALID_FLAG | PPOD_TID(tid)); | |
272 | ||
273 | hdr->rsvd = 0; | |
274 | hdr->pgsz_tag_clr = htonl(tag & ppm->tformat.idx_clr_mask); | |
275 | hdr->max_offset = htonl(length); | |
276 | hdr->page_offset = htonl(offset); | |
277 | ||
278 | pr_debug("ippm: tag 0x%x, tid 0x%x, xfer %u, off %u.\n", | |
279 | tag, tid, length, offset); | |
280 | } | |
281 | EXPORT_SYMBOL(cxgbi_ppm_make_ppod_hdr); | |
282 | ||
283 | static void ppm_free(struct cxgbi_ppm *ppm) | |
284 | { | |
285 | vfree(ppm); | |
286 | } | |
287 | ||
288 | static void ppm_destroy(struct kref *kref) | |
289 | { | |
290 | struct cxgbi_ppm *ppm = container_of(kref, | |
291 | struct cxgbi_ppm, | |
292 | refcnt); | |
293 | pr_info("ippm: kref 0, destroy %s ppm 0x%p.\n", | |
294 | ppm->ndev->name, ppm); | |
295 | ||
296 | *ppm->ppm_pp = NULL; | |
297 | ||
298 | free_percpu(ppm->pool); | |
299 | ppm_free(ppm); | |
300 | } | |
301 | ||
302 | int cxgbi_ppm_release(struct cxgbi_ppm *ppm) | |
303 | { | |
304 | if (ppm) { | |
305 | int rv; | |
306 | ||
307 | rv = kref_put(&ppm->refcnt, ppm_destroy); | |
308 | return rv; | |
309 | } | |
310 | return 1; | |
311 | } | |
312 | ||
313 | static struct cxgbi_ppm_pool *ppm_alloc_cpu_pool(unsigned int *total, | |
314 | unsigned int *pcpu_ppmax) | |
315 | { | |
316 | struct cxgbi_ppm_pool *pools; | |
317 | unsigned int ppmax = (*total) / num_possible_cpus(); | |
318 | unsigned int max = (PCPU_MIN_UNIT_SIZE - sizeof(*pools)) << 3; | |
319 | unsigned int bmap; | |
320 | unsigned int alloc_sz; | |
321 | unsigned int count = 0; | |
322 | unsigned int cpu; | |
323 | ||
324 | /* make sure per cpu pool fits into PCPU_MIN_UNIT_SIZE */ | |
325 | if (ppmax > max) | |
326 | ppmax = max; | |
327 | ||
328 | /* pool size must be multiple of unsigned long */ | |
329 | bmap = BITS_TO_LONGS(ppmax); | |
330 | ppmax = (bmap * sizeof(unsigned long)) << 3; | |
331 | ||
332 | alloc_sz = sizeof(*pools) + sizeof(unsigned long) * bmap; | |
333 | pools = __alloc_percpu(alloc_sz, __alignof__(struct cxgbi_ppm_pool)); | |
334 | ||
335 | if (!pools) | |
336 | return NULL; | |
337 | ||
338 | for_each_possible_cpu(cpu) { | |
339 | struct cxgbi_ppm_pool *ppool = per_cpu_ptr(pools, cpu); | |
340 | ||
341 | memset(ppool, 0, alloc_sz); | |
342 | spin_lock_init(&ppool->lock); | |
343 | count += ppmax; | |
344 | } | |
345 | ||
346 | *total = count; | |
347 | *pcpu_ppmax = ppmax; | |
348 | ||
349 | return pools; | |
350 | } | |
351 | ||
352 | int cxgbi_ppm_init(void **ppm_pp, struct net_device *ndev, | |
353 | struct pci_dev *pdev, void *lldev, | |
354 | struct cxgbi_tag_format *tformat, | |
355 | unsigned int ppmax, | |
356 | unsigned int llimit, | |
357 | unsigned int start, | |
358 | unsigned int reserve_factor) | |
359 | { | |
360 | struct cxgbi_ppm *ppm = (struct cxgbi_ppm *)(*ppm_pp); | |
361 | struct cxgbi_ppm_pool *pool = NULL; | |
362 | unsigned int ppmax_pool = 0; | |
363 | unsigned int pool_index_max = 0; | |
364 | unsigned int alloc_sz; | |
365 | unsigned int ppod_bmap_size; | |
366 | ||
367 | if (ppm) { | |
368 | pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n", | |
369 | ndev->name, ppm_pp, ppm, ppm->ppmax, ppmax); | |
370 | kref_get(&ppm->refcnt); | |
371 | return 1; | |
372 | } | |
373 | ||
374 | if (reserve_factor) { | |
375 | ppmax_pool = ppmax / reserve_factor; | |
376 | pool = ppm_alloc_cpu_pool(&ppmax_pool, &pool_index_max); | |
377 | ||
378 | pr_debug("%s: ppmax %u, cpu total %u, per cpu %u.\n", | |
379 | ndev->name, ppmax, ppmax_pool, pool_index_max); | |
380 | } | |
381 | ||
382 | ppod_bmap_size = BITS_TO_LONGS(ppmax - ppmax_pool); | |
383 | alloc_sz = sizeof(struct cxgbi_ppm) + | |
384 | ppmax * (sizeof(struct cxgbi_ppod_data)) + | |
385 | ppod_bmap_size * sizeof(unsigned long); | |
386 | ||
387 | ppm = vmalloc(alloc_sz); | |
388 | if (!ppm) | |
389 | goto release_ppm_pool; | |
390 | ||
391 | memset(ppm, 0, alloc_sz); | |
392 | ||
393 | ppm->ppod_bmap = (unsigned long *)(&ppm->ppod_data[ppmax]); | |
394 | ||
395 | if ((ppod_bmap_size >> 3) > (ppmax - ppmax_pool)) { | |
396 | unsigned int start = ppmax - ppmax_pool; | |
397 | unsigned int end = ppod_bmap_size >> 3; | |
398 | ||
399 | bitmap_set(ppm->ppod_bmap, ppmax, end - start); | |
400 | pr_info("%s: %u - %u < %u * 8, mask extra bits %u, %u.\n", | |
401 | __func__, ppmax, ppmax_pool, ppod_bmap_size, start, | |
402 | end); | |
403 | } | |
404 | ||
405 | spin_lock_init(&ppm->map_lock); | |
406 | kref_init(&ppm->refcnt); | |
407 | ||
408 | memcpy(&ppm->tformat, tformat, sizeof(struct cxgbi_tag_format)); | |
409 | ||
410 | ppm->ppm_pp = ppm_pp; | |
411 | ppm->ndev = ndev; | |
412 | ppm->pdev = pdev; | |
413 | ppm->lldev = lldev; | |
414 | ppm->ppmax = ppmax; | |
415 | ppm->next = 0; | |
416 | ppm->llimit = llimit; | |
417 | ppm->base_idx = start > llimit ? | |
418 | (start - llimit + 1) >> PPOD_SIZE_SHIFT : 0; | |
419 | ppm->bmap_index_max = ppmax - ppmax_pool; | |
420 | ||
421 | ppm->pool = pool; | |
422 | ppm->pool_rsvd = ppmax_pool; | |
423 | ppm->pool_index_max = pool_index_max; | |
424 | ||
425 | /* check one more time */ | |
426 | if (*ppm_pp) { | |
427 | ppm_free(ppm); | |
428 | ppm = (struct cxgbi_ppm *)(*ppm_pp); | |
429 | ||
430 | pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n", | |
431 | ndev->name, ppm_pp, *ppm_pp, ppm->ppmax, ppmax); | |
432 | ||
433 | kref_get(&ppm->refcnt); | |
434 | return 1; | |
435 | } | |
436 | *ppm_pp = ppm; | |
437 | ||
438 | ppm->tformat.pgsz_idx_dflt = cxgbi_ppm_find_page_index(ppm, PAGE_SIZE); | |
439 | ||
440 | pr_info("ippm %s: ppm 0x%p, 0x%p, base %u/%u, pg %lu,%u, rsvd %u,%u.\n", | |
441 | ndev->name, ppm_pp, ppm, ppm->base_idx, ppm->ppmax, PAGE_SIZE, | |
442 | ppm->tformat.pgsz_idx_dflt, ppm->pool_rsvd, | |
443 | ppm->pool_index_max); | |
444 | ||
445 | return 0; | |
446 | ||
447 | release_ppm_pool: | |
448 | free_percpu(pool); | |
449 | return -ENOMEM; | |
450 | } | |
451 | EXPORT_SYMBOL(cxgbi_ppm_init); | |
452 | ||
453 | unsigned int cxgbi_tagmask_set(unsigned int ppmax) | |
454 | { | |
455 | unsigned int bits = fls(ppmax); | |
456 | ||
457 | if (bits > PPOD_IDX_MAX_SIZE) | |
458 | bits = PPOD_IDX_MAX_SIZE; | |
459 | ||
460 | pr_info("ippm: ppmax %u/0x%x -> bits %u, tagmask 0x%x.\n", | |
461 | ppmax, ppmax, bits, 1 << (bits + PPOD_IDX_SHIFT)); | |
462 | ||
463 | return 1 << (bits + PPOD_IDX_SHIFT); | |
464 | } |