]>
Commit | Line | Data |
---|---|---|
67f4addb FH |
1 | /** |
2 | * IBM Accelerator Family 'GenWQE' | |
3 | * | |
4 | * (C) Copyright IBM Corp. 2013 | |
5 | * | |
6 | * Author: Frank Haverkamp <haver@linux.vnet.ibm.com> | |
7 | * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com> | |
26d8f6f1 | 8 | * Author: Michael Jung <mijung@gmx.net> |
67f4addb FH |
9 | * Author: Michael Ruettger <michael@ibmra.de> |
10 | * | |
11 | * This program is free software; you can redistribute it and/or modify | |
12 | * it under the terms of the GNU General Public License (version 2 only) | |
13 | * as published by the Free Software Foundation. | |
14 | * | |
15 | * This program is distributed in the hope that it will be useful, | |
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | * GNU General Public License for more details. | |
19 | */ | |
20 | ||
21 | /* | |
22 | * Miscelanous functionality used in the other GenWQE driver parts. | |
23 | */ | |
24 | ||
25 | #include <linux/kernel.h> | |
26 | #include <linux/dma-mapping.h> | |
27 | #include <linux/sched.h> | |
28 | #include <linux/vmalloc.h> | |
29 | #include <linux/page-flags.h> | |
30 | #include <linux/scatterlist.h> | |
31 | #include <linux/hugetlb.h> | |
32 | #include <linux/iommu.h> | |
33 | #include <linux/delay.h> | |
34 | #include <linux/pci.h> | |
35 | #include <linux/dma-mapping.h> | |
36 | #include <linux/ctype.h> | |
37 | #include <linux/module.h> | |
38 | #include <linux/platform_device.h> | |
39 | #include <linux/delay.h> | |
40 | #include <asm/pgtable.h> | |
41 | ||
42 | #include "genwqe_driver.h" | |
43 | #include "card_base.h" | |
44 | #include "card_ddcb.h" | |
45 | ||
46 | /** | |
47 | * __genwqe_writeq() - Write 64-bit register | |
48 | * @cd: genwqe device descriptor | |
49 | * @byte_offs: byte offset within BAR | |
50 | * @val: 64-bit value | |
51 | * | |
52 | * Return: 0 if success; < 0 if error | |
53 | */ | |
54 | int __genwqe_writeq(struct genwqe_dev *cd, u64 byte_offs, u64 val) | |
55 | { | |
fb145456 KSS |
56 | struct pci_dev *pci_dev = cd->pci_dev; |
57 | ||
67f4addb FH |
58 | if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) |
59 | return -EIO; | |
60 | ||
61 | if (cd->mmio == NULL) | |
62 | return -EIO; | |
63 | ||
fb145456 KSS |
64 | if (pci_channel_offline(pci_dev)) |
65 | return -EIO; | |
66 | ||
a45a0258 | 67 | __raw_writeq((__force u64)cpu_to_be64(val), cd->mmio + byte_offs); |
67f4addb FH |
68 | return 0; |
69 | } | |
70 | ||
71 | /** | |
72 | * __genwqe_readq() - Read 64-bit register | |
73 | * @cd: genwqe device descriptor | |
74 | * @byte_offs: offset within BAR | |
75 | * | |
76 | * Return: value from register | |
77 | */ | |
78 | u64 __genwqe_readq(struct genwqe_dev *cd, u64 byte_offs) | |
79 | { | |
67f4addb FH |
80 | if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) |
81 | return 0xffffffffffffffffull; | |
82 | ||
83 | if ((cd->err_inject & GENWQE_INJECT_GFIR_FATAL) && | |
84 | (byte_offs == IO_SLC_CFGREG_GFIR)) | |
85 | return 0x000000000000ffffull; | |
86 | ||
87 | if ((cd->err_inject & GENWQE_INJECT_GFIR_INFO) && | |
88 | (byte_offs == IO_SLC_CFGREG_GFIR)) | |
89 | return 0x00000000ffff0000ull; | |
90 | ||
91 | if (cd->mmio == NULL) | |
92 | return 0xffffffffffffffffull; | |
93 | ||
58d66ce7 | 94 | return be64_to_cpu((__force __be64)__raw_readq(cd->mmio + byte_offs)); |
67f4addb FH |
95 | } |
96 | ||
97 | /** | |
98 | * __genwqe_writel() - Write 32-bit register | |
99 | * @cd: genwqe device descriptor | |
100 | * @byte_offs: byte offset within BAR | |
101 | * @val: 32-bit value | |
102 | * | |
103 | * Return: 0 if success; < 0 if error | |
104 | */ | |
105 | int __genwqe_writel(struct genwqe_dev *cd, u64 byte_offs, u32 val) | |
106 | { | |
fb145456 KSS |
107 | struct pci_dev *pci_dev = cd->pci_dev; |
108 | ||
67f4addb FH |
109 | if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) |
110 | return -EIO; | |
111 | ||
112 | if (cd->mmio == NULL) | |
113 | return -EIO; | |
114 | ||
fb145456 KSS |
115 | if (pci_channel_offline(pci_dev)) |
116 | return -EIO; | |
117 | ||
58d66ce7 | 118 | __raw_writel((__force u32)cpu_to_be32(val), cd->mmio + byte_offs); |
67f4addb FH |
119 | return 0; |
120 | } | |
121 | ||
122 | /** | |
123 | * __genwqe_readl() - Read 32-bit register | |
124 | * @cd: genwqe device descriptor | |
125 | * @byte_offs: offset within BAR | |
126 | * | |
127 | * Return: Value from register | |
128 | */ | |
129 | u32 __genwqe_readl(struct genwqe_dev *cd, u64 byte_offs) | |
130 | { | |
131 | if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) | |
132 | return 0xffffffff; | |
133 | ||
134 | if (cd->mmio == NULL) | |
135 | return 0xffffffff; | |
136 | ||
58d66ce7 | 137 | return be32_to_cpu((__force __be32)__raw_readl(cd->mmio + byte_offs)); |
67f4addb FH |
138 | } |
139 | ||
140 | /** | |
141 | * genwqe_read_app_id() - Extract app_id | |
142 | * | |
143 | * app_unitcfg need to be filled with valid data first | |
144 | */ | |
145 | int genwqe_read_app_id(struct genwqe_dev *cd, char *app_name, int len) | |
146 | { | |
147 | int i, j; | |
148 | u32 app_id = (u32)cd->app_unitcfg; | |
149 | ||
150 | memset(app_name, 0, len); | |
151 | for (i = 0, j = 0; j < min(len, 4); j++) { | |
152 | char ch = (char)((app_id >> (24 - j*8)) & 0xff); | |
d9c11d45 | 153 | |
67f4addb FH |
154 | if (ch == ' ') |
155 | continue; | |
156 | app_name[i++] = isprint(ch) ? ch : 'X'; | |
157 | } | |
158 | return i; | |
159 | } | |
160 | ||
161 | /** | |
162 | * genwqe_init_crc32() - Prepare a lookup table for fast crc32 calculations | |
163 | * | |
164 | * Existing kernel functions seem to use a different polynom, | |
165 | * therefore we could not use them here. | |
166 | * | |
167 | * Genwqe's Polynomial = 0x20044009 | |
168 | */ | |
169 | #define CRC32_POLYNOMIAL 0x20044009 | |
170 | static u32 crc32_tab[256]; /* crc32 lookup table */ | |
171 | ||
172 | void genwqe_init_crc32(void) | |
173 | { | |
174 | int i, j; | |
175 | u32 crc; | |
176 | ||
177 | for (i = 0; i < 256; i++) { | |
178 | crc = i << 24; | |
179 | for (j = 0; j < 8; j++) { | |
180 | if (crc & 0x80000000) | |
181 | crc = (crc << 1) ^ CRC32_POLYNOMIAL; | |
182 | else | |
183 | crc = (crc << 1); | |
184 | } | |
185 | crc32_tab[i] = crc; | |
186 | } | |
187 | } | |
188 | ||
189 | /** | |
190 | * genwqe_crc32() - Generate 32-bit crc as required for DDCBs | |
191 | * @buff: pointer to data buffer | |
192 | * @len: length of data for calculation | |
193 | * @init: initial crc (0xffffffff at start) | |
194 | * | |
195 | * polynomial = x^32 * + x^29 + x^18 + x^14 + x^3 + 1 (0x20044009) | |
196 | ||
197 | * Example: 4 bytes 0x01 0x02 0x03 0x04 with init=0xffffffff should | |
198 | * result in a crc32 of 0xf33cb7d3. | |
199 | * | |
200 | * The existing kernel crc functions did not cover this polynom yet. | |
201 | * | |
202 | * Return: crc32 checksum. | |
203 | */ | |
204 | u32 genwqe_crc32(u8 *buff, size_t len, u32 init) | |
205 | { | |
206 | int i; | |
207 | u32 crc; | |
208 | ||
209 | crc = init; | |
210 | while (len--) { | |
211 | i = ((crc >> 24) ^ *buff++) & 0xFF; | |
212 | crc = (crc << 8) ^ crc32_tab[i]; | |
213 | } | |
214 | return crc; | |
215 | } | |
216 | ||
217 | void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size, | |
218 | dma_addr_t *dma_handle) | |
219 | { | |
220 | if (get_order(size) > MAX_ORDER) | |
221 | return NULL; | |
222 | ||
13decfb4 SO |
223 | return dma_zalloc_coherent(&cd->pci_dev->dev, size, dma_handle, |
224 | GFP_KERNEL); | |
67f4addb FH |
225 | } |
226 | ||
227 | void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size, | |
228 | void *vaddr, dma_addr_t dma_handle) | |
229 | { | |
230 | if (vaddr == NULL) | |
231 | return; | |
232 | ||
19f7767e | 233 | dma_free_coherent(&cd->pci_dev->dev, size, vaddr, dma_handle); |
67f4addb FH |
234 | } |
235 | ||
236 | static void genwqe_unmap_pages(struct genwqe_dev *cd, dma_addr_t *dma_list, | |
237 | int num_pages) | |
238 | { | |
239 | int i; | |
240 | struct pci_dev *pci_dev = cd->pci_dev; | |
241 | ||
242 | for (i = 0; (i < num_pages) && (dma_list[i] != 0x0); i++) { | |
243 | pci_unmap_page(pci_dev, dma_list[i], | |
244 | PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); | |
245 | dma_list[i] = 0x0; | |
246 | } | |
247 | } | |
248 | ||
249 | static int genwqe_map_pages(struct genwqe_dev *cd, | |
250 | struct page **page_list, int num_pages, | |
251 | dma_addr_t *dma_list) | |
252 | { | |
253 | int i; | |
254 | struct pci_dev *pci_dev = cd->pci_dev; | |
255 | ||
256 | /* establish DMA mapping for requested pages */ | |
257 | for (i = 0; i < num_pages; i++) { | |
258 | dma_addr_t daddr; | |
259 | ||
260 | dma_list[i] = 0x0; | |
261 | daddr = pci_map_page(pci_dev, page_list[i], | |
262 | 0, /* map_offs */ | |
263 | PAGE_SIZE, | |
264 | PCI_DMA_BIDIRECTIONAL); /* FIXME rd/rw */ | |
265 | ||
266 | if (pci_dma_mapping_error(pci_dev, daddr)) { | |
267 | dev_err(&pci_dev->dev, | |
268 | "[%s] err: no dma addr daddr=%016llx!\n", | |
269 | __func__, (long long)daddr); | |
270 | goto err; | |
271 | } | |
272 | ||
273 | dma_list[i] = daddr; | |
274 | } | |
275 | return 0; | |
276 | ||
277 | err: | |
278 | genwqe_unmap_pages(cd, dma_list, num_pages); | |
279 | return -EIO; | |
280 | } | |
281 | ||
282 | static int genwqe_sgl_size(int num_pages) | |
283 | { | |
284 | int len, num_tlb = num_pages / 7; | |
285 | ||
286 | len = sizeof(struct sg_entry) * (num_pages+num_tlb + 1); | |
287 | return roundup(len, PAGE_SIZE); | |
288 | } | |
289 | ||
718f762e FH |
290 | /** |
291 | * genwqe_alloc_sync_sgl() - Allocate memory for sgl and overlapping pages | |
292 | * | |
293 | * Allocates memory for sgl and overlapping pages. Pages which might | |
294 | * overlap other user-space memory blocks are being cached for DMAs, | |
295 | * such that we do not run into syncronization issues. Data is copied | |
296 | * from user-space into the cached pages. | |
297 | */ | |
298 | int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, | |
299 | void __user *user_addr, size_t user_size) | |
67f4addb | 300 | { |
718f762e | 301 | int rc; |
67f4addb | 302 | struct pci_dev *pci_dev = cd->pci_dev; |
67f4addb | 303 | |
718f762e FH |
304 | sgl->fpage_offs = offset_in_page((unsigned long)user_addr); |
305 | sgl->fpage_size = min_t(size_t, PAGE_SIZE-sgl->fpage_offs, user_size); | |
306 | sgl->nr_pages = DIV_ROUND_UP(sgl->fpage_offs + user_size, PAGE_SIZE); | |
307 | sgl->lpage_size = (user_size - sgl->fpage_size) % PAGE_SIZE; | |
308 | ||
d9c11d45 | 309 | dev_dbg(&pci_dev->dev, "[%s] uaddr=%p usize=%8ld nr_pages=%ld fpage_offs=%lx fpage_size=%ld lpage_size=%ld\n", |
718f762e FH |
310 | __func__, user_addr, user_size, sgl->nr_pages, |
311 | sgl->fpage_offs, sgl->fpage_size, sgl->lpage_size); | |
312 | ||
313 | sgl->user_addr = user_addr; | |
314 | sgl->user_size = user_size; | |
315 | sgl->sgl_size = genwqe_sgl_size(sgl->nr_pages); | |
316 | ||
317 | if (get_order(sgl->sgl_size) > MAX_ORDER) { | |
67f4addb FH |
318 | dev_err(&pci_dev->dev, |
319 | "[%s] err: too much memory requested!\n", __func__); | |
718f762e | 320 | return -ENOMEM; |
67f4addb FH |
321 | } |
322 | ||
718f762e FH |
323 | sgl->sgl = __genwqe_alloc_consistent(cd, sgl->sgl_size, |
324 | &sgl->sgl_dma_addr); | |
325 | if (sgl->sgl == NULL) { | |
67f4addb FH |
326 | dev_err(&pci_dev->dev, |
327 | "[%s] err: no memory available!\n", __func__); | |
718f762e | 328 | return -ENOMEM; |
67f4addb FH |
329 | } |
330 | ||
718f762e FH |
331 | /* Only use buffering on incomplete pages */ |
332 | if ((sgl->fpage_size != 0) && (sgl->fpage_size != PAGE_SIZE)) { | |
333 | sgl->fpage = __genwqe_alloc_consistent(cd, PAGE_SIZE, | |
334 | &sgl->fpage_dma_addr); | |
335 | if (sgl->fpage == NULL) | |
336 | goto err_out; | |
337 | ||
338 | /* Sync with user memory */ | |
339 | if (copy_from_user(sgl->fpage + sgl->fpage_offs, | |
340 | user_addr, sgl->fpage_size)) { | |
341 | rc = -EFAULT; | |
342 | goto err_out; | |
343 | } | |
344 | } | |
345 | if (sgl->lpage_size != 0) { | |
346 | sgl->lpage = __genwqe_alloc_consistent(cd, PAGE_SIZE, | |
347 | &sgl->lpage_dma_addr); | |
348 | if (sgl->lpage == NULL) | |
349 | goto err_out1; | |
350 | ||
351 | /* Sync with user memory */ | |
352 | if (copy_from_user(sgl->lpage, user_addr + user_size - | |
353 | sgl->lpage_size, sgl->lpage_size)) { | |
354 | rc = -EFAULT; | |
a7a7aeef | 355 | goto err_out2; |
718f762e FH |
356 | } |
357 | } | |
358 | return 0; | |
359 | ||
a7a7aeef GS |
360 | err_out2: |
361 | __genwqe_free_consistent(cd, PAGE_SIZE, sgl->lpage, | |
362 | sgl->lpage_dma_addr); | |
363 | sgl->lpage = NULL; | |
364 | sgl->lpage_dma_addr = 0; | |
718f762e FH |
365 | err_out1: |
366 | __genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage, | |
367 | sgl->fpage_dma_addr); | |
a7a7aeef GS |
368 | sgl->fpage = NULL; |
369 | sgl->fpage_dma_addr = 0; | |
718f762e FH |
370 | err_out: |
371 | __genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl, | |
372 | sgl->sgl_dma_addr); | |
a7a7aeef GS |
373 | sgl->sgl = NULL; |
374 | sgl->sgl_dma_addr = 0; | |
375 | sgl->sgl_size = 0; | |
718f762e | 376 | return -ENOMEM; |
67f4addb FH |
377 | } |
378 | ||
718f762e FH |
379 | int genwqe_setup_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, |
380 | dma_addr_t *dma_list) | |
67f4addb FH |
381 | { |
382 | int i = 0, j = 0, p; | |
383 | unsigned long dma_offs, map_offs; | |
67f4addb FH |
384 | dma_addr_t prev_daddr = 0; |
385 | struct sg_entry *s, *last_s = NULL; | |
718f762e | 386 | size_t size = sgl->user_size; |
67f4addb FH |
387 | |
388 | dma_offs = 128; /* next block if needed/dma_offset */ | |
718f762e | 389 | map_offs = sgl->fpage_offs; /* offset in first page */ |
67f4addb | 390 | |
718f762e | 391 | s = &sgl->sgl[0]; /* first set of 8 entries */ |
67f4addb | 392 | p = 0; /* page */ |
718f762e | 393 | while (p < sgl->nr_pages) { |
67f4addb FH |
394 | dma_addr_t daddr; |
395 | unsigned int size_to_map; | |
396 | ||
397 | /* always write the chaining entry, cleanup is done later */ | |
398 | j = 0; | |
718f762e | 399 | s[j].target_addr = cpu_to_be64(sgl->sgl_dma_addr + dma_offs); |
67f4addb FH |
400 | s[j].len = cpu_to_be32(128); |
401 | s[j].flags = cpu_to_be32(SG_CHAINED); | |
402 | j++; | |
403 | ||
404 | while (j < 8) { | |
405 | /* DMA mapping for requested page, offs, size */ | |
406 | size_to_map = min(size, PAGE_SIZE - map_offs); | |
718f762e FH |
407 | |
408 | if ((p == 0) && (sgl->fpage != NULL)) { | |
409 | daddr = sgl->fpage_dma_addr + map_offs; | |
410 | ||
411 | } else if ((p == sgl->nr_pages - 1) && | |
412 | (sgl->lpage != NULL)) { | |
413 | daddr = sgl->lpage_dma_addr; | |
414 | } else { | |
415 | daddr = dma_list[p] + map_offs; | |
416 | } | |
417 | ||
67f4addb FH |
418 | size -= size_to_map; |
419 | map_offs = 0; | |
420 | ||
421 | if (prev_daddr == daddr) { | |
422 | u32 prev_len = be32_to_cpu(last_s->len); | |
423 | ||
424 | /* pr_info("daddr combining: " | |
425 | "%016llx/%08x -> %016llx\n", | |
426 | prev_daddr, prev_len, daddr); */ | |
427 | ||
428 | last_s->len = cpu_to_be32(prev_len + | |
429 | size_to_map); | |
430 | ||
431 | p++; /* process next page */ | |
718f762e | 432 | if (p == sgl->nr_pages) |
67f4addb FH |
433 | goto fixup; /* nothing to do */ |
434 | ||
435 | prev_daddr = daddr + size_to_map; | |
436 | continue; | |
437 | } | |
438 | ||
439 | /* start new entry */ | |
440 | s[j].target_addr = cpu_to_be64(daddr); | |
441 | s[j].len = cpu_to_be32(size_to_map); | |
442 | s[j].flags = cpu_to_be32(SG_DATA); | |
443 | prev_daddr = daddr + size_to_map; | |
444 | last_s = &s[j]; | |
445 | j++; | |
446 | ||
447 | p++; /* process next page */ | |
718f762e | 448 | if (p == sgl->nr_pages) |
67f4addb FH |
449 | goto fixup; /* nothing to do */ |
450 | } | |
451 | dma_offs += 128; | |
452 | s += 8; /* continue 8 elements further */ | |
453 | } | |
454 | fixup: | |
455 | if (j == 1) { /* combining happend on last entry! */ | |
456 | s -= 8; /* full shift needed on previous sgl block */ | |
457 | j = 7; /* shift all elements */ | |
458 | } | |
459 | ||
460 | for (i = 0; i < j; i++) /* move elements 1 up */ | |
461 | s[i] = s[i + 1]; | |
462 | ||
463 | s[i].target_addr = cpu_to_be64(0); | |
464 | s[i].len = cpu_to_be32(0); | |
465 | s[i].flags = cpu_to_be32(SG_END_LIST); | |
466 | return 0; | |
467 | } | |
468 | ||
718f762e FH |
469 | /** |
470 | * genwqe_free_sync_sgl() - Free memory for sgl and overlapping pages | |
471 | * | |
472 | * After the DMA transfer has been completed we free the memory for | |
473 | * the sgl and the cached pages. Data is being transfered from cached | |
474 | * pages into user-space buffers. | |
475 | */ | |
476 | int genwqe_free_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl) | |
67f4addb | 477 | { |
63fa80cd | 478 | int rc = 0; |
718f762e FH |
479 | struct pci_dev *pci_dev = cd->pci_dev; |
480 | ||
481 | if (sgl->fpage) { | |
482 | if (copy_to_user(sgl->user_addr, sgl->fpage + sgl->fpage_offs, | |
483 | sgl->fpage_size)) { | |
484 | dev_err(&pci_dev->dev, "[%s] err: copying fpage!\n", | |
485 | __func__); | |
486 | rc = -EFAULT; | |
487 | } | |
488 | __genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage, | |
489 | sgl->fpage_dma_addr); | |
490 | sgl->fpage = NULL; | |
491 | sgl->fpage_dma_addr = 0; | |
492 | } | |
493 | if (sgl->lpage) { | |
494 | if (copy_to_user(sgl->user_addr + sgl->user_size - | |
495 | sgl->lpage_size, sgl->lpage, | |
496 | sgl->lpage_size)) { | |
497 | dev_err(&pci_dev->dev, "[%s] err: copying lpage!\n", | |
498 | __func__); | |
499 | rc = -EFAULT; | |
500 | } | |
501 | __genwqe_free_consistent(cd, PAGE_SIZE, sgl->lpage, | |
502 | sgl->lpage_dma_addr); | |
503 | sgl->lpage = NULL; | |
504 | sgl->lpage_dma_addr = 0; | |
505 | } | |
506 | __genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl, | |
507 | sgl->sgl_dma_addr); | |
508 | ||
509 | sgl->sgl = NULL; | |
510 | sgl->sgl_dma_addr = 0x0; | |
511 | sgl->sgl_size = 0; | |
512 | return rc; | |
67f4addb FH |
513 | } |
514 | ||
515 | /** | |
516 | * free_user_pages() - Give pinned pages back | |
517 | * | |
518 | * Documentation of get_user_pages is in mm/memory.c: | |
519 | * | |
520 | * If the page is written to, set_page_dirty (or set_page_dirty_lock, | |
521 | * as appropriate) must be called after the page is finished with, and | |
522 | * before put_page is called. | |
523 | * | |
524 | * FIXME Could be of use to others and might belong in the generic | |
525 | * code, if others agree. E.g. | |
526 | * ll_free_user_pages in drivers/staging/lustre/lustre/llite/rw26.c | |
527 | * ceph_put_page_vector in net/ceph/pagevec.c | |
528 | * maybe more? | |
529 | */ | |
530 | static int free_user_pages(struct page **page_list, unsigned int nr_pages, | |
531 | int dirty) | |
532 | { | |
533 | unsigned int i; | |
534 | ||
535 | for (i = 0; i < nr_pages; i++) { | |
536 | if (page_list[i] != NULL) { | |
537 | if (dirty) | |
538 | set_page_dirty_lock(page_list[i]); | |
539 | put_page(page_list[i]); | |
540 | } | |
541 | } | |
542 | return 0; | |
543 | } | |
544 | ||
545 | /** | |
546 | * genwqe_user_vmap() - Map user-space memory to virtual kernel memory | |
547 | * @cd: pointer to genwqe device | |
548 | * @m: mapping params | |
549 | * @uaddr: user virtual address | |
550 | * @size: size of memory to be mapped | |
551 | * | |
552 | * We need to think about how we could speed this up. Of course it is | |
553 | * not a good idea to do this over and over again, like we are | |
554 | * currently doing it. Nevertheless, I am curious where on the path | |
555 | * the performance is spend. Most probably within the memory | |
556 | * allocation functions, but maybe also in the DMA mapping code. | |
557 | * | |
558 | * Restrictions: The maximum size of the possible mapping currently depends | |
559 | * on the amount of memory we can get using kzalloc() for the | |
560 | * page_list and pci_alloc_consistent for the sg_list. | |
561 | * The sg_list is currently itself not scattered, which could | |
562 | * be fixed with some effort. The page_list must be split into | |
563 | * PAGE_SIZE chunks too. All that will make the complicated | |
564 | * code more complicated. | |
565 | * | |
566 | * Return: 0 if success | |
567 | */ | |
568 | int genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m, void *uaddr, | |
569 | unsigned long size, struct ddcb_requ *req) | |
570 | { | |
571 | int rc = -EINVAL; | |
572 | unsigned long data, offs; | |
573 | struct pci_dev *pci_dev = cd->pci_dev; | |
574 | ||
575 | if ((uaddr == NULL) || (size == 0)) { | |
576 | m->size = 0; /* mark unused and not added */ | |
577 | return -EINVAL; | |
578 | } | |
579 | m->u_vaddr = uaddr; | |
580 | m->size = size; | |
581 | ||
582 | /* determine space needed for page_list. */ | |
583 | data = (unsigned long)uaddr; | |
584 | offs = offset_in_page(data); | |
585 | m->nr_pages = DIV_ROUND_UP(offs + size, PAGE_SIZE); | |
586 | ||
587 | m->page_list = kcalloc(m->nr_pages, | |
588 | sizeof(struct page *) + sizeof(dma_addr_t), | |
589 | GFP_KERNEL); | |
590 | if (!m->page_list) { | |
591 | dev_err(&pci_dev->dev, "err: alloc page_list failed\n"); | |
592 | m->nr_pages = 0; | |
593 | m->u_vaddr = NULL; | |
594 | m->size = 0; /* mark unused and not added */ | |
595 | return -ENOMEM; | |
596 | } | |
597 | m->dma_list = (dma_addr_t *)(m->page_list + m->nr_pages); | |
598 | ||
599 | /* pin user pages in memory */ | |
600 | rc = get_user_pages_fast(data & PAGE_MASK, /* page aligned addr */ | |
601 | m->nr_pages, | |
602 | 1, /* write by caller */ | |
603 | m->page_list); /* ptrs to pages */ | |
cf35d6e0 IA |
604 | if (rc < 0) |
605 | goto fail_get_user_pages; | |
67f4addb FH |
606 | |
607 | /* assumption: get_user_pages can be killed by signals. */ | |
608 | if (rc < m->nr_pages) { | |
609 | free_user_pages(m->page_list, rc, 0); | |
610 | rc = -EFAULT; | |
611 | goto fail_get_user_pages; | |
612 | } | |
613 | ||
614 | rc = genwqe_map_pages(cd, m->page_list, m->nr_pages, m->dma_list); | |
615 | if (rc != 0) | |
616 | goto fail_free_user_pages; | |
617 | ||
618 | return 0; | |
619 | ||
620 | fail_free_user_pages: | |
621 | free_user_pages(m->page_list, m->nr_pages, 0); | |
622 | ||
623 | fail_get_user_pages: | |
624 | kfree(m->page_list); | |
625 | m->page_list = NULL; | |
626 | m->dma_list = NULL; | |
627 | m->nr_pages = 0; | |
628 | m->u_vaddr = NULL; | |
629 | m->size = 0; /* mark unused and not added */ | |
630 | return rc; | |
631 | } | |
632 | ||
633 | /** | |
634 | * genwqe_user_vunmap() - Undo mapping of user-space mem to virtual kernel | |
635 | * memory | |
636 | * @cd: pointer to genwqe device | |
637 | * @m: mapping params | |
638 | */ | |
639 | int genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m, | |
640 | struct ddcb_requ *req) | |
641 | { | |
642 | struct pci_dev *pci_dev = cd->pci_dev; | |
643 | ||
644 | if (!dma_mapping_used(m)) { | |
645 | dev_err(&pci_dev->dev, "[%s] err: mapping %p not used!\n", | |
646 | __func__, m); | |
647 | return -EINVAL; | |
648 | } | |
649 | ||
650 | if (m->dma_list) | |
651 | genwqe_unmap_pages(cd, m->dma_list, m->nr_pages); | |
652 | ||
653 | if (m->page_list) { | |
654 | free_user_pages(m->page_list, m->nr_pages, 1); | |
655 | ||
656 | kfree(m->page_list); | |
657 | m->page_list = NULL; | |
658 | m->dma_list = NULL; | |
659 | m->nr_pages = 0; | |
660 | } | |
661 | ||
662 | m->u_vaddr = NULL; | |
663 | m->size = 0; /* mark as unused and not added */ | |
664 | return 0; | |
665 | } | |
666 | ||
667 | /** | |
668 | * genwqe_card_type() - Get chip type SLU Configuration Register | |
669 | * @cd: pointer to the genwqe device descriptor | |
670 | * Return: 0: Altera Stratix-IV 230 | |
671 | * 1: Altera Stratix-IV 530 | |
672 | * 2: Altera Stratix-V A4 | |
673 | * 3: Altera Stratix-V A7 | |
674 | */ | |
675 | u8 genwqe_card_type(struct genwqe_dev *cd) | |
676 | { | |
677 | u64 card_type = cd->slu_unitcfg; | |
d9c11d45 | 678 | |
67f4addb FH |
679 | return (u8)((card_type & IO_SLU_UNITCFG_TYPE_MASK) >> 20); |
680 | } | |
681 | ||
682 | /** | |
683 | * genwqe_card_reset() - Reset the card | |
684 | * @cd: pointer to the genwqe device descriptor | |
685 | */ | |
686 | int genwqe_card_reset(struct genwqe_dev *cd) | |
687 | { | |
688 | u64 softrst; | |
689 | struct pci_dev *pci_dev = cd->pci_dev; | |
690 | ||
691 | if (!genwqe_is_privileged(cd)) | |
692 | return -ENODEV; | |
693 | ||
694 | /* new SL */ | |
695 | __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, 0x1ull); | |
696 | msleep(1000); | |
697 | __genwqe_readq(cd, IO_HSU_FIR_CLR); | |
698 | __genwqe_readq(cd, IO_APP_FIR_CLR); | |
699 | __genwqe_readq(cd, IO_SLU_FIR_CLR); | |
700 | ||
701 | /* | |
702 | * Read-modify-write to preserve the stealth bits | |
703 | * | |
704 | * For SL >= 039, Stealth WE bit allows removing | |
705 | * the read-modify-wrote. | |
706 | * r-m-w may require a mask 0x3C to avoid hitting hard | |
707 | * reset again for error reset (should be 0, chicken). | |
708 | */ | |
709 | softrst = __genwqe_readq(cd, IO_SLC_CFGREG_SOFTRESET) & 0x3cull; | |
710 | __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, softrst | 0x2ull); | |
711 | ||
712 | /* give ERRORRESET some time to finish */ | |
713 | msleep(50); | |
714 | ||
715 | if (genwqe_need_err_masking(cd)) { | |
716 | dev_info(&pci_dev->dev, | |
717 | "[%s] masking errors for old bitstreams\n", __func__); | |
718 | __genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull); | |
719 | } | |
720 | return 0; | |
721 | } | |
722 | ||
723 | int genwqe_read_softreset(struct genwqe_dev *cd) | |
724 | { | |
725 | u64 bitstream; | |
726 | ||
727 | if (!genwqe_is_privileged(cd)) | |
728 | return -ENODEV; | |
729 | ||
730 | bitstream = __genwqe_readq(cd, IO_SLU_BITSTREAM) & 0x1; | |
731 | cd->softreset = (bitstream == 0) ? 0x8ull : 0xcull; | |
732 | return 0; | |
733 | } | |
734 | ||
735 | /** | |
736 | * genwqe_set_interrupt_capability() - Configure MSI capability structure | |
737 | * @cd: pointer to the device | |
738 | * Return: 0 if no error | |
739 | */ | |
740 | int genwqe_set_interrupt_capability(struct genwqe_dev *cd, int count) | |
741 | { | |
742 | int rc; | |
67f4addb | 743 | |
d3f45647 | 744 | rc = pci_alloc_irq_vectors(cd->pci_dev, 1, count, PCI_IRQ_MSI); |
7276883f SO |
745 | if (rc < 0) |
746 | return rc; | |
7276883f | 747 | return 0; |
67f4addb FH |
748 | } |
749 | ||
750 | /** | |
751 | * genwqe_reset_interrupt_capability() - Undo genwqe_set_interrupt_capability() | |
752 | * @cd: pointer to the device | |
753 | */ | |
754 | void genwqe_reset_interrupt_capability(struct genwqe_dev *cd) | |
755 | { | |
d3f45647 | 756 | pci_free_irq_vectors(cd->pci_dev); |
67f4addb FH |
757 | } |
758 | ||
759 | /** | |
760 | * set_reg_idx() - Fill array with data. Ignore illegal offsets. | |
761 | * @cd: card device | |
762 | * @r: debug register array | |
763 | * @i: index to desired entry | |
764 | * @m: maximum possible entries | |
765 | * @addr: addr which is read | |
766 | * @index: index in debug array | |
767 | * @val: read value | |
768 | */ | |
769 | static int set_reg_idx(struct genwqe_dev *cd, struct genwqe_reg *r, | |
770 | unsigned int *i, unsigned int m, u32 addr, u32 idx, | |
771 | u64 val) | |
772 | { | |
773 | if (WARN_ON_ONCE(*i >= m)) | |
774 | return -EFAULT; | |
775 | ||
776 | r[*i].addr = addr; | |
777 | r[*i].idx = idx; | |
778 | r[*i].val = val; | |
779 | ++*i; | |
780 | return 0; | |
781 | } | |
782 | ||
783 | static int set_reg(struct genwqe_dev *cd, struct genwqe_reg *r, | |
784 | unsigned int *i, unsigned int m, u32 addr, u64 val) | |
785 | { | |
786 | return set_reg_idx(cd, r, i, m, addr, 0, val); | |
787 | } | |
788 | ||
789 | int genwqe_read_ffdc_regs(struct genwqe_dev *cd, struct genwqe_reg *regs, | |
790 | unsigned int max_regs, int all) | |
791 | { | |
792 | unsigned int i, j, idx = 0; | |
793 | u32 ufir_addr, ufec_addr, sfir_addr, sfec_addr; | |
794 | u64 gfir, sluid, appid, ufir, ufec, sfir, sfec; | |
795 | ||
796 | /* Global FIR */ | |
797 | gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); | |
798 | set_reg(cd, regs, &idx, max_regs, IO_SLC_CFGREG_GFIR, gfir); | |
799 | ||
800 | /* UnitCfg for SLU */ | |
801 | sluid = __genwqe_readq(cd, IO_SLU_UNITCFG); /* 0x00000000 */ | |
802 | set_reg(cd, regs, &idx, max_regs, IO_SLU_UNITCFG, sluid); | |
803 | ||
804 | /* UnitCfg for APP */ | |
805 | appid = __genwqe_readq(cd, IO_APP_UNITCFG); /* 0x02000000 */ | |
806 | set_reg(cd, regs, &idx, max_regs, IO_APP_UNITCFG, appid); | |
807 | ||
808 | /* Check all chip Units */ | |
809 | for (i = 0; i < GENWQE_MAX_UNITS; i++) { | |
810 | ||
811 | /* Unit FIR */ | |
812 | ufir_addr = (i << 24) | 0x008; | |
813 | ufir = __genwqe_readq(cd, ufir_addr); | |
814 | set_reg(cd, regs, &idx, max_regs, ufir_addr, ufir); | |
815 | ||
816 | /* Unit FEC */ | |
817 | ufec_addr = (i << 24) | 0x018; | |
818 | ufec = __genwqe_readq(cd, ufec_addr); | |
819 | set_reg(cd, regs, &idx, max_regs, ufec_addr, ufec); | |
820 | ||
821 | for (j = 0; j < 64; j++) { | |
822 | /* wherever there is a primary 1, read the 2ndary */ | |
823 | if (!all && (!(ufir & (1ull << j)))) | |
824 | continue; | |
825 | ||
826 | sfir_addr = (i << 24) | (0x100 + 8 * j); | |
827 | sfir = __genwqe_readq(cd, sfir_addr); | |
828 | set_reg(cd, regs, &idx, max_regs, sfir_addr, sfir); | |
829 | ||
830 | sfec_addr = (i << 24) | (0x300 + 8 * j); | |
831 | sfec = __genwqe_readq(cd, sfec_addr); | |
832 | set_reg(cd, regs, &idx, max_regs, sfec_addr, sfec); | |
833 | } | |
834 | } | |
835 | ||
836 | /* fill with invalid data until end */ | |
837 | for (i = idx; i < max_regs; i++) { | |
838 | regs[i].addr = 0xffffffff; | |
839 | regs[i].val = 0xffffffffffffffffull; | |
840 | } | |
841 | return idx; | |
842 | } | |
843 | ||
844 | /** | |
845 | * genwqe_ffdc_buff_size() - Calculates the number of dump registers | |
846 | */ | |
847 | int genwqe_ffdc_buff_size(struct genwqe_dev *cd, int uid) | |
848 | { | |
849 | int entries = 0, ring, traps, traces, trace_entries; | |
850 | u32 eevptr_addr, l_addr, d_len, d_type; | |
851 | u64 eevptr, val, addr; | |
852 | ||
853 | eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER; | |
854 | eevptr = __genwqe_readq(cd, eevptr_addr); | |
855 | ||
856 | if ((eevptr != 0x0) && (eevptr != -1ull)) { | |
857 | l_addr = GENWQE_UID_OFFS(uid) | eevptr; | |
858 | ||
859 | while (1) { | |
860 | val = __genwqe_readq(cd, l_addr); | |
861 | ||
862 | if ((val == 0x0) || (val == -1ull)) | |
863 | break; | |
864 | ||
865 | /* 38:24 */ | |
866 | d_len = (val & 0x0000007fff000000ull) >> 24; | |
867 | ||
868 | /* 39 */ | |
869 | d_type = (val & 0x0000008000000000ull) >> 36; | |
870 | ||
871 | if (d_type) { /* repeat */ | |
872 | entries += d_len; | |
873 | } else { /* size in bytes! */ | |
874 | entries += d_len >> 3; | |
875 | } | |
876 | ||
877 | l_addr += 8; | |
878 | } | |
879 | } | |
880 | ||
881 | for (ring = 0; ring < 8; ring++) { | |
882 | addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring); | |
883 | val = __genwqe_readq(cd, addr); | |
884 | ||
885 | if ((val == 0x0ull) || (val == -1ull)) | |
886 | continue; | |
887 | ||
888 | traps = (val >> 24) & 0xff; | |
889 | traces = (val >> 16) & 0xff; | |
890 | trace_entries = val & 0xffff; | |
891 | ||
892 | entries += traps + (traces * trace_entries); | |
893 | } | |
894 | return entries; | |
895 | } | |
896 | ||
897 | /** | |
898 | * genwqe_ffdc_buff_read() - Implements LogoutExtendedErrorRegisters procedure | |
899 | */ | |
900 | int genwqe_ffdc_buff_read(struct genwqe_dev *cd, int uid, | |
901 | struct genwqe_reg *regs, unsigned int max_regs) | |
902 | { | |
903 | int i, traps, traces, trace, trace_entries, trace_entry, ring; | |
904 | unsigned int idx = 0; | |
905 | u32 eevptr_addr, l_addr, d_addr, d_len, d_type; | |
906 | u64 eevptr, e, val, addr; | |
907 | ||
908 | eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER; | |
909 | eevptr = __genwqe_readq(cd, eevptr_addr); | |
910 | ||
911 | if ((eevptr != 0x0) && (eevptr != 0xffffffffffffffffull)) { | |
912 | l_addr = GENWQE_UID_OFFS(uid) | eevptr; | |
913 | while (1) { | |
914 | e = __genwqe_readq(cd, l_addr); | |
915 | if ((e == 0x0) || (e == 0xffffffffffffffffull)) | |
916 | break; | |
917 | ||
918 | d_addr = (e & 0x0000000000ffffffull); /* 23:0 */ | |
919 | d_len = (e & 0x0000007fff000000ull) >> 24; /* 38:24 */ | |
920 | d_type = (e & 0x0000008000000000ull) >> 36; /* 39 */ | |
921 | d_addr |= GENWQE_UID_OFFS(uid); | |
922 | ||
923 | if (d_type) { | |
924 | for (i = 0; i < (int)d_len; i++) { | |
925 | val = __genwqe_readq(cd, d_addr); | |
926 | set_reg_idx(cd, regs, &idx, max_regs, | |
927 | d_addr, i, val); | |
928 | } | |
929 | } else { | |
930 | d_len >>= 3; /* Size in bytes! */ | |
931 | for (i = 0; i < (int)d_len; i++, d_addr += 8) { | |
932 | val = __genwqe_readq(cd, d_addr); | |
933 | set_reg_idx(cd, regs, &idx, max_regs, | |
934 | d_addr, 0, val); | |
935 | } | |
936 | } | |
937 | l_addr += 8; | |
938 | } | |
939 | } | |
940 | ||
941 | /* | |
942 | * To save time, there are only 6 traces poplulated on Uid=2, | |
943 | * Ring=1. each with iters=512. | |
944 | */ | |
945 | for (ring = 0; ring < 8; ring++) { /* 0 is fls, 1 is fds, | |
946 | 2...7 are ASI rings */ | |
947 | addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring); | |
948 | val = __genwqe_readq(cd, addr); | |
949 | ||
950 | if ((val == 0x0ull) || (val == -1ull)) | |
951 | continue; | |
952 | ||
953 | traps = (val >> 24) & 0xff; /* Number of Traps */ | |
954 | traces = (val >> 16) & 0xff; /* Number of Traces */ | |
955 | trace_entries = val & 0xffff; /* Entries per trace */ | |
956 | ||
957 | /* Note: This is a combined loop that dumps both the traps */ | |
958 | /* (for the trace == 0 case) as well as the traces 1 to */ | |
959 | /* 'traces'. */ | |
960 | for (trace = 0; trace <= traces; trace++) { | |
961 | u32 diag_sel = | |
962 | GENWQE_EXTENDED_DIAG_SELECTOR(ring, trace); | |
963 | ||
964 | addr = (GENWQE_UID_OFFS(uid) | | |
965 | IO_EXTENDED_DIAG_SELECTOR); | |
966 | __genwqe_writeq(cd, addr, diag_sel); | |
967 | ||
968 | for (trace_entry = 0; | |
969 | trace_entry < (trace ? trace_entries : traps); | |
970 | trace_entry++) { | |
971 | addr = (GENWQE_UID_OFFS(uid) | | |
972 | IO_EXTENDED_DIAG_READ_MBX); | |
973 | val = __genwqe_readq(cd, addr); | |
974 | set_reg_idx(cd, regs, &idx, max_regs, addr, | |
975 | (diag_sel<<16) | trace_entry, val); | |
976 | } | |
977 | } | |
978 | } | |
979 | return 0; | |
980 | } | |
981 | ||
982 | /** | |
983 | * genwqe_write_vreg() - Write register in virtual window | |
984 | * | |
985 | * Note, these registers are only accessible to the PF through the | |
986 | * VF-window. It is not intended for the VF to access. | |
987 | */ | |
988 | int genwqe_write_vreg(struct genwqe_dev *cd, u32 reg, u64 val, int func) | |
989 | { | |
990 | __genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf); | |
991 | __genwqe_writeq(cd, reg, val); | |
992 | return 0; | |
993 | } | |
994 | ||
995 | /** | |
996 | * genwqe_read_vreg() - Read register in virtual window | |
997 | * | |
998 | * Note, these registers are only accessible to the PF through the | |
999 | * VF-window. It is not intended for the VF to access. | |
1000 | */ | |
1001 | u64 genwqe_read_vreg(struct genwqe_dev *cd, u32 reg, int func) | |
1002 | { | |
1003 | __genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf); | |
1004 | return __genwqe_readq(cd, reg); | |
1005 | } | |
1006 | ||
1007 | /** | |
1008 | * genwqe_base_clock_frequency() - Deteremine base clock frequency of the card | |
1009 | * | |
1010 | * Note: From a design perspective it turned out to be a bad idea to | |
1011 | * use codes here to specifiy the frequency/speed values. An old | |
1012 | * driver cannot understand new codes and is therefore always a | |
1013 | * problem. Better is to measure out the value or put the | |
1014 | * speed/frequency directly into a register which is always a valid | |
1015 | * value for old as well as for new software. | |
1016 | * | |
1017 | * Return: Card clock in MHz | |
1018 | */ | |
1019 | int genwqe_base_clock_frequency(struct genwqe_dev *cd) | |
1020 | { | |
1021 | u16 speed; /* MHz MHz MHz MHz */ | |
1022 | static const int speed_grade[] = { 250, 200, 166, 175 }; | |
1023 | ||
1024 | speed = (u16)((cd->slu_unitcfg >> 28) & 0x0full); | |
1025 | if (speed >= ARRAY_SIZE(speed_grade)) | |
1026 | return 0; /* illegal value */ | |
1027 | ||
1028 | return speed_grade[speed]; | |
1029 | } | |
1030 | ||
1031 | /** | |
1032 | * genwqe_stop_traps() - Stop traps | |
1033 | * | |
1034 | * Before reading out the analysis data, we need to stop the traps. | |
1035 | */ | |
1036 | void genwqe_stop_traps(struct genwqe_dev *cd) | |
1037 | { | |
1038 | __genwqe_writeq(cd, IO_SLC_MISC_DEBUG_SET, 0xcull); | |
1039 | } | |
1040 | ||
1041 | /** | |
1042 | * genwqe_start_traps() - Start traps | |
1043 | * | |
1044 | * After having read the data, we can/must enable the traps again. | |
1045 | */ | |
1046 | void genwqe_start_traps(struct genwqe_dev *cd) | |
1047 | { | |
1048 | __genwqe_writeq(cd, IO_SLC_MISC_DEBUG_CLR, 0xcull); | |
1049 | ||
1050 | if (genwqe_need_err_masking(cd)) | |
1051 | __genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull); | |
1052 | } |