1 /* SPDX-License-Identifier: BSD-3-Clause
5 #include <rte_memory.h>
7 #include "dpaax_iova_table.h"
8 #include "dpaax_logs.h"
10 /* Global dpaax logger identifier */
13 /* Global table reference */
14 struct dpaax_iova_table
*dpaax_iova_table_p
;
16 static int dpaax_handle_memevents(void);
18 /* A structure representing the device-tree node available in /proc/device-tree.
25 /* A ntohll equivalent routine
26 * XXX: This is only applicable for 64 bit environment.
29 rotate_8(unsigned char *arr
)
33 uint32_t *second_half
;
35 first_half
= (uint32_t *)(arr
);
36 second_half
= (uint32_t *)(arr
+ 4);
39 *first_half
= *second_half
;
42 *first_half
= ntohl(*first_half
);
43 *second_half
= ntohl(*second_half
);
47 * Memory layout for DPAAx platforms (LS1043, LS1046, LS1088, LS2088, LX2160)
48 * are populated by Uboot and available in device tree:
49 * /proc/device-tree/memory@<address>/reg <= register.
50 * Entries are of the form:
51 * (<8 byte start addr><8 byte length>)(..more similar blocks of start,len>)..
54 * OUT populate number of entries found in memory node
56 * Pointer to array of reg_node elements, count size
58 static struct reg_node
*
59 read_memory_node(unsigned int *count
)
64 struct stat statbuf
= {0};
65 char file_data
[MEM_NODE_FILE_LEN
];
66 struct reg_node
*nodes
= NULL
;
70 ret
= glob(MEM_NODE_PATH_GLOB
, 0, NULL
, &result
);
72 DPAAX_DEBUG("Unable to glob device-tree memory node: (%s)(%d)",
73 MEM_NODE_PATH_GLOB
, ret
);
77 if (result
.gl_pathc
!= 1) {
78 /* Either more than one memory@<addr> node found, or none.
79 * In either case, cannot work ahead.
81 DPAAX_DEBUG("Found (%zu) entries in device-tree. Not supported!",
86 DPAAX_DEBUG("Opening and parsing device-tree node: (%s)",
88 fd
= open(result
.gl_pathv
[0], O_RDONLY
);
90 DPAAX_DEBUG("Unable to open the device-tree node: (%s)(fd=%d)",
91 MEM_NODE_PATH_GLOB
, fd
);
95 /* Stat to get the file size */
96 ret
= fstat(fd
, &statbuf
);
98 DPAAX_DEBUG("Unable to get device-tree memory node size.");
102 DPAAX_DEBUG("Size of device-tree mem node: %lu", statbuf
.st_size
);
103 if (statbuf
.st_size
> MEM_NODE_FILE_LEN
) {
104 DPAAX_DEBUG("More memory nodes available than assumed.");
105 DPAAX_DEBUG("System may not work properly!");
108 ret
= read(fd
, file_data
, statbuf
.st_size
> MEM_NODE_FILE_LEN
?
109 MEM_NODE_FILE_LEN
: statbuf
.st_size
);
111 DPAAX_DEBUG("Unable to read device-tree memory node: (%d)",
116 /* The reg node should be multiple of 16 bytes, 8 bytes each for addr
119 *count
= (statbuf
.st_size
/ 16);
120 if ((*count
) <= 0 || (statbuf
.st_size
% 16 != 0)) {
121 DPAAX_DEBUG("Invalid memory node values or count. (size=%lu)",
126 /* each entry is of 16 bytes, and size/16 is total count of entries */
127 nodes
= malloc(sizeof(struct reg_node
) * (*count
));
129 DPAAX_DEBUG("Failure in allocating working memory.");
132 memset(nodes
, 0, sizeof(struct reg_node
) * (*count
));
134 for (i
= 0, j
= 0; i
< (statbuf
.st_size
) && j
< (*count
); i
+= 16, j
++) {
135 memcpy(&nodes
[j
], file_data
+ i
, 16);
136 /* Rotate (ntohl) each 8 byte entry */
137 rotate_8((unsigned char *)(&(nodes
[j
].addr
)));
138 rotate_8((unsigned char *)(&(nodes
[j
].len
)));
141 DPAAX_DEBUG("Device-tree memory node data:");
143 DPAAX_DEBUG("\n %08" PRIx64
" %08zu", nodes
[j
].addr
, nodes
[j
].len
);
154 dpaax_iova_table_populate(void)
157 unsigned int i
, node_count
;
158 size_t tot_memory_size
, total_table_size
;
159 struct reg_node
*nodes
;
160 struct dpaax_iovat_element
*entry
;
162 /* dpaax_iova_table_p is a singleton - only one instance should be
165 if (dpaax_iova_table_p
) {
166 DPAAX_DEBUG("Multiple allocation attempt for IOVA Table (%p)",
168 /* This can be an error case as well - some path not cleaning
169 * up table - but, for now, it is assumed that if IOVA Table
170 * pointer is valid, table is allocated.
175 nodes
= read_memory_node(&node_count
);
177 DPAAX_WARN("PA->VA translation not available;");
178 DPAAX_WARN("Expect performance impact.");
183 for (i
= 0; i
< node_count
; i
++)
184 tot_memory_size
+= nodes
[i
].len
;
186 DPAAX_DEBUG("Total available PA memory size: %zu", tot_memory_size
);
188 /* Total table size = meta data + tot_memory_size/8 */
189 total_table_size
= sizeof(struct dpaax_iova_table
) +
190 (sizeof(struct dpaax_iovat_element
) * node_count
) +
191 ((tot_memory_size
/ DPAAX_MEM_SPLIT
) * sizeof(uint64_t));
193 /* TODO: This memory doesn't need to shared but needs to be always
194 * pinned to RAM (no swap out) - using hugepage rather than malloc
196 dpaax_iova_table_p
= rte_zmalloc(NULL
, total_table_size
, 0);
197 if (dpaax_iova_table_p
== NULL
) {
198 DPAAX_WARN("Unable to allocate memory for PA->VA Table;");
199 DPAAX_WARN("PA->VA translation not available;");
200 DPAAX_WARN("Expect performance impact.");
205 /* Initialize table */
206 dpaax_iova_table_p
->count
= node_count
;
207 entry
= dpaax_iova_table_p
->entries
;
209 DPAAX_DEBUG("IOVA Table entries: (entry start = %p)", (void *)entry
);
210 DPAAX_DEBUG("\t(entry),(start),(len),(next)");
212 for (i
= 0; i
< node_count
; i
++) {
213 /* dpaax_iova_table_p
214 * | dpaax_iova_table_p->entries
218 * +------+------+-------+---+----------+---------+---
219 * |iova_ |entry | entry | | pages | pages |
220 * |table | 1 | 2 |...| entry 1 | entry2 |
221 * +-----'+.-----+-------+---+;---------+;--------+---
223 * `~~~~~~|~~~~~>pages /
227 entry
[i
].start
= nodes
[i
].addr
;
228 entry
[i
].len
= nodes
[i
].len
;
230 entry
[i
].pages
= entry
[i
-1].pages
+
231 ((entry
[i
-1].len
/DPAAX_MEM_SPLIT
));
233 entry
[i
].pages
= (uint64_t *)((unsigned char *)entry
+
234 (sizeof(struct dpaax_iovat_element
) *
237 DPAAX_DEBUG("\t(%u),(%8"PRIx64
"),(%8zu),(%8p)",
238 i
, entry
[i
].start
, entry
[i
].len
, entry
[i
].pages
);
241 /* Release memory associated with nodes array - not required now */
244 DPAAX_DEBUG("Adding mem-event handler\n");
245 ret
= dpaax_handle_memevents();
247 DPAAX_ERR("Unable to add mem-event handler");
248 DPAAX_WARN("Cases with non-buffer pool mem won't work!");
255 dpaax_iova_table_depopulate(void)
257 if (dpaax_iova_table_p
== NULL
)
260 rte_free(dpaax_iova_table_p
->entries
);
261 dpaax_iova_table_p
= NULL
;
263 DPAAX_DEBUG("IOVA Table cleanedup");
267 dpaax_iova_table_update(phys_addr_t paddr
, void *vaddr
, size_t length
)
271 size_t req_length
= length
, e_offset
;
272 struct dpaax_iovat_element
*entry
;
273 uintptr_t align_vaddr
;
274 phys_addr_t align_paddr
;
276 if (unlikely(dpaax_iova_table_p
== NULL
))
279 align_paddr
= paddr
& DPAAX_MEM_SPLIT_MASK
;
280 align_vaddr
= ((uintptr_t)vaddr
& DPAAX_MEM_SPLIT_MASK
);
282 /* Check if paddr is available in table */
283 entry
= dpaax_iova_table_p
->entries
;
284 for (i
= 0; i
< dpaax_iova_table_p
->count
; i
++) {
285 if (align_paddr
< entry
[i
].start
) {
286 /* Address lower than start, but not found in previous
287 * iteration shouldn't exist.
289 DPAAX_ERR("Add: Incorrect entry for PA->VA Table"
290 "(%"PRIu64
")", paddr
);
291 DPAAX_ERR("Add: Lowest address: %"PRIu64
"",
296 if (align_paddr
> (entry
[i
].start
+ entry
[i
].len
))
299 /* align_paddr >= start && align_paddr < (start + len) */
303 e_offset
= ((align_paddr
- entry
[i
].start
) / DPAAX_MEM_SPLIT
);
304 /* TODO: Whatif something already exists at this
305 * location - is that an error? For now, ignoring the
308 entry
[i
].pages
[e_offset
] = align_vaddr
;
309 DPAAX_DEBUG("Added: vaddr=%zu for Phy:%"PRIu64
" at %zu"
310 " remaining len %zu", align_vaddr
,
311 align_paddr
, e_offset
, req_length
);
313 /* Incoming request can be larger than the
314 * DPAAX_MEM_SPLIT size - in which case, multiple
315 * entries in entry->pages[] are filled up.
317 if (req_length
<= DPAAX_MEM_SPLIT
)
319 align_paddr
+= DPAAX_MEM_SPLIT
;
320 align_vaddr
+= DPAAX_MEM_SPLIT
;
321 req_length
-= DPAAX_MEM_SPLIT
;
328 /* There might be case where the incoming physical address is
329 * beyond the address discovered in the memory node of
330 * device-tree. Specially if some malloc'd area is used by EAL
331 * and the memevent handlers passes that across. But, this is
332 * not necessarily an error.
334 DPAAX_DEBUG("Add: Unable to find slot for vaddr:(%p),"
340 DPAAX_DEBUG("Add: Found slot at (%"PRIu64
")[(%zu)] for vaddr:(%p),"
341 " phy(%"PRIu64
"), len(%zu)", entry
[i
].start
, e_offset
,
342 vaddr
, paddr
, length
);
346 /* dpaax_iova_table_dump
347 * Dump the table, with its entries, on screen. Only works in Debug Mode
348 * Not for weak hearted - the tables can get quite large
351 dpaax_iova_table_dump(void)
354 struct dpaax_iovat_element
*entry
;
356 /* In case DEBUG is not enabled, some 'if' conditions might misbehave
357 * as they have nothing else in them except a DPAAX_DEBUG() which if
358 * tuned out would leave 'if' naked.
360 if (rte_log_get_global_level() < RTE_LOG_DEBUG
) {
361 DPAAX_ERR("Set log level to Debug for PA->Table dump!");
365 DPAAX_DEBUG(" === Start of PA->VA Translation Table ===");
366 if (dpaax_iova_table_p
== NULL
)
367 DPAAX_DEBUG("\tNULL");
369 entry
= dpaax_iova_table_p
->entries
;
370 for (i
= 0; i
< dpaax_iova_table_p
->count
; i
++) {
371 DPAAX_DEBUG("\t(%16i),(%16"PRIu64
"),(%16zu),(%16p)",
372 i
, entry
[i
].start
, entry
[i
].len
, entry
[i
].pages
);
373 DPAAX_DEBUG("\t\t (PA), (VA)");
374 for (j
= 0; j
< (entry
->len
/DPAAX_MEM_SPLIT
); j
++) {
375 if (entry
[i
].pages
[j
] == 0)
377 DPAAX_DEBUG("\t\t(%16"PRIx64
"),(%16"PRIx64
")",
378 (entry
[i
].start
+ (j
* sizeof(uint64_t))),
382 DPAAX_DEBUG(" === End of PA->VA Translation Table ===");
386 dpaax_memevent_cb(enum rte_mem_event type
, const void *addr
, size_t len
,
387 void *arg __rte_unused
)
389 struct rte_memseg_list
*msl
;
390 struct rte_memseg
*ms
;
391 size_t cur_len
= 0, map_len
= 0;
392 phys_addr_t phys_addr
;
396 DPAAX_DEBUG("Called with addr=%p, len=%zu", addr
, len
);
398 msl
= rte_mem_virt2memseg_list(addr
);
400 while (cur_len
< len
) {
401 const void *va
= RTE_PTR_ADD(addr
, cur_len
);
403 ms
= rte_mem_virt2memseg(va
, msl
);
404 phys_addr
= rte_mem_virt2phy(ms
->addr
);
405 virt_addr
= ms
->addr
;
408 DPAAX_DEBUG("Request for %s, va=%p, virt_addr=%p,"
409 "iova=%"PRIu64
", map_len=%zu",
410 type
== RTE_MEM_EVENT_ALLOC
?
412 va
, virt_addr
, phys_addr
, map_len
);
414 if (type
== RTE_MEM_EVENT_ALLOC
)
415 ret
= dpaax_iova_table_update(phys_addr
, virt_addr
,
418 /* In case of mem_events for MEM_EVENT_FREE, complete
419 * hugepage is released and its PA entry is set to 0.
421 ret
= dpaax_iova_table_update(phys_addr
, 0, map_len
);
424 DPAAX_DEBUG("PA-Table entry update failed. "
425 "Map=%d, addr=%p, len=%zu, err:(%d)",
426 type
, va
, map_len
, ret
);
435 dpaax_memevent_walk_memsegs(const struct rte_memseg_list
*msl __rte_unused
,
436 const struct rte_memseg
*ms
, size_t len
,
437 void *arg __rte_unused
)
439 DPAAX_DEBUG("Walking for %p (pa=%"PRIu64
") and len %zu",
440 ms
->addr
, ms
->phys_addr
, len
);
441 dpaax_iova_table_update(rte_mem_virt2phy(ms
->addr
), ms
->addr
, len
);
446 dpaax_handle_memevents(void)
448 /* First, walk through all memsegs and pin them, before installing
449 * handler. This assures that all memseg which have already been
450 * identified/allocated by EAL, are already part of PA->VA Table. This
451 * is especially for cases where application allocates memory before
452 * the EAL or this is an externally allocated memory passed to EAL.
454 rte_memseg_contig_walk_thread_unsafe(dpaax_memevent_walk_memsegs
, NULL
);
456 return rte_mem_event_callback_register("dpaax_memevents_cb",
457 dpaax_memevent_cb
, NULL
);
462 dpaax_logger
= rte_log_register("pmd.common.dpaax");
463 if (dpaax_logger
>= 0)
464 rte_log_set_level(dpaax_logger
, RTE_LOG_ERR
);