]>
Commit | Line | Data |
---|---|---|
174de876 FK |
1 | /* |
2 | * Copyright 2015-2017 Advanced Micro Devices, Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | */ | |
3a87177e HK |
22 | |
23 | #include <linux/pci.h> | |
174de876 FK |
24 | #include <linux/acpi.h> |
25 | #include "kfd_crat.h" | |
520b8fb7 | 26 | #include "kfd_priv.h" |
174de876 | 27 | #include "kfd_topology.h" |
64d1c3a4 | 28 | #include "kfd_iommu.h" |
5b87245f | 29 | #include "amdgpu_amdkfd.h" |
174de876 | 30 | |
3a87177e HK |
31 | /* GPU Processor ID base for dGPUs for which VCRAT needs to be created. |
32 | * GPU processor ID are expressed with Bit[31]=1. | |
33 | * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs | |
34 | * used in the CRAT. | |
35 | */ | |
36 | static uint32_t gpu_processor_id_low = 0x80001000; | |
37 | ||
38 | /* Return the next available gpu_processor_id and increment it for next GPU | |
39 | * @total_cu_count - Total CUs present in the GPU including ones | |
40 | * masked off | |
41 | */ | |
42 | static inline unsigned int get_and_inc_gpu_processor_id( | |
43 | unsigned int total_cu_count) | |
44 | { | |
45 | int current_id = gpu_processor_id_low; | |
46 | ||
47 | gpu_processor_id_low += total_cu_count; | |
48 | return current_id; | |
49 | } | |
50 | ||
51 | /* Static table to describe GPU Cache information */ | |
52 | struct kfd_gpu_cache_info { | |
53 | uint32_t cache_size; | |
54 | uint32_t cache_level; | |
55 | uint32_t flags; | |
56 | /* Indicates how many Compute Units share this cache | |
57 | * Value = 1 indicates the cache is not shared | |
58 | */ | |
59 | uint32_t num_cu_shared; | |
60 | }; | |
61 | ||
62 | static struct kfd_gpu_cache_info kaveri_cache_info[] = { | |
63 | { | |
64 | /* TCP L1 Cache per CU */ | |
65 | .cache_size = 16, | |
66 | .cache_level = 1, | |
67 | .flags = (CRAT_CACHE_FLAGS_ENABLED | | |
68 | CRAT_CACHE_FLAGS_DATA_CACHE | | |
69 | CRAT_CACHE_FLAGS_SIMD_CACHE), | |
70 | .num_cu_shared = 1, | |
71 | ||
72 | }, | |
73 | { | |
74 | /* Scalar L1 Instruction Cache (in SQC module) per bank */ | |
75 | .cache_size = 16, | |
76 | .cache_level = 1, | |
77 | .flags = (CRAT_CACHE_FLAGS_ENABLED | | |
78 | CRAT_CACHE_FLAGS_INST_CACHE | | |
79 | CRAT_CACHE_FLAGS_SIMD_CACHE), | |
80 | .num_cu_shared = 2, | |
81 | }, | |
82 | { | |
83 | /* Scalar L1 Data Cache (in SQC module) per bank */ | |
84 | .cache_size = 8, | |
85 | .cache_level = 1, | |
86 | .flags = (CRAT_CACHE_FLAGS_ENABLED | | |
87 | CRAT_CACHE_FLAGS_DATA_CACHE | | |
88 | CRAT_CACHE_FLAGS_SIMD_CACHE), | |
89 | .num_cu_shared = 2, | |
90 | }, | |
91 | ||
92 | /* TODO: Add L2 Cache information */ | |
93 | }; | |
94 | ||
95 | ||
96 | static struct kfd_gpu_cache_info carrizo_cache_info[] = { | |
97 | { | |
98 | /* TCP L1 Cache per CU */ | |
99 | .cache_size = 16, | |
100 | .cache_level = 1, | |
101 | .flags = (CRAT_CACHE_FLAGS_ENABLED | | |
102 | CRAT_CACHE_FLAGS_DATA_CACHE | | |
103 | CRAT_CACHE_FLAGS_SIMD_CACHE), | |
104 | .num_cu_shared = 1, | |
105 | }, | |
106 | { | |
107 | /* Scalar L1 Instruction Cache (in SQC module) per bank */ | |
108 | .cache_size = 8, | |
109 | .cache_level = 1, | |
110 | .flags = (CRAT_CACHE_FLAGS_ENABLED | | |
111 | CRAT_CACHE_FLAGS_INST_CACHE | | |
112 | CRAT_CACHE_FLAGS_SIMD_CACHE), | |
113 | .num_cu_shared = 4, | |
114 | }, | |
115 | { | |
116 | /* Scalar L1 Data Cache (in SQC module) per bank. */ | |
117 | .cache_size = 4, | |
118 | .cache_level = 1, | |
119 | .flags = (CRAT_CACHE_FLAGS_ENABLED | | |
120 | CRAT_CACHE_FLAGS_DATA_CACHE | | |
121 | CRAT_CACHE_FLAGS_SIMD_CACHE), | |
122 | .num_cu_shared = 4, | |
123 | }, | |
124 | ||
125 | /* TODO: Add L2 Cache information */ | |
126 | }; | |
127 | ||
128 | /* NOTE: In future if more information is added to struct kfd_gpu_cache_info | |
129 | * the following ASICs may need a separate table. | |
130 | */ | |
131 | #define hawaii_cache_info kaveri_cache_info | |
132 | #define tonga_cache_info carrizo_cache_info | |
133 | #define fiji_cache_info carrizo_cache_info | |
134 | #define polaris10_cache_info carrizo_cache_info | |
135 | #define polaris11_cache_info carrizo_cache_info | |
846a44d7 | 136 | #define polaris12_cache_info carrizo_cache_info |
389056e5 FK |
137 | /* TODO - check & update Vega10 cache details */ |
138 | #define vega10_cache_info carrizo_cache_info | |
139 | #define raven_cache_info carrizo_cache_info | |
3a87177e | 140 | |
174de876 FK |
141 | static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, |
142 | struct crat_subtype_computeunit *cu) | |
143 | { | |
144 | dev->node_props.cpu_cores_count = cu->num_cpu_cores; | |
145 | dev->node_props.cpu_core_id_base = cu->processor_id_low; | |
146 | if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT) | |
147 | dev->node_props.capability |= HSA_CAP_ATS_PRESENT; | |
148 | ||
42aa8793 | 149 | pr_debug("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores, |
174de876 FK |
150 | cu->processor_id_low); |
151 | } | |
152 | ||
153 | static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev, | |
154 | struct crat_subtype_computeunit *cu) | |
155 | { | |
156 | dev->node_props.simd_id_base = cu->processor_id_low; | |
157 | dev->node_props.simd_count = cu->num_simd_cores; | |
158 | dev->node_props.lds_size_in_kb = cu->lds_size_in_kb; | |
159 | dev->node_props.max_waves_per_simd = cu->max_waves_simd; | |
160 | dev->node_props.wave_front_size = cu->wave_front_size; | |
3a87177e | 161 | dev->node_props.array_count = cu->array_count; |
174de876 FK |
162 | dev->node_props.cu_per_simd_array = cu->num_cu_per_array; |
163 | dev->node_props.simd_per_cu = cu->num_simd_per_cu; | |
164 | dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu; | |
165 | if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE) | |
166 | dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE; | |
42aa8793 | 167 | pr_debug("CU GPU: id_base=%d\n", cu->processor_id_low); |
174de876 FK |
168 | } |
169 | ||
4f449311 HK |
170 | /* kfd_parse_subtype_cu - parse compute unit subtypes and attach it to correct |
171 | * topology device present in the device_list | |
172 | */ | |
173 | static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu, | |
174 | struct list_head *device_list) | |
174de876 FK |
175 | { |
176 | struct kfd_topology_device *dev; | |
174de876 | 177 | |
42aa8793 | 178 | pr_debug("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n", |
174de876 | 179 | cu->proximity_domain, cu->hsa_capability); |
4f449311 HK |
180 | list_for_each_entry(dev, device_list, list) { |
181 | if (cu->proximity_domain == dev->proximity_domain) { | |
174de876 FK |
182 | if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT) |
183 | kfd_populated_cu_info_cpu(dev, cu); | |
184 | ||
185 | if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT) | |
186 | kfd_populated_cu_info_gpu(dev, cu); | |
187 | break; | |
188 | } | |
174de876 FK |
189 | } |
190 | ||
191 | return 0; | |
192 | } | |
193 | ||
f3ed5df8 YZ |
194 | static struct kfd_mem_properties * |
195 | find_subtype_mem(uint32_t heap_type, uint32_t flags, uint32_t width, | |
196 | struct kfd_topology_device *dev) | |
197 | { | |
198 | struct kfd_mem_properties *props; | |
199 | ||
200 | list_for_each_entry(props, &dev->mem_props, list) { | |
201 | if (props->heap_type == heap_type | |
202 | && props->flags == flags | |
203 | && props->width == width) | |
204 | return props; | |
205 | } | |
206 | ||
207 | return NULL; | |
208 | } | |
4f449311 HK |
209 | /* kfd_parse_subtype_mem - parse memory subtypes and attach it to correct |
210 | * topology device present in the device_list | |
174de876 | 211 | */ |
4f449311 HK |
212 | static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem, |
213 | struct list_head *device_list) | |
174de876 FK |
214 | { |
215 | struct kfd_mem_properties *props; | |
216 | struct kfd_topology_device *dev; | |
f3ed5df8 YZ |
217 | uint32_t heap_type; |
218 | uint64_t size_in_bytes; | |
219 | uint32_t flags = 0; | |
220 | uint32_t width; | |
174de876 | 221 | |
42aa8793 | 222 | pr_debug("Found memory entry in CRAT table with proximity_domain=%d\n", |
174de876 | 223 | mem->proximity_domain); |
4f449311 HK |
224 | list_for_each_entry(dev, device_list, list) { |
225 | if (mem->proximity_domain == dev->proximity_domain) { | |
3a87177e HK |
226 | /* We're on GPU node */ |
227 | if (dev->node_props.cpu_cores_count == 0) { | |
228 | /* APU */ | |
229 | if (mem->visibility_type == 0) | |
f3ed5df8 | 230 | heap_type = |
3a87177e HK |
231 | HSA_MEM_HEAP_TYPE_FB_PRIVATE; |
232 | /* dGPU */ | |
233 | else | |
f3ed5df8 | 234 | heap_type = mem->visibility_type; |
3a87177e | 235 | } else |
f3ed5df8 | 236 | heap_type = HSA_MEM_HEAP_TYPE_SYSTEM; |
174de876 FK |
237 | |
238 | if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE) | |
f3ed5df8 | 239 | flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE; |
174de876 | 240 | if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE) |
f3ed5df8 | 241 | flags |= HSA_MEM_FLAGS_NON_VOLATILE; |
174de876 | 242 | |
f3ed5df8 | 243 | size_in_bytes = |
174de876 FK |
244 | ((uint64_t)mem->length_high << 32) + |
245 | mem->length_low; | |
f3ed5df8 YZ |
246 | width = mem->width; |
247 | ||
248 | /* Multiple banks of the same type are aggregated into | |
249 | * one. User mode doesn't care about multiple physical | |
250 | * memory segments. It's managed as a single virtual | |
251 | * heap for user mode. | |
252 | */ | |
253 | props = find_subtype_mem(heap_type, flags, width, dev); | |
254 | if (props) { | |
255 | props->size_in_bytes += size_in_bytes; | |
256 | break; | |
257 | } | |
258 | ||
259 | props = kfd_alloc_struct(props); | |
260 | if (!props) | |
261 | return -ENOMEM; | |
262 | ||
263 | props->heap_type = heap_type; | |
264 | props->flags = flags; | |
265 | props->size_in_bytes = size_in_bytes; | |
266 | props->width = width; | |
174de876 | 267 | |
175b9263 | 268 | dev->node_props.mem_banks_count++; |
174de876 FK |
269 | list_add_tail(&props->list, &dev->mem_props); |
270 | ||
271 | break; | |
272 | } | |
174de876 FK |
273 | } |
274 | ||
275 | return 0; | |
276 | } | |
277 | ||
4f449311 HK |
278 | /* kfd_parse_subtype_cache - parse cache subtypes and attach it to correct |
279 | * topology device present in the device_list | |
174de876 | 280 | */ |
4f449311 HK |
281 | static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache, |
282 | struct list_head *device_list) | |
174de876 FK |
283 | { |
284 | struct kfd_cache_properties *props; | |
285 | struct kfd_topology_device *dev; | |
286 | uint32_t id; | |
3a87177e | 287 | uint32_t total_num_of_cu; |
174de876 FK |
288 | |
289 | id = cache->processor_id_low; | |
290 | ||
42aa8793 | 291 | pr_debug("Found cache entry in CRAT table with processor_id=%d\n", id); |
3a87177e HK |
292 | list_for_each_entry(dev, device_list, list) { |
293 | total_num_of_cu = (dev->node_props.array_count * | |
294 | dev->node_props.cu_per_simd_array); | |
295 | ||
296 | /* Cache infomration in CRAT doesn't have proximity_domain | |
297 | * information as it is associated with a CPU core or GPU | |
298 | * Compute Unit. So map the cache using CPU core Id or SIMD | |
299 | * (GPU) ID. | |
300 | * TODO: This works because currently we can safely assume that | |
301 | * Compute Units are parsed before caches are parsed. In | |
302 | * future, remove this dependency | |
303 | */ | |
304 | if ((id >= dev->node_props.cpu_core_id_base && | |
305 | id <= dev->node_props.cpu_core_id_base + | |
306 | dev->node_props.cpu_cores_count) || | |
307 | (id >= dev->node_props.simd_id_base && | |
308 | id < dev->node_props.simd_id_base + | |
309 | total_num_of_cu)) { | |
174de876 FK |
310 | props = kfd_alloc_struct(props); |
311 | if (!props) | |
312 | return -ENOMEM; | |
313 | ||
314 | props->processor_id_low = id; | |
315 | props->cache_level = cache->cache_level; | |
316 | props->cache_size = cache->cache_size; | |
317 | props->cacheline_size = cache->cache_line_size; | |
318 | props->cachelines_per_tag = cache->lines_per_tag; | |
319 | props->cache_assoc = cache->associativity; | |
320 | props->cache_latency = cache->cache_latency; | |
3a87177e HK |
321 | memcpy(props->sibling_map, cache->sibling_map, |
322 | sizeof(props->sibling_map)); | |
174de876 FK |
323 | |
324 | if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE) | |
325 | props->cache_type |= HSA_CACHE_TYPE_DATA; | |
326 | if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE) | |
327 | props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; | |
328 | if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE) | |
329 | props->cache_type |= HSA_CACHE_TYPE_CPU; | |
330 | if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE) | |
331 | props->cache_type |= HSA_CACHE_TYPE_HSACU; | |
332 | ||
333 | dev->cache_count++; | |
334 | dev->node_props.caches_count++; | |
335 | list_add_tail(&props->list, &dev->cache_props); | |
336 | ||
337 | break; | |
338 | } | |
3a87177e | 339 | } |
174de876 FK |
340 | |
341 | return 0; | |
342 | } | |
343 | ||
4f449311 HK |
344 | /* kfd_parse_subtype_iolink - parse iolink subtypes and attach it to correct |
345 | * topology device present in the device_list | |
174de876 | 346 | */ |
4f449311 HK |
347 | static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, |
348 | struct list_head *device_list) | |
174de876 | 349 | { |
3a87177e | 350 | struct kfd_iolink_properties *props = NULL, *props2; |
ae9a25ae | 351 | struct kfd_topology_device *dev, *to_dev; |
174de876 FK |
352 | uint32_t id_from; |
353 | uint32_t id_to; | |
354 | ||
355 | id_from = iolink->proximity_domain_from; | |
356 | id_to = iolink->proximity_domain_to; | |
357 | ||
67f7cf9f | 358 | pr_debug("Found IO link entry in CRAT table with id_from=%d, id_to %d\n", |
359 | id_from, id_to); | |
4f449311 HK |
360 | list_for_each_entry(dev, device_list, list) { |
361 | if (id_from == dev->proximity_domain) { | |
174de876 FK |
362 | props = kfd_alloc_struct(props); |
363 | if (!props) | |
364 | return -ENOMEM; | |
365 | ||
366 | props->node_from = id_from; | |
367 | props->node_to = id_to; | |
368 | props->ver_maj = iolink->version_major; | |
369 | props->ver_min = iolink->version_minor; | |
3a87177e | 370 | props->iolink_type = iolink->io_interface_type; |
174de876 | 371 | |
3a87177e HK |
372 | if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) |
373 | props->weight = 20; | |
ae9a25ae SL |
374 | else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI) |
375 | props->weight = 15; | |
3a87177e HK |
376 | else |
377 | props->weight = node_distance(id_from, id_to); | |
174de876 FK |
378 | |
379 | props->min_latency = iolink->minimum_latency; | |
380 | props->max_latency = iolink->maximum_latency; | |
381 | props->min_bandwidth = iolink->minimum_bandwidth_mbs; | |
382 | props->max_bandwidth = iolink->maximum_bandwidth_mbs; | |
383 | props->rec_transfer_size = | |
384 | iolink->recommended_transfer_size; | |
385 | ||
386 | dev->io_link_count++; | |
387 | dev->node_props.io_links_count++; | |
388 | list_add_tail(&props->list, &dev->io_link_props); | |
174de876 FK |
389 | break; |
390 | } | |
174de876 FK |
391 | } |
392 | ||
3a87177e HK |
393 | /* CPU topology is created before GPUs are detected, so CPU->GPU |
394 | * links are not built at that time. If a PCIe type is discovered, it | |
395 | * means a GPU is detected and we are adding GPU->CPU to the topology. | |
67f7cf9f | 396 | * At this time, also add the corresponded CPU->GPU link if GPU |
397 | * is large bar. | |
ae9a25ae SL |
398 | * For xGMI, we only added the link with one direction in the crat |
399 | * table, add corresponded reversed direction link now. | |
3a87177e | 400 | */ |
67f7cf9f | 401 | if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) { |
ae9a25ae SL |
402 | to_dev = kfd_topology_device_by_proximity_domain(id_to); |
403 | if (!to_dev) | |
3a87177e HK |
404 | return -ENODEV; |
405 | /* same everything but the other direction */ | |
406 | props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL); | |
407 | props2->node_from = id_to; | |
408 | props2->node_to = id_from; | |
409 | props2->kobj = NULL; | |
ae9a25ae SL |
410 | to_dev->io_link_count++; |
411 | to_dev->node_props.io_links_count++; | |
412 | list_add_tail(&props2->list, &to_dev->io_link_props); | |
3a87177e HK |
413 | } |
414 | ||
174de876 FK |
415 | return 0; |
416 | } | |
417 | ||
4f449311 HK |
418 | /* kfd_parse_subtype - parse subtypes and attach it to correct topology device |
419 | * present in the device_list | |
420 | * @sub_type_hdr - subtype section of crat_image | |
421 | * @device_list - list of topology devices present in this crat_image | |
422 | */ | |
423 | static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr, | |
424 | struct list_head *device_list) | |
174de876 FK |
425 | { |
426 | struct crat_subtype_computeunit *cu; | |
427 | struct crat_subtype_memory *mem; | |
428 | struct crat_subtype_cache *cache; | |
429 | struct crat_subtype_iolink *iolink; | |
430 | int ret = 0; | |
431 | ||
432 | switch (sub_type_hdr->type) { | |
433 | case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY: | |
434 | cu = (struct crat_subtype_computeunit *)sub_type_hdr; | |
4f449311 | 435 | ret = kfd_parse_subtype_cu(cu, device_list); |
174de876 FK |
436 | break; |
437 | case CRAT_SUBTYPE_MEMORY_AFFINITY: | |
438 | mem = (struct crat_subtype_memory *)sub_type_hdr; | |
4f449311 | 439 | ret = kfd_parse_subtype_mem(mem, device_list); |
174de876 FK |
440 | break; |
441 | case CRAT_SUBTYPE_CACHE_AFFINITY: | |
442 | cache = (struct crat_subtype_cache *)sub_type_hdr; | |
4f449311 | 443 | ret = kfd_parse_subtype_cache(cache, device_list); |
174de876 FK |
444 | break; |
445 | case CRAT_SUBTYPE_TLB_AFFINITY: | |
446 | /* | |
447 | * For now, nothing to do here | |
448 | */ | |
42aa8793 | 449 | pr_debug("Found TLB entry in CRAT table (not processing)\n"); |
174de876 FK |
450 | break; |
451 | case CRAT_SUBTYPE_CCOMPUTE_AFFINITY: | |
452 | /* | |
453 | * For now, nothing to do here | |
454 | */ | |
42aa8793 | 455 | pr_debug("Found CCOMPUTE entry in CRAT table (not processing)\n"); |
174de876 FK |
456 | break; |
457 | case CRAT_SUBTYPE_IOLINK_AFFINITY: | |
458 | iolink = (struct crat_subtype_iolink *)sub_type_hdr; | |
4f449311 | 459 | ret = kfd_parse_subtype_iolink(iolink, device_list); |
174de876 FK |
460 | break; |
461 | default: | |
462 | pr_warn("Unknown subtype %d in CRAT\n", | |
463 | sub_type_hdr->type); | |
464 | } | |
465 | ||
466 | return ret; | |
467 | } | |
468 | ||
4f449311 HK |
469 | /* kfd_parse_crat_table - parse CRAT table. For each node present in CRAT |
470 | * create a kfd_topology_device and add in to device_list. Also parse | |
471 | * CRAT subtypes and attach it to appropriate kfd_topology_device | |
472 | * @crat_image - input image containing CRAT | |
473 | * @device_list - [OUT] list of kfd_topology_device generated after | |
474 | * parsing crat_image | |
475 | * @proximity_domain - Proximity domain of the first device in the table | |
476 | * | |
477 | * Return - 0 if successful else -ve value | |
478 | */ | |
479 | int kfd_parse_crat_table(void *crat_image, struct list_head *device_list, | |
480 | uint32_t proximity_domain) | |
174de876 | 481 | { |
520b8fb7 | 482 | struct kfd_topology_device *top_dev = NULL; |
174de876 FK |
483 | struct crat_subtype_generic *sub_type_hdr; |
484 | uint16_t node_id; | |
4f449311 | 485 | int ret = 0; |
174de876 FK |
486 | struct crat_header *crat_table = (struct crat_header *)crat_image; |
487 | uint16_t num_nodes; | |
488 | uint32_t image_len; | |
489 | ||
490 | if (!crat_image) | |
491 | return -EINVAL; | |
492 | ||
4f449311 HK |
493 | if (!list_empty(device_list)) { |
494 | pr_warn("Error device list should be empty\n"); | |
495 | return -EINVAL; | |
496 | } | |
497 | ||
174de876 FK |
498 | num_nodes = crat_table->num_domains; |
499 | image_len = crat_table->length; | |
500 | ||
501 | pr_info("Parsing CRAT table with %d nodes\n", num_nodes); | |
502 | ||
503 | for (node_id = 0; node_id < num_nodes; node_id++) { | |
4f449311 HK |
504 | top_dev = kfd_create_topology_device(device_list); |
505 | if (!top_dev) | |
506 | break; | |
507 | top_dev->proximity_domain = proximity_domain++; | |
508 | } | |
509 | ||
510 | if (!top_dev) { | |
511 | ret = -ENOMEM; | |
512 | goto err; | |
174de876 FK |
513 | } |
514 | ||
520b8fb7 FK |
515 | memcpy(top_dev->oem_id, crat_table->oem_id, CRAT_OEMID_LENGTH); |
516 | memcpy(top_dev->oem_table_id, crat_table->oem_table_id, | |
517 | CRAT_OEMTABLEID_LENGTH); | |
518 | top_dev->oem_revision = crat_table->oem_revision; | |
174de876 FK |
519 | |
520 | sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1); | |
521 | while ((char *)sub_type_hdr + sizeof(struct crat_subtype_generic) < | |
522 | ((char *)crat_image) + image_len) { | |
523 | if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) { | |
4f449311 HK |
524 | ret = kfd_parse_subtype(sub_type_hdr, device_list); |
525 | if (ret) | |
526 | break; | |
174de876 FK |
527 | } |
528 | ||
529 | sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + | |
530 | sub_type_hdr->length); | |
531 | } | |
532 | ||
4f449311 HK |
533 | err: |
534 | if (ret) | |
535 | kfd_release_topology_device_list(device_list); | |
174de876 | 536 | |
4f449311 | 537 | return ret; |
174de876 FK |
538 | } |
539 | ||
3a87177e HK |
540 | /* Helper function. See kfd_fill_gpu_cache_info for parameter description */ |
541 | static int fill_in_pcache(struct crat_subtype_cache *pcache, | |
542 | struct kfd_gpu_cache_info *pcache_info, | |
543 | struct kfd_cu_info *cu_info, | |
544 | int mem_available, | |
545 | int cu_bitmask, | |
546 | int cache_type, unsigned int cu_processor_id, | |
547 | int cu_block) | |
548 | { | |
549 | unsigned int cu_sibling_map_mask; | |
550 | int first_active_cu; | |
551 | ||
552 | /* First check if enough memory is available */ | |
553 | if (sizeof(struct crat_subtype_cache) > mem_available) | |
554 | return -ENOMEM; | |
555 | ||
556 | cu_sibling_map_mask = cu_bitmask; | |
557 | cu_sibling_map_mask >>= cu_block; | |
558 | cu_sibling_map_mask &= | |
559 | ((1 << pcache_info[cache_type].num_cu_shared) - 1); | |
560 | first_active_cu = ffs(cu_sibling_map_mask); | |
561 | ||
562 | /* CU could be inactive. In case of shared cache find the first active | |
563 | * CU. and incase of non-shared cache check if the CU is inactive. If | |
564 | * inactive active skip it | |
565 | */ | |
566 | if (first_active_cu) { | |
567 | memset(pcache, 0, sizeof(struct crat_subtype_cache)); | |
568 | pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY; | |
569 | pcache->length = sizeof(struct crat_subtype_cache); | |
570 | pcache->flags = pcache_info[cache_type].flags; | |
571 | pcache->processor_id_low = cu_processor_id | |
572 | + (first_active_cu - 1); | |
573 | pcache->cache_level = pcache_info[cache_type].cache_level; | |
574 | pcache->cache_size = pcache_info[cache_type].cache_size; | |
575 | ||
576 | /* Sibling map is w.r.t processor_id_low, so shift out | |
577 | * inactive CU | |
578 | */ | |
579 | cu_sibling_map_mask = | |
580 | cu_sibling_map_mask >> (first_active_cu - 1); | |
581 | ||
582 | pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF); | |
583 | pcache->sibling_map[1] = | |
584 | (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF); | |
585 | pcache->sibling_map[2] = | |
586 | (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF); | |
587 | pcache->sibling_map[3] = | |
588 | (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF); | |
589 | return 0; | |
590 | } | |
591 | return 1; | |
592 | } | |
593 | ||
594 | /* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info | |
595 | * tables | |
596 | * | |
597 | * @kdev - [IN] GPU device | |
598 | * @gpu_processor_id - [IN] GPU processor ID to which these caches | |
599 | * associate | |
600 | * @available_size - [IN] Amount of memory available in pcache | |
601 | * @cu_info - [IN] Compute Unit info obtained from KGD | |
602 | * @pcache - [OUT] memory into which cache data is to be filled in. | |
603 | * @size_filled - [OUT] amount of data used up in pcache. | |
604 | * @num_of_entries - [OUT] number of caches added | |
605 | */ | |
606 | static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, | |
607 | int gpu_processor_id, | |
608 | int available_size, | |
609 | struct kfd_cu_info *cu_info, | |
610 | struct crat_subtype_cache *pcache, | |
611 | int *size_filled, | |
612 | int *num_of_entries) | |
613 | { | |
614 | struct kfd_gpu_cache_info *pcache_info; | |
615 | int num_of_cache_types = 0; | |
616 | int i, j, k; | |
617 | int ct = 0; | |
618 | int mem_available = available_size; | |
619 | unsigned int cu_processor_id; | |
620 | int ret; | |
621 | ||
622 | switch (kdev->device_info->asic_family) { | |
623 | case CHIP_KAVERI: | |
624 | pcache_info = kaveri_cache_info; | |
625 | num_of_cache_types = ARRAY_SIZE(kaveri_cache_info); | |
626 | break; | |
627 | case CHIP_HAWAII: | |
628 | pcache_info = hawaii_cache_info; | |
629 | num_of_cache_types = ARRAY_SIZE(hawaii_cache_info); | |
630 | break; | |
631 | case CHIP_CARRIZO: | |
632 | pcache_info = carrizo_cache_info; | |
633 | num_of_cache_types = ARRAY_SIZE(carrizo_cache_info); | |
634 | break; | |
635 | case CHIP_TONGA: | |
636 | pcache_info = tonga_cache_info; | |
637 | num_of_cache_types = ARRAY_SIZE(tonga_cache_info); | |
638 | break; | |
639 | case CHIP_FIJI: | |
640 | pcache_info = fiji_cache_info; | |
641 | num_of_cache_types = ARRAY_SIZE(fiji_cache_info); | |
642 | break; | |
643 | case CHIP_POLARIS10: | |
644 | pcache_info = polaris10_cache_info; | |
645 | num_of_cache_types = ARRAY_SIZE(polaris10_cache_info); | |
646 | break; | |
647 | case CHIP_POLARIS11: | |
648 | pcache_info = polaris11_cache_info; | |
649 | num_of_cache_types = ARRAY_SIZE(polaris11_cache_info); | |
650 | break; | |
846a44d7 GB |
651 | case CHIP_POLARIS12: |
652 | pcache_info = polaris12_cache_info; | |
653 | num_of_cache_types = ARRAY_SIZE(polaris12_cache_info); | |
654 | break; | |
389056e5 | 655 | case CHIP_VEGA10: |
846a44d7 | 656 | case CHIP_VEGA12: |
22a3a294 | 657 | case CHIP_VEGA20: |
389056e5 FK |
658 | pcache_info = vega10_cache_info; |
659 | num_of_cache_types = ARRAY_SIZE(vega10_cache_info); | |
660 | break; | |
661 | case CHIP_RAVEN: | |
662 | pcache_info = raven_cache_info; | |
663 | num_of_cache_types = ARRAY_SIZE(raven_cache_info); | |
664 | break; | |
3a87177e HK |
665 | default: |
666 | return -EINVAL; | |
667 | } | |
668 | ||
669 | *size_filled = 0; | |
670 | *num_of_entries = 0; | |
671 | ||
672 | /* For each type of cache listed in the kfd_gpu_cache_info table, | |
673 | * go through all available Compute Units. | |
674 | * The [i,j,k] loop will | |
675 | * if kfd_gpu_cache_info.num_cu_shared = 1 | |
676 | * will parse through all available CU | |
677 | * If (kfd_gpu_cache_info.num_cu_shared != 1) | |
678 | * then it will consider only one CU from | |
679 | * the shared unit | |
680 | */ | |
681 | ||
682 | for (ct = 0; ct < num_of_cache_types; ct++) { | |
683 | cu_processor_id = gpu_processor_id; | |
684 | for (i = 0; i < cu_info->num_shader_engines; i++) { | |
685 | for (j = 0; j < cu_info->num_shader_arrays_per_engine; | |
686 | j++) { | |
687 | for (k = 0; k < cu_info->num_cu_per_sh; | |
688 | k += pcache_info[ct].num_cu_shared) { | |
689 | ||
690 | ret = fill_in_pcache(pcache, | |
691 | pcache_info, | |
692 | cu_info, | |
693 | mem_available, | |
694 | cu_info->cu_bitmap[i][j], | |
695 | ct, | |
696 | cu_processor_id, | |
697 | k); | |
698 | ||
699 | if (ret < 0) | |
700 | break; | |
701 | ||
702 | if (!ret) { | |
703 | pcache++; | |
704 | (*num_of_entries)++; | |
705 | mem_available -= | |
706 | sizeof(*pcache); | |
707 | (*size_filled) += | |
708 | sizeof(*pcache); | |
709 | } | |
710 | ||
711 | /* Move to next CU block */ | |
712 | cu_processor_id += | |
713 | pcache_info[ct].num_cu_shared; | |
714 | } | |
715 | } | |
716 | } | |
717 | } | |
718 | ||
719 | pr_debug("Added [%d] GPU cache entries\n", *num_of_entries); | |
720 | ||
721 | return 0; | |
722 | } | |
723 | ||
8e05247d HK |
724 | /* |
725 | * kfd_create_crat_image_acpi - Allocates memory for CRAT image and | |
726 | * copies CRAT from ACPI (if available). | |
727 | * NOTE: Call kfd_destroy_crat_image to free CRAT image memory | |
728 | * | |
729 | * @crat_image: CRAT read from ACPI. If no CRAT in ACPI then | |
730 | * crat_image will be NULL | |
731 | * @size: [OUT] size of crat_image | |
732 | * | |
733 | * Return 0 if successful else return error code | |
734 | */ | |
735 | int kfd_create_crat_image_acpi(void **crat_image, size_t *size) | |
174de876 FK |
736 | { |
737 | struct acpi_table_header *crat_table; | |
738 | acpi_status status; | |
8e05247d | 739 | void *pcrat_image; |
174de876 | 740 | |
8e05247d | 741 | if (!crat_image) |
174de876 FK |
742 | return -EINVAL; |
743 | ||
8e05247d HK |
744 | *crat_image = NULL; |
745 | ||
746 | /* Fetch the CRAT table from ACPI */ | |
174de876 FK |
747 | status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table); |
748 | if (status == AE_NOT_FOUND) { | |
749 | pr_warn("CRAT table not found\n"); | |
750 | return -ENODATA; | |
751 | } else if (ACPI_FAILURE(status)) { | |
752 | const char *err = acpi_format_exception(status); | |
753 | ||
754 | pr_err("CRAT table error: %s\n", err); | |
755 | return -EINVAL; | |
756 | } | |
757 | ||
ebcfd1e2 FK |
758 | if (ignore_crat) { |
759 | pr_info("CRAT table disabled by module option\n"); | |
760 | return -ENODATA; | |
761 | } | |
762 | ||
6dfeb11a | 763 | pcrat_image = kmemdup(crat_table, crat_table->length, GFP_KERNEL); |
8e05247d HK |
764 | if (!pcrat_image) |
765 | return -ENOMEM; | |
766 | ||
8e05247d | 767 | *crat_image = pcrat_image; |
174de876 FK |
768 | *size = crat_table->length; |
769 | ||
770 | return 0; | |
771 | } | |
8e05247d | 772 | |
520b8fb7 FK |
773 | /* Memory required to create Virtual CRAT. |
774 | * Since there is no easy way to predict the amount of memory required, the | |
775 | * following amount are allocated for CPU and GPU Virtual CRAT. This is | |
776 | * expected to cover all known conditions. But to be safe additional check | |
777 | * is put in the code to ensure we don't overwrite. | |
778 | */ | |
779 | #define VCRAT_SIZE_FOR_CPU (2 * PAGE_SIZE) | |
780 | #define VCRAT_SIZE_FOR_GPU (3 * PAGE_SIZE) | |
781 | ||
782 | /* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node | |
783 | * | |
784 | * @numa_node_id: CPU NUMA node id | |
785 | * @avail_size: Available size in the memory | |
786 | * @sub_type_hdr: Memory into which compute info will be filled in | |
787 | * | |
788 | * Return 0 if successful else return -ve value | |
789 | */ | |
790 | static int kfd_fill_cu_for_cpu(int numa_node_id, int *avail_size, | |
791 | int proximity_domain, | |
792 | struct crat_subtype_computeunit *sub_type_hdr) | |
793 | { | |
794 | const struct cpumask *cpumask; | |
795 | ||
796 | *avail_size -= sizeof(struct crat_subtype_computeunit); | |
797 | if (*avail_size < 0) | |
798 | return -ENOMEM; | |
799 | ||
800 | memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit)); | |
801 | ||
802 | /* Fill in subtype header data */ | |
803 | sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY; | |
804 | sub_type_hdr->length = sizeof(struct crat_subtype_computeunit); | |
805 | sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; | |
806 | ||
807 | cpumask = cpumask_of_node(numa_node_id); | |
808 | ||
809 | /* Fill in CU data */ | |
810 | sub_type_hdr->flags |= CRAT_CU_FLAGS_CPU_PRESENT; | |
811 | sub_type_hdr->proximity_domain = proximity_domain; | |
812 | sub_type_hdr->processor_id_low = kfd_numa_node_to_apic_id(numa_node_id); | |
813 | if (sub_type_hdr->processor_id_low == -1) | |
814 | return -EINVAL; | |
815 | ||
816 | sub_type_hdr->num_cpu_cores = cpumask_weight(cpumask); | |
817 | ||
818 | return 0; | |
819 | } | |
820 | ||
821 | /* kfd_fill_mem_info_for_cpu - Fill in Memory info for the given CPU NUMA node | |
822 | * | |
823 | * @numa_node_id: CPU NUMA node id | |
824 | * @avail_size: Available size in the memory | |
825 | * @sub_type_hdr: Memory into which compute info will be filled in | |
826 | * | |
827 | * Return 0 if successful else return -ve value | |
828 | */ | |
829 | static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size, | |
830 | int proximity_domain, | |
831 | struct crat_subtype_memory *sub_type_hdr) | |
832 | { | |
833 | uint64_t mem_in_bytes = 0; | |
834 | pg_data_t *pgdat; | |
835 | int zone_type; | |
836 | ||
837 | *avail_size -= sizeof(struct crat_subtype_memory); | |
838 | if (*avail_size < 0) | |
839 | return -ENOMEM; | |
840 | ||
841 | memset(sub_type_hdr, 0, sizeof(struct crat_subtype_memory)); | |
842 | ||
843 | /* Fill in subtype header data */ | |
844 | sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY; | |
845 | sub_type_hdr->length = sizeof(struct crat_subtype_memory); | |
846 | sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; | |
847 | ||
848 | /* Fill in Memory Subunit data */ | |
849 | ||
850 | /* Unlike si_meminfo, si_meminfo_node is not exported. So | |
851 | * the following lines are duplicated from si_meminfo_node | |
852 | * function | |
853 | */ | |
854 | pgdat = NODE_DATA(numa_node_id); | |
855 | for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) | |
9705bea5 | 856 | mem_in_bytes += zone_managed_pages(&pgdat->node_zones[zone_type]); |
520b8fb7 FK |
857 | mem_in_bytes <<= PAGE_SHIFT; |
858 | ||
859 | sub_type_hdr->length_low = lower_32_bits(mem_in_bytes); | |
860 | sub_type_hdr->length_high = upper_32_bits(mem_in_bytes); | |
861 | sub_type_hdr->proximity_domain = proximity_domain; | |
862 | ||
863 | return 0; | |
864 | } | |
865 | ||
866 | static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size, | |
867 | uint32_t *num_entries, | |
868 | struct crat_subtype_iolink *sub_type_hdr) | |
869 | { | |
870 | int nid; | |
871 | struct cpuinfo_x86 *c = &cpu_data(0); | |
872 | uint8_t link_type; | |
873 | ||
874 | if (c->x86_vendor == X86_VENDOR_AMD) | |
875 | link_type = CRAT_IOLINK_TYPE_HYPERTRANSPORT; | |
876 | else | |
877 | link_type = CRAT_IOLINK_TYPE_QPI_1_1; | |
878 | ||
879 | *num_entries = 0; | |
880 | ||
881 | /* Create IO links from this node to other CPU nodes */ | |
882 | for_each_online_node(nid) { | |
883 | if (nid == numa_node_id) /* node itself */ | |
884 | continue; | |
885 | ||
886 | *avail_size -= sizeof(struct crat_subtype_iolink); | |
887 | if (*avail_size < 0) | |
888 | return -ENOMEM; | |
889 | ||
890 | memset(sub_type_hdr, 0, sizeof(struct crat_subtype_iolink)); | |
891 | ||
892 | /* Fill in subtype header data */ | |
893 | sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; | |
894 | sub_type_hdr->length = sizeof(struct crat_subtype_iolink); | |
895 | sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; | |
896 | ||
897 | /* Fill in IO link data */ | |
898 | sub_type_hdr->proximity_domain_from = numa_node_id; | |
899 | sub_type_hdr->proximity_domain_to = nid; | |
900 | sub_type_hdr->io_interface_type = link_type; | |
901 | ||
902 | (*num_entries)++; | |
903 | sub_type_hdr++; | |
904 | } | |
905 | ||
906 | return 0; | |
907 | } | |
908 | ||
909 | /* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU | |
910 | * | |
911 | * @pcrat_image: Fill in VCRAT for CPU | |
912 | * @size: [IN] allocated size of crat_image. | |
913 | * [OUT] actual size of data filled in crat_image | |
914 | */ | |
915 | static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) | |
916 | { | |
917 | struct crat_header *crat_table = (struct crat_header *)pcrat_image; | |
918 | struct acpi_table_header *acpi_table; | |
919 | acpi_status status; | |
920 | struct crat_subtype_generic *sub_type_hdr; | |
921 | int avail_size = *size; | |
922 | int numa_node_id; | |
923 | uint32_t entries = 0; | |
924 | int ret = 0; | |
925 | ||
926 | if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_CPU) | |
927 | return -EINVAL; | |
928 | ||
929 | /* Fill in CRAT Header. | |
930 | * Modify length and total_entries as subunits are added. | |
931 | */ | |
932 | avail_size -= sizeof(struct crat_header); | |
933 | if (avail_size < 0) | |
934 | return -ENOMEM; | |
935 | ||
936 | memset(crat_table, 0, sizeof(struct crat_header)); | |
937 | memcpy(&crat_table->signature, CRAT_SIGNATURE, | |
938 | sizeof(crat_table->signature)); | |
939 | crat_table->length = sizeof(struct crat_header); | |
940 | ||
941 | status = acpi_get_table("DSDT", 0, &acpi_table); | |
48a44387 | 942 | if (status != AE_OK) |
520b8fb7 FK |
943 | pr_warn("DSDT table not found for OEM information\n"); |
944 | else { | |
945 | crat_table->oem_revision = acpi_table->revision; | |
946 | memcpy(crat_table->oem_id, acpi_table->oem_id, | |
947 | CRAT_OEMID_LENGTH); | |
948 | memcpy(crat_table->oem_table_id, acpi_table->oem_table_id, | |
949 | CRAT_OEMTABLEID_LENGTH); | |
950 | } | |
951 | crat_table->total_entries = 0; | |
952 | crat_table->num_domains = 0; | |
953 | ||
954 | sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1); | |
955 | ||
956 | for_each_online_node(numa_node_id) { | |
957 | if (kfd_numa_node_to_apic_id(numa_node_id) == -1) | |
958 | continue; | |
959 | ||
960 | /* Fill in Subtype: Compute Unit */ | |
961 | ret = kfd_fill_cu_for_cpu(numa_node_id, &avail_size, | |
962 | crat_table->num_domains, | |
963 | (struct crat_subtype_computeunit *)sub_type_hdr); | |
964 | if (ret < 0) | |
965 | return ret; | |
966 | crat_table->length += sub_type_hdr->length; | |
967 | crat_table->total_entries++; | |
968 | ||
969 | sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + | |
970 | sub_type_hdr->length); | |
971 | ||
972 | /* Fill in Subtype: Memory */ | |
973 | ret = kfd_fill_mem_info_for_cpu(numa_node_id, &avail_size, | |
974 | crat_table->num_domains, | |
975 | (struct crat_subtype_memory *)sub_type_hdr); | |
976 | if (ret < 0) | |
977 | return ret; | |
978 | crat_table->length += sub_type_hdr->length; | |
979 | crat_table->total_entries++; | |
980 | ||
981 | sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + | |
982 | sub_type_hdr->length); | |
983 | ||
984 | /* Fill in Subtype: IO Link */ | |
985 | ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size, | |
986 | &entries, | |
987 | (struct crat_subtype_iolink *)sub_type_hdr); | |
988 | if (ret < 0) | |
989 | return ret; | |
990 | crat_table->length += (sub_type_hdr->length * entries); | |
991 | crat_table->total_entries += entries; | |
992 | ||
993 | sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + | |
994 | sub_type_hdr->length * entries); | |
995 | ||
996 | crat_table->num_domains++; | |
997 | } | |
998 | ||
999 | /* TODO: Add cache Subtype for CPU. | |
1000 | * Currently, CPU cache information is available in function | |
1001 | * detect_cache_attributes(cpu) defined in the file | |
1002 | * ./arch/x86/kernel/cpu/intel_cacheinfo.c. This function is not | |
1003 | * exported and to get the same information the code needs to be | |
1004 | * duplicated. | |
1005 | */ | |
1006 | ||
1007 | *size = crat_table->length; | |
1008 | pr_info("Virtual CRAT table created for CPU\n"); | |
1009 | ||
1010 | return 0; | |
1011 | } | |
1012 | ||
3a87177e HK |
1013 | static int kfd_fill_gpu_memory_affinity(int *avail_size, |
1014 | struct kfd_dev *kdev, uint8_t type, uint64_t size, | |
1015 | struct crat_subtype_memory *sub_type_hdr, | |
1016 | uint32_t proximity_domain, | |
1017 | const struct kfd_local_mem_info *local_mem_info) | |
1018 | { | |
1019 | *avail_size -= sizeof(struct crat_subtype_memory); | |
1020 | if (*avail_size < 0) | |
1021 | return -ENOMEM; | |
1022 | ||
1023 | memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_memory)); | |
1024 | sub_type_hdr->type = CRAT_SUBTYPE_MEMORY_AFFINITY; | |
1025 | sub_type_hdr->length = sizeof(struct crat_subtype_memory); | |
1026 | sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED; | |
1027 | ||
1028 | sub_type_hdr->proximity_domain = proximity_domain; | |
1029 | ||
1030 | pr_debug("Fill gpu memory affinity - type 0x%x size 0x%llx\n", | |
1031 | type, size); | |
1032 | ||
1033 | sub_type_hdr->length_low = lower_32_bits(size); | |
1034 | sub_type_hdr->length_high = upper_32_bits(size); | |
1035 | ||
1036 | sub_type_hdr->width = local_mem_info->vram_width; | |
1037 | sub_type_hdr->visibility_type = type; | |
1038 | ||
1039 | return 0; | |
1040 | } | |
1041 | ||
1042 | /* kfd_fill_gpu_direct_io_link - Fill in direct io link from GPU | |
1043 | * to its NUMA node | |
1044 | * @avail_size: Available size in the memory | |
1045 | * @kdev - [IN] GPU device | |
1046 | * @sub_type_hdr: Memory into which io link info will be filled in | |
1047 | * @proximity_domain - proximity domain of the GPU node | |
1048 | * | |
1049 | * Return 0 if successful else return -ve value | |
1050 | */ | |
ae9a25ae | 1051 | static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, |
3a87177e HK |
1052 | struct kfd_dev *kdev, |
1053 | struct crat_subtype_iolink *sub_type_hdr, | |
1054 | uint32_t proximity_domain) | |
1055 | { | |
1056 | *avail_size -= sizeof(struct crat_subtype_iolink); | |
1057 | if (*avail_size < 0) | |
1058 | return -ENOMEM; | |
1059 | ||
1060 | memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink)); | |
1061 | ||
1062 | /* Fill in subtype header data */ | |
1063 | sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; | |
1064 | sub_type_hdr->length = sizeof(struct crat_subtype_iolink); | |
1065 | sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED; | |
67f7cf9f | 1066 | if (kfd_dev_is_large_bar(kdev)) |
1067 | sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; | |
3a87177e HK |
1068 | |
1069 | /* Fill in IOLINK subtype. | |
1070 | * TODO: Fill-in other fields of iolink subtype | |
1071 | */ | |
1072 | sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS; | |
1073 | sub_type_hdr->proximity_domain_from = proximity_domain; | |
1074 | #ifdef CONFIG_NUMA | |
1075 | if (kdev->pdev->dev.numa_node == NUMA_NO_NODE) | |
1076 | sub_type_hdr->proximity_domain_to = 0; | |
1077 | else | |
1078 | sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node; | |
1079 | #else | |
1080 | sub_type_hdr->proximity_domain_to = 0; | |
1081 | #endif | |
1082 | return 0; | |
1083 | } | |
1084 | ||
ae9a25ae SL |
1085 | static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, |
1086 | struct kfd_dev *kdev, | |
1087 | struct crat_subtype_iolink *sub_type_hdr, | |
1088 | uint32_t proximity_domain_from, | |
1089 | uint32_t proximity_domain_to) | |
1090 | { | |
1091 | *avail_size -= sizeof(struct crat_subtype_iolink); | |
1092 | if (*avail_size < 0) | |
1093 | return -ENOMEM; | |
1094 | ||
1095 | memset((void *)sub_type_hdr, 0, sizeof(struct crat_subtype_iolink)); | |
1096 | ||
1097 | sub_type_hdr->type = CRAT_SUBTYPE_IOLINK_AFFINITY; | |
1098 | sub_type_hdr->length = sizeof(struct crat_subtype_iolink); | |
67f7cf9f | 1099 | sub_type_hdr->flags |= CRAT_SUBTYPE_FLAGS_ENABLED | |
1100 | CRAT_IOLINK_FLAGS_BI_DIRECTIONAL; | |
ae9a25ae SL |
1101 | |
1102 | sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; | |
1103 | sub_type_hdr->proximity_domain_from = proximity_domain_from; | |
1104 | sub_type_hdr->proximity_domain_to = proximity_domain_to; | |
1105 | return 0; | |
1106 | } | |
1107 | ||
3a87177e HK |
1108 | /* kfd_create_vcrat_image_gpu - Create Virtual CRAT for CPU |
1109 | * | |
1110 | * @pcrat_image: Fill in VCRAT for GPU | |
1111 | * @size: [IN] allocated size of crat_image. | |
1112 | * [OUT] actual size of data filled in crat_image | |
1113 | */ | |
1114 | static int kfd_create_vcrat_image_gpu(void *pcrat_image, | |
1115 | size_t *size, struct kfd_dev *kdev, | |
1116 | uint32_t proximity_domain) | |
1117 | { | |
1118 | struct crat_header *crat_table = (struct crat_header *)pcrat_image; | |
1119 | struct crat_subtype_generic *sub_type_hdr; | |
ae9a25ae SL |
1120 | struct kfd_local_mem_info local_mem_info; |
1121 | struct kfd_topology_device *peer_dev; | |
3a87177e HK |
1122 | struct crat_subtype_computeunit *cu; |
1123 | struct kfd_cu_info cu_info; | |
3a87177e HK |
1124 | int avail_size = *size; |
1125 | uint32_t total_num_of_cu; | |
1126 | int num_of_cache_entries = 0; | |
1127 | int cache_mem_filled = 0; | |
ae9a25ae | 1128 | uint32_t nid = 0; |
3a87177e | 1129 | int ret = 0; |
3a87177e HK |
1130 | |
1131 | if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU) | |
1132 | return -EINVAL; | |
1133 | ||
1134 | /* Fill the CRAT Header. | |
1135 | * Modify length and total_entries as subunits are added. | |
1136 | */ | |
1137 | avail_size -= sizeof(struct crat_header); | |
1138 | if (avail_size < 0) | |
1139 | return -ENOMEM; | |
1140 | ||
1141 | memset(crat_table, 0, sizeof(struct crat_header)); | |
1142 | ||
1143 | memcpy(&crat_table->signature, CRAT_SIGNATURE, | |
1144 | sizeof(crat_table->signature)); | |
1145 | /* Change length as we add more subtypes*/ | |
1146 | crat_table->length = sizeof(struct crat_header); | |
1147 | crat_table->num_domains = 1; | |
1148 | crat_table->total_entries = 0; | |
1149 | ||
1150 | /* Fill in Subtype: Compute Unit | |
1151 | * First fill in the sub type header and then sub type data | |
1152 | */ | |
1153 | avail_size -= sizeof(struct crat_subtype_computeunit); | |
1154 | if (avail_size < 0) | |
1155 | return -ENOMEM; | |
1156 | ||
1157 | sub_type_hdr = (struct crat_subtype_generic *)(crat_table + 1); | |
1158 | memset(sub_type_hdr, 0, sizeof(struct crat_subtype_computeunit)); | |
1159 | ||
1160 | sub_type_hdr->type = CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY; | |
1161 | sub_type_hdr->length = sizeof(struct crat_subtype_computeunit); | |
1162 | sub_type_hdr->flags = CRAT_SUBTYPE_FLAGS_ENABLED; | |
1163 | ||
1164 | /* Fill CU subtype data */ | |
1165 | cu = (struct crat_subtype_computeunit *)sub_type_hdr; | |
1166 | cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT; | |
1167 | cu->proximity_domain = proximity_domain; | |
1168 | ||
7cd52c91 | 1169 | amdgpu_amdkfd_get_cu_info(kdev->kgd, &cu_info); |
3a87177e HK |
1170 | cu->num_simd_per_cu = cu_info.simd_per_cu; |
1171 | cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number; | |
1172 | cu->max_waves_simd = cu_info.max_waves_per_simd; | |
1173 | ||
1174 | cu->wave_front_size = cu_info.wave_front_size; | |
1175 | cu->array_count = cu_info.num_shader_arrays_per_engine * | |
1176 | cu_info.num_shader_engines; | |
1177 | total_num_of_cu = (cu->array_count * cu_info.num_cu_per_sh); | |
1178 | cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu); | |
1179 | cu->num_cu_per_array = cu_info.num_cu_per_sh; | |
1180 | cu->max_slots_scatch_cu = cu_info.max_scratch_slots_per_cu; | |
1181 | cu->num_banks = cu_info.num_shader_engines; | |
1182 | cu->lds_size_in_kb = cu_info.lds_size; | |
1183 | ||
1184 | cu->hsa_capability = 0; | |
1185 | ||
1186 | /* Check if this node supports IOMMU. During parsing this flag will | |
1187 | * translate to HSA_CAP_ATS_PRESENT | |
1188 | */ | |
64d1c3a4 FK |
1189 | if (!kfd_iommu_check_device(kdev)) |
1190 | cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT; | |
3a87177e HK |
1191 | |
1192 | crat_table->length += sub_type_hdr->length; | |
1193 | crat_table->total_entries++; | |
1194 | ||
1195 | /* Fill in Subtype: Memory. Only on systems with large BAR (no | |
1196 | * private FB), report memory as public. On other systems | |
1197 | * report the total FB size (public+private) as a single | |
1198 | * private heap. | |
1199 | */ | |
7cd52c91 | 1200 | amdgpu_amdkfd_get_local_mem_info(kdev->kgd, &local_mem_info); |
3a87177e HK |
1201 | sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + |
1202 | sub_type_hdr->length); | |
1203 | ||
374200b1 FK |
1204 | if (debug_largebar) |
1205 | local_mem_info.local_mem_size_private = 0; | |
1206 | ||
3a87177e HK |
1207 | if (local_mem_info.local_mem_size_private == 0) |
1208 | ret = kfd_fill_gpu_memory_affinity(&avail_size, | |
1209 | kdev, HSA_MEM_HEAP_TYPE_FB_PUBLIC, | |
1210 | local_mem_info.local_mem_size_public, | |
1211 | (struct crat_subtype_memory *)sub_type_hdr, | |
1212 | proximity_domain, | |
1213 | &local_mem_info); | |
1214 | else | |
1215 | ret = kfd_fill_gpu_memory_affinity(&avail_size, | |
1216 | kdev, HSA_MEM_HEAP_TYPE_FB_PRIVATE, | |
1217 | local_mem_info.local_mem_size_public + | |
1218 | local_mem_info.local_mem_size_private, | |
1219 | (struct crat_subtype_memory *)sub_type_hdr, | |
1220 | proximity_domain, | |
1221 | &local_mem_info); | |
1222 | if (ret < 0) | |
1223 | return ret; | |
1224 | ||
1225 | crat_table->length += sizeof(struct crat_subtype_memory); | |
1226 | crat_table->total_entries++; | |
1227 | ||
1228 | /* TODO: Fill in cache information. This information is NOT readily | |
1229 | * available in KGD | |
1230 | */ | |
1231 | sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + | |
1232 | sub_type_hdr->length); | |
1233 | ret = kfd_fill_gpu_cache_info(kdev, cu->processor_id_low, | |
1234 | avail_size, | |
1235 | &cu_info, | |
1236 | (struct crat_subtype_cache *)sub_type_hdr, | |
1237 | &cache_mem_filled, | |
1238 | &num_of_cache_entries); | |
1239 | ||
1240 | if (ret < 0) | |
1241 | return ret; | |
1242 | ||
1243 | crat_table->length += cache_mem_filled; | |
1244 | crat_table->total_entries += num_of_cache_entries; | |
1245 | avail_size -= cache_mem_filled; | |
1246 | ||
1247 | /* Fill in Subtype: IO_LINKS | |
1248 | * Only direct links are added here which is Link from GPU to | |
1249 | * to its NUMA node. Indirect links are added by userspace. | |
1250 | */ | |
1251 | sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + | |
1252 | cache_mem_filled); | |
ae9a25ae | 1253 | ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev, |
3a87177e HK |
1254 | (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain); |
1255 | ||
1256 | if (ret < 0) | |
1257 | return ret; | |
1258 | ||
1259 | crat_table->length += sub_type_hdr->length; | |
1260 | crat_table->total_entries++; | |
1261 | ||
ae9a25ae SL |
1262 | |
1263 | /* Fill in Subtype: IO_LINKS | |
1264 | * Direct links from GPU to other GPUs through xGMI. | |
1265 | * We will loop GPUs that already be processed (with lower value | |
1266 | * of proximity_domain), add the link for the GPUs with same | |
1267 | * hive id (from this GPU to other GPU) . The reversed iolink | |
1268 | * (from other GPU to this GPU) will be added | |
1269 | * in kfd_parse_subtype_iolink. | |
1270 | */ | |
1271 | if (kdev->hive_id) { | |
1272 | for (nid = 0; nid < proximity_domain; ++nid) { | |
1273 | peer_dev = kfd_topology_device_by_proximity_domain(nid); | |
1274 | if (!peer_dev->gpu) | |
1275 | continue; | |
1276 | if (peer_dev->gpu->hive_id != kdev->hive_id) | |
1277 | continue; | |
1278 | sub_type_hdr = (typeof(sub_type_hdr))( | |
1279 | (char *)sub_type_hdr + | |
1280 | sizeof(struct crat_subtype_iolink)); | |
1281 | ret = kfd_fill_gpu_xgmi_link_to_gpu( | |
1282 | &avail_size, kdev, | |
1283 | (struct crat_subtype_iolink *)sub_type_hdr, | |
1284 | proximity_domain, nid); | |
1285 | if (ret < 0) | |
1286 | return ret; | |
1287 | crat_table->length += sub_type_hdr->length; | |
1288 | crat_table->total_entries++; | |
1289 | } | |
1290 | } | |
3a87177e HK |
1291 | *size = crat_table->length; |
1292 | pr_info("Virtual CRAT table created for GPU\n"); | |
1293 | ||
1294 | return ret; | |
1295 | } | |
1296 | ||
520b8fb7 FK |
1297 | /* kfd_create_crat_image_virtual - Allocates memory for CRAT image and |
1298 | * creates a Virtual CRAT (VCRAT) image | |
1299 | * | |
1300 | * NOTE: Call kfd_destroy_crat_image to free CRAT image memory | |
1301 | * | |
1302 | * @crat_image: VCRAT image created because ACPI does not have a | |
1303 | * CRAT for this device | |
1304 | * @size: [OUT] size of virtual crat_image | |
1305 | * @flags: COMPUTE_UNIT_CPU - Create VCRAT for CPU device | |
1306 | * COMPUTE_UNIT_GPU - Create VCRAT for GPU | |
1307 | * (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU) - Create VCRAT for APU | |
1308 | * -- this option is not currently implemented. | |
1309 | * The assumption is that all AMD APUs will have CRAT | |
1310 | * @kdev: Valid kfd_device required if flags contain COMPUTE_UNIT_GPU | |
1311 | * | |
1312 | * Return 0 if successful else return -ve value | |
1313 | */ | |
1314 | int kfd_create_crat_image_virtual(void **crat_image, size_t *size, | |
1315 | int flags, struct kfd_dev *kdev, | |
1316 | uint32_t proximity_domain) | |
1317 | { | |
1318 | void *pcrat_image = NULL; | |
1319 | int ret = 0; | |
1320 | ||
1321 | if (!crat_image) | |
1322 | return -EINVAL; | |
1323 | ||
1324 | *crat_image = NULL; | |
1325 | ||
1326 | /* Allocate one VCRAT_SIZE_FOR_CPU for CPU virtual CRAT image and | |
1327 | * VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image. This should cover | |
1328 | * all the current conditions. A check is put not to overwrite beyond | |
1329 | * allocated size | |
1330 | */ | |
1331 | switch (flags) { | |
1332 | case COMPUTE_UNIT_CPU: | |
1333 | pcrat_image = kmalloc(VCRAT_SIZE_FOR_CPU, GFP_KERNEL); | |
1334 | if (!pcrat_image) | |
1335 | return -ENOMEM; | |
1336 | *size = VCRAT_SIZE_FOR_CPU; | |
1337 | ret = kfd_create_vcrat_image_cpu(pcrat_image, size); | |
1338 | break; | |
1339 | case COMPUTE_UNIT_GPU: | |
3a87177e HK |
1340 | if (!kdev) |
1341 | return -EINVAL; | |
1342 | pcrat_image = kmalloc(VCRAT_SIZE_FOR_GPU, GFP_KERNEL); | |
1343 | if (!pcrat_image) | |
1344 | return -ENOMEM; | |
1345 | *size = VCRAT_SIZE_FOR_GPU; | |
1346 | ret = kfd_create_vcrat_image_gpu(pcrat_image, size, kdev, | |
1347 | proximity_domain); | |
520b8fb7 FK |
1348 | break; |
1349 | case (COMPUTE_UNIT_CPU | COMPUTE_UNIT_GPU): | |
1350 | /* TODO: */ | |
1351 | ret = -EINVAL; | |
1352 | pr_err("VCRAT not implemented for APU\n"); | |
1353 | break; | |
1354 | default: | |
1355 | ret = -EINVAL; | |
1356 | } | |
1357 | ||
1358 | if (!ret) | |
1359 | *crat_image = pcrat_image; | |
1360 | else | |
1361 | kfree(pcrat_image); | |
1362 | ||
1363 | return ret; | |
1364 | } | |
1365 | ||
1366 | ||
1367 | /* kfd_destroy_crat_image | |
8e05247d HK |
1368 | * |
1369 | * @crat_image: [IN] - crat_image from kfd_create_crat_image_xxx(..) | |
1370 | * | |
1371 | */ | |
1372 | void kfd_destroy_crat_image(void *crat_image) | |
1373 | { | |
1374 | kfree(crat_image); | |
1375 | } |