]>
Commit | Line | Data |
---|---|---|
47979480 CW |
1 | /* |
2 | * Copyright © 2017 Intel Corporation | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice (including the next | |
12 | * paragraph) shall be included in all copies or substantial portions of the | |
13 | * Software. | |
14 | * | |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
21 | * IN THE SOFTWARE. | |
22 | * | |
23 | */ | |
24 | ||
25 | #include <linux/slab.h> | |
26 | ||
27 | #include "i915_syncmap.h" | |
28 | ||
29 | #include "i915_gem.h" /* GEM_BUG_ON() */ | |
30 | #include "i915_selftest.h" | |
31 | ||
32 | #define SHIFT ilog2(KSYNCMAP) | |
33 | #define MASK (KSYNCMAP - 1) | |
34 | ||
35 | /* | |
36 | * struct i915_syncmap is a layer of a radixtree that maps a u64 fence | |
37 | * context id to the last u32 fence seqno waited upon from that context. | |
38 | * Unlike lib/radixtree it uses a parent pointer that allows traversal back to | |
39 | * the root. This allows us to access the whole tree via a single pointer | |
40 | * to the most recently used layer. We expect fence contexts to be dense | |
41 | * and most reuse to be on the same i915_gem_context but on neighbouring | |
42 | * engines (i.e. on adjacent contexts) and reuse the same leaf, a very | |
43 | * effective lookup cache. If the new lookup is not on the same leaf, we | |
44 | * expect it to be on the neighbouring branch. | |
45 | * | |
46 | * A leaf holds an array of u32 seqno, and has height 0. The bitmap field | |
47 | * allows us to store whether a particular seqno is valid (i.e. allows us | |
48 | * to distinguish unset from 0). | |
49 | * | |
50 | * A branch holds an array of layer pointers, and has height > 0, and always | |
51 | * has at least 2 layers (either branches or leaves) below it. | |
52 | * | |
53 | * For example, | |
54 | * for x in | |
55 | * 0 1 2 0x10 0x11 0x200 0x201 | |
56 | * 0x500000 0x500001 0x503000 0x503001 | |
57 | * 0xE<<60: | |
58 | * i915_syncmap_set(&sync, x, lower_32_bits(x)); | |
59 | * will build a tree like: | |
60 | * 0xXXXXXXXXXXXXXXXX | |
61 | * 0-> 0x0000000000XXXXXX | |
62 | * | 0-> 0x0000000000000XXX | |
63 | * | | 0-> 0x00000000000000XX | |
64 | * | | | 0-> 0x000000000000000X 0:0, 1:1, 2:2 | |
65 | * | | | 1-> 0x000000000000001X 0:10, 1:11 | |
66 | * | | 2-> 0x000000000000020X 0:200, 1:201 | |
67 | * | 5-> 0x000000000050XXXX | |
68 | * | 0-> 0x000000000050000X 0:500000, 1:500001 | |
69 | * | 3-> 0x000000000050300X 0:503000, 1:503001 | |
70 | * e-> 0xe00000000000000X e:e | |
71 | */ | |
72 | ||
73 | struct i915_syncmap { | |
74 | u64 prefix; | |
75 | unsigned int height; | |
76 | unsigned int bitmap; | |
77 | struct i915_syncmap *parent; | |
78 | /* | |
79 | * Following this header is an array of either seqno or child pointers: | |
80 | * union { | |
81 | * u32 seqno[KSYNCMAP]; | |
82 | * struct i915_syncmap *child[KSYNCMAP]; | |
83 | * }; | |
84 | */ | |
85 | }; | |
86 | ||
87 | /** | |
88 | * i915_syncmap_init -- initialise the #i915_syncmap | |
89 | * @root - pointer to the #i915_syncmap | |
90 | */ | |
91 | void i915_syncmap_init(struct i915_syncmap **root) | |
92 | { | |
93 | BUILD_BUG_ON_NOT_POWER_OF_2(KSYNCMAP); | |
94 | BUILD_BUG_ON_NOT_POWER_OF_2(SHIFT); | |
95 | BUILD_BUG_ON(KSYNCMAP > BITS_PER_BYTE * sizeof((*root)->bitmap)); | |
96 | *root = NULL; | |
97 | } | |
98 | ||
99 | static inline u32 *__sync_seqno(struct i915_syncmap *p) | |
100 | { | |
101 | GEM_BUG_ON(p->height); | |
102 | return (u32 *)(p + 1); | |
103 | } | |
104 | ||
105 | static inline struct i915_syncmap **__sync_child(struct i915_syncmap *p) | |
106 | { | |
107 | GEM_BUG_ON(!p->height); | |
108 | return (struct i915_syncmap **)(p + 1); | |
109 | } | |
110 | ||
111 | static inline unsigned int | |
112 | __sync_branch_idx(const struct i915_syncmap *p, u64 id) | |
113 | { | |
114 | return (id >> p->height) & MASK; | |
115 | } | |
116 | ||
117 | static inline unsigned int | |
118 | __sync_leaf_idx(const struct i915_syncmap *p, u64 id) | |
119 | { | |
120 | GEM_BUG_ON(p->height); | |
121 | return id & MASK; | |
122 | } | |
123 | ||
124 | static inline u64 __sync_branch_prefix(const struct i915_syncmap *p, u64 id) | |
125 | { | |
126 | return id >> p->height >> SHIFT; | |
127 | } | |
128 | ||
129 | static inline u64 __sync_leaf_prefix(const struct i915_syncmap *p, u64 id) | |
130 | { | |
131 | GEM_BUG_ON(p->height); | |
132 | return id >> SHIFT; | |
133 | } | |
134 | ||
135 | static inline bool seqno_later(u32 a, u32 b) | |
136 | { | |
137 | return (s32)(a - b) >= 0; | |
138 | } | |
139 | ||
140 | /** | |
141 | * i915_syncmap_is_later -- compare against the last know sync point | |
142 | * @root - pointer to the #i915_syncmap | |
143 | * @id - the context id (other timeline) we are synchronising to | |
144 | * @seqno - the sequence number along the other timeline | |
145 | * | |
146 | * If we have already synchronised this @root timeline with another (@id) then | |
147 | * we can omit any repeated or earlier synchronisation requests. If the two | |
148 | * timelines are already coupled, we can also omit the dependency between the | |
149 | * two as that is already known via the timeline. | |
150 | * | |
151 | * Returns true if the two timelines are already synchronised wrt to @seqno, | |
152 | * false if not and the synchronisation must be emitted. | |
153 | */ | |
154 | bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno) | |
155 | { | |
156 | struct i915_syncmap *p; | |
157 | unsigned int idx; | |
158 | ||
159 | p = *root; | |
160 | if (!p) | |
161 | return false; | |
162 | ||
163 | if (likely(__sync_leaf_prefix(p, id) == p->prefix)) | |
164 | goto found; | |
165 | ||
166 | /* First climb the tree back to a parent branch */ | |
167 | do { | |
168 | p = p->parent; | |
169 | if (!p) | |
170 | return false; | |
171 | ||
172 | if (__sync_branch_prefix(p, id) == p->prefix) | |
173 | break; | |
174 | } while (1); | |
175 | ||
176 | /* And then descend again until we find our leaf */ | |
177 | do { | |
178 | if (!p->height) | |
179 | break; | |
180 | ||
181 | p = __sync_child(p)[__sync_branch_idx(p, id)]; | |
182 | if (!p) | |
183 | return false; | |
184 | ||
185 | if (__sync_branch_prefix(p, id) != p->prefix) | |
186 | return false; | |
187 | } while (1); | |
188 | ||
189 | *root = p; | |
190 | found: | |
191 | idx = __sync_leaf_idx(p, id); | |
192 | if (!(p->bitmap & BIT(idx))) | |
193 | return false; | |
194 | ||
195 | return seqno_later(__sync_seqno(p)[idx], seqno); | |
196 | } | |
197 | ||
198 | static struct i915_syncmap * | |
199 | __sync_alloc_leaf(struct i915_syncmap *parent, u64 id) | |
200 | { | |
201 | struct i915_syncmap *p; | |
202 | ||
203 | p = kmalloc(sizeof(*p) + KSYNCMAP * sizeof(u32), GFP_KERNEL); | |
204 | if (unlikely(!p)) | |
205 | return NULL; | |
206 | ||
207 | p->parent = parent; | |
208 | p->height = 0; | |
209 | p->bitmap = 0; | |
210 | p->prefix = __sync_leaf_prefix(p, id); | |
211 | return p; | |
212 | } | |
213 | ||
214 | static inline void __sync_set_seqno(struct i915_syncmap *p, u64 id, u32 seqno) | |
215 | { | |
216 | unsigned int idx = __sync_leaf_idx(p, id); | |
217 | ||
218 | p->bitmap |= BIT(idx); | |
219 | __sync_seqno(p)[idx] = seqno; | |
220 | } | |
221 | ||
222 | static inline void __sync_set_child(struct i915_syncmap *p, | |
223 | unsigned int idx, | |
224 | struct i915_syncmap *child) | |
225 | { | |
226 | p->bitmap |= BIT(idx); | |
227 | __sync_child(p)[idx] = child; | |
228 | } | |
229 | ||
230 | static noinline int __sync_set(struct i915_syncmap **root, u64 id, u32 seqno) | |
231 | { | |
232 | struct i915_syncmap *p = *root; | |
233 | unsigned int idx; | |
234 | ||
235 | if (!p) { | |
236 | p = __sync_alloc_leaf(NULL, id); | |
237 | if (unlikely(!p)) | |
238 | return -ENOMEM; | |
239 | ||
240 | goto found; | |
241 | } | |
242 | ||
243 | /* Caller handled the likely cached case */ | |
244 | GEM_BUG_ON(__sync_leaf_prefix(p, id) == p->prefix); | |
245 | ||
246 | /* Climb back up the tree until we find a common prefix */ | |
247 | do { | |
248 | if (!p->parent) | |
249 | break; | |
250 | ||
251 | p = p->parent; | |
252 | ||
253 | if (__sync_branch_prefix(p, id) == p->prefix) | |
254 | break; | |
255 | } while (1); | |
256 | ||
257 | /* | |
258 | * No shortcut, we have to descend the tree to find the right layer | |
259 | * containing this fence. | |
260 | * | |
261 | * Each layer in the tree holds 16 (KSYNCMAP) pointers, either fences | |
262 | * or lower layers. Leaf nodes (height = 0) contain the fences, all | |
263 | * other nodes (height > 0) are internal layers that point to a lower | |
264 | * node. Each internal layer has at least 2 descendents. | |
265 | * | |
266 | * Starting at the top, we check whether the current prefix matches. If | |
267 | * it doesn't, we have gone past our target and need to insert a join | |
268 | * into the tree, and a new leaf node for the target as a descendent | |
269 | * of the join, as well as the original layer. | |
270 | * | |
271 | * The matching prefix means we are still following the right branch | |
272 | * of the tree. If it has height 0, we have found our leaf and just | |
273 | * need to replace the fence slot with ourselves. If the height is | |
274 | * not zero, our slot contains the next layer in the tree (unless | |
275 | * it is empty, in which case we can add ourselves as a new leaf). | |
276 | * As descend the tree the prefix grows (and height decreases). | |
277 | */ | |
278 | do { | |
279 | struct i915_syncmap *next; | |
280 | ||
281 | if (__sync_branch_prefix(p, id) != p->prefix) { | |
282 | unsigned int above; | |
283 | ||
284 | /* Insert a join above the current layer */ | |
285 | next = kzalloc(sizeof(*next) + KSYNCMAP * sizeof(next), | |
286 | GFP_KERNEL); | |
287 | if (unlikely(!next)) | |
288 | return -ENOMEM; | |
289 | ||
290 | /* Compute the height at which these two diverge */ | |
291 | above = fls64(__sync_branch_prefix(p, id) ^ p->prefix); | |
292 | above = round_up(above, SHIFT); | |
293 | next->height = above + p->height; | |
294 | next->prefix = __sync_branch_prefix(next, id); | |
295 | ||
296 | /* Insert the join into the parent */ | |
297 | if (p->parent) { | |
298 | idx = __sync_branch_idx(p->parent, id); | |
299 | __sync_child(p->parent)[idx] = next; | |
300 | GEM_BUG_ON(!(p->parent->bitmap & BIT(idx))); | |
301 | } | |
302 | next->parent = p->parent; | |
303 | ||
304 | /* Compute the idx of the other branch, not our id! */ | |
305 | idx = p->prefix >> (above - SHIFT) & MASK; | |
306 | __sync_set_child(next, idx, p); | |
307 | p->parent = next; | |
308 | ||
309 | /* Ascend to the join */ | |
310 | p = next; | |
311 | } else { | |
312 | if (!p->height) | |
313 | break; | |
314 | } | |
315 | ||
316 | /* Descend into the next layer */ | |
317 | GEM_BUG_ON(!p->height); | |
318 | idx = __sync_branch_idx(p, id); | |
319 | next = __sync_child(p)[idx]; | |
320 | if (!next) { | |
321 | next = __sync_alloc_leaf(p, id); | |
322 | if (unlikely(!next)) | |
323 | return -ENOMEM; | |
324 | ||
325 | __sync_set_child(p, idx, next); | |
326 | p = next; | |
327 | break; | |
328 | } | |
329 | ||
330 | p = next; | |
331 | } while (1); | |
332 | ||
333 | found: | |
334 | GEM_BUG_ON(p->prefix != __sync_leaf_prefix(p, id)); | |
335 | __sync_set_seqno(p, id, seqno); | |
336 | *root = p; | |
337 | return 0; | |
338 | } | |
339 | ||
340 | /** | |
341 | * i915_syncmap_set -- mark the most recent syncpoint between contexts | |
342 | * @root - pointer to the #i915_syncmap | |
343 | * @id - the context id (other timeline) we have synchronised to | |
344 | * @seqno - the sequence number along the other timeline | |
345 | * | |
346 | * When we synchronise this @root timeline with another (@id), we also know | |
347 | * that we have synchronized with all previous seqno along that timeline. If | |
348 | * we then have a request to synchronise with the same seqno or older, we can | |
349 | * omit it, see i915_syncmap_is_later() | |
350 | * | |
351 | * Returns 0 on success, or a negative error code. | |
352 | */ | |
353 | int i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno) | |
354 | { | |
355 | struct i915_syncmap *p = *root; | |
356 | ||
357 | /* | |
358 | * We expect to be called in sequence following is_later(id), which | |
359 | * should have preloaded the root for us. | |
360 | */ | |
361 | if (likely(p && __sync_leaf_prefix(p, id) == p->prefix)) { | |
362 | __sync_set_seqno(p, id, seqno); | |
363 | return 0; | |
364 | } | |
365 | ||
366 | return __sync_set(root, id, seqno); | |
367 | } | |
368 | ||
369 | static void __sync_free(struct i915_syncmap *p) | |
370 | { | |
371 | if (p->height) { | |
372 | unsigned int i; | |
373 | ||
374 | while ((i = ffs(p->bitmap))) { | |
375 | p->bitmap &= ~0u << i; | |
376 | __sync_free(__sync_child(p)[i - 1]); | |
377 | } | |
378 | } | |
379 | ||
380 | kfree(p); | |
381 | } | |
382 | ||
383 | /** | |
384 | * i915_syncmap_free -- free all memory associated with the syncmap | |
385 | * @root - pointer to the #i915_syncmap | |
386 | * | |
387 | * Either when the timeline is to be freed and we no longer need the sync | |
388 | * point tracking, or when the fences are all known to be signaled and the | |
389 | * sync point tracking is redundant, we can free the #i915_syncmap to recover | |
390 | * its allocations. | |
391 | * | |
392 | * Will reinitialise the @root pointer so that the #i915_syncmap is ready for | |
393 | * reuse. | |
394 | */ | |
395 | void i915_syncmap_free(struct i915_syncmap **root) | |
396 | { | |
397 | struct i915_syncmap *p; | |
398 | ||
399 | p = *root; | |
400 | if (!p) | |
401 | return; | |
402 | ||
403 | while (p->parent) | |
404 | p = p->parent; | |
405 | ||
406 | __sync_free(p); | |
407 | *root = NULL; | |
408 | } | |
409 | ||
410 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) | |
411 | #include "selftests/i915_syncmap.c" | |
412 | #endif |