]>
Commit | Line | Data |
---|---|---|
a89d1f92 CW |
1 | /* |
2 | * SPDX-License-Identifier: MIT | |
3 | * | |
4 | * Copyright © 2016-2018 Intel Corporation | |
5 | */ | |
6 | ||
7 | #include "i915_drv.h" | |
8 | ||
ebece753 | 9 | #include "i915_active.h" |
a89d1f92 | 10 | #include "i915_syncmap.h" |
2871ea85 CW |
11 | #include "intel_gt.h" |
12 | #include "intel_ring.h" | |
13 | #include "intel_timeline.h" | |
ebece753 CW |
14 | |
15 | #define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit))) | |
16 | #define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit)) | |
a89d1f92 | 17 | |
85bedbf1 CW |
18 | #define CACHELINE_BITS 6 |
19 | #define CACHELINE_FREE CACHELINE_BITS | |
20 | ||
f0c02c1b | 21 | struct intel_timeline_hwsp { |
4c6d51ea | 22 | struct intel_gt *gt; |
c6fe28b0 | 23 | struct intel_gt_timelines *gt_timelines; |
8ba306a6 | 24 | struct list_head free_link; |
ebece753 | 25 | struct i915_vma *vma; |
8ba306a6 CW |
26 | u64 free_bitmap; |
27 | }; | |
28 | ||
4c6d51ea | 29 | static struct i915_vma *__hwsp_alloc(struct intel_gt *gt) |
52954edd | 30 | { |
4c6d51ea | 31 | struct drm_i915_private *i915 = gt->i915; |
52954edd CW |
32 | struct drm_i915_gem_object *obj; |
33 | struct i915_vma *vma; | |
34 | ||
35 | obj = i915_gem_object_create_internal(i915, PAGE_SIZE); | |
36 | if (IS_ERR(obj)) | |
37 | return ERR_CAST(obj); | |
38 | ||
39 | i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); | |
40 | ||
4c6d51ea | 41 | vma = i915_vma_instance(obj, >->ggtt->vm, NULL); |
52954edd CW |
42 | if (IS_ERR(vma)) |
43 | i915_gem_object_put(obj); | |
44 | ||
45 | return vma; | |
46 | } | |
47 | ||
8ba306a6 | 48 | static struct i915_vma * |
f0c02c1b | 49 | hwsp_alloc(struct intel_timeline *timeline, unsigned int *cacheline) |
52954edd | 50 | { |
c6fe28b0 | 51 | struct intel_gt_timelines *gt = &timeline->gt->timelines; |
f0c02c1b | 52 | struct intel_timeline_hwsp *hwsp; |
52954edd | 53 | |
8ba306a6 | 54 | BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE); |
52954edd | 55 | |
155ab883 | 56 | spin_lock_irq(>->hwsp_lock); |
52954edd | 57 | |
8ba306a6 CW |
58 | /* hwsp_free_list only contains HWSP that have available cachelines */ |
59 | hwsp = list_first_entry_or_null(>->hwsp_free_list, | |
60 | typeof(*hwsp), free_link); | |
61 | if (!hwsp) { | |
62 | struct i915_vma *vma; | |
63 | ||
155ab883 | 64 | spin_unlock_irq(>->hwsp_lock); |
8ba306a6 CW |
65 | |
66 | hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL); | |
67 | if (!hwsp) | |
68 | return ERR_PTR(-ENOMEM); | |
69 | ||
4c6d51ea | 70 | vma = __hwsp_alloc(timeline->gt); |
8ba306a6 CW |
71 | if (IS_ERR(vma)) { |
72 | kfree(hwsp); | |
73 | return vma; | |
74 | } | |
75 | ||
d45171ac CW |
76 | GT_TRACE(timeline->gt, "new HWSP allocated\n"); |
77 | ||
8ba306a6 | 78 | vma->private = hwsp; |
4c6d51ea | 79 | hwsp->gt = timeline->gt; |
8ba306a6 CW |
80 | hwsp->vma = vma; |
81 | hwsp->free_bitmap = ~0ull; | |
4c6d51ea | 82 | hwsp->gt_timelines = gt; |
8ba306a6 | 83 | |
155ab883 | 84 | spin_lock_irq(>->hwsp_lock); |
8ba306a6 CW |
85 | list_add(&hwsp->free_link, >->hwsp_free_list); |
86 | } | |
87 | ||
88 | GEM_BUG_ON(!hwsp->free_bitmap); | |
89 | *cacheline = __ffs64(hwsp->free_bitmap); | |
90 | hwsp->free_bitmap &= ~BIT_ULL(*cacheline); | |
91 | if (!hwsp->free_bitmap) | |
92 | list_del(&hwsp->free_link); | |
93 | ||
155ab883 | 94 | spin_unlock_irq(>->hwsp_lock); |
8ba306a6 CW |
95 | |
96 | GEM_BUG_ON(hwsp->vma->private != hwsp); | |
97 | return hwsp->vma; | |
98 | } | |
99 | ||
f0c02c1b | 100 | static void __idle_hwsp_free(struct intel_timeline_hwsp *hwsp, int cacheline) |
8ba306a6 | 101 | { |
c6fe28b0 | 102 | struct intel_gt_timelines *gt = hwsp->gt_timelines; |
155ab883 | 103 | unsigned long flags; |
8ba306a6 | 104 | |
155ab883 | 105 | spin_lock_irqsave(>->hwsp_lock, flags); |
8ba306a6 CW |
106 | |
107 | /* As a cacheline becomes available, publish the HWSP on the freelist */ | |
108 | if (!hwsp->free_bitmap) | |
109 | list_add_tail(&hwsp->free_link, >->hwsp_free_list); | |
110 | ||
ebece753 CW |
111 | GEM_BUG_ON(cacheline >= BITS_PER_TYPE(hwsp->free_bitmap)); |
112 | hwsp->free_bitmap |= BIT_ULL(cacheline); | |
8ba306a6 CW |
113 | |
114 | /* And if no one is left using it, give the page back to the system */ | |
115 | if (hwsp->free_bitmap == ~0ull) { | |
116 | i915_vma_put(hwsp->vma); | |
117 | list_del(&hwsp->free_link); | |
118 | kfree(hwsp); | |
119 | } | |
120 | ||
155ab883 | 121 | spin_unlock_irqrestore(>->hwsp_lock, flags); |
52954edd CW |
122 | } |
123 | ||
8e87e013 CW |
124 | static void __rcu_cacheline_free(struct rcu_head *rcu) |
125 | { | |
126 | struct intel_timeline_cacheline *cl = | |
127 | container_of(rcu, typeof(*cl), rcu); | |
128 | ||
45db630e CW |
129 | /* Must wait until after all *rq->hwsp are complete before removing */ |
130 | i915_gem_object_unpin_map(cl->hwsp->vma->obj); | |
131 | __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS)); | |
132 | ||
8e87e013 CW |
133 | i915_active_fini(&cl->active); |
134 | kfree(cl); | |
135 | } | |
136 | ||
f0c02c1b | 137 | static void __idle_cacheline_free(struct intel_timeline_cacheline *cl) |
ebece753 CW |
138 | { |
139 | GEM_BUG_ON(!i915_active_is_idle(&cl->active)); | |
8e87e013 | 140 | call_rcu(&cl->rcu, __rcu_cacheline_free); |
ebece753 CW |
141 | } |
142 | ||
274cbf20 | 143 | __i915_active_call |
ebece753 CW |
144 | static void __cacheline_retire(struct i915_active *active) |
145 | { | |
f0c02c1b | 146 | struct intel_timeline_cacheline *cl = |
ebece753 CW |
147 | container_of(active, typeof(*cl), active); |
148 | ||
149 | i915_vma_unpin(cl->hwsp->vma); | |
150 | if (ptr_test_bit(cl->vaddr, CACHELINE_FREE)) | |
151 | __idle_cacheline_free(cl); | |
152 | } | |
153 | ||
12c255b5 CW |
154 | static int __cacheline_active(struct i915_active *active) |
155 | { | |
156 | struct intel_timeline_cacheline *cl = | |
157 | container_of(active, typeof(*cl), active); | |
158 | ||
159 | __i915_vma_pin(cl->hwsp->vma); | |
160 | return 0; | |
161 | } | |
162 | ||
f0c02c1b TU |
163 | static struct intel_timeline_cacheline * |
164 | cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline) | |
ebece753 | 165 | { |
f0c02c1b | 166 | struct intel_timeline_cacheline *cl; |
ebece753 CW |
167 | void *vaddr; |
168 | ||
169 | GEM_BUG_ON(cacheline >= BIT(CACHELINE_BITS)); | |
170 | ||
171 | cl = kmalloc(sizeof(*cl), GFP_KERNEL); | |
172 | if (!cl) | |
173 | return ERR_PTR(-ENOMEM); | |
174 | ||
175 | vaddr = i915_gem_object_pin_map(hwsp->vma->obj, I915_MAP_WB); | |
176 | if (IS_ERR(vaddr)) { | |
177 | kfree(cl); | |
178 | return ERR_CAST(vaddr); | |
179 | } | |
180 | ||
ebece753 CW |
181 | cl->hwsp = hwsp; |
182 | cl->vaddr = page_pack_bits(vaddr, cacheline); | |
183 | ||
b1e3177b | 184 | i915_active_init(&cl->active, __cacheline_active, __cacheline_retire); |
ebece753 CW |
185 | |
186 | return cl; | |
187 | } | |
188 | ||
8ce70996 CW |
189 | static void cacheline_acquire(struct intel_timeline_cacheline *cl, |
190 | u32 ggtt_offset) | |
ebece753 | 191 | { |
8ce70996 CW |
192 | if (!cl) |
193 | return; | |
194 | ||
195 | cl->ggtt_offset = ggtt_offset; | |
196 | i915_active_acquire(&cl->active); | |
ebece753 CW |
197 | } |
198 | ||
f0c02c1b | 199 | static void cacheline_release(struct intel_timeline_cacheline *cl) |
ebece753 CW |
200 | { |
201 | if (cl) | |
202 | i915_active_release(&cl->active); | |
203 | } | |
204 | ||
f0c02c1b | 205 | static void cacheline_free(struct intel_timeline_cacheline *cl) |
ebece753 | 206 | { |
2d4bd971 CW |
207 | if (!i915_active_acquire_if_busy(&cl->active)) { |
208 | __idle_cacheline_free(cl); | |
209 | return; | |
210 | } | |
211 | ||
ebece753 CW |
212 | GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE)); |
213 | cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE); | |
214 | ||
2d4bd971 | 215 | i915_active_release(&cl->active); |
ebece753 CW |
216 | } |
217 | ||
e31fe02e MK |
218 | static int intel_timeline_init(struct intel_timeline *timeline, |
219 | struct intel_gt *gt, | |
d1bf5dd8 CW |
220 | struct i915_vma *hwsp, |
221 | unsigned int offset) | |
a89d1f92 | 222 | { |
52954edd | 223 | void *vaddr; |
a89d1f92 | 224 | |
f0ca820c | 225 | kref_init(&timeline->kref); |
ccb23d2d | 226 | atomic_set(&timeline->pin_count, 0); |
f0ca820c | 227 | |
4c6d51ea | 228 | timeline->gt = gt; |
f0ca820c | 229 | |
85474441 | 230 | timeline->has_initial_breadcrumb = !hwsp; |
ebece753 | 231 | timeline->hwsp_cacheline = NULL; |
52954edd | 232 | |
8ba306a6 | 233 | if (!hwsp) { |
f0c02c1b | 234 | struct intel_timeline_cacheline *cl; |
8ba306a6 CW |
235 | unsigned int cacheline; |
236 | ||
237 | hwsp = hwsp_alloc(timeline, &cacheline); | |
238 | if (IS_ERR(hwsp)) | |
239 | return PTR_ERR(hwsp); | |
240 | ||
ebece753 CW |
241 | cl = cacheline_alloc(hwsp->private, cacheline); |
242 | if (IS_ERR(cl)) { | |
243 | __idle_hwsp_free(hwsp->private, cacheline); | |
244 | return PTR_ERR(cl); | |
245 | } | |
246 | ||
247 | timeline->hwsp_cacheline = cl; | |
8ba306a6 | 248 | timeline->hwsp_offset = cacheline * CACHELINE_BYTES; |
a89d1f92 | 249 | |
ebece753 CW |
250 | vaddr = page_mask_bits(cl->vaddr); |
251 | } else { | |
d1bf5dd8 | 252 | timeline->hwsp_offset = offset; |
ebece753 CW |
253 | vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB); |
254 | if (IS_ERR(vaddr)) | |
255 | return PTR_ERR(vaddr); | |
52954edd | 256 | } |
a89d1f92 | 257 | |
52954edd CW |
258 | timeline->hwsp_seqno = |
259 | memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES); | |
a89d1f92 | 260 | |
ebece753 CW |
261 | timeline->hwsp_ggtt = i915_vma_get(hwsp); |
262 | GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size); | |
263 | ||
a89d1f92 CW |
264 | timeline->fence_context = dma_fence_context_alloc(1); |
265 | ||
3ef71149 | 266 | mutex_init(&timeline->mutex); |
a89d1f92 | 267 | |
df9f85d8 | 268 | INIT_ACTIVE_FENCE(&timeline->last_request); |
a89d1f92 CW |
269 | INIT_LIST_HEAD(&timeline->requests); |
270 | ||
271 | i915_syncmap_init(&timeline->sync); | |
52954edd | 272 | |
52954edd | 273 | return 0; |
a89d1f92 CW |
274 | } |
275 | ||
4605bb73 | 276 | void intel_gt_init_timelines(struct intel_gt *gt) |
1e345568 | 277 | { |
c6fe28b0 | 278 | struct intel_gt_timelines *timelines = >->timelines; |
1e345568 | 279 | |
338aade9 | 280 | spin_lock_init(&timelines->lock); |
390c8205 | 281 | INIT_LIST_HEAD(&timelines->active_list); |
1e345568 | 282 | |
390c8205 TU |
283 | spin_lock_init(&timelines->hwsp_lock); |
284 | INIT_LIST_HEAD(&timelines->hwsp_free_list); | |
390c8205 TU |
285 | } |
286 | ||
e31fe02e | 287 | static void intel_timeline_fini(struct intel_timeline *timeline) |
a89d1f92 | 288 | { |
ccb23d2d | 289 | GEM_BUG_ON(atomic_read(&timeline->pin_count)); |
a89d1f92 | 290 | GEM_BUG_ON(!list_empty(&timeline->requests)); |
4f88f874 | 291 | GEM_BUG_ON(timeline->retire); |
a89d1f92 | 292 | |
ebece753 CW |
293 | if (timeline->hwsp_cacheline) |
294 | cacheline_free(timeline->hwsp_cacheline); | |
295 | else | |
296 | i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); | |
297 | ||
52954edd | 298 | i915_vma_put(timeline->hwsp_ggtt); |
5ff3d5fc MB |
299 | |
300 | /* | |
301 | * A small race exists between intel_gt_retire_requests_timeout and | |
302 | * intel_timeline_exit which could result in the syncmap not getting | |
303 | * free'd. Rather than work to hard to seal this race, simply cleanup | |
304 | * the syncmap on fini. | |
305 | */ | |
306 | i915_syncmap_free(&timeline->sync); | |
a89d1f92 CW |
307 | } |
308 | ||
f0c02c1b | 309 | struct intel_timeline * |
d1bf5dd8 CW |
310 | __intel_timeline_create(struct intel_gt *gt, |
311 | struct i915_vma *global_hwsp, | |
312 | unsigned int offset) | |
a89d1f92 | 313 | { |
f0c02c1b | 314 | struct intel_timeline *timeline; |
52954edd | 315 | int err; |
a89d1f92 CW |
316 | |
317 | timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); | |
318 | if (!timeline) | |
319 | return ERR_PTR(-ENOMEM); | |
320 | ||
d1bf5dd8 | 321 | err = intel_timeline_init(timeline, gt, global_hwsp, offset); |
52954edd CW |
322 | if (err) { |
323 | kfree(timeline); | |
324 | return ERR_PTR(err); | |
325 | } | |
326 | ||
a89d1f92 CW |
327 | return timeline; |
328 | } | |
329 | ||
47b08693 ML |
330 | void __intel_timeline_pin(struct intel_timeline *tl) |
331 | { | |
332 | GEM_BUG_ON(!atomic_read(&tl->pin_count)); | |
333 | atomic_inc(&tl->pin_count); | |
334 | } | |
335 | ||
336 | int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww) | |
52954edd CW |
337 | { |
338 | int err; | |
339 | ||
ccb23d2d | 340 | if (atomic_add_unless(&tl->pin_count, 1, 0)) |
52954edd | 341 | return 0; |
52954edd | 342 | |
47b08693 | 343 | err = i915_ggtt_pin(tl->hwsp_ggtt, ww, 0, PIN_HIGH); |
52954edd | 344 | if (err) |
ccb23d2d | 345 | return err; |
52954edd | 346 | |
5013eb8c CW |
347 | tl->hwsp_offset = |
348 | i915_ggtt_offset(tl->hwsp_ggtt) + | |
349 | offset_in_page(tl->hwsp_offset); | |
d45171ac CW |
350 | GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n", |
351 | tl->fence_context, tl->hwsp_offset); | |
5013eb8c | 352 | |
8ce70996 | 353 | cacheline_acquire(tl->hwsp_cacheline, tl->hwsp_offset); |
ccb23d2d CW |
354 | if (atomic_fetch_inc(&tl->pin_count)) { |
355 | cacheline_release(tl->hwsp_cacheline); | |
356 | __i915_vma_unpin(tl->hwsp_ggtt); | |
357 | } | |
9407d3bd | 358 | |
52954edd | 359 | return 0; |
52954edd CW |
360 | } |
361 | ||
bd3ec9e7 CW |
362 | void intel_timeline_reset_seqno(const struct intel_timeline *tl) |
363 | { | |
364 | /* Must be pinned to be writable, and no requests in flight. */ | |
365 | GEM_BUG_ON(!atomic_read(&tl->pin_count)); | |
366 | WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno); | |
367 | } | |
368 | ||
531958f6 CW |
369 | void intel_timeline_enter(struct intel_timeline *tl) |
370 | { | |
371 | struct intel_gt_timelines *timelines = &tl->gt->timelines; | |
372 | ||
a6edbca7 CW |
373 | /* |
374 | * Pretend we are serialised by the timeline->mutex. | |
375 | * | |
376 | * While generally true, there are a few exceptions to the rule | |
377 | * for the engine->kernel_context being used to manage power | |
378 | * transitions. As the engine_park may be called from under any | |
379 | * timeline, it uses the power mutex as a global serialisation | |
380 | * lock to prevent any other request entering its timeline. | |
381 | * | |
382 | * The rule is generally tl->mutex, otherwise engine->wakeref.mutex. | |
383 | * | |
384 | * However, intel_gt_retire_request() does not know which engine | |
385 | * it is retiring along and so cannot partake in the engine-pm | |
386 | * barrier, and there we use the tl->active_count as a means to | |
387 | * pin the timeline in the active_list while the locks are dropped. | |
388 | * Ergo, as that is outside of the engine-pm barrier, we need to | |
389 | * use atomic to manipulate tl->active_count. | |
390 | */ | |
6c69a454 | 391 | lockdep_assert_held(&tl->mutex); |
a6edbca7 CW |
392 | |
393 | if (atomic_add_unless(&tl->active_count, 1, 0)) | |
531958f6 | 394 | return; |
531958f6 | 395 | |
88cec497 | 396 | spin_lock(&timelines->lock); |
bd3ec9e7 CW |
397 | if (!atomic_fetch_inc(&tl->active_count)) { |
398 | /* | |
399 | * The HWSP is volatile, and may have been lost while inactive, | |
400 | * e.g. across suspend/resume. Be paranoid, and ensure that | |
401 | * the HWSP value matches our seqno so we don't proclaim | |
402 | * the next request as already complete. | |
403 | */ | |
404 | intel_timeline_reset_seqno(tl); | |
a6edbca7 | 405 | list_add_tail(&tl->link, &timelines->active_list); |
bd3ec9e7 | 406 | } |
88cec497 | 407 | spin_unlock(&timelines->lock); |
531958f6 CW |
408 | } |
409 | ||
410 | void intel_timeline_exit(struct intel_timeline *tl) | |
411 | { | |
412 | struct intel_gt_timelines *timelines = &tl->gt->timelines; | |
413 | ||
a6edbca7 | 414 | /* See intel_timeline_enter() */ |
6c69a454 CW |
415 | lockdep_assert_held(&tl->mutex); |
416 | ||
a6edbca7 CW |
417 | GEM_BUG_ON(!atomic_read(&tl->active_count)); |
418 | if (atomic_add_unless(&tl->active_count, -1, 1)) | |
531958f6 CW |
419 | return; |
420 | ||
88cec497 | 421 | spin_lock(&timelines->lock); |
a6edbca7 CW |
422 | if (atomic_dec_and_test(&tl->active_count)) |
423 | list_del(&tl->link); | |
88cec497 | 424 | spin_unlock(&timelines->lock); |
531958f6 CW |
425 | |
426 | /* | |
427 | * Since this timeline is idle, all bariers upon which we were waiting | |
428 | * must also be complete and so we can discard the last used barriers | |
429 | * without loss of information. | |
430 | */ | |
431 | i915_syncmap_free(&tl->sync); | |
432 | } | |
433 | ||
f0c02c1b | 434 | static u32 timeline_advance(struct intel_timeline *tl) |
ebece753 | 435 | { |
ccb23d2d | 436 | GEM_BUG_ON(!atomic_read(&tl->pin_count)); |
ebece753 CW |
437 | GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb); |
438 | ||
439 | return tl->seqno += 1 + tl->has_initial_breadcrumb; | |
440 | } | |
441 | ||
f0c02c1b | 442 | static void timeline_rollback(struct intel_timeline *tl) |
ebece753 CW |
443 | { |
444 | tl->seqno -= 1 + tl->has_initial_breadcrumb; | |
445 | } | |
446 | ||
447 | static noinline int | |
f0c02c1b TU |
448 | __intel_timeline_get_seqno(struct intel_timeline *tl, |
449 | struct i915_request *rq, | |
450 | u32 *seqno) | |
ebece753 | 451 | { |
f0c02c1b | 452 | struct intel_timeline_cacheline *cl; |
ebece753 CW |
453 | unsigned int cacheline; |
454 | struct i915_vma *vma; | |
455 | void *vaddr; | |
456 | int err; | |
457 | ||
8faa7251 | 458 | might_lock(&tl->gt->ggtt->vm.mutex); |
d45171ac | 459 | GT_TRACE(tl->gt, "timeline:%llx wrapped\n", tl->fence_context); |
8faa7251 | 460 | |
ebece753 CW |
461 | /* |
462 | * If there is an outstanding GPU reference to this cacheline, | |
463 | * such as it being sampled by a HW semaphore on another timeline, | |
464 | * we cannot wraparound our seqno value (the HW semaphore does | |
465 | * a strict greater-than-or-equals compare, not i915_seqno_passed). | |
466 | * So if the cacheline is still busy, we must detach ourselves | |
467 | * from it and leave it inflight alongside its users. | |
468 | * | |
469 | * However, if nobody is watching and we can guarantee that nobody | |
470 | * will, we could simply reuse the same cacheline. | |
471 | * | |
472 | * if (i915_active_request_is_signaled(&tl->last_request) && | |
473 | * i915_active_is_signaled(&tl->hwsp_cacheline->active)) | |
474 | * return 0; | |
475 | * | |
476 | * That seems unlikely for a busy timeline that needed to wrap in | |
477 | * the first place, so just replace the cacheline. | |
478 | */ | |
479 | ||
480 | vma = hwsp_alloc(tl, &cacheline); | |
481 | if (IS_ERR(vma)) { | |
482 | err = PTR_ERR(vma); | |
483 | goto err_rollback; | |
484 | } | |
485 | ||
47b08693 | 486 | err = i915_ggtt_pin(vma, NULL, 0, PIN_HIGH); |
ebece753 CW |
487 | if (err) { |
488 | __idle_hwsp_free(vma->private, cacheline); | |
489 | goto err_rollback; | |
490 | } | |
491 | ||
492 | cl = cacheline_alloc(vma->private, cacheline); | |
493 | if (IS_ERR(cl)) { | |
494 | err = PTR_ERR(cl); | |
495 | __idle_hwsp_free(vma->private, cacheline); | |
496 | goto err_unpin; | |
497 | } | |
498 | GEM_BUG_ON(cl->hwsp->vma != vma); | |
499 | ||
500 | /* | |
501 | * Attach the old cacheline to the current request, so that we only | |
502 | * free it after the current request is retired, which ensures that | |
503 | * all writes into the cacheline from previous requests are complete. | |
504 | */ | |
5d934137 CW |
505 | err = i915_active_ref(&tl->hwsp_cacheline->active, |
506 | tl->fence_context, | |
507 | &rq->fence); | |
ebece753 CW |
508 | if (err) |
509 | goto err_cacheline; | |
510 | ||
511 | cacheline_release(tl->hwsp_cacheline); /* ownership now xfered to rq */ | |
512 | cacheline_free(tl->hwsp_cacheline); | |
513 | ||
514 | i915_vma_unpin(tl->hwsp_ggtt); /* binding kept alive by old cacheline */ | |
515 | i915_vma_put(tl->hwsp_ggtt); | |
516 | ||
517 | tl->hwsp_ggtt = i915_vma_get(vma); | |
518 | ||
519 | vaddr = page_mask_bits(cl->vaddr); | |
520 | tl->hwsp_offset = cacheline * CACHELINE_BYTES; | |
521 | tl->hwsp_seqno = | |
522 | memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES); | |
523 | ||
524 | tl->hwsp_offset += i915_ggtt_offset(vma); | |
d45171ac CW |
525 | GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n", |
526 | tl->fence_context, tl->hwsp_offset); | |
ebece753 | 527 | |
8ce70996 | 528 | cacheline_acquire(cl, tl->hwsp_offset); |
ebece753 CW |
529 | tl->hwsp_cacheline = cl; |
530 | ||
531 | *seqno = timeline_advance(tl); | |
532 | GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno)); | |
533 | return 0; | |
534 | ||
535 | err_cacheline: | |
536 | cacheline_free(cl); | |
537 | err_unpin: | |
538 | i915_vma_unpin(vma); | |
539 | err_rollback: | |
540 | timeline_rollback(tl); | |
541 | return err; | |
542 | } | |
543 | ||
f0c02c1b TU |
544 | int intel_timeline_get_seqno(struct intel_timeline *tl, |
545 | struct i915_request *rq, | |
546 | u32 *seqno) | |
ebece753 CW |
547 | { |
548 | *seqno = timeline_advance(tl); | |
549 | ||
550 | /* Replace the HWSP on wraparound for HW semaphores */ | |
551 | if (unlikely(!*seqno && tl->hwsp_cacheline)) | |
f0c02c1b | 552 | return __intel_timeline_get_seqno(tl, rq, seqno); |
ebece753 CW |
553 | |
554 | return 0; | |
555 | } | |
556 | ||
f0c02c1b | 557 | static int cacheline_ref(struct intel_timeline_cacheline *cl, |
ebece753 CW |
558 | struct i915_request *rq) |
559 | { | |
d19d71fc | 560 | return i915_active_add_request(&cl->active, rq); |
ebece753 CW |
561 | } |
562 | ||
f0c02c1b TU |
563 | int intel_timeline_read_hwsp(struct i915_request *from, |
564 | struct i915_request *to, | |
565 | u32 *hwsp) | |
ebece753 | 566 | { |
85bedbf1 | 567 | struct intel_timeline_cacheline *cl; |
ebece753 CW |
568 | int err; |
569 | ||
85bedbf1 CW |
570 | GEM_BUG_ON(!rcu_access_pointer(from->hwsp_cacheline)); |
571 | ||
9eee0dd7 | 572 | rcu_read_lock(); |
85bedbf1 | 573 | cl = rcu_dereference(from->hwsp_cacheline); |
2759e395 CW |
574 | if (i915_request_completed(from)) /* confirm cacheline is valid */ |
575 | goto unlock; | |
85bedbf1 CW |
576 | if (unlikely(!i915_active_acquire_if_busy(&cl->active))) |
577 | goto unlock; /* seqno wrapped and completed! */ | |
578 | if (unlikely(i915_request_completed(from))) | |
579 | goto release; | |
9eee0dd7 | 580 | rcu_read_unlock(); |
9eee0dd7 | 581 | |
85bedbf1 CW |
582 | err = cacheline_ref(cl, to); |
583 | if (err) | |
584 | goto out; | |
9eee0dd7 | 585 | |
8ce70996 | 586 | *hwsp = cl->ggtt_offset; |
85bedbf1 CW |
587 | out: |
588 | i915_active_release(&cl->active); | |
589 | return err; | |
9eee0dd7 | 590 | |
85bedbf1 CW |
591 | release: |
592 | i915_active_release(&cl->active); | |
9eee0dd7 | 593 | unlock: |
85bedbf1 CW |
594 | rcu_read_unlock(); |
595 | return 1; | |
ebece753 CW |
596 | } |
597 | ||
f0c02c1b | 598 | void intel_timeline_unpin(struct intel_timeline *tl) |
52954edd | 599 | { |
ccb23d2d CW |
600 | GEM_BUG_ON(!atomic_read(&tl->pin_count)); |
601 | if (!atomic_dec_and_test(&tl->pin_count)) | |
52954edd CW |
602 | return; |
603 | ||
ebece753 | 604 | cacheline_release(tl->hwsp_cacheline); |
9407d3bd | 605 | |
52954edd CW |
606 | __i915_vma_unpin(tl->hwsp_ggtt); |
607 | } | |
608 | ||
f0c02c1b | 609 | void __intel_timeline_free(struct kref *kref) |
a89d1f92 | 610 | { |
f0c02c1b | 611 | struct intel_timeline *timeline = |
a89d1f92 CW |
612 | container_of(kref, typeof(*timeline), kref); |
613 | ||
f0c02c1b | 614 | intel_timeline_fini(timeline); |
d19d71fc | 615 | kfree_rcu(timeline, rcu); |
a89d1f92 CW |
616 | } |
617 | ||
4605bb73 | 618 | void intel_gt_fini_timelines(struct intel_gt *gt) |
1e345568 | 619 | { |
c6fe28b0 | 620 | struct intel_gt_timelines *timelines = >->timelines; |
1e345568 | 621 | |
390c8205 TU |
622 | GEM_BUG_ON(!list_empty(&timelines->active_list)); |
623 | GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list)); | |
390c8205 TU |
624 | } |
625 | ||
a89d1f92 | 626 | #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) |
f0c02c1b TU |
627 | #include "gt/selftests/mock_timeline.c" |
628 | #include "gt/selftest_timeline.c" | |
a89d1f92 | 629 | #endif |