2 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * Zhiyuan Lv <zhiyuan.lv@intel.com>
25 * Zhi Wang <zhi.a.wang@intel.com>
28 * Min He <min.he@intel.com>
29 * Bing Niu <bing.niu@intel.com>
30 * Ping Gao <ping.a.gao@intel.com>
31 * Tina Zhang <tina.zhang@intel.com>
38 #define _EL_OFFSET_STATUS 0x234
39 #define _EL_OFFSET_STATUS_BUF 0x370
40 #define _EL_OFFSET_STATUS_PTR 0x3A0
42 #define execlist_ring_mmio(gvt, ring_id, offset) \
43 (gvt->dev_priv->engine[ring_id]->mmio_base + (offset))
45 #define valid_context(ctx) ((ctx)->valid)
46 #define same_context(a, b) (((a)->context_id == (b)->context_id) && \
47 ((a)->lrca == (b)->lrca))
49 static void clean_workloads(struct intel_vgpu
*vgpu
, unsigned long engine_mask
);
51 static int context_switch_events
[] = {
52 [RCS
] = RCS_AS_CONTEXT_SWITCH
,
53 [BCS
] = BCS_AS_CONTEXT_SWITCH
,
54 [VCS
] = VCS_AS_CONTEXT_SWITCH
,
55 [VCS2
] = VCS2_AS_CONTEXT_SWITCH
,
56 [VECS
] = VECS_AS_CONTEXT_SWITCH
,
59 static int ring_id_to_context_switch_event(int ring_id
)
61 if (WARN_ON(ring_id
< RCS
||
62 ring_id
>= ARRAY_SIZE(context_switch_events
)))
65 return context_switch_events
[ring_id
];
68 static void switch_virtual_execlist_slot(struct intel_vgpu_execlist
*execlist
)
70 gvt_dbg_el("[before] running slot %d/context %x pending slot %d\n",
71 execlist
->running_slot
?
72 execlist
->running_slot
->index
: -1,
73 execlist
->running_context
?
74 execlist
->running_context
->context_id
: 0,
75 execlist
->pending_slot
?
76 execlist
->pending_slot
->index
: -1);
78 execlist
->running_slot
= execlist
->pending_slot
;
79 execlist
->pending_slot
= NULL
;
80 execlist
->running_context
= execlist
->running_context
?
81 &execlist
->running_slot
->ctx
[0] : NULL
;
83 gvt_dbg_el("[after] running slot %d/context %x pending slot %d\n",
84 execlist
->running_slot
?
85 execlist
->running_slot
->index
: -1,
86 execlist
->running_context
?
87 execlist
->running_context
->context_id
: 0,
88 execlist
->pending_slot
?
89 execlist
->pending_slot
->index
: -1);
92 static void emulate_execlist_status(struct intel_vgpu_execlist
*execlist
)
94 struct intel_vgpu_execlist_slot
*running
= execlist
->running_slot
;
95 struct intel_vgpu_execlist_slot
*pending
= execlist
->pending_slot
;
96 struct execlist_ctx_descriptor_format
*desc
= execlist
->running_context
;
97 struct intel_vgpu
*vgpu
= execlist
->vgpu
;
98 struct execlist_status_format status
;
99 int ring_id
= execlist
->ring_id
;
100 u32 status_reg
= execlist_ring_mmio(vgpu
->gvt
,
101 ring_id
, _EL_OFFSET_STATUS
);
103 status
.ldw
= vgpu_vreg(vgpu
, status_reg
);
104 status
.udw
= vgpu_vreg(vgpu
, status_reg
+ 4);
107 status
.current_execlist_pointer
= !!running
->index
;
108 status
.execlist_write_pointer
= !!!running
->index
;
109 status
.execlist_0_active
= status
.execlist_0_valid
=
111 status
.execlist_1_active
= status
.execlist_1_valid
=
114 status
.context_id
= 0;
115 status
.execlist_0_active
= status
.execlist_0_valid
= 0;
116 status
.execlist_1_active
= status
.execlist_1_valid
= 0;
119 status
.context_id
= desc
? desc
->context_id
: 0;
120 status
.execlist_queue_full
= !!(pending
);
122 vgpu_vreg(vgpu
, status_reg
) = status
.ldw
;
123 vgpu_vreg(vgpu
, status_reg
+ 4) = status
.udw
;
125 gvt_dbg_el("vgpu%d: status reg offset %x ldw %x udw %x\n",
126 vgpu
->id
, status_reg
, status
.ldw
, status
.udw
);
129 static void emulate_csb_update(struct intel_vgpu_execlist
*execlist
,
130 struct execlist_context_status_format
*status
,
131 bool trigger_interrupt_later
)
133 struct intel_vgpu
*vgpu
= execlist
->vgpu
;
134 int ring_id
= execlist
->ring_id
;
135 struct execlist_context_status_pointer_format ctx_status_ptr
;
137 u32 ctx_status_ptr_reg
, ctx_status_buf_reg
, offset
;
139 ctx_status_ptr_reg
= execlist_ring_mmio(vgpu
->gvt
, ring_id
,
140 _EL_OFFSET_STATUS_PTR
);
141 ctx_status_buf_reg
= execlist_ring_mmio(vgpu
->gvt
, ring_id
,
142 _EL_OFFSET_STATUS_BUF
);
144 ctx_status_ptr
.dw
= vgpu_vreg(vgpu
, ctx_status_ptr_reg
);
146 write_pointer
= ctx_status_ptr
.write_ptr
;
148 if (write_pointer
== 0x7)
152 write_pointer
%= 0x6;
155 offset
= ctx_status_buf_reg
+ write_pointer
* 8;
157 vgpu_vreg(vgpu
, offset
) = status
->ldw
;
158 vgpu_vreg(vgpu
, offset
+ 4) = status
->udw
;
160 ctx_status_ptr
.write_ptr
= write_pointer
;
161 vgpu_vreg(vgpu
, ctx_status_ptr_reg
) = ctx_status_ptr
.dw
;
163 gvt_dbg_el("vgpu%d: w pointer %u reg %x csb l %x csb h %x\n",
164 vgpu
->id
, write_pointer
, offset
, status
->ldw
, status
->udw
);
166 if (trigger_interrupt_later
)
169 intel_vgpu_trigger_virtual_event(vgpu
,
170 ring_id_to_context_switch_event(execlist
->ring_id
));
173 static int emulate_execlist_ctx_schedule_out(
174 struct intel_vgpu_execlist
*execlist
,
175 struct execlist_ctx_descriptor_format
*ctx
)
177 struct intel_vgpu
*vgpu
= execlist
->vgpu
;
178 struct intel_vgpu_execlist_slot
*running
= execlist
->running_slot
;
179 struct intel_vgpu_execlist_slot
*pending
= execlist
->pending_slot
;
180 struct execlist_ctx_descriptor_format
*ctx0
= &running
->ctx
[0];
181 struct execlist_ctx_descriptor_format
*ctx1
= &running
->ctx
[1];
182 struct execlist_context_status_format status
;
184 memset(&status
, 0, sizeof(status
));
186 gvt_dbg_el("schedule out context id %x\n", ctx
->context_id
);
188 if (WARN_ON(!same_context(ctx
, execlist
->running_context
))) {
189 gvt_vgpu_err("schedule out context is not running context,"
190 "ctx id %x running ctx id %x\n",
192 execlist
->running_context
->context_id
);
196 /* ctx1 is valid, ctx0/ctx is scheduled-out -> element switch */
197 if (valid_context(ctx1
) && same_context(ctx0
, ctx
)) {
198 gvt_dbg_el("ctx 1 valid, ctx/ctx 0 is scheduled-out\n");
200 execlist
->running_context
= ctx1
;
202 emulate_execlist_status(execlist
);
204 status
.context_complete
= status
.element_switch
= 1;
205 status
.context_id
= ctx
->context_id
;
207 emulate_csb_update(execlist
, &status
, false);
209 * ctx1 is not valid, ctx == ctx0
210 * ctx1 is valid, ctx1 == ctx
211 * --> last element is finished
213 * active-to-idle if there is *no* pending execlist
214 * context-complete if there *is* pending execlist
216 } else if ((!valid_context(ctx1
) && same_context(ctx0
, ctx
))
217 || (valid_context(ctx1
) && same_context(ctx1
, ctx
))) {
218 gvt_dbg_el("need to switch virtual execlist slot\n");
220 switch_virtual_execlist_slot(execlist
);
222 emulate_execlist_status(execlist
);
224 status
.context_complete
= status
.active_to_idle
= 1;
225 status
.context_id
= ctx
->context_id
;
228 emulate_csb_update(execlist
, &status
, false);
230 emulate_csb_update(execlist
, &status
, true);
232 memset(&status
, 0, sizeof(status
));
234 status
.idle_to_active
= 1;
235 status
.context_id
= 0;
237 emulate_csb_update(execlist
, &status
, false);
247 static struct intel_vgpu_execlist_slot
*get_next_execlist_slot(
248 struct intel_vgpu_execlist
*execlist
)
250 struct intel_vgpu
*vgpu
= execlist
->vgpu
;
251 int ring_id
= execlist
->ring_id
;
252 u32 status_reg
= execlist_ring_mmio(vgpu
->gvt
, ring_id
,
254 struct execlist_status_format status
;
256 status
.ldw
= vgpu_vreg(vgpu
, status_reg
);
257 status
.udw
= vgpu_vreg(vgpu
, status_reg
+ 4);
259 if (status
.execlist_queue_full
) {
260 gvt_vgpu_err("virtual execlist slots are full\n");
264 return &execlist
->slot
[status
.execlist_write_pointer
];
267 static int emulate_execlist_schedule_in(struct intel_vgpu_execlist
*execlist
,
268 struct execlist_ctx_descriptor_format ctx
[2])
270 struct intel_vgpu_execlist_slot
*running
= execlist
->running_slot
;
271 struct intel_vgpu_execlist_slot
*slot
=
272 get_next_execlist_slot(execlist
);
274 struct execlist_ctx_descriptor_format
*ctx0
, *ctx1
;
275 struct execlist_context_status_format status
;
276 struct intel_vgpu
*vgpu
= execlist
->vgpu
;
278 gvt_dbg_el("emulate schedule-in\n");
281 gvt_vgpu_err("no available execlist slot\n");
285 memset(&status
, 0, sizeof(status
));
286 memset(slot
->ctx
, 0, sizeof(slot
->ctx
));
288 slot
->ctx
[0] = ctx
[0];
289 slot
->ctx
[1] = ctx
[1];
291 gvt_dbg_el("alloc slot index %d ctx 0 %x ctx 1 %x\n",
292 slot
->index
, ctx
[0].context_id
,
296 * no running execlist, make this write bundle as running execlist
300 gvt_dbg_el("no current running execlist\n");
302 execlist
->running_slot
= slot
;
303 execlist
->pending_slot
= NULL
;
304 execlist
->running_context
= &slot
->ctx
[0];
306 gvt_dbg_el("running slot index %d running context %x\n",
307 execlist
->running_slot
->index
,
308 execlist
->running_context
->context_id
);
310 emulate_execlist_status(execlist
);
312 status
.idle_to_active
= 1;
313 status
.context_id
= 0;
315 emulate_csb_update(execlist
, &status
, false);
319 ctx0
= &running
->ctx
[0];
320 ctx1
= &running
->ctx
[1];
322 gvt_dbg_el("current running slot index %d ctx 0 %x ctx 1 %x\n",
323 running
->index
, ctx0
->context_id
, ctx1
->context_id
);
326 * already has an running execlist
327 * a. running ctx1 is valid,
328 * ctx0 is finished, and running ctx1 == new execlist ctx[0]
329 * b. running ctx1 is not valid,
330 * ctx0 == new execlist ctx[0]
331 * ----> lite-restore + preempted
333 if ((valid_context(ctx1
) && same_context(ctx1
, &slot
->ctx
[0]) &&
335 (!same_context(ctx0
, execlist
->running_context
))) ||
336 (!valid_context(ctx1
) &&
337 same_context(ctx0
, &slot
->ctx
[0]))) { /* condition b */
338 gvt_dbg_el("need to switch virtual execlist slot\n");
340 execlist
->pending_slot
= slot
;
341 switch_virtual_execlist_slot(execlist
);
343 emulate_execlist_status(execlist
);
345 status
.lite_restore
= status
.preempted
= 1;
346 status
.context_id
= ctx
[0].context_id
;
348 emulate_csb_update(execlist
, &status
, false);
350 gvt_dbg_el("emulate as pending slot\n");
353 * --> emulate pending execlist exist + but no preemption case
355 execlist
->pending_slot
= slot
;
356 emulate_execlist_status(execlist
);
361 static void free_workload(struct intel_vgpu_workload
*workload
)
363 intel_vgpu_unpin_mm(workload
->shadow_mm
);
364 intel_gvt_mm_unreference(workload
->shadow_mm
);
365 kmem_cache_free(workload
->vgpu
->workloads
, workload
);
368 #define get_desc_from_elsp_dwords(ed, i) \
369 ((struct execlist_ctx_descriptor_format *)&((ed)->data[i * 2]))
371 static void prepare_shadow_batch_buffer(struct intel_vgpu_workload
*workload
)
373 const int gmadr_bytes
= workload
->vgpu
->gvt
->device_info
.gmadr_bytes_in_cmd
;
374 struct intel_shadow_bb_entry
*entry_obj
;
376 /* pin the gem object to ggtt */
377 list_for_each_entry(entry_obj
, &workload
->shadow_bb
, list
) {
378 struct i915_vma
*vma
;
380 vma
= i915_gem_object_ggtt_pin(entry_obj
->obj
, NULL
, 0, 4, 0);
385 /* FIXME: we are not tracking our pinned VMA leaving it
386 * up to the core to fix up the stray pin_count upon
390 /* update the relocate gma with shadow batch buffer*/
391 entry_obj
->bb_start_cmd_va
[1] = i915_ggtt_offset(vma
);
392 if (gmadr_bytes
== 8)
393 entry_obj
->bb_start_cmd_va
[2] = 0;
397 static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx
*wa_ctx
)
399 struct intel_vgpu_workload
*workload
= container_of(wa_ctx
,
400 struct intel_vgpu_workload
,
402 int ring_id
= workload
->ring_id
;
403 struct i915_gem_context
*shadow_ctx
= workload
->vgpu
->shadow_ctx
;
404 struct drm_i915_gem_object
*ctx_obj
=
405 shadow_ctx
->engine
[ring_id
].state
->obj
;
406 struct execlist_ring_context
*shadow_ring_context
;
409 page
= i915_gem_object_get_page(ctx_obj
, LRC_STATE_PN
);
410 shadow_ring_context
= kmap_atomic(page
);
412 shadow_ring_context
->bb_per_ctx_ptr
.val
=
413 (shadow_ring_context
->bb_per_ctx_ptr
.val
&
414 (~PER_CTX_ADDR_MASK
)) | wa_ctx
->per_ctx
.shadow_gma
;
415 shadow_ring_context
->rcs_indirect_ctx
.val
=
416 (shadow_ring_context
->rcs_indirect_ctx
.val
&
417 (~INDIRECT_CTX_ADDR_MASK
)) | wa_ctx
->indirect_ctx
.shadow_gma
;
419 kunmap_atomic(shadow_ring_context
);
423 static void prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx
*wa_ctx
)
425 struct i915_vma
*vma
;
426 unsigned char *per_ctx_va
=
427 (unsigned char *)wa_ctx
->indirect_ctx
.shadow_va
+
428 wa_ctx
->indirect_ctx
.size
;
430 if (wa_ctx
->indirect_ctx
.size
== 0)
433 vma
= i915_gem_object_ggtt_pin(wa_ctx
->indirect_ctx
.obj
, NULL
,
434 0, CACHELINE_BYTES
, 0);
439 /* FIXME: we are not tracking our pinned VMA leaving it
440 * up to the core to fix up the stray pin_count upon
444 wa_ctx
->indirect_ctx
.shadow_gma
= i915_ggtt_offset(vma
);
446 wa_ctx
->per_ctx
.shadow_gma
= *((unsigned int *)per_ctx_va
+ 1);
447 memset(per_ctx_va
, 0, CACHELINE_BYTES
);
449 update_wa_ctx_2_shadow_ctx(wa_ctx
);
452 static int prepare_execlist_workload(struct intel_vgpu_workload
*workload
)
454 struct intel_vgpu
*vgpu
= workload
->vgpu
;
455 struct execlist_ctx_descriptor_format ctx
[2];
456 int ring_id
= workload
->ring_id
;
458 intel_vgpu_pin_mm(workload
->shadow_mm
);
459 intel_vgpu_sync_oos_pages(workload
->vgpu
);
460 intel_vgpu_flush_post_shadow(workload
->vgpu
);
461 prepare_shadow_batch_buffer(workload
);
462 prepare_shadow_wa_ctx(&workload
->wa_ctx
);
463 if (!workload
->emulate_schedule_in
)
466 ctx
[0] = *get_desc_from_elsp_dwords(&workload
->elsp_dwords
, 1);
467 ctx
[1] = *get_desc_from_elsp_dwords(&workload
->elsp_dwords
, 0);
469 return emulate_execlist_schedule_in(&vgpu
->execlist
[ring_id
], ctx
);
472 static void release_shadow_batch_buffer(struct intel_vgpu_workload
*workload
)
474 /* release all the shadow batch buffer */
475 if (!list_empty(&workload
->shadow_bb
)) {
476 struct intel_shadow_bb_entry
*entry_obj
=
477 list_first_entry(&workload
->shadow_bb
,
478 struct intel_shadow_bb_entry
,
480 struct intel_shadow_bb_entry
*temp
;
482 list_for_each_entry_safe(entry_obj
, temp
, &workload
->shadow_bb
,
484 i915_gem_object_unpin_map(entry_obj
->obj
);
485 i915_gem_object_put(entry_obj
->obj
);
486 list_del(&entry_obj
->list
);
492 static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx
*wa_ctx
)
494 if (!wa_ctx
->indirect_ctx
.obj
)
497 i915_gem_object_unpin_map(wa_ctx
->indirect_ctx
.obj
);
498 i915_gem_object_put(wa_ctx
->indirect_ctx
.obj
);
501 static int complete_execlist_workload(struct intel_vgpu_workload
*workload
)
503 struct intel_vgpu
*vgpu
= workload
->vgpu
;
504 int ring_id
= workload
->ring_id
;
505 struct intel_vgpu_execlist
*execlist
= &vgpu
->execlist
[ring_id
];
506 struct intel_vgpu_workload
*next_workload
;
507 struct list_head
*next
= workload_q_head(vgpu
, ring_id
)->next
;
508 bool lite_restore
= false;
511 gvt_dbg_el("complete workload %p status %d\n", workload
,
514 release_shadow_batch_buffer(workload
);
515 release_shadow_wa_ctx(&workload
->wa_ctx
);
517 if (workload
->status
|| (vgpu
->resetting_eng
& ENGINE_MASK(ring_id
))) {
518 /* if workload->status is not successful means HW GPU
519 * has occurred GPU hang or something wrong with i915/GVT,
520 * and GVT won't inject context switch interrupt to guest.
521 * So this error is a vGPU hang actually to the guest.
522 * According to this we should emunlate a vGPU hang. If
523 * there are pending workloads which are already submitted
524 * from guest, we should clean them up like HW GPU does.
526 * if it is in middle of engine resetting, the pending
527 * workloads won't be submitted to HW GPU and will be
528 * cleaned up during the resetting process later, so doing
529 * the workload clean up here doesn't have any impact.
531 clean_workloads(vgpu
, ENGINE_MASK(ring_id
));
535 if (!list_empty(workload_q_head(vgpu
, ring_id
))) {
536 struct execlist_ctx_descriptor_format
*this_desc
, *next_desc
;
538 next_workload
= container_of(next
,
539 struct intel_vgpu_workload
, list
);
540 this_desc
= &workload
->ctx_desc
;
541 next_desc
= &next_workload
->ctx_desc
;
543 lite_restore
= same_context(this_desc
, next_desc
);
547 gvt_dbg_el("next context == current - no schedule-out\n");
548 free_workload(workload
);
552 ret
= emulate_execlist_ctx_schedule_out(execlist
, &workload
->ctx_desc
);
556 free_workload(workload
);
559 free_workload(workload
);
563 #define RING_CTX_OFF(x) \
564 offsetof(struct execlist_ring_context, x)
566 static void read_guest_pdps(struct intel_vgpu
*vgpu
,
567 u64 ring_context_gpa
, u32 pdp
[8])
572 gpa
= ring_context_gpa
+ RING_CTX_OFF(pdp3_UDW
.val
);
574 for (i
= 0; i
< 8; i
++)
575 intel_gvt_hypervisor_read_gpa(vgpu
,
576 gpa
+ i
* 8, &pdp
[7 - i
], 4);
579 static int prepare_mm(struct intel_vgpu_workload
*workload
)
581 struct execlist_ctx_descriptor_format
*desc
= &workload
->ctx_desc
;
582 struct intel_vgpu_mm
*mm
;
583 struct intel_vgpu
*vgpu
= workload
->vgpu
;
584 int page_table_level
;
587 if (desc
->addressing_mode
== 1) { /* legacy 32-bit */
588 page_table_level
= 3;
589 } else if (desc
->addressing_mode
== 3) { /* legacy 64 bit */
590 page_table_level
= 4;
592 gvt_vgpu_err("Advanced Context mode(SVM) is not supported!\n");
596 read_guest_pdps(workload
->vgpu
, workload
->ring_context_gpa
, pdp
);
598 mm
= intel_vgpu_find_ppgtt_mm(workload
->vgpu
, page_table_level
, pdp
);
600 intel_gvt_mm_reference(mm
);
603 mm
= intel_vgpu_create_mm(workload
->vgpu
, INTEL_GVT_MM_PPGTT
,
604 pdp
, page_table_level
, 0);
606 gvt_vgpu_err("fail to create mm object.\n");
610 workload
->shadow_mm
= mm
;
614 #define get_last_workload(q) \
615 (list_empty(q) ? NULL : container_of(q->prev, \
616 struct intel_vgpu_workload, list))
618 static int submit_context(struct intel_vgpu
*vgpu
, int ring_id
,
619 struct execlist_ctx_descriptor_format
*desc
,
620 bool emulate_schedule_in
)
622 struct list_head
*q
= workload_q_head(vgpu
, ring_id
);
623 struct intel_vgpu_workload
*last_workload
= get_last_workload(q
);
624 struct intel_vgpu_workload
*workload
= NULL
;
625 u64 ring_context_gpa
;
626 u32 head
, tail
, start
, ctl
, ctx_ctl
, per_ctx
, indirect_ctx
;
629 ring_context_gpa
= intel_vgpu_gma_to_gpa(vgpu
->gtt
.ggtt_mm
,
630 (u32
)((desc
->lrca
+ 1) << GTT_PAGE_SHIFT
));
631 if (ring_context_gpa
== INTEL_GVT_INVALID_ADDR
) {
632 gvt_vgpu_err("invalid guest context LRCA: %x\n", desc
->lrca
);
636 intel_gvt_hypervisor_read_gpa(vgpu
, ring_context_gpa
+
637 RING_CTX_OFF(ring_header
.val
), &head
, 4);
639 intel_gvt_hypervisor_read_gpa(vgpu
, ring_context_gpa
+
640 RING_CTX_OFF(ring_tail
.val
), &tail
, 4);
642 head
&= RB_HEAD_OFF_MASK
;
643 tail
&= RB_TAIL_OFF_MASK
;
645 if (last_workload
&& same_context(&last_workload
->ctx_desc
, desc
)) {
646 gvt_dbg_el("ring id %d cur workload == last\n", ring_id
);
647 gvt_dbg_el("ctx head %x real head %lx\n", head
,
648 last_workload
->rb_tail
);
650 * cannot use guest context head pointer here,
651 * as it might not be updated at this time
653 head
= last_workload
->rb_tail
;
656 gvt_dbg_el("ring id %d begin a new workload\n", ring_id
);
658 workload
= kmem_cache_zalloc(vgpu
->workloads
, GFP_KERNEL
);
662 /* record some ring buffer register values for scan and shadow */
663 intel_gvt_hypervisor_read_gpa(vgpu
, ring_context_gpa
+
664 RING_CTX_OFF(rb_start
.val
), &start
, 4);
665 intel_gvt_hypervisor_read_gpa(vgpu
, ring_context_gpa
+
666 RING_CTX_OFF(rb_ctrl
.val
), &ctl
, 4);
667 intel_gvt_hypervisor_read_gpa(vgpu
, ring_context_gpa
+
668 RING_CTX_OFF(ctx_ctrl
.val
), &ctx_ctl
, 4);
670 INIT_LIST_HEAD(&workload
->list
);
671 INIT_LIST_HEAD(&workload
->shadow_bb
);
673 init_waitqueue_head(&workload
->shadow_ctx_status_wq
);
674 atomic_set(&workload
->shadow_ctx_active
, 0);
676 workload
->vgpu
= vgpu
;
677 workload
->ring_id
= ring_id
;
678 workload
->ctx_desc
= *desc
;
679 workload
->ring_context_gpa
= ring_context_gpa
;
680 workload
->rb_head
= head
;
681 workload
->rb_tail
= tail
;
682 workload
->rb_start
= start
;
683 workload
->rb_ctl
= ctl
;
684 workload
->prepare
= prepare_execlist_workload
;
685 workload
->complete
= complete_execlist_workload
;
686 workload
->status
= -EINPROGRESS
;
687 workload
->emulate_schedule_in
= emulate_schedule_in
;
689 if (ring_id
== RCS
) {
690 intel_gvt_hypervisor_read_gpa(vgpu
, ring_context_gpa
+
691 RING_CTX_OFF(bb_per_ctx_ptr
.val
), &per_ctx
, 4);
692 intel_gvt_hypervisor_read_gpa(vgpu
, ring_context_gpa
+
693 RING_CTX_OFF(rcs_indirect_ctx
.val
), &indirect_ctx
, 4);
695 workload
->wa_ctx
.indirect_ctx
.guest_gma
=
696 indirect_ctx
& INDIRECT_CTX_ADDR_MASK
;
697 workload
->wa_ctx
.indirect_ctx
.size
=
698 (indirect_ctx
& INDIRECT_CTX_SIZE_MASK
) *
700 workload
->wa_ctx
.per_ctx
.guest_gma
=
701 per_ctx
& PER_CTX_ADDR_MASK
;
703 WARN_ON(workload
->wa_ctx
.indirect_ctx
.size
&& !(per_ctx
& 0x1));
706 if (emulate_schedule_in
)
707 workload
->elsp_dwords
= vgpu
->execlist
[ring_id
].elsp_dwords
;
709 gvt_dbg_el("workload %p ring id %d head %x tail %x start %x ctl %x\n",
710 workload
, ring_id
, head
, tail
, start
, ctl
);
712 gvt_dbg_el("workload %p emulate schedule_in %d\n", workload
,
713 emulate_schedule_in
);
715 ret
= prepare_mm(workload
);
717 kmem_cache_free(vgpu
->workloads
, workload
);
721 queue_workload(workload
);
725 int intel_vgpu_submit_execlist(struct intel_vgpu
*vgpu
, int ring_id
)
727 struct intel_vgpu_execlist
*execlist
= &vgpu
->execlist
[ring_id
];
728 struct execlist_ctx_descriptor_format desc
[2];
731 desc
[0] = *get_desc_from_elsp_dwords(&execlist
->elsp_dwords
, 1);
732 desc
[1] = *get_desc_from_elsp_dwords(&execlist
->elsp_dwords
, 0);
734 if (!desc
[0].valid
) {
735 gvt_vgpu_err("invalid elsp submission, desc0 is invalid\n");
739 for (i
= 0; i
< ARRAY_SIZE(desc
); i
++) {
742 if (!desc
[i
].privilege_access
) {
743 gvt_vgpu_err("unexpected GGTT elsp submission\n");
748 /* submit workload */
749 for (i
= 0; i
< ARRAY_SIZE(desc
); i
++) {
752 ret
= submit_context(vgpu
, ring_id
, &desc
[i
], i
== 0);
754 gvt_vgpu_err("failed to submit desc %d\n", i
);
762 gvt_vgpu_err("descriptors content: desc0 %08x %08x desc1 %08x %08x\n",
763 desc
[0].udw
, desc
[0].ldw
, desc
[1].udw
, desc
[1].ldw
);
767 static void init_vgpu_execlist(struct intel_vgpu
*vgpu
, int ring_id
)
769 struct intel_vgpu_execlist
*execlist
= &vgpu
->execlist
[ring_id
];
770 struct execlist_context_status_pointer_format ctx_status_ptr
;
771 u32 ctx_status_ptr_reg
;
773 memset(execlist
, 0, sizeof(*execlist
));
775 execlist
->vgpu
= vgpu
;
776 execlist
->ring_id
= ring_id
;
777 execlist
->slot
[0].index
= 0;
778 execlist
->slot
[1].index
= 1;
780 ctx_status_ptr_reg
= execlist_ring_mmio(vgpu
->gvt
, ring_id
,
781 _EL_OFFSET_STATUS_PTR
);
783 ctx_status_ptr
.dw
= vgpu_vreg(vgpu
, ctx_status_ptr_reg
);
784 ctx_status_ptr
.read_ptr
= 0;
785 ctx_status_ptr
.write_ptr
= 0x7;
786 vgpu_vreg(vgpu
, ctx_status_ptr_reg
) = ctx_status_ptr
.dw
;
789 static void clean_workloads(struct intel_vgpu
*vgpu
, unsigned long engine_mask
)
791 struct drm_i915_private
*dev_priv
= vgpu
->gvt
->dev_priv
;
792 struct intel_engine_cs
*engine
;
793 struct intel_vgpu_workload
*pos
, *n
;
796 /* free the unsubmited workloads in the queues. */
797 for_each_engine_masked(engine
, dev_priv
, engine_mask
, tmp
) {
798 list_for_each_entry_safe(pos
, n
,
799 &vgpu
->workload_q_head
[engine
->id
], list
) {
800 list_del_init(&pos
->list
);
806 void intel_vgpu_clean_execlist(struct intel_vgpu
*vgpu
)
808 clean_workloads(vgpu
, ALL_ENGINES
);
809 kmem_cache_destroy(vgpu
->workloads
);
812 int intel_vgpu_init_execlist(struct intel_vgpu
*vgpu
)
814 enum intel_engine_id i
;
815 struct intel_engine_cs
*engine
;
817 /* each ring has a virtual execlist engine */
818 for_each_engine(engine
, vgpu
->gvt
->dev_priv
, i
) {
819 init_vgpu_execlist(vgpu
, i
);
820 INIT_LIST_HEAD(&vgpu
->workload_q_head
[i
]);
823 vgpu
->workloads
= kmem_cache_create("gvt-g_vgpu_workload",
824 sizeof(struct intel_vgpu_workload
), 0,
828 if (!vgpu
->workloads
)
834 void intel_vgpu_reset_execlist(struct intel_vgpu
*vgpu
,
835 unsigned long engine_mask
)
837 struct drm_i915_private
*dev_priv
= vgpu
->gvt
->dev_priv
;
838 struct intel_engine_cs
*engine
;
841 clean_workloads(vgpu
, engine_mask
);
842 for_each_engine_masked(engine
, dev_priv
, engine_mask
, tmp
)
843 init_vgpu_execlist(vgpu
, engine
->id
);