2 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * Zhiyuan Lv <zhiyuan.lv@intel.com>
25 * Zhi Wang <zhi.a.wang@intel.com>
28 * Min He <min.he@intel.com>
29 * Bing Niu <bing.niu@intel.com>
30 * Ping Gao <ping.a.gao@intel.com>
31 * Tina Zhang <tina.zhang@intel.com>
38 #define _EL_OFFSET_STATUS 0x234
39 #define _EL_OFFSET_STATUS_BUF 0x370
40 #define _EL_OFFSET_STATUS_PTR 0x3A0
42 #define execlist_ring_mmio(gvt, ring_id, offset) \
43 (gvt->dev_priv->engine[ring_id]->mmio_base + (offset))
45 #define valid_context(ctx) ((ctx)->valid)
46 #define same_context(a, b) (((a)->context_id == (b)->context_id) && \
47 ((a)->lrca == (b)->lrca))
49 static int context_switch_events
[] = {
50 [RCS
] = RCS_AS_CONTEXT_SWITCH
,
51 [BCS
] = BCS_AS_CONTEXT_SWITCH
,
52 [VCS
] = VCS_AS_CONTEXT_SWITCH
,
53 [VCS2
] = VCS2_AS_CONTEXT_SWITCH
,
54 [VECS
] = VECS_AS_CONTEXT_SWITCH
,
57 static int ring_id_to_context_switch_event(int ring_id
)
59 if (WARN_ON(ring_id
< RCS
||
60 ring_id
>= ARRAY_SIZE(context_switch_events
)))
63 return context_switch_events
[ring_id
];
66 static void switch_virtual_execlist_slot(struct intel_vgpu_execlist
*execlist
)
68 gvt_dbg_el("[before] running slot %d/context %x pending slot %d\n",
69 execlist
->running_slot
?
70 execlist
->running_slot
->index
: -1,
71 execlist
->running_context
?
72 execlist
->running_context
->context_id
: 0,
73 execlist
->pending_slot
?
74 execlist
->pending_slot
->index
: -1);
76 execlist
->running_slot
= execlist
->pending_slot
;
77 execlist
->pending_slot
= NULL
;
78 execlist
->running_context
= execlist
->running_context
?
79 &execlist
->running_slot
->ctx
[0] : NULL
;
81 gvt_dbg_el("[after] running slot %d/context %x pending slot %d\n",
82 execlist
->running_slot
?
83 execlist
->running_slot
->index
: -1,
84 execlist
->running_context
?
85 execlist
->running_context
->context_id
: 0,
86 execlist
->pending_slot
?
87 execlist
->pending_slot
->index
: -1);
90 static void emulate_execlist_status(struct intel_vgpu_execlist
*execlist
)
92 struct intel_vgpu_execlist_slot
*running
= execlist
->running_slot
;
93 struct intel_vgpu_execlist_slot
*pending
= execlist
->pending_slot
;
94 struct execlist_ctx_descriptor_format
*desc
= execlist
->running_context
;
95 struct intel_vgpu
*vgpu
= execlist
->vgpu
;
96 struct execlist_status_format status
;
97 int ring_id
= execlist
->ring_id
;
98 u32 status_reg
= execlist_ring_mmio(vgpu
->gvt
,
99 ring_id
, _EL_OFFSET_STATUS
);
101 status
.ldw
= vgpu_vreg(vgpu
, status_reg
);
102 status
.udw
= vgpu_vreg(vgpu
, status_reg
+ 4);
105 status
.current_execlist_pointer
= !!running
->index
;
106 status
.execlist_write_pointer
= !!!running
->index
;
107 status
.execlist_0_active
= status
.execlist_0_valid
=
109 status
.execlist_1_active
= status
.execlist_1_valid
=
112 status
.context_id
= 0;
113 status
.execlist_0_active
= status
.execlist_0_valid
= 0;
114 status
.execlist_1_active
= status
.execlist_1_valid
= 0;
117 status
.context_id
= desc
? desc
->context_id
: 0;
118 status
.execlist_queue_full
= !!(pending
);
120 vgpu_vreg(vgpu
, status_reg
) = status
.ldw
;
121 vgpu_vreg(vgpu
, status_reg
+ 4) = status
.udw
;
123 gvt_dbg_el("vgpu%d: status reg offset %x ldw %x udw %x\n",
124 vgpu
->id
, status_reg
, status
.ldw
, status
.udw
);
127 static void emulate_csb_update(struct intel_vgpu_execlist
*execlist
,
128 struct execlist_context_status_format
*status
,
129 bool trigger_interrupt_later
)
131 struct intel_vgpu
*vgpu
= execlist
->vgpu
;
132 int ring_id
= execlist
->ring_id
;
133 struct execlist_context_status_pointer_format ctx_status_ptr
;
135 u32 ctx_status_ptr_reg
, ctx_status_buf_reg
, offset
;
137 ctx_status_ptr_reg
= execlist_ring_mmio(vgpu
->gvt
, ring_id
,
138 _EL_OFFSET_STATUS_PTR
);
139 ctx_status_buf_reg
= execlist_ring_mmio(vgpu
->gvt
, ring_id
,
140 _EL_OFFSET_STATUS_BUF
);
142 ctx_status_ptr
.dw
= vgpu_vreg(vgpu
, ctx_status_ptr_reg
);
144 write_pointer
= ctx_status_ptr
.write_ptr
;
146 if (write_pointer
== 0x7)
150 write_pointer
%= 0x6;
153 offset
= ctx_status_buf_reg
+ write_pointer
* 8;
155 vgpu_vreg(vgpu
, offset
) = status
->ldw
;
156 vgpu_vreg(vgpu
, offset
+ 4) = status
->udw
;
158 ctx_status_ptr
.write_ptr
= write_pointer
;
159 vgpu_vreg(vgpu
, ctx_status_ptr_reg
) = ctx_status_ptr
.dw
;
161 gvt_dbg_el("vgpu%d: w pointer %u reg %x csb l %x csb h %x\n",
162 vgpu
->id
, write_pointer
, offset
, status
->ldw
, status
->udw
);
164 if (trigger_interrupt_later
)
167 intel_vgpu_trigger_virtual_event(vgpu
,
168 ring_id_to_context_switch_event(execlist
->ring_id
));
171 static int emulate_execlist_ctx_schedule_out(
172 struct intel_vgpu_execlist
*execlist
,
173 struct execlist_ctx_descriptor_format
*ctx
)
175 struct intel_vgpu
*vgpu
= execlist
->vgpu
;
176 struct intel_vgpu_execlist_slot
*running
= execlist
->running_slot
;
177 struct intel_vgpu_execlist_slot
*pending
= execlist
->pending_slot
;
178 struct execlist_ctx_descriptor_format
*ctx0
= &running
->ctx
[0];
179 struct execlist_ctx_descriptor_format
*ctx1
= &running
->ctx
[1];
180 struct execlist_context_status_format status
;
182 memset(&status
, 0, sizeof(status
));
184 gvt_dbg_el("schedule out context id %x\n", ctx
->context_id
);
186 if (WARN_ON(!same_context(ctx
, execlist
->running_context
))) {
187 gvt_vgpu_err("schedule out context is not running context,"
188 "ctx id %x running ctx id %x\n",
190 execlist
->running_context
->context_id
);
194 /* ctx1 is valid, ctx0/ctx is scheduled-out -> element switch */
195 if (valid_context(ctx1
) && same_context(ctx0
, ctx
)) {
196 gvt_dbg_el("ctx 1 valid, ctx/ctx 0 is scheduled-out\n");
198 execlist
->running_context
= ctx1
;
200 emulate_execlist_status(execlist
);
202 status
.context_complete
= status
.element_switch
= 1;
203 status
.context_id
= ctx
->context_id
;
205 emulate_csb_update(execlist
, &status
, false);
207 * ctx1 is not valid, ctx == ctx0
208 * ctx1 is valid, ctx1 == ctx
209 * --> last element is finished
211 * active-to-idle if there is *no* pending execlist
212 * context-complete if there *is* pending execlist
214 } else if ((!valid_context(ctx1
) && same_context(ctx0
, ctx
))
215 || (valid_context(ctx1
) && same_context(ctx1
, ctx
))) {
216 gvt_dbg_el("need to switch virtual execlist slot\n");
218 switch_virtual_execlist_slot(execlist
);
220 emulate_execlist_status(execlist
);
222 status
.context_complete
= status
.active_to_idle
= 1;
223 status
.context_id
= ctx
->context_id
;
226 emulate_csb_update(execlist
, &status
, false);
228 emulate_csb_update(execlist
, &status
, true);
230 memset(&status
, 0, sizeof(status
));
232 status
.idle_to_active
= 1;
233 status
.context_id
= 0;
235 emulate_csb_update(execlist
, &status
, false);
245 static struct intel_vgpu_execlist_slot
*get_next_execlist_slot(
246 struct intel_vgpu_execlist
*execlist
)
248 struct intel_vgpu
*vgpu
= execlist
->vgpu
;
249 int ring_id
= execlist
->ring_id
;
250 u32 status_reg
= execlist_ring_mmio(vgpu
->gvt
, ring_id
,
252 struct execlist_status_format status
;
254 status
.ldw
= vgpu_vreg(vgpu
, status_reg
);
255 status
.udw
= vgpu_vreg(vgpu
, status_reg
+ 4);
257 if (status
.execlist_queue_full
) {
258 gvt_vgpu_err("virtual execlist slots are full\n");
262 return &execlist
->slot
[status
.execlist_write_pointer
];
265 static int emulate_execlist_schedule_in(struct intel_vgpu_execlist
*execlist
,
266 struct execlist_ctx_descriptor_format ctx
[2])
268 struct intel_vgpu_execlist_slot
*running
= execlist
->running_slot
;
269 struct intel_vgpu_execlist_slot
*slot
=
270 get_next_execlist_slot(execlist
);
272 struct execlist_ctx_descriptor_format
*ctx0
, *ctx1
;
273 struct execlist_context_status_format status
;
274 struct intel_vgpu
*vgpu
= execlist
->vgpu
;
276 gvt_dbg_el("emulate schedule-in\n");
279 gvt_vgpu_err("no available execlist slot\n");
283 memset(&status
, 0, sizeof(status
));
284 memset(slot
->ctx
, 0, sizeof(slot
->ctx
));
286 slot
->ctx
[0] = ctx
[0];
287 slot
->ctx
[1] = ctx
[1];
289 gvt_dbg_el("alloc slot index %d ctx 0 %x ctx 1 %x\n",
290 slot
->index
, ctx
[0].context_id
,
294 * no running execlist, make this write bundle as running execlist
298 gvt_dbg_el("no current running execlist\n");
300 execlist
->running_slot
= slot
;
301 execlist
->pending_slot
= NULL
;
302 execlist
->running_context
= &slot
->ctx
[0];
304 gvt_dbg_el("running slot index %d running context %x\n",
305 execlist
->running_slot
->index
,
306 execlist
->running_context
->context_id
);
308 emulate_execlist_status(execlist
);
310 status
.idle_to_active
= 1;
311 status
.context_id
= 0;
313 emulate_csb_update(execlist
, &status
, false);
317 ctx0
= &running
->ctx
[0];
318 ctx1
= &running
->ctx
[1];
320 gvt_dbg_el("current running slot index %d ctx 0 %x ctx 1 %x\n",
321 running
->index
, ctx0
->context_id
, ctx1
->context_id
);
324 * already has an running execlist
325 * a. running ctx1 is valid,
326 * ctx0 is finished, and running ctx1 == new execlist ctx[0]
327 * b. running ctx1 is not valid,
328 * ctx0 == new execlist ctx[0]
329 * ----> lite-restore + preempted
331 if ((valid_context(ctx1
) && same_context(ctx1
, &slot
->ctx
[0]) &&
333 (!same_context(ctx0
, execlist
->running_context
))) ||
334 (!valid_context(ctx1
) &&
335 same_context(ctx0
, &slot
->ctx
[0]))) { /* condition b */
336 gvt_dbg_el("need to switch virtual execlist slot\n");
338 execlist
->pending_slot
= slot
;
339 switch_virtual_execlist_slot(execlist
);
341 emulate_execlist_status(execlist
);
343 status
.lite_restore
= status
.preempted
= 1;
344 status
.context_id
= ctx
[0].context_id
;
346 emulate_csb_update(execlist
, &status
, false);
348 gvt_dbg_el("emulate as pending slot\n");
351 * --> emulate pending execlist exist + but no preemption case
353 execlist
->pending_slot
= slot
;
354 emulate_execlist_status(execlist
);
359 static void free_workload(struct intel_vgpu_workload
*workload
)
361 intel_vgpu_unpin_mm(workload
->shadow_mm
);
362 intel_gvt_mm_unreference(workload
->shadow_mm
);
363 kmem_cache_free(workload
->vgpu
->workloads
, workload
);
366 #define get_desc_from_elsp_dwords(ed, i) \
367 ((struct execlist_ctx_descriptor_format *)&((ed)->data[i * 2]))
369 static void prepare_shadow_batch_buffer(struct intel_vgpu_workload
*workload
)
371 const int gmadr_bytes
= workload
->vgpu
->gvt
->device_info
.gmadr_bytes_in_cmd
;
372 struct intel_shadow_bb_entry
*entry_obj
;
374 /* pin the gem object to ggtt */
375 list_for_each_entry(entry_obj
, &workload
->shadow_bb
, list
) {
376 struct i915_vma
*vma
;
378 vma
= i915_gem_object_ggtt_pin(entry_obj
->obj
, NULL
, 0, 4, 0);
383 /* FIXME: we are not tracking our pinned VMA leaving it
384 * up to the core to fix up the stray pin_count upon
388 /* update the relocate gma with shadow batch buffer*/
389 entry_obj
->bb_start_cmd_va
[1] = i915_ggtt_offset(vma
);
390 if (gmadr_bytes
== 8)
391 entry_obj
->bb_start_cmd_va
[2] = 0;
395 static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx
*wa_ctx
)
397 struct intel_vgpu_workload
*workload
= container_of(wa_ctx
,
398 struct intel_vgpu_workload
,
400 int ring_id
= workload
->ring_id
;
401 struct i915_gem_context
*shadow_ctx
= workload
->vgpu
->shadow_ctx
;
402 struct drm_i915_gem_object
*ctx_obj
=
403 shadow_ctx
->engine
[ring_id
].state
->obj
;
404 struct execlist_ring_context
*shadow_ring_context
;
407 page
= i915_gem_object_get_page(ctx_obj
, LRC_STATE_PN
);
408 shadow_ring_context
= kmap_atomic(page
);
410 shadow_ring_context
->bb_per_ctx_ptr
.val
=
411 (shadow_ring_context
->bb_per_ctx_ptr
.val
&
412 (~PER_CTX_ADDR_MASK
)) | wa_ctx
->per_ctx
.shadow_gma
;
413 shadow_ring_context
->rcs_indirect_ctx
.val
=
414 (shadow_ring_context
->rcs_indirect_ctx
.val
&
415 (~INDIRECT_CTX_ADDR_MASK
)) | wa_ctx
->indirect_ctx
.shadow_gma
;
417 kunmap_atomic(shadow_ring_context
);
421 static void prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx
*wa_ctx
)
423 struct i915_vma
*vma
;
424 unsigned char *per_ctx_va
=
425 (unsigned char *)wa_ctx
->indirect_ctx
.shadow_va
+
426 wa_ctx
->indirect_ctx
.size
;
428 if (wa_ctx
->indirect_ctx
.size
== 0)
431 vma
= i915_gem_object_ggtt_pin(wa_ctx
->indirect_ctx
.obj
, NULL
,
432 0, CACHELINE_BYTES
, 0);
437 /* FIXME: we are not tracking our pinned VMA leaving it
438 * up to the core to fix up the stray pin_count upon
442 wa_ctx
->indirect_ctx
.shadow_gma
= i915_ggtt_offset(vma
);
444 wa_ctx
->per_ctx
.shadow_gma
= *((unsigned int *)per_ctx_va
+ 1);
445 memset(per_ctx_va
, 0, CACHELINE_BYTES
);
447 update_wa_ctx_2_shadow_ctx(wa_ctx
);
450 static int prepare_execlist_workload(struct intel_vgpu_workload
*workload
)
452 struct intel_vgpu
*vgpu
= workload
->vgpu
;
453 struct execlist_ctx_descriptor_format ctx
[2];
454 int ring_id
= workload
->ring_id
;
456 intel_vgpu_pin_mm(workload
->shadow_mm
);
457 intel_vgpu_sync_oos_pages(workload
->vgpu
);
458 intel_vgpu_flush_post_shadow(workload
->vgpu
);
459 prepare_shadow_batch_buffer(workload
);
460 prepare_shadow_wa_ctx(&workload
->wa_ctx
);
461 if (!workload
->emulate_schedule_in
)
464 ctx
[0] = *get_desc_from_elsp_dwords(&workload
->elsp_dwords
, 1);
465 ctx
[1] = *get_desc_from_elsp_dwords(&workload
->elsp_dwords
, 0);
467 return emulate_execlist_schedule_in(&vgpu
->execlist
[ring_id
], ctx
);
470 static void release_shadow_batch_buffer(struct intel_vgpu_workload
*workload
)
472 /* release all the shadow batch buffer */
473 if (!list_empty(&workload
->shadow_bb
)) {
474 struct intel_shadow_bb_entry
*entry_obj
=
475 list_first_entry(&workload
->shadow_bb
,
476 struct intel_shadow_bb_entry
,
478 struct intel_shadow_bb_entry
*temp
;
480 list_for_each_entry_safe(entry_obj
, temp
, &workload
->shadow_bb
,
482 i915_gem_object_unpin_map(entry_obj
->obj
);
483 i915_gem_object_put(entry_obj
->obj
);
484 list_del(&entry_obj
->list
);
490 static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx
*wa_ctx
)
492 if (!wa_ctx
->indirect_ctx
.obj
)
495 i915_gem_object_unpin_map(wa_ctx
->indirect_ctx
.obj
);
496 i915_gem_object_put(wa_ctx
->indirect_ctx
.obj
);
499 static int complete_execlist_workload(struct intel_vgpu_workload
*workload
)
501 struct intel_vgpu
*vgpu
= workload
->vgpu
;
502 struct intel_vgpu_execlist
*execlist
=
503 &vgpu
->execlist
[workload
->ring_id
];
504 struct intel_vgpu_workload
*next_workload
;
505 struct list_head
*next
= workload_q_head(vgpu
, workload
->ring_id
)->next
;
506 bool lite_restore
= false;
509 gvt_dbg_el("complete workload %p status %d\n", workload
,
512 release_shadow_batch_buffer(workload
);
513 release_shadow_wa_ctx(&workload
->wa_ctx
);
515 if (workload
->status
|| vgpu
->resetting
)
518 if (!list_empty(workload_q_head(vgpu
, workload
->ring_id
))) {
519 struct execlist_ctx_descriptor_format
*this_desc
, *next_desc
;
521 next_workload
= container_of(next
,
522 struct intel_vgpu_workload
, list
);
523 this_desc
= &workload
->ctx_desc
;
524 next_desc
= &next_workload
->ctx_desc
;
526 lite_restore
= same_context(this_desc
, next_desc
);
530 gvt_dbg_el("next context == current - no schedule-out\n");
531 free_workload(workload
);
535 ret
= emulate_execlist_ctx_schedule_out(execlist
, &workload
->ctx_desc
);
539 free_workload(workload
);
542 free_workload(workload
);
546 #define RING_CTX_OFF(x) \
547 offsetof(struct execlist_ring_context, x)
549 static void read_guest_pdps(struct intel_vgpu
*vgpu
,
550 u64 ring_context_gpa
, u32 pdp
[8])
555 gpa
= ring_context_gpa
+ RING_CTX_OFF(pdp3_UDW
.val
);
557 for (i
= 0; i
< 8; i
++)
558 intel_gvt_hypervisor_read_gpa(vgpu
,
559 gpa
+ i
* 8, &pdp
[7 - i
], 4);
562 static int prepare_mm(struct intel_vgpu_workload
*workload
)
564 struct execlist_ctx_descriptor_format
*desc
= &workload
->ctx_desc
;
565 struct intel_vgpu_mm
*mm
;
566 struct intel_vgpu
*vgpu
= workload
->vgpu
;
567 int page_table_level
;
570 if (desc
->addressing_mode
== 1) { /* legacy 32-bit */
571 page_table_level
= 3;
572 } else if (desc
->addressing_mode
== 3) { /* legacy 64 bit */
573 page_table_level
= 4;
575 gvt_vgpu_err("Advanced Context mode(SVM) is not supported!\n");
579 read_guest_pdps(workload
->vgpu
, workload
->ring_context_gpa
, pdp
);
581 mm
= intel_vgpu_find_ppgtt_mm(workload
->vgpu
, page_table_level
, pdp
);
583 intel_gvt_mm_reference(mm
);
586 mm
= intel_vgpu_create_mm(workload
->vgpu
, INTEL_GVT_MM_PPGTT
,
587 pdp
, page_table_level
, 0);
589 gvt_vgpu_err("fail to create mm object.\n");
593 workload
->shadow_mm
= mm
;
597 #define get_last_workload(q) \
598 (list_empty(q) ? NULL : container_of(q->prev, \
599 struct intel_vgpu_workload, list))
601 static int submit_context(struct intel_vgpu
*vgpu
, int ring_id
,
602 struct execlist_ctx_descriptor_format
*desc
,
603 bool emulate_schedule_in
)
605 struct list_head
*q
= workload_q_head(vgpu
, ring_id
);
606 struct intel_vgpu_workload
*last_workload
= get_last_workload(q
);
607 struct intel_vgpu_workload
*workload
= NULL
;
608 u64 ring_context_gpa
;
609 u32 head
, tail
, start
, ctl
, ctx_ctl
, per_ctx
, indirect_ctx
;
612 ring_context_gpa
= intel_vgpu_gma_to_gpa(vgpu
->gtt
.ggtt_mm
,
613 (u32
)((desc
->lrca
+ 1) << GTT_PAGE_SHIFT
));
614 if (ring_context_gpa
== INTEL_GVT_INVALID_ADDR
) {
615 gvt_vgpu_err("invalid guest context LRCA: %x\n", desc
->lrca
);
619 intel_gvt_hypervisor_read_gpa(vgpu
, ring_context_gpa
+
620 RING_CTX_OFF(ring_header
.val
), &head
, 4);
622 intel_gvt_hypervisor_read_gpa(vgpu
, ring_context_gpa
+
623 RING_CTX_OFF(ring_tail
.val
), &tail
, 4);
625 head
&= RB_HEAD_OFF_MASK
;
626 tail
&= RB_TAIL_OFF_MASK
;
628 if (last_workload
&& same_context(&last_workload
->ctx_desc
, desc
)) {
629 gvt_dbg_el("ring id %d cur workload == last\n", ring_id
);
630 gvt_dbg_el("ctx head %x real head %lx\n", head
,
631 last_workload
->rb_tail
);
633 * cannot use guest context head pointer here,
634 * as it might not be updated at this time
636 head
= last_workload
->rb_tail
;
639 gvt_dbg_el("ring id %d begin a new workload\n", ring_id
);
641 workload
= kmem_cache_zalloc(vgpu
->workloads
, GFP_KERNEL
);
645 /* record some ring buffer register values for scan and shadow */
646 intel_gvt_hypervisor_read_gpa(vgpu
, ring_context_gpa
+
647 RING_CTX_OFF(rb_start
.val
), &start
, 4);
648 intel_gvt_hypervisor_read_gpa(vgpu
, ring_context_gpa
+
649 RING_CTX_OFF(rb_ctrl
.val
), &ctl
, 4);
650 intel_gvt_hypervisor_read_gpa(vgpu
, ring_context_gpa
+
651 RING_CTX_OFF(ctx_ctrl
.val
), &ctx_ctl
, 4);
653 INIT_LIST_HEAD(&workload
->list
);
654 INIT_LIST_HEAD(&workload
->shadow_bb
);
656 init_waitqueue_head(&workload
->shadow_ctx_status_wq
);
657 atomic_set(&workload
->shadow_ctx_active
, 0);
659 workload
->vgpu
= vgpu
;
660 workload
->ring_id
= ring_id
;
661 workload
->ctx_desc
= *desc
;
662 workload
->ring_context_gpa
= ring_context_gpa
;
663 workload
->rb_head
= head
;
664 workload
->rb_tail
= tail
;
665 workload
->rb_start
= start
;
666 workload
->rb_ctl
= ctl
;
667 workload
->prepare
= prepare_execlist_workload
;
668 workload
->complete
= complete_execlist_workload
;
669 workload
->status
= -EINPROGRESS
;
670 workload
->emulate_schedule_in
= emulate_schedule_in
;
672 if (ring_id
== RCS
) {
673 intel_gvt_hypervisor_read_gpa(vgpu
, ring_context_gpa
+
674 RING_CTX_OFF(bb_per_ctx_ptr
.val
), &per_ctx
, 4);
675 intel_gvt_hypervisor_read_gpa(vgpu
, ring_context_gpa
+
676 RING_CTX_OFF(rcs_indirect_ctx
.val
), &indirect_ctx
, 4);
678 workload
->wa_ctx
.indirect_ctx
.guest_gma
=
679 indirect_ctx
& INDIRECT_CTX_ADDR_MASK
;
680 workload
->wa_ctx
.indirect_ctx
.size
=
681 (indirect_ctx
& INDIRECT_CTX_SIZE_MASK
) *
683 workload
->wa_ctx
.per_ctx
.guest_gma
=
684 per_ctx
& PER_CTX_ADDR_MASK
;
686 WARN_ON(workload
->wa_ctx
.indirect_ctx
.size
&& !(per_ctx
& 0x1));
689 if (emulate_schedule_in
)
690 workload
->elsp_dwords
= vgpu
->execlist
[ring_id
].elsp_dwords
;
692 gvt_dbg_el("workload %p ring id %d head %x tail %x start %x ctl %x\n",
693 workload
, ring_id
, head
, tail
, start
, ctl
);
695 gvt_dbg_el("workload %p emulate schedule_in %d\n", workload
,
696 emulate_schedule_in
);
698 ret
= prepare_mm(workload
);
700 kmem_cache_free(vgpu
->workloads
, workload
);
704 queue_workload(workload
);
708 int intel_vgpu_submit_execlist(struct intel_vgpu
*vgpu
, int ring_id
)
710 struct intel_vgpu_execlist
*execlist
= &vgpu
->execlist
[ring_id
];
711 struct execlist_ctx_descriptor_format desc
[2];
714 desc
[0] = *get_desc_from_elsp_dwords(&execlist
->elsp_dwords
, 1);
715 desc
[1] = *get_desc_from_elsp_dwords(&execlist
->elsp_dwords
, 0);
717 if (!desc
[0].valid
) {
718 gvt_vgpu_err("invalid elsp submission, desc0 is invalid\n");
722 for (i
= 0; i
< ARRAY_SIZE(desc
); i
++) {
725 if (!desc
[i
].privilege_access
) {
726 gvt_vgpu_err("unexpected GGTT elsp submission\n");
731 /* submit workload */
732 for (i
= 0; i
< ARRAY_SIZE(desc
); i
++) {
735 ret
= submit_context(vgpu
, ring_id
, &desc
[i
], i
== 0);
737 gvt_vgpu_err("failed to submit desc %d\n", i
);
745 gvt_vgpu_err("descriptors content: desc0 %08x %08x desc1 %08x %08x\n",
746 desc
[0].udw
, desc
[0].ldw
, desc
[1].udw
, desc
[1].ldw
);
750 static void init_vgpu_execlist(struct intel_vgpu
*vgpu
, int ring_id
)
752 struct intel_vgpu_execlist
*execlist
= &vgpu
->execlist
[ring_id
];
753 struct execlist_context_status_pointer_format ctx_status_ptr
;
754 u32 ctx_status_ptr_reg
;
756 memset(execlist
, 0, sizeof(*execlist
));
758 execlist
->vgpu
= vgpu
;
759 execlist
->ring_id
= ring_id
;
760 execlist
->slot
[0].index
= 0;
761 execlist
->slot
[1].index
= 1;
763 ctx_status_ptr_reg
= execlist_ring_mmio(vgpu
->gvt
, ring_id
,
764 _EL_OFFSET_STATUS_PTR
);
766 ctx_status_ptr
.dw
= vgpu_vreg(vgpu
, ctx_status_ptr_reg
);
767 ctx_status_ptr
.read_ptr
= 0;
768 ctx_status_ptr
.write_ptr
= 0x7;
769 vgpu_vreg(vgpu
, ctx_status_ptr_reg
) = ctx_status_ptr
.dw
;
772 static void clean_workloads(struct intel_vgpu
*vgpu
, unsigned long engine_mask
)
774 struct drm_i915_private
*dev_priv
= vgpu
->gvt
->dev_priv
;
775 struct intel_engine_cs
*engine
;
776 struct intel_vgpu_workload
*pos
, *n
;
779 /* free the unsubmited workloads in the queues. */
780 for_each_engine_masked(engine
, dev_priv
, engine_mask
, tmp
) {
781 list_for_each_entry_safe(pos
, n
,
782 &vgpu
->workload_q_head
[engine
->id
], list
) {
783 list_del_init(&pos
->list
);
789 void intel_vgpu_clean_execlist(struct intel_vgpu
*vgpu
)
791 clean_workloads(vgpu
, ALL_ENGINES
);
792 kmem_cache_destroy(vgpu
->workloads
);
795 int intel_vgpu_init_execlist(struct intel_vgpu
*vgpu
)
797 enum intel_engine_id i
;
798 struct intel_engine_cs
*engine
;
800 /* each ring has a virtual execlist engine */
801 for_each_engine(engine
, vgpu
->gvt
->dev_priv
, i
) {
802 init_vgpu_execlist(vgpu
, i
);
803 INIT_LIST_HEAD(&vgpu
->workload_q_head
[i
]);
806 vgpu
->workloads
= kmem_cache_create("gvt-g_vgpu_workload",
807 sizeof(struct intel_vgpu_workload
), 0,
811 if (!vgpu
->workloads
)
817 void intel_vgpu_reset_execlist(struct intel_vgpu
*vgpu
,
818 unsigned long engine_mask
)
820 struct drm_i915_private
*dev_priv
= vgpu
->gvt
->dev_priv
;
821 struct intel_engine_cs
*engine
;
824 clean_workloads(vgpu
, engine_mask
);
825 for_each_engine_masked(engine
, dev_priv
, engine_mask
, tmp
)
826 init_vgpu_execlist(vgpu
, engine
->id
);