2 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * Zhiyuan Lv <zhiyuan.lv@intel.com>
25 * Zhi Wang <zhi.a.wang@intel.com>
28 * Min He <min.he@intel.com>
29 * Bing Niu <bing.niu@intel.com>
30 * Ping Gao <ping.a.gao@intel.com>
31 * Tina Zhang <tina.zhang@intel.com>
37 #define _EL_OFFSET_STATUS 0x234
38 #define _EL_OFFSET_STATUS_BUF 0x370
39 #define _EL_OFFSET_STATUS_PTR 0x3A0
41 #define execlist_ring_mmio(gvt, ring_id, offset) \
42 (gvt->dev_priv->engine[ring_id]->mmio_base + (offset))
44 #define valid_context(ctx) ((ctx)->valid)
45 #define same_context(a, b) (((a)->context_id == (b)->context_id) && \
46 ((a)->lrca == (b)->lrca))
48 static int context_switch_events
[] = {
49 [RCS
] = RCS_AS_CONTEXT_SWITCH
,
50 [BCS
] = BCS_AS_CONTEXT_SWITCH
,
51 [VCS
] = VCS_AS_CONTEXT_SWITCH
,
52 [VCS2
] = VCS2_AS_CONTEXT_SWITCH
,
53 [VECS
] = VECS_AS_CONTEXT_SWITCH
,
56 static int ring_id_to_context_switch_event(int ring_id
)
58 if (WARN_ON(ring_id
< RCS
&& ring_id
>
59 ARRAY_SIZE(context_switch_events
)))
62 return context_switch_events
[ring_id
];
65 static void switch_virtual_execlist_slot(struct intel_vgpu_execlist
*execlist
)
67 gvt_dbg_el("[before] running slot %d/context %x pending slot %d\n",
68 execlist
->running_slot
?
69 execlist
->running_slot
->index
: -1,
70 execlist
->running_context
?
71 execlist
->running_context
->context_id
: 0,
72 execlist
->pending_slot
?
73 execlist
->pending_slot
->index
: -1);
75 execlist
->running_slot
= execlist
->pending_slot
;
76 execlist
->pending_slot
= NULL
;
77 execlist
->running_context
= execlist
->running_context
?
78 &execlist
->running_slot
->ctx
[0] : NULL
;
80 gvt_dbg_el("[after] running slot %d/context %x pending slot %d\n",
81 execlist
->running_slot
?
82 execlist
->running_slot
->index
: -1,
83 execlist
->running_context
?
84 execlist
->running_context
->context_id
: 0,
85 execlist
->pending_slot
?
86 execlist
->pending_slot
->index
: -1);
89 static void emulate_execlist_status(struct intel_vgpu_execlist
*execlist
)
91 struct intel_vgpu_execlist_slot
*running
= execlist
->running_slot
;
92 struct intel_vgpu_execlist_slot
*pending
= execlist
->pending_slot
;
93 struct execlist_ctx_descriptor_format
*desc
= execlist
->running_context
;
94 struct intel_vgpu
*vgpu
= execlist
->vgpu
;
95 struct execlist_status_format status
;
96 int ring_id
= execlist
->ring_id
;
97 u32 status_reg
= execlist_ring_mmio(vgpu
->gvt
,
98 ring_id
, _EL_OFFSET_STATUS
);
100 status
.ldw
= vgpu_vreg(vgpu
, status_reg
);
101 status
.udw
= vgpu_vreg(vgpu
, status_reg
+ 4);
104 status
.current_execlist_pointer
= !!running
->index
;
105 status
.execlist_write_pointer
= !!!running
->index
;
106 status
.execlist_0_active
= status
.execlist_0_valid
=
108 status
.execlist_1_active
= status
.execlist_1_valid
=
111 status
.context_id
= 0;
112 status
.execlist_0_active
= status
.execlist_0_valid
= 0;
113 status
.execlist_1_active
= status
.execlist_1_valid
= 0;
116 status
.context_id
= desc
? desc
->context_id
: 0;
117 status
.execlist_queue_full
= !!(pending
);
119 vgpu_vreg(vgpu
, status_reg
) = status
.ldw
;
120 vgpu_vreg(vgpu
, status_reg
+ 4) = status
.udw
;
122 gvt_dbg_el("vgpu%d: status reg offset %x ldw %x udw %x\n",
123 vgpu
->id
, status_reg
, status
.ldw
, status
.udw
);
126 static void emulate_csb_update(struct intel_vgpu_execlist
*execlist
,
127 struct execlist_context_status_format
*status
,
128 bool trigger_interrupt_later
)
130 struct intel_vgpu
*vgpu
= execlist
->vgpu
;
131 int ring_id
= execlist
->ring_id
;
132 struct execlist_context_status_pointer_format ctx_status_ptr
;
134 u32 ctx_status_ptr_reg
, ctx_status_buf_reg
, offset
;
136 ctx_status_ptr_reg
= execlist_ring_mmio(vgpu
->gvt
, ring_id
,
137 _EL_OFFSET_STATUS_PTR
);
138 ctx_status_buf_reg
= execlist_ring_mmio(vgpu
->gvt
, ring_id
,
139 _EL_OFFSET_STATUS_BUF
);
141 ctx_status_ptr
.dw
= vgpu_vreg(vgpu
, ctx_status_ptr_reg
);
143 write_pointer
= ctx_status_ptr
.write_ptr
;
145 if (write_pointer
== 0x7)
149 write_pointer
%= 0x6;
152 offset
= ctx_status_buf_reg
+ write_pointer
* 8;
154 vgpu_vreg(vgpu
, offset
) = status
->ldw
;
155 vgpu_vreg(vgpu
, offset
+ 4) = status
->udw
;
157 ctx_status_ptr
.write_ptr
= write_pointer
;
158 vgpu_vreg(vgpu
, ctx_status_ptr_reg
) = ctx_status_ptr
.dw
;
160 gvt_dbg_el("vgpu%d: w pointer %u reg %x csb l %x csb h %x\n",
161 vgpu
->id
, write_pointer
, offset
, status
->ldw
, status
->udw
);
163 if (trigger_interrupt_later
)
166 intel_vgpu_trigger_virtual_event(vgpu
,
167 ring_id_to_context_switch_event(execlist
->ring_id
));
170 static int emulate_execlist_ctx_schedule_out(
171 struct intel_vgpu_execlist
*execlist
,
172 struct execlist_ctx_descriptor_format
*ctx
)
174 struct intel_vgpu_execlist_slot
*running
= execlist
->running_slot
;
175 struct intel_vgpu_execlist_slot
*pending
= execlist
->pending_slot
;
176 struct execlist_ctx_descriptor_format
*ctx0
= &running
->ctx
[0];
177 struct execlist_ctx_descriptor_format
*ctx1
= &running
->ctx
[1];
178 struct execlist_context_status_format status
;
180 memset(&status
, 0, sizeof(status
));
182 gvt_dbg_el("schedule out context id %x\n", ctx
->context_id
);
184 if (WARN_ON(!same_context(ctx
, execlist
->running_context
))) {
185 gvt_err("schedule out context is not running context,"
186 "ctx id %x running ctx id %x\n",
188 execlist
->running_context
->context_id
);
192 /* ctx1 is valid, ctx0/ctx is scheduled-out -> element switch */
193 if (valid_context(ctx1
) && same_context(ctx0
, ctx
)) {
194 gvt_dbg_el("ctx 1 valid, ctx/ctx 0 is scheduled-out\n");
196 execlist
->running_context
= ctx1
;
198 emulate_execlist_status(execlist
);
200 status
.context_complete
= status
.element_switch
= 1;
201 status
.context_id
= ctx
->context_id
;
203 emulate_csb_update(execlist
, &status
, false);
205 * ctx1 is not valid, ctx == ctx0
206 * ctx1 is valid, ctx1 == ctx
207 * --> last element is finished
209 * active-to-idle if there is *no* pending execlist
210 * context-complete if there *is* pending execlist
212 } else if ((!valid_context(ctx1
) && same_context(ctx0
, ctx
))
213 || (valid_context(ctx1
) && same_context(ctx1
, ctx
))) {
214 gvt_dbg_el("need to switch virtual execlist slot\n");
216 switch_virtual_execlist_slot(execlist
);
218 emulate_execlist_status(execlist
);
220 status
.context_complete
= status
.active_to_idle
= 1;
221 status
.context_id
= ctx
->context_id
;
224 emulate_csb_update(execlist
, &status
, false);
226 emulate_csb_update(execlist
, &status
, true);
228 memset(&status
, 0, sizeof(status
));
230 status
.idle_to_active
= 1;
231 status
.context_id
= 0;
233 emulate_csb_update(execlist
, &status
, false);
243 static struct intel_vgpu_execlist_slot
*get_next_execlist_slot(
244 struct intel_vgpu_execlist
*execlist
)
246 struct intel_vgpu
*vgpu
= execlist
->vgpu
;
247 int ring_id
= execlist
->ring_id
;
248 u32 status_reg
= execlist_ring_mmio(vgpu
->gvt
, ring_id
,
250 struct execlist_status_format status
;
252 status
.ldw
= vgpu_vreg(vgpu
, status_reg
);
253 status
.udw
= vgpu_vreg(vgpu
, status_reg
+ 4);
255 if (status
.execlist_queue_full
) {
256 gvt_err("virtual execlist slots are full\n");
260 return &execlist
->slot
[status
.execlist_write_pointer
];
263 static int emulate_execlist_schedule_in(struct intel_vgpu_execlist
*execlist
,
264 struct execlist_ctx_descriptor_format ctx
[2])
266 struct intel_vgpu_execlist_slot
*running
= execlist
->running_slot
;
267 struct intel_vgpu_execlist_slot
*slot
=
268 get_next_execlist_slot(execlist
);
270 struct execlist_ctx_descriptor_format
*ctx0
, *ctx1
;
271 struct execlist_context_status_format status
;
273 gvt_dbg_el("emulate schedule-in\n");
276 gvt_err("no available execlist slot\n");
280 memset(&status
, 0, sizeof(status
));
281 memset(slot
->ctx
, 0, sizeof(slot
->ctx
));
283 slot
->ctx
[0] = ctx
[0];
284 slot
->ctx
[1] = ctx
[1];
286 gvt_dbg_el("alloc slot index %d ctx 0 %x ctx 1 %x\n",
287 slot
->index
, ctx
[0].context_id
,
291 * no running execlist, make this write bundle as running execlist
295 gvt_dbg_el("no current running execlist\n");
297 execlist
->running_slot
= slot
;
298 execlist
->pending_slot
= NULL
;
299 execlist
->running_context
= &slot
->ctx
[0];
301 gvt_dbg_el("running slot index %d running context %x\n",
302 execlist
->running_slot
->index
,
303 execlist
->running_context
->context_id
);
305 emulate_execlist_status(execlist
);
307 status
.idle_to_active
= 1;
308 status
.context_id
= 0;
310 emulate_csb_update(execlist
, &status
, false);
314 ctx0
= &running
->ctx
[0];
315 ctx1
= &running
->ctx
[1];
317 gvt_dbg_el("current running slot index %d ctx 0 %x ctx 1 %x\n",
318 running
->index
, ctx0
->context_id
, ctx1
->context_id
);
321 * already has an running execlist
322 * a. running ctx1 is valid,
323 * ctx0 is finished, and running ctx1 == new execlist ctx[0]
324 * b. running ctx1 is not valid,
325 * ctx0 == new execlist ctx[0]
326 * ----> lite-restore + preempted
328 if ((valid_context(ctx1
) && same_context(ctx1
, &slot
->ctx
[0]) &&
330 (!same_context(ctx0
, execlist
->running_context
))) ||
331 (!valid_context(ctx1
) &&
332 same_context(ctx0
, &slot
->ctx
[0]))) { /* condition b */
333 gvt_dbg_el("need to switch virtual execlist slot\n");
335 execlist
->pending_slot
= slot
;
336 switch_virtual_execlist_slot(execlist
);
338 emulate_execlist_status(execlist
);
340 status
.lite_restore
= status
.preempted
= 1;
341 status
.context_id
= ctx
[0].context_id
;
343 emulate_csb_update(execlist
, &status
, false);
345 gvt_dbg_el("emulate as pending slot\n");
348 * --> emulate pending execlist exist + but no preemption case
350 execlist
->pending_slot
= slot
;
351 emulate_execlist_status(execlist
);
356 static void free_workload(struct intel_vgpu_workload
*workload
)
358 intel_vgpu_unpin_mm(workload
->shadow_mm
);
359 intel_gvt_mm_unreference(workload
->shadow_mm
);
360 kmem_cache_free(workload
->vgpu
->workloads
, workload
);
363 #define get_desc_from_elsp_dwords(ed, i) \
364 ((struct execlist_ctx_descriptor_format *)&((ed)->data[i * 2]))
367 #define BATCH_BUFFER_ADDR_MASK ((1UL << 32) - (1U << 2))
368 #define BATCH_BUFFER_ADDR_HIGH_MASK ((1UL << 16) - (1U))
369 static int set_gma_to_bb_cmd(struct intel_shadow_bb_entry
*entry_obj
,
370 unsigned long add
, int gmadr_bytes
)
372 if (WARN_ON(gmadr_bytes
!= 4 && gmadr_bytes
!= 8))
375 *((u32
*)(entry_obj
->bb_start_cmd_va
+ (1 << 2))) = add
&
376 BATCH_BUFFER_ADDR_MASK
;
377 if (gmadr_bytes
== 8) {
378 *((u32
*)(entry_obj
->bb_start_cmd_va
+ (2 << 2))) =
379 add
& BATCH_BUFFER_ADDR_HIGH_MASK
;
385 static void prepare_shadow_batch_buffer(struct intel_vgpu_workload
*workload
)
387 int gmadr_bytes
= workload
->vgpu
->gvt
->device_info
.gmadr_bytes_in_cmd
;
388 struct i915_vma
*vma
;
391 /* pin the gem object to ggtt */
392 if (!list_empty(&workload
->shadow_bb
)) {
393 struct intel_shadow_bb_entry
*entry_obj
=
394 list_first_entry(&workload
->shadow_bb
,
395 struct intel_shadow_bb_entry
,
397 struct intel_shadow_bb_entry
*temp
;
399 list_for_each_entry_safe(entry_obj
, temp
, &workload
->shadow_bb
,
401 vma
= i915_gem_object_ggtt_pin(entry_obj
->obj
, NULL
, 0,
404 gvt_err("Cannot pin\n");
407 i915_gem_object_unpin_pages(entry_obj
->obj
);
409 /* update the relocate gma with shadow batch buffer*/
410 gma
= i915_gem_object_ggtt_offset(entry_obj
->obj
, NULL
);
411 WARN_ON(!IS_ALIGNED(gma
, 4));
412 set_gma_to_bb_cmd(entry_obj
, gma
, gmadr_bytes
);
417 static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx
*wa_ctx
)
419 int ring_id
= wa_ctx
->workload
->ring_id
;
420 struct i915_gem_context
*shadow_ctx
=
421 wa_ctx
->workload
->vgpu
->shadow_ctx
;
422 struct drm_i915_gem_object
*ctx_obj
=
423 shadow_ctx
->engine
[ring_id
].state
->obj
;
424 struct execlist_ring_context
*shadow_ring_context
;
427 page
= i915_gem_object_get_page(ctx_obj
, LRC_STATE_PN
);
428 shadow_ring_context
= kmap_atomic(page
);
430 shadow_ring_context
->bb_per_ctx_ptr
.val
=
431 (shadow_ring_context
->bb_per_ctx_ptr
.val
&
432 (~PER_CTX_ADDR_MASK
)) | wa_ctx
->per_ctx
.shadow_gma
;
433 shadow_ring_context
->rcs_indirect_ctx
.val
=
434 (shadow_ring_context
->rcs_indirect_ctx
.val
&
435 (~INDIRECT_CTX_ADDR_MASK
)) | wa_ctx
->indirect_ctx
.shadow_gma
;
437 kunmap_atomic(shadow_ring_context
);
441 static void prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx
*wa_ctx
)
443 struct i915_vma
*vma
;
445 unsigned char *per_ctx_va
=
446 (unsigned char *)wa_ctx
->indirect_ctx
.shadow_va
+
447 wa_ctx
->indirect_ctx
.size
;
449 if (wa_ctx
->indirect_ctx
.size
== 0)
452 vma
= i915_gem_object_ggtt_pin(wa_ctx
->indirect_ctx
.obj
, NULL
, 0, 0, 0);
454 gvt_err("Cannot pin indirect ctx obj\n");
457 i915_gem_object_unpin_pages(wa_ctx
->indirect_ctx
.obj
);
459 gma
= i915_gem_object_ggtt_offset(wa_ctx
->indirect_ctx
.obj
, NULL
);
460 WARN_ON(!IS_ALIGNED(gma
, CACHELINE_BYTES
));
461 wa_ctx
->indirect_ctx
.shadow_gma
= gma
;
463 wa_ctx
->per_ctx
.shadow_gma
= *((unsigned int *)per_ctx_va
+ 1);
464 memset(per_ctx_va
, 0, CACHELINE_BYTES
);
466 update_wa_ctx_2_shadow_ctx(wa_ctx
);
469 static int prepare_execlist_workload(struct intel_vgpu_workload
*workload
)
471 struct intel_vgpu
*vgpu
= workload
->vgpu
;
472 struct execlist_ctx_descriptor_format ctx
[2];
473 int ring_id
= workload
->ring_id
;
475 intel_vgpu_pin_mm(workload
->shadow_mm
);
476 intel_vgpu_sync_oos_pages(workload
->vgpu
);
477 intel_vgpu_flush_post_shadow(workload
->vgpu
);
478 prepare_shadow_batch_buffer(workload
);
479 prepare_shadow_wa_ctx(&workload
->wa_ctx
);
480 if (!workload
->emulate_schedule_in
)
483 ctx
[0] = *get_desc_from_elsp_dwords(&workload
->elsp_dwords
, 1);
484 ctx
[1] = *get_desc_from_elsp_dwords(&workload
->elsp_dwords
, 0);
486 return emulate_execlist_schedule_in(&vgpu
->execlist
[ring_id
], ctx
);
489 static void release_shadow_batch_buffer(struct intel_vgpu_workload
*workload
)
491 /* release all the shadow batch buffer */
492 if (!list_empty(&workload
->shadow_bb
)) {
493 struct intel_shadow_bb_entry
*entry_obj
=
494 list_first_entry(&workload
->shadow_bb
,
495 struct intel_shadow_bb_entry
,
497 struct intel_shadow_bb_entry
*temp
;
499 list_for_each_entry_safe(entry_obj
, temp
, &workload
->shadow_bb
,
501 drm_gem_object_unreference(&(entry_obj
->obj
->base
));
502 kvfree(entry_obj
->va
);
503 list_del(&entry_obj
->list
);
509 static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx
*wa_ctx
)
511 if (wa_ctx
->indirect_ctx
.size
== 0)
514 drm_gem_object_unreference(&(wa_ctx
->indirect_ctx
.obj
->base
));
515 kvfree(wa_ctx
->indirect_ctx
.shadow_va
);
518 static int complete_execlist_workload(struct intel_vgpu_workload
*workload
)
520 struct intel_vgpu
*vgpu
= workload
->vgpu
;
521 struct intel_vgpu_execlist
*execlist
=
522 &vgpu
->execlist
[workload
->ring_id
];
523 struct intel_vgpu_workload
*next_workload
;
524 struct list_head
*next
= workload_q_head(vgpu
, workload
->ring_id
)->next
;
525 bool lite_restore
= false;
528 gvt_dbg_el("complete workload %p status %d\n", workload
,
531 release_shadow_batch_buffer(workload
);
532 release_shadow_wa_ctx(&workload
->wa_ctx
);
534 if (workload
->status
|| vgpu
->resetting
)
537 if (!list_empty(workload_q_head(vgpu
, workload
->ring_id
))) {
538 struct execlist_ctx_descriptor_format
*this_desc
, *next_desc
;
540 next_workload
= container_of(next
,
541 struct intel_vgpu_workload
, list
);
542 this_desc
= &workload
->ctx_desc
;
543 next_desc
= &next_workload
->ctx_desc
;
545 lite_restore
= same_context(this_desc
, next_desc
);
549 gvt_dbg_el("next context == current - no schedule-out\n");
550 free_workload(workload
);
554 ret
= emulate_execlist_ctx_schedule_out(execlist
, &workload
->ctx_desc
);
558 free_workload(workload
);
561 free_workload(workload
);
565 #define RING_CTX_OFF(x) \
566 offsetof(struct execlist_ring_context, x)
568 static void read_guest_pdps(struct intel_vgpu
*vgpu
,
569 u64 ring_context_gpa
, u32 pdp
[8])
574 gpa
= ring_context_gpa
+ RING_CTX_OFF(pdp3_UDW
.val
);
576 for (i
= 0; i
< 8; i
++)
577 intel_gvt_hypervisor_read_gpa(vgpu
,
578 gpa
+ i
* 8, &pdp
[7 - i
], 4);
581 static int prepare_mm(struct intel_vgpu_workload
*workload
)
583 struct execlist_ctx_descriptor_format
*desc
= &workload
->ctx_desc
;
584 struct intel_vgpu_mm
*mm
;
585 int page_table_level
;
588 if (desc
->addressing_mode
== 1) { /* legacy 32-bit */
589 page_table_level
= 3;
590 } else if (desc
->addressing_mode
== 3) { /* legacy 64 bit */
591 page_table_level
= 4;
593 gvt_err("Advanced Context mode(SVM) is not supported!\n");
597 read_guest_pdps(workload
->vgpu
, workload
->ring_context_gpa
, pdp
);
599 mm
= intel_vgpu_find_ppgtt_mm(workload
->vgpu
, page_table_level
, pdp
);
601 intel_gvt_mm_reference(mm
);
604 mm
= intel_vgpu_create_mm(workload
->vgpu
, INTEL_GVT_MM_PPGTT
,
605 pdp
, page_table_level
, 0);
607 gvt_err("fail to create mm object.\n");
611 workload
->shadow_mm
= mm
;
615 #define get_last_workload(q) \
616 (list_empty(q) ? NULL : container_of(q->prev, \
617 struct intel_vgpu_workload, list))
619 bool submit_context(struct intel_vgpu
*vgpu
, int ring_id
,
620 struct execlist_ctx_descriptor_format
*desc
,
621 bool emulate_schedule_in
)
623 struct list_head
*q
= workload_q_head(vgpu
, ring_id
);
624 struct intel_vgpu_workload
*last_workload
= get_last_workload(q
);
625 struct intel_vgpu_workload
*workload
= NULL
;
626 u64 ring_context_gpa
;
627 u32 head
, tail
, start
, ctl
, ctx_ctl
, per_ctx
, indirect_ctx
;
630 ring_context_gpa
= intel_vgpu_gma_to_gpa(vgpu
->gtt
.ggtt_mm
,
631 (u32
)((desc
->lrca
+ 1) << GTT_PAGE_SHIFT
));
632 if (ring_context_gpa
== INTEL_GVT_INVALID_ADDR
) {
633 gvt_err("invalid guest context LRCA: %x\n", desc
->lrca
);
637 intel_gvt_hypervisor_read_gpa(vgpu
, ring_context_gpa
+
638 RING_CTX_OFF(ring_header
.val
), &head
, 4);
640 intel_gvt_hypervisor_read_gpa(vgpu
, ring_context_gpa
+
641 RING_CTX_OFF(ring_tail
.val
), &tail
, 4);
643 head
&= RB_HEAD_OFF_MASK
;
644 tail
&= RB_TAIL_OFF_MASK
;
646 if (last_workload
&& same_context(&last_workload
->ctx_desc
, desc
)) {
647 gvt_dbg_el("ring id %d cur workload == last\n", ring_id
);
648 gvt_dbg_el("ctx head %x real head %lx\n", head
,
649 last_workload
->rb_tail
);
651 * cannot use guest context head pointer here,
652 * as it might not be updated at this time
654 head
= last_workload
->rb_tail
;
657 gvt_dbg_el("ring id %d begin a new workload\n", ring_id
);
659 workload
= kmem_cache_zalloc(vgpu
->workloads
, GFP_KERNEL
);
663 /* record some ring buffer register values for scan and shadow */
664 intel_gvt_hypervisor_read_gpa(vgpu
, ring_context_gpa
+
665 RING_CTX_OFF(rb_start
.val
), &start
, 4);
666 intel_gvt_hypervisor_read_gpa(vgpu
, ring_context_gpa
+
667 RING_CTX_OFF(rb_ctrl
.val
), &ctl
, 4);
668 intel_gvt_hypervisor_read_gpa(vgpu
, ring_context_gpa
+
669 RING_CTX_OFF(ctx_ctrl
.val
), &ctx_ctl
, 4);
671 INIT_LIST_HEAD(&workload
->list
);
672 INIT_LIST_HEAD(&workload
->shadow_bb
);
674 init_waitqueue_head(&workload
->shadow_ctx_status_wq
);
675 atomic_set(&workload
->shadow_ctx_active
, 0);
677 workload
->vgpu
= vgpu
;
678 workload
->ring_id
= ring_id
;
679 workload
->ctx_desc
= *desc
;
680 workload
->ring_context_gpa
= ring_context_gpa
;
681 workload
->rb_head
= head
;
682 workload
->rb_tail
= tail
;
683 workload
->rb_start
= start
;
684 workload
->rb_ctl
= ctl
;
685 workload
->prepare
= prepare_execlist_workload
;
686 workload
->complete
= complete_execlist_workload
;
687 workload
->status
= -EINPROGRESS
;
688 workload
->emulate_schedule_in
= emulate_schedule_in
;
690 if (ring_id
== RCS
) {
691 intel_gvt_hypervisor_read_gpa(vgpu
, ring_context_gpa
+
692 RING_CTX_OFF(bb_per_ctx_ptr
.val
), &per_ctx
, 4);
693 intel_gvt_hypervisor_read_gpa(vgpu
, ring_context_gpa
+
694 RING_CTX_OFF(rcs_indirect_ctx
.val
), &indirect_ctx
, 4);
696 workload
->wa_ctx
.indirect_ctx
.guest_gma
=
697 indirect_ctx
& INDIRECT_CTX_ADDR_MASK
;
698 workload
->wa_ctx
.indirect_ctx
.size
=
699 (indirect_ctx
& INDIRECT_CTX_SIZE_MASK
) *
701 workload
->wa_ctx
.per_ctx
.guest_gma
=
702 per_ctx
& PER_CTX_ADDR_MASK
;
703 workload
->wa_ctx
.workload
= workload
;
705 WARN_ON(workload
->wa_ctx
.indirect_ctx
.size
&& !(per_ctx
& 0x1));
708 if (emulate_schedule_in
)
709 memcpy(&workload
->elsp_dwords
,
710 &vgpu
->execlist
[ring_id
].elsp_dwords
,
711 sizeof(workload
->elsp_dwords
));
713 gvt_dbg_el("workload %p ring id %d head %x tail %x start %x ctl %x\n",
714 workload
, ring_id
, head
, tail
, start
, ctl
);
716 gvt_dbg_el("workload %p emulate schedule_in %d\n", workload
,
717 emulate_schedule_in
);
719 ret
= prepare_mm(workload
);
721 kmem_cache_free(vgpu
->workloads
, workload
);
725 queue_workload(workload
);
729 int intel_vgpu_submit_execlist(struct intel_vgpu
*vgpu
, int ring_id
)
731 struct intel_vgpu_execlist
*execlist
= &vgpu
->execlist
[ring_id
];
732 struct execlist_ctx_descriptor_format
*desc
[2], valid_desc
[2];
733 unsigned long valid_desc_bitmap
= 0;
734 bool emulate_schedule_in
= true;
738 memset(valid_desc
, 0, sizeof(valid_desc
));
740 desc
[0] = get_desc_from_elsp_dwords(&execlist
->elsp_dwords
, 1);
741 desc
[1] = get_desc_from_elsp_dwords(&execlist
->elsp_dwords
, 0);
743 for (i
= 0; i
< 2; i
++) {
747 if (!desc
[i
]->privilege_access
) {
748 gvt_err("vgpu%d: unexpected GGTT elsp submission\n",
753 /* TODO: add another guest context checks here. */
754 set_bit(i
, &valid_desc_bitmap
);
755 valid_desc
[i
] = *desc
[i
];
758 if (!valid_desc_bitmap
) {
759 gvt_err("vgpu%d: no valid desc in a elsp submission\n",
764 if (!test_bit(0, (void *)&valid_desc_bitmap
) &&
765 test_bit(1, (void *)&valid_desc_bitmap
)) {
766 gvt_err("vgpu%d: weird elsp submission, desc 0 is not valid\n",
771 /* submit workload */
772 for_each_set_bit(i
, (void *)&valid_desc_bitmap
, 2) {
773 ret
= submit_context(vgpu
, ring_id
, &valid_desc
[i
],
774 emulate_schedule_in
);
776 gvt_err("vgpu%d: fail to schedule workload\n",
780 emulate_schedule_in
= false;
785 static void init_vgpu_execlist(struct intel_vgpu
*vgpu
, int ring_id
)
787 struct intel_vgpu_execlist
*execlist
= &vgpu
->execlist
[ring_id
];
788 struct execlist_context_status_pointer_format ctx_status_ptr
;
789 u32 ctx_status_ptr_reg
;
791 memset(execlist
, 0, sizeof(*execlist
));
793 execlist
->vgpu
= vgpu
;
794 execlist
->ring_id
= ring_id
;
795 execlist
->slot
[0].index
= 0;
796 execlist
->slot
[1].index
= 1;
798 ctx_status_ptr_reg
= execlist_ring_mmio(vgpu
->gvt
, ring_id
,
799 _EL_OFFSET_STATUS_PTR
);
801 ctx_status_ptr
.dw
= vgpu_vreg(vgpu
, ctx_status_ptr_reg
);
802 ctx_status_ptr
.read_ptr
= ctx_status_ptr
.write_ptr
= 0x7;
803 vgpu_vreg(vgpu
, ctx_status_ptr_reg
) = ctx_status_ptr
.dw
;
806 void intel_vgpu_clean_execlist(struct intel_vgpu
*vgpu
)
808 kmem_cache_destroy(vgpu
->workloads
);
811 int intel_vgpu_init_execlist(struct intel_vgpu
*vgpu
)
815 /* each ring has a virtual execlist engine */
816 for (i
= 0; i
< I915_NUM_ENGINES
; i
++) {
817 init_vgpu_execlist(vgpu
, i
);
818 INIT_LIST_HEAD(&vgpu
->workload_q_head
[i
]);
821 vgpu
->workloads
= kmem_cache_create("gvt-g vgpu workload",
822 sizeof(struct intel_vgpu_workload
), 0,
826 if (!vgpu
->workloads
)
832 void intel_vgpu_reset_execlist(struct intel_vgpu
*vgpu
,
833 unsigned long ring_bitmap
)
836 struct list_head
*pos
, *n
;
837 struct intel_vgpu_workload
*workload
= NULL
;
839 for_each_set_bit(bit
, &ring_bitmap
, sizeof(ring_bitmap
) * 8) {
840 if (bit
>= I915_NUM_ENGINES
)
842 /* free the unsubmited workload in the queue */
843 list_for_each_safe(pos
, n
, &vgpu
->workload_q_head
[bit
]) {
844 workload
= container_of(pos
,
845 struct intel_vgpu_workload
, list
);
846 list_del_init(&workload
->list
);
847 free_workload(workload
);
850 init_vgpu_execlist(vgpu
, bit
);