#include "regs/xe_gpu_commands.h"
#include "xe_device.h"
#include "xe_engine_types.h"
+#include "xe_gt.h"
#include "xe_hw_fence.h"
#include "xe_sa.h"
#include "xe_sched_job.h"
#include "xe_vm_types.h"
+static int bb_prefetch(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt->xe;
+
+ if (GRAPHICS_VERx100(xe) >= 1250 && !xe_gt_is_media_type(gt))
+ /*
+ * RCS and CCS require 1K, although other engines would be
+ * okay with 512.
+ */
+ return SZ_1K;
+ else
+ return SZ_512;
+}
+
struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm)
{
struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL);
if (!bb)
return ERR_PTR(-ENOMEM);
- bb->bo = xe_sa_bo_new(!usm ? >->kernel_bb_pool :
- >->usm.bb_pool, 4 * dwords + 4);
+ /*
+ * We need to allocate space for the requested number of dwords,
+ * one additional MI_BATCH_BUFFER_END dword, and additional buffer
+ * space to accomodate the platform-specific hardware prefetch
+ * requirements.
+ */
+ bb->bo = xe_sa_bo_new(!usm ? >->kernel_bb_pool : >->usm.bb_pool,
+ 4 * (dwords + 1) + bb_prefetch(gt));
if (IS_ERR(bb->bo)) {
err = PTR_ERR(bb->bo);
goto err;