ret = gmu_poll_timeout(gmu, REG_A6XX_RSCC_SEQ_BUSY_DRV0, val,
!val, 100, 10000);
- if (!ret) {
- gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 0);
-
- /* Re-enable the power counter */
- gmu_write(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1);
- return 0;
+ if (ret) {
+ DRM_DEV_ERROR(gmu->dev, "GPU RSC sequence stuck while waking up the GPU\n");
+ return ret;
}
- DRM_DEV_ERROR(gmu->dev, "GPU RSC sequence stuck while waking up the GPU\n");
- return ret;
+ gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 0);
+
+ /* Set up CX GMU counter 0 to count busy ticks */
+ gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000);
+ gmu_rmw(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0, 0xff, 0x20);
+
+ /* Enable the power counter */
+ gmu_write(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1);
+ return 0;
}
static void a6xx_rpmh_stop(struct a6xx_gmu *gmu)
gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
}
+static void get_stats_counter(struct msm_ringbuffer *ring, u32 counter,
+ u64 iova)
+{
+ OUT_PKT7(ring, CP_REG_TO_MEM, 3);
+ OUT_RING(ring, counter | (1 << 30) | (2 << 18));
+ OUT_RING(ring, lower_32_bits(iova));
+ OUT_RING(ring, upper_32_bits(iova));
+}
+
static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
struct msm_file_private *ctx)
{
+ unsigned int index = submit->seqno % MSM_GPU_SUBMIT_STATS_COUNT;
struct msm_drm_private *priv = gpu->dev->dev_private;
struct msm_ringbuffer *ring = submit->ring;
unsigned int i;
+ get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP_0_LO,
+ rbmemptr_stats(ring, index, cpcycles_start));
+
+ /*
+ * For PM4 the GMU register offsets are calculated from the base of the
+ * GPU registers so we need to add 0x1a800 to the register value on A630
+ * to get the right value from PM4.
+ */
+ get_stats_counter(ring, REG_A6XX_GMU_ALWAYS_ON_COUNTER_L + 0x1a800,
+ rbmemptr_stats(ring, index, alwayson_start));
+
/* Invalidate CCU depth and color */
OUT_PKT7(ring, CP_EVENT_WRITE, 1);
OUT_RING(ring, PC_CCU_INVALIDATE_DEPTH);
}
}
+ get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP_0_LO,
+ rbmemptr_stats(ring, index, cpcycles_end));
+ get_stats_counter(ring, REG_A6XX_GMU_ALWAYS_ON_COUNTER_L + 0x1a800,
+ rbmemptr_stats(ring, index, alwayson_end));
+
/* Write the fence to the scratch register */
OUT_PKT4(ring, REG_A6XX_CP_SCRATCH_REG(2), 1);
OUT_RING(ring, submit->seqno);
/* Select CP0 to always count cycles */
gpu_write(gpu, REG_A6XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
- /* FIXME: not sure if this should live here or in a6xx_gmu.c */
- gmu_write(&a6xx_gpu->gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK,
- 0xff000000);
- gmu_rmw(&a6xx_gpu->gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_SELECT_0,
- 0xff, 0x20);
- gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE,
- 0x01);
-
gpu_write(gpu, REG_A6XX_RB_NC_MODE_CNTL, 2 << 1);
gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, 2 << 1);
gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, 2 << 1);
#define rbmemptr(ring, member) \
((ring)->memptrs_iova + offsetof(struct msm_rbmemptrs, member))
+#define rbmemptr_stats(ring, index, member) \
+ (rbmemptr((ring), stats) + \
+ ((index) * sizeof(struct msm_gpu_submit_stats)) + \
+ offsetof(struct msm_gpu_submit_stats, member))
+
+struct msm_gpu_submit_stats {
+ u64 cpcycles_start;
+ u64 cpcycles_end;
+ u64 alwayson_start;
+ u64 alwayson_end;
+};
+
+#define MSM_GPU_SUBMIT_STATS_COUNT 64
+
struct msm_rbmemptrs {
volatile uint32_t rptr;
volatile uint32_t fence;
+
+ volatile struct msm_gpu_submit_stats stats[MSM_GPU_SUBMIT_STATS_COUNT];
};
struct msm_ringbuffer {