]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/commitdiff
drm/amdgpu: request reg_val_offs each kiq read reg
authorYintian Tao <yttao@amd.com>
Wed, 22 Apr 2020 11:58:22 +0000 (19:58 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 23 Apr 2020 19:06:41 +0000 (15:06 -0400)
According to the current kiq read register method,
there will be race condition when using KIQ to read
register if multiple clients want to read at same time
just like the expample below:
1. client-A start to read REG-0 throguh KIQ
2. client-A poll the seqno-0
3. client-B start to read REG-1 through KIQ
4. client-B poll the seqno-1
5. the kiq complete these two read operation
6. client-A to read the register at the wb buffer and
   get REG-1 value

Therefore, use amdgpu_device_wb_get() to request reg_val_offs
for each kiq read register.

v2: fix the error remove
v3: fix the print typo
v4: remove unused variables

Signed-off-by: Yintian Tao <yttao@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c

index b90449cf23fa68122597bb145e42735882e8ba65..56da8920195d83278102ef29157bb8d14cd93953 100644 (file)
@@ -533,7 +533,7 @@ static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p,
 /*
  * Writeback
  */
-#define AMDGPU_MAX_WB 128      /* Reserve at most 128 WB slots for amdgpu-owned rings. */
+#define AMDGPU_MAX_WB 256      /* Reserve at most 256 WB slots for amdgpu-owned rings. */
 
 struct amdgpu_wb {
        struct amdgpu_bo        *wb_obj;
index ea576b4260a461363a624aa9622ecebe8b2090a1..a721b0e0ff6952e43b6987754c77736e732f3be8 100644 (file)
@@ -304,10 +304,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
 
        spin_lock_init(&kiq->ring_lock);
 
-       r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs);
-       if (r)
-               return r;
-
        ring->adev = NULL;
        ring->ring_obj = NULL;
        ring->use_doorbell = true;
@@ -331,7 +327,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
 
 void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
 {
-       amdgpu_device_wb_free(ring->adev, ring->adev->gfx.kiq.reg_val_offs);
        amdgpu_ring_fini(ring);
 }
 
@@ -672,15 +667,20 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
 {
        signed long r, cnt = 0;
        unsigned long flags;
-       uint32_t seq;
+       uint32_t seq, reg_val_offs = 0, value = 0;
        struct amdgpu_kiq *kiq = &adev->gfx.kiq;
        struct amdgpu_ring *ring = &kiq->ring;
 
        BUG_ON(!ring->funcs->emit_rreg);
 
        spin_lock_irqsave(&kiq->ring_lock, flags);
+       if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
+               spin_unlock_irqrestore(&kiq->ring_lock, flags);
+               pr_err("critical bug! too many kiq readers\n");
+               goto failed_kiq_read;
+       }
        amdgpu_ring_alloc(ring, 32);
-       amdgpu_ring_emit_rreg(ring, reg);
+       amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
        amdgpu_fence_emit_polling(ring, &seq);
        amdgpu_ring_commit(ring);
        spin_unlock_irqrestore(&kiq->ring_lock, flags);
@@ -707,7 +707,10 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
        if (cnt > MAX_KIQ_REG_TRY)
                goto failed_kiq_read;
 
-       return adev->wb.wb[kiq->reg_val_offs];
+       mb();
+       value = adev->wb.wb[reg_val_offs];
+       amdgpu_device_wb_free(adev, reg_val_offs);
+       return value;
 
 failed_kiq_read:
        pr_err("failed to read reg:%x\n", reg);
index 634746829024955bf9b1f27342885fd436ca273d..ee698f0246d84bfaed29c0aa24ccfad7ee125e17 100644 (file)
@@ -103,7 +103,6 @@ struct amdgpu_kiq {
        struct amdgpu_ring      ring;
        struct amdgpu_irq_src   irq;
        const struct kiq_pm4_funcs *pmf;
-       uint32_t                        reg_val_offs;
 };
 
 /*
index 5254396e84dc54eb7fcb22af76a9c20919ec0ccc..e1d894f0d65432c405943456cac3feeab98693d0 100644 (file)
@@ -168,7 +168,8 @@ struct amdgpu_ring_funcs {
        void (*end_use)(struct amdgpu_ring *ring);
        void (*emit_switch_buffer) (struct amdgpu_ring *ring);
        void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
-       void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg);
+       void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg,
+                         uint32_t reg_val_offs);
        void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
        void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg,
                              uint32_t val, uint32_t mask);
@@ -250,7 +251,7 @@ struct amdgpu_ring {
 #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
 #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
 #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
-#define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
+#define amdgpu_ring_emit_rreg(r, d, o) (r)->funcs->emit_rreg((r), (d), (o))
 #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
 #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
 #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
index 212492244e33ee5f1dfc58fd94dc0631066f1c86..e1687e408e1d2baeea20bcb6c406d72e8dbcd817 100644 (file)
@@ -7599,10 +7599,10 @@ static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
        amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
 }
 
-static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
+static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
+                                    uint32_t reg_val_offs)
 {
        struct amdgpu_device *adev = ring->adev;
-       struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 
        amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
        amdgpu_ring_write(ring, 0 |     /* src: register*/
@@ -7611,9 +7611,9 @@ static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
        amdgpu_ring_write(ring, reg);
        amdgpu_ring_write(ring, 0);
        amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
-                               kiq->reg_val_offs * 4));
+                               reg_val_offs * 4));
        amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
-                               kiq->reg_val_offs * 4));
+                               reg_val_offs * 4));
 }
 
 static void gfx_v10_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
index 053a599dd877ec5adcac98041772a12f88d849bb..2fcf6865abbade9bb68d6fa548ce4b6a1faefc3d 100644 (file)
@@ -6389,10 +6389,10 @@ static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne
                ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
 }
 
-static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
+static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
+                                   uint32_t reg_val_offs)
 {
        struct amdgpu_device *adev = ring->adev;
-       struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 
        amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
        amdgpu_ring_write(ring, 0 |     /* src: register*/
@@ -6401,9 +6401,9 @@ static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
        amdgpu_ring_write(ring, reg);
        amdgpu_ring_write(ring, 0);
        amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
-                               kiq->reg_val_offs * 4));
+                               reg_val_offs * 4));
        amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
-                               kiq->reg_val_offs * 4));
+                               reg_val_offs * 4));
 }
 
 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
index f8273fd8191ac03d439e9d9bb5589fd7f545aada..496205a8ee0c037b13269b8fb703189f71406359 100644 (file)
@@ -4045,13 +4045,19 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
 {
        signed long r, cnt = 0;
        unsigned long flags;
-       uint32_t seq;
+       uint32_t seq, reg_val_offs = 0;
+       uint64_t value = 0;
        struct amdgpu_kiq *kiq = &adev->gfx.kiq;
        struct amdgpu_ring *ring = &kiq->ring;
 
        BUG_ON(!ring->funcs->emit_rreg);
 
        spin_lock_irqsave(&kiq->ring_lock, flags);
+       if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
+               spin_unlock_irqrestore(&kiq->ring_lock, flags);
+               pr_err("critical bug! too many kiq readers\n");
+               goto failed_kiq_read;
+       }
        amdgpu_ring_alloc(ring, 32);
        amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
        amdgpu_ring_write(ring, 9 |     /* src: register*/
@@ -4061,9 +4067,9 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
        amdgpu_ring_write(ring, 0);
        amdgpu_ring_write(ring, 0);
        amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
-                               kiq->reg_val_offs * 4));
+                               reg_val_offs * 4));
        amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
-                               kiq->reg_val_offs * 4));
+                               reg_val_offs * 4));
        amdgpu_fence_emit_polling(ring, &seq);
        amdgpu_ring_commit(ring);
        spin_unlock_irqrestore(&kiq->ring_lock, flags);
@@ -4090,8 +4096,11 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
        if (cnt > MAX_KIQ_REG_TRY)
                goto failed_kiq_read;
 
-       return (uint64_t)adev->wb.wb[kiq->reg_val_offs] |
-               (uint64_t)adev->wb.wb[kiq->reg_val_offs + 1 ] << 32ULL;
+       mb();
+       value = (uint64_t)adev->wb.wb[reg_val_offs] |
+               (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
+       amdgpu_device_wb_free(adev, reg_val_offs);
+       return value;
 
 failed_kiq_read:
        pr_err("failed to read gpu clock\n");
@@ -5490,10 +5499,10 @@ static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne
                ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
 }
 
-static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
+static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
+                                   uint32_t reg_val_offs)
 {
        struct amdgpu_device *adev = ring->adev;
-       struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 
        amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
        amdgpu_ring_write(ring, 0 |     /* src: register*/
@@ -5502,9 +5511,9 @@ static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
        amdgpu_ring_write(ring, reg);
        amdgpu_ring_write(ring, 0);
        amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
-                               kiq->reg_val_offs * 4));
+                               reg_val_offs * 4));
        amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
-                               kiq->reg_val_offs * 4));
+                               reg_val_offs * 4));
 }
 
 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,