]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/commitdiff
habanalabs: handle barriers in DMA QMAN streams
authorOded Gabbay <oded.gabbay@gmail.com>
Tue, 31 Mar 2020 08:57:11 +0000 (11:57 +0300)
committerOded Gabbay <oded.gabbay@gmail.com>
Sun, 17 May 2020 09:06:22 +0000 (12:06 +0300)
When we have DMA QMAN with multiple streams, we need to know whether the
command buffer contains at least one DMA packet in order to configure the
barriers correctly when adding the 2xMSG_PROT at the end of the JOB. If
there is no DMA packet, then there is no need to put engine barrier. This
is relevant only for GAUDI as GOYA doesn't have streams so the engine can't
be busy by another stream.

Reviewed-by: Tomer Tayar <ttayar@habana.ai>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
drivers/misc/habanalabs/command_submission.c
drivers/misc/habanalabs/goya/goya.c
drivers/misc/habanalabs/goya/goyaP.h
drivers/misc/habanalabs/habanalabs.h
drivers/misc/habanalabs/hw_queue.c

index 409276b6374d7caa3032d5fbf1f72ded0603ebbf..6680e183d88131d9256a8210d754f2963d671a07 100644 (file)
@@ -113,6 +113,7 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
                if (!rc) {
                        job->patched_cb = parser.patched_cb;
                        job->job_cb_size = parser.patched_cb_size;
+                       job->contains_dma_pkt = parser.contains_dma_pkt;
 
                        spin_lock(&job->patched_cb->lock);
                        job->patched_cb->cs_cnt++;
index 85f29cb7d67b273cf0abf7646a341e44c2ca8af4..19c3bdf4c358138164359e0569cf5342bc4d24d9 100644 (file)
@@ -3903,7 +3903,8 @@ int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
 }
 
 void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address,
-                               u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec)
+                               u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec,
+                               bool eb)
 {
        struct packet_msg_prot *cq_pkt;
        u32 tmp;
index a05250e53175f274e93f34846231d42d31090e10..86857cdd36b16cfd3d4548ff4c218f0495f7817a 100644 (file)
@@ -216,7 +216,8 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry);
 void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size);
 
 void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address,
-                               u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec);
+                               u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec,
+                               bool eb);
 int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser);
 void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
                                dma_addr_t *dma_handle, u16 *queue_len);
index 29b9767387af347a865279369dfad2bb984e76df..8db955485609e4e350a7fa9846190e6d39913e68 100644 (file)
@@ -598,7 +598,8 @@ struct hl_asic_funcs {
                                        struct sg_table *sgt);
        void (*add_end_of_cb_packets)(struct hl_device *hdev,
                                        u64 kernel_address, u32 len,
-                                       u64 cq_addr, u32 cq_val, u32 msix_num);
+                                       u64 cq_addr, u32 cq_val, u32 msix_num,
+                                       bool eb);
        void (*update_eq_ci)(struct hl_device *hdev, u32 val);
        int (*context_switch)(struct hl_device *hdev, u32 asid);
        void (*restore_phase_topology)(struct hl_device *hdev);
@@ -824,6 +825,12 @@ struct hl_cs {
  * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
  *                          handle to a kernel-allocated CB object, false
  *                          otherwise (SRAM/DRAM/host address).
+ * @contains_dma_pkt: whether the JOB contains at least one DMA packet. This
+ *                    info is needed later, when adding the 2xMSG_PROT at the
+ *                    end of the JOB, to know which barriers to put in the
+ *                    MSG_PROT packets. Relevant only for GAUDI as GOYA doesn't
+ *                    have streams so the engine can't be busy by another
+ *                    stream.
  */
 struct hl_cs_job {
        struct list_head        cs_node;
@@ -839,6 +846,7 @@ struct hl_cs_job {
        u32                     user_cb_size;
        u32                     job_cb_size;
        u8                      is_kernel_allocated_cb;
+       u8                      contains_dma_pkt;
 };
 
 /**
@@ -858,6 +866,12 @@ struct hl_cs_job {
  * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
  *                          handle to a kernel-allocated CB object, false
  *                          otherwise (SRAM/DRAM/host address).
+ * @contains_dma_pkt: whether the JOB contains at least one DMA packet. This
+ *                    info is needed later, when adding the 2xMSG_PROT at the
+ *                    end of the JOB, to know which barriers to put in the
+ *                    MSG_PROT packets. Relevant only for GAUDI as GOYA doesn't
+ *                    have streams so the engine can't be busy by another
+ *                    stream.
  */
 struct hl_cs_parser {
        struct hl_cb            *user_cb;
@@ -871,6 +885,7 @@ struct hl_cs_parser {
        u32                     patched_cb_size;
        u8                      job_id;
        u8                      is_kernel_allocated_cb;
+       u8                      contains_dma_pkt;
 };
 
 
index 8248adcc7ef8a364fb75c2c6b8ca952993256ba6..a5abc224399da5c37ac8b5e531369f4d1c596417 100644 (file)
@@ -314,7 +314,8 @@ static void ext_queue_schedule_job(struct hl_cs_job *job)
        hdev->asic_funcs->add_end_of_cb_packets(hdev, cb->kernel_address, len,
                                                cq_addr,
                                                le32_to_cpu(cq_pkt.data),
-                                               q->msi_vec);
+                                               q->msi_vec,
+                                               job->contains_dma_pkt);
 
        q->shadow_queue[hl_pi_2_offset(q->pi)] = job;