*
* @hdev: pointer to habanalabs device structure
* @cs: CS structure
- *
- * The function signals waiting entity that its waiting stream has common
- * stream with the completed CS.
+ * The function signals a waiting entity that has an overlapping stream masters
+ * with the completed CS.
* For example:
- * - a completed CS worked on streams 0 and 1, multi CS completion
- * is actively waiting on stream 3. don't send signal as no common stream
- * - a completed CS worked on streams 0 and 1, multi CS completion
- * is actively waiting on streams 1 and 3. send signal as stream 1 is common
+ * - a completed CS worked on stream master QID 4, multi CS completion
+ * is actively waiting on stream master QIDs 3, 5. don't send signal as no
+ * common stream master QID
+ * - a completed CS worked on stream master QID 4, multi CS completion
+ * is actively waiting on stream master QIDs 3, 4. send signal as stream
+ * master QID 4 is common
*/
static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
{
* complete if:
* 1. still waiting for completion
* 2. the completed CS has at least one overlapping stream
- * with the streams in the completion
+ * master with the stream masters in the completion
*/
if (mcs_compl->used &&
- (fence->stream_map & mcs_compl->stream_map)) {
+ (fence->stream_master_qid_map &
+ mcs_compl->stream_master_qid_map)) {
/* extract the timestamp only of first completed CS */
if (!mcs_compl->timestamp)
mcs_compl->timestamp =
return 0;
}
+static u32 get_stream_master_qid_mask(struct hl_device *hdev, u32 qid)
+{
+ int i;
+
+ for (i = 0; i < hdev->stream_master_qid_arr_size; i++)
+ if (qid == hdev->stream_master_qid_arr[i])
+ return BIT(i);
+
+ return 0;
+}
+
static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
u32 num_chunks, u64 *cs_seq, u32 flags,
u32 encaps_signals_handle, u32 timeout)
struct hl_cs *cs;
struct hl_cb *cb;
u64 user_sequence;
- u8 stream_map = 0;
+ u8 stream_master_qid_map = 0;
int rc, i;
cntr = &hdev->aggregated_cs_counters;
* queues of this CS
*/
if (hdev->supports_wait_for_multi_cs)
- stream_map |= BIT((chunk->queue_index % 4));
+ stream_master_qid_map |=
+ get_stream_master_qid_mask(hdev,
+ chunk->queue_index);
}
job = hl_cs_allocate_job(hdev, queue_type,
* fence object for multi-CS completion
*/
if (hdev->supports_wait_for_multi_cs)
- cs->fence->stream_map = stream_map;
+ cs->fence->stream_master_qid_map = stream_master_qid_map;
rc = hl_hw_queue_schedule_cs(cs);
if (rc) {
break;
}
- mcs_data->stream_map |= fence->stream_map;
+ mcs_data->stream_master_qid_map |= fence->stream_master_qid_map;
if (status == CS_WAIT_STATUS_BUSY)
continue;
* hl_wait_multi_cs_completion_init - init completion structure
*
* @hdev: pointer to habanalabs device structure
- * @stream_map: stream map, set bit indicates stream to wait on
+ * @stream_master_bitmap: stream master QIDs map, set bit indicates stream
+ * master QID to wait on
*
* @return valid completion struct pointer on success, otherwise error pointer
*
*/
static struct multi_cs_completion *hl_wait_multi_cs_completion_init(
struct hl_device *hdev,
- u8 stream_map)
+ u8 stream_master_bitmap)
{
struct multi_cs_completion *mcs_compl;
int i;
if (!mcs_compl->used) {
mcs_compl->used = 1;
mcs_compl->timestamp = 0;
- mcs_compl->stream_map = stream_map;
+ mcs_compl->stream_master_qid_map = stream_master_bitmap;
reinit_completion(&mcs_compl->completion);
spin_unlock(&mcs_compl->lock);
break;
long completion_rc;
mcs_compl = hl_wait_multi_cs_completion_init(hdev,
- mcs_data->stream_map);
+ mcs_data->stream_master_qid_map);
if (IS_ERR(mcs_compl))
return PTR_ERR(mcs_compl);
* @completion: fence is implemented using completion
* @refcount: refcount for this fence
* @cs_sequence: sequence of the corresponding command submission
+ * @stream_master_qid_map: streams masters QID bitmap to represent all streams
+ * masters QIDs that multi cs is waiting on
* @error: mark this fence with error
* @timestamp: timestamp upon completion
- * @stream_map: streams bitmap to represent all streams that multi cs is
- * waiting on
*/
struct hl_fence {
struct completion completion;
struct kref refcount;
u64 cs_sequence;
+ u32 stream_master_qid_map;
int error;
ktime_t timestamp;
- u8 stream_map;
};
/**
* @state_dump_init: initialize constants required for state dump
* @get_sob_addr: get SOB base address offset.
* @set_pci_memory_regions: setting properties of PCI memory regions
+ * @get_stream_master_qid_arr: get pointer to stream masters QID array
*/
struct hl_asic_funcs {
int (*early_init)(struct hl_device *hdev);
void (*state_dump_init)(struct hl_device *hdev);
u32 (*get_sob_addr)(struct hl_device *hdev, u32 sob_id);
void (*set_pci_memory_regions)(struct hl_device *hdev);
+ u32* (*get_stream_master_qid_arr)(void);
};
* @completion: completion of any of the CS in the list
* @lock: spinlock for the completion structure
* @timestamp: timestamp for the multi-CS completion
+ * @stream_master_qid_map: bitmap of all stream masters on which the multi-CS
+ * is waiting
* @used: 1 if in use, otherwise 0
- * @stream_map: bitmap of all HW/external queues streams on which the multi-CS
- * is waiting
*/
struct multi_cs_completion {
struct completion completion;
spinlock_t lock;
s64 timestamp;
+ u32 stream_master_qid_map;
u8 used;
- u8 stream_map;
};
/**
* @timestamp: timestamp of first completed CS
* @wait_status: wait for CS status
* @completion_bitmap: bitmap of completed CSs (1- completed, otherwise 0)
+ * @stream_master_qid_map: bitmap of all stream master QIDs on which the
+ * multi-CS is waiting
* @arr_len: fence_arr and seq_arr array length
- * @stream_map: bitmap of all HW/external queues streams on which the multi-CS
- * is waiting
* @gone_cs: indication of gone CS (1- there was gone CS, otherwise 0)
* @update_ts: update timestamp. 1- update the timestamp, otherwise 0.
*/
s64 timestamp;
long wait_status;
u32 completion_bitmap;
+ u32 stream_master_qid_map;
u8 arr_len;
- u8 stream_map;
u8 gone_cs;
u8 update_ts;
};
struct multi_cs_completion multi_cs_completion[
MULTI_CS_MAX_USER_CTX];
+ u32 *stream_master_qid_arr;
atomic64_t dram_used_mem;
u64 timeout_jiffies;
u64 max_power;
u8 skip_reset_on_timeout;
u8 device_cpu_is_halted;
u8 supports_wait_for_multi_cs;
+ u8 stream_master_qid_arr_size;
/* Parameters for bring-up */
u64 nic_ports_mask;
/* update stream map of the first CS */
if (hdev->supports_wait_for_multi_cs)
- staged_cs->fence->stream_map |= cs->fence->stream_map;
+ staged_cs->fence->stream_master_qid_map |=
+ cs->fence->stream_master_qid_map;
}
list_add_tail(&cs->mirror_node, &hdev->cs_mirror_list);
#define MONITOR_SOB_STRING_SIZE 256
+static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
+ GAUDI_QUEUE_ID_DMA_0_0,
+ GAUDI_QUEUE_ID_DMA_0_1,
+ GAUDI_QUEUE_ID_DMA_0_2,
+ GAUDI_QUEUE_ID_DMA_0_3,
+ GAUDI_QUEUE_ID_DMA_1_0,
+ GAUDI_QUEUE_ID_DMA_1_1,
+ GAUDI_QUEUE_ID_DMA_1_2,
+ GAUDI_QUEUE_ID_DMA_1_3
+};
+
static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
"gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
hdev->supports_wait_for_multi_cs = true;
hdev->asic_funcs->set_pci_memory_regions(hdev);
+ hdev->stream_master_qid_arr =
+ hdev->asic_funcs->get_stream_master_qid_arr();
+ hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
return 0;
sds->funcs = gaudi_state_dump_funcs;
}
+static u32 *gaudi_get_stream_master_qid_arr(void)
+{
+ return gaudi_stream_master;
+}
+
static const struct hl_asic_funcs gaudi_funcs = {
.early_init = gaudi_early_init,
.early_fini = gaudi_early_fini,
.init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
.state_dump_init = gaudi_state_dump_init,
.get_sob_addr = gaudi_get_sob_addr,
- .set_pci_memory_regions = gaudi_set_pci_memory_regions
+ .set_pci_memory_regions = gaudi_set_pci_memory_regions,
+ .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr
};
/**
#define NUMBER_OF_INTERRUPTS (NUMBER_OF_CMPLT_QUEUES + \
NUMBER_OF_CPU_HW_QUEUES)
+#define GAUDI_STREAM_MASTER_ARR_SIZE 8
+
#if (NUMBER_OF_INTERRUPTS > GAUDI_MSI_ENTRIES)
#error "Number of MSI interrupts must be smaller or equal to GAUDI_MSI_ENTRIES"
#endif
return 0;
}
+static u32 *goya_get_stream_master_qid_arr(void)
+{
+ return NULL;
+}
+
static const struct hl_asic_funcs goya_funcs = {
.early_init = goya_early_init,
.early_fini = goya_early_fini,
.state_dump_init = goya_state_dump_init,
.get_sob_addr = &goya_get_sob_addr,
.set_pci_memory_regions = goya_set_pci_memory_regions,
+ .get_stream_master_qid_arr = goya_get_stream_master_qid_arr,
};
/*