Merge 5.8-rc7 into char-misc-next

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 27 Jul 2020 09:49:37 +0000 (11:49 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 27 Jul 2020 09:49:37 +0000 (11:49 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 27 Jul 2020 09:49:37 +0000 (11:49 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 27 Jul 2020 09:49:37 +0000 (11:49 +0200)
diff --cc MAINTAINERS
Simple merge
diff --cc drivers/fpga/dfl-afu-main.c
Simple merge
diff --cc drivers/fpga/dfl-pci.c
Simple merge
diff --cc drivers/interconnect/core.c
Simple merge
diff --cc drivers/misc/habanalabs/common/debugfs.c

index fc4372c18ce20798317e4022bd119001201f5a55,0000000000000000000000000000000000000000..0bc036e01ee8df1bfbfa8fccd8f41ba1890a4b3d

mode 100644,000000..100644
--- 1/drivers/misc/habanalabs/common/debugfs.c
--- /dev/null
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@@ -1,1411 -1,0 +1,1404 @@@
-                                       HL_DEVICE_TIMEOUT_USEC, (long *) val);
+ +// SPDX-License-Identifier: GPL-2.0
+ +
+ +/*
+ + * Copyright 2016-2019 HabanaLabs, Ltd.
+ + * All Rights Reserved.
+ + */
+ +
+ +#include "habanalabs.h"
+ +#include "include/hw_ip/mmu/mmu_general.h"
+ +
+ +#include <linux/pci.h>
+ +#include <linux/debugfs.h>
+ +#include <linux/uaccess.h>
+ +
+ +#define MMU_ADDR_BUF_SIZE     40
+ +#define MMU_ASID_BUF_SIZE     10
+ +#define MMU_KBUF_SIZE         (MMU_ADDR_BUF_SIZE + MMU_ASID_BUF_SIZE)
+ +
+ +static struct dentry *hl_debug_root;
+ +
+ +static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
+ +                              u8 i2c_reg, u32 *val)
+ +{
+ +      struct armcp_packet pkt;
+ +      int rc;
+ +
+ +      if (hl_device_disabled_or_in_reset(hdev))
+ +              return -EBUSY;
+ +
+ +      memset(&pkt, 0, sizeof(pkt));
+ +
+ +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_RD <<
+ +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
+ +      pkt.i2c_bus = i2c_bus;
+ +      pkt.i2c_addr = i2c_addr;
+ +      pkt.i2c_reg = i2c_reg;
+ +
+ +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                       HL_DEVICE_TIMEOUT_USEC, NULL);
++                                              0, (long *) val);
+ +
+ +      if (rc)
+ +              dev_err(hdev->dev, "Failed to read from I2C, error %d\n", rc);
+ +
+ +      return rc;
+ +}
+ +
+ +static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
+ +                              u8 i2c_reg, u32 val)
+ +{
+ +      struct armcp_packet pkt;
+ +      int rc;
+ +
+ +      if (hl_device_disabled_or_in_reset(hdev))
+ +              return -EBUSY;
+ +
+ +      memset(&pkt, 0, sizeof(pkt));
+ +
+ +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_WR <<
+ +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
+ +      pkt.i2c_bus = i2c_bus;
+ +      pkt.i2c_addr = i2c_addr;
+ +      pkt.i2c_reg = i2c_reg;
+ +      pkt.value = cpu_to_le64(val);
+ +
+ +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                               HL_DEVICE_TIMEOUT_USEC, NULL);
++                                              0, NULL);
+ +
+ +      if (rc)
+ +              dev_err(hdev->dev, "Failed to write to I2C, error %d\n", rc);
+ +
+ +      return rc;
+ +}
+ +
+ +static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state)
+ +{
+ +      struct armcp_packet pkt;
+ +      int rc;
+ +
+ +      if (hl_device_disabled_or_in_reset(hdev))
+ +              return;
+ +
+ +      memset(&pkt, 0, sizeof(pkt));
+ +
+ +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_LED_SET <<
+ +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
+ +      pkt.led_index = cpu_to_le32(led);
+ +      pkt.value = cpu_to_le64(state);
+ +
+ +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-       sprintf(tmp_buf, "%d\n", hdev->clock_gating);
++                                              0, NULL);
+ +
+ +      if (rc)
+ +              dev_err(hdev->dev, "Failed to set LED %d, error %d\n", led, rc);
+ +}
+ +
+ +static int command_buffers_show(struct seq_file *s, void *data)
+ +{
+ +      struct hl_debugfs_entry *entry = s->private;
+ +      struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+ +      struct hl_cb *cb;
+ +      bool first = true;
+ +
+ +      spin_lock(&dev_entry->cb_spinlock);
+ +
+ +      list_for_each_entry(cb, &dev_entry->cb_list, debugfs_list) {
+ +              if (first) {
+ +                      first = false;
+ +                      seq_puts(s, "\n");
+ +                      seq_puts(s, " CB ID   CTX ID   CB size    CB RefCnt    mmap?   CS counter\n");
+ +                      seq_puts(s, "---------------------------------------------------------------\n");
+ +              }
+ +              seq_printf(s,
+ +                      "   %03d        %d    0x%08x      %d          %d          %d\n",
+ +                      cb->id, cb->ctx_id, cb->size,
+ +                      kref_read(&cb->refcount),
+ +                      cb->mmap, cb->cs_cnt);
+ +      }
+ +
+ +      spin_unlock(&dev_entry->cb_spinlock);
+ +
+ +      if (!first)
+ +              seq_puts(s, "\n");
+ +
+ +      return 0;
+ +}
+ +
+ +static int command_submission_show(struct seq_file *s, void *data)
+ +{
+ +      struct hl_debugfs_entry *entry = s->private;
+ +      struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+ +      struct hl_cs *cs;
+ +      bool first = true;
+ +
+ +      spin_lock(&dev_entry->cs_spinlock);
+ +
+ +      list_for_each_entry(cs, &dev_entry->cs_list, debugfs_list) {
+ +              if (first) {
+ +                      first = false;
+ +                      seq_puts(s, "\n");
+ +                      seq_puts(s, " CS ID   CTX ASID   CS RefCnt   Submitted    Completed\n");
+ +                      seq_puts(s, "------------------------------------------------------\n");
+ +              }
+ +              seq_printf(s,
+ +                      "   %llu       %d          %d           %d            %d\n",
+ +                      cs->sequence, cs->ctx->asid,
+ +                      kref_read(&cs->refcount),
+ +                      cs->submitted, cs->completed);
+ +      }
+ +
+ +      spin_unlock(&dev_entry->cs_spinlock);
+ +
+ +      if (!first)
+ +              seq_puts(s, "\n");
+ +
+ +      return 0;
+ +}
+ +
+ +static int command_submission_jobs_show(struct seq_file *s, void *data)
+ +{
+ +      struct hl_debugfs_entry *entry = s->private;
+ +      struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+ +      struct hl_cs_job *job;
+ +      bool first = true;
+ +
+ +      spin_lock(&dev_entry->cs_job_spinlock);
+ +
+ +      list_for_each_entry(job, &dev_entry->cs_job_list, debugfs_list) {
+ +              if (first) {
+ +                      first = false;
+ +                      seq_puts(s, "\n");
+ +                      seq_puts(s, " JOB ID   CS ID    CTX ASID   H/W Queue\n");
+ +                      seq_puts(s, "---------------------------------------\n");
+ +              }
+ +              if (job->cs)
+ +                      seq_printf(s,
+ +                              "    %02d       %llu         %d         %d\n",
+ +                              job->id, job->cs->sequence, job->cs->ctx->asid,
+ +                              job->hw_queue_id);
+ +              else
+ +                      seq_printf(s,
+ +                              "    %02d       0         %d         %d\n",
+ +                              job->id, HL_KERNEL_ASID_ID, job->hw_queue_id);
+ +      }
+ +
+ +      spin_unlock(&dev_entry->cs_job_spinlock);
+ +
+ +      if (!first)
+ +              seq_puts(s, "\n");
+ +
+ +      return 0;
+ +}
+ +
+ +static int userptr_show(struct seq_file *s, void *data)
+ +{
+ +      struct hl_debugfs_entry *entry = s->private;
+ +      struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+ +      struct hl_userptr *userptr;
+ +      char dma_dir[4][30] = {"DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
+ +                              "DMA_FROM_DEVICE", "DMA_NONE"};
+ +      bool first = true;
+ +
+ +      spin_lock(&dev_entry->userptr_spinlock);
+ +
+ +      list_for_each_entry(userptr, &dev_entry->userptr_list, debugfs_list) {
+ +              if (first) {
+ +                      first = false;
+ +                      seq_puts(s, "\n");
+ +                      seq_puts(s, " user virtual address     size             dma dir\n");
+ +                      seq_puts(s, "----------------------------------------------------------\n");
+ +              }
+ +              seq_printf(s,
+ +                      "    0x%-14llx      %-10u    %-30s\n",
+ +                      userptr->addr, userptr->size, dma_dir[userptr->dir]);
+ +      }
+ +
+ +      spin_unlock(&dev_entry->userptr_spinlock);
+ +
+ +      if (!first)
+ +              seq_puts(s, "\n");
+ +
+ +      return 0;
+ +}
+ +
+ +static int vm_show(struct seq_file *s, void *data)
+ +{
+ +      struct hl_debugfs_entry *entry = s->private;
+ +      struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+ +      struct hl_ctx *ctx;
+ +      struct hl_vm *vm;
+ +      struct hl_vm_hash_node *hnode;
+ +      struct hl_userptr *userptr;
+ +      struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
+ +      enum vm_type_t *vm_type;
+ +      bool once = true;
+ +      u64 j;
+ +      int i;
+ +
+ +      if (!dev_entry->hdev->mmu_enable)
+ +              return 0;
+ +
+ +      spin_lock(&dev_entry->ctx_mem_hash_spinlock);
+ +
+ +      list_for_each_entry(ctx, &dev_entry->ctx_mem_hash_list, debugfs_list) {
+ +              once = false;
+ +              seq_puts(s, "\n\n----------------------------------------------------");
+ +              seq_puts(s, "\n----------------------------------------------------\n\n");
+ +              seq_printf(s, "ctx asid: %u\n", ctx->asid);
+ +
+ +              seq_puts(s, "\nmappings:\n\n");
+ +              seq_puts(s, "    virtual address        size          handle\n");
+ +              seq_puts(s, "----------------------------------------------------\n");
+ +              mutex_lock(&ctx->mem_hash_lock);
+ +              hash_for_each(ctx->mem_hash, i, hnode, node) {
+ +                      vm_type = hnode->ptr;
+ +
+ +                      if (*vm_type == VM_TYPE_USERPTR) {
+ +                              userptr = hnode->ptr;
+ +                              seq_printf(s,
+ +                                      "    0x%-14llx      %-10u\n",
+ +                                      hnode->vaddr, userptr->size);
+ +                      } else {
+ +                              phys_pg_pack = hnode->ptr;
+ +                              seq_printf(s,
+ +                                      "    0x%-14llx      %-10llu       %-4u\n",
+ +                                      hnode->vaddr, phys_pg_pack->total_size,
+ +                                      phys_pg_pack->handle);
+ +                      }
+ +              }
+ +              mutex_unlock(&ctx->mem_hash_lock);
+ +
+ +              vm = &ctx->hdev->vm;
+ +              spin_lock(&vm->idr_lock);
+ +
+ +              if (!idr_is_empty(&vm->phys_pg_pack_handles))
+ +                      seq_puts(s, "\n\nallocations:\n");
+ +
+ +              idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_pack, i) {
+ +                      if (phys_pg_pack->asid != ctx->asid)
+ +                              continue;
+ +
+ +                      seq_printf(s, "\nhandle: %u\n", phys_pg_pack->handle);
+ +                      seq_printf(s, "page size: %u\n\n",
+ +                                              phys_pg_pack->page_size);
+ +                      seq_puts(s, "   physical address\n");
+ +                      seq_puts(s, "---------------------\n");
+ +                      for (j = 0 ; j < phys_pg_pack->npages ; j++) {
+ +                              seq_printf(s, "    0x%-14llx\n",
+ +                                              phys_pg_pack->pages[j]);
+ +                      }
+ +              }
+ +              spin_unlock(&vm->idr_lock);
+ +
+ +      }
+ +
+ +      spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
+ +
+ +      if (!once)
+ +              seq_puts(s, "\n");
+ +
+ +      return 0;
+ +}
+ +
+ +/* these inline functions are copied from mmu.c */
+ +static inline u64 get_hop0_addr(struct hl_ctx *ctx)
+ +{
+ +      return ctx->hdev->asic_prop.mmu_pgt_addr +
+ +                      (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
+ +}
+ +
+ +static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
+ +                                      u64 virt_addr, u64 mask, u64 shift)
+ +{
+ +      return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
+ +                      ((virt_addr & mask) >> shift);
+ +}
+ +
+ +static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
+ +                                      struct hl_mmu_properties *mmu_specs,
+ +                                      u64 hop_addr, u64 vaddr)
+ +{
+ +      return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop0_mask,
+ +                                      mmu_specs->hop0_shift);
+ +}
+ +
+ +static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
+ +                                      struct hl_mmu_properties *mmu_specs,
+ +                                      u64 hop_addr, u64 vaddr)
+ +{
+ +      return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop1_mask,
+ +                                      mmu_specs->hop1_shift);
+ +}
+ +
+ +static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
+ +                                      struct hl_mmu_properties *mmu_specs,
+ +                                      u64 hop_addr, u64 vaddr)
+ +{
+ +      return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop2_mask,
+ +                                      mmu_specs->hop2_shift);
+ +}
+ +
+ +static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
+ +                                      struct hl_mmu_properties *mmu_specs,
+ +                                      u64 hop_addr, u64 vaddr)
+ +{
+ +      return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop3_mask,
+ +                                      mmu_specs->hop3_shift);
+ +}
+ +
+ +static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
+ +                                      struct hl_mmu_properties *mmu_specs,
+ +                                      u64 hop_addr, u64 vaddr)
+ +{
+ +      return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop4_mask,
+ +                                      mmu_specs->hop4_shift);
+ +}
+ +
+ +static inline u64 get_next_hop_addr(u64 curr_pte)
+ +{
+ +      if (curr_pte & PAGE_PRESENT_MASK)
+ +              return curr_pte & HOP_PHYS_ADDR_MASK;
+ +      else
+ +              return ULLONG_MAX;
+ +}
+ +
+ +static int mmu_show(struct seq_file *s, void *data)
+ +{
+ +      struct hl_debugfs_entry *entry = s->private;
+ +      struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+ +      struct hl_device *hdev = dev_entry->hdev;
+ +      struct asic_fixed_properties *prop = &hdev->asic_prop;
+ +      struct hl_mmu_properties *mmu_prop;
+ +      struct hl_ctx *ctx;
+ +      bool is_dram_addr;
+ +
+ +      u64 hop0_addr = 0, hop0_pte_addr = 0, hop0_pte = 0,
+ +              hop1_addr = 0, hop1_pte_addr = 0, hop1_pte = 0,
+ +              hop2_addr = 0, hop2_pte_addr = 0, hop2_pte = 0,
+ +              hop3_addr = 0, hop3_pte_addr = 0, hop3_pte = 0,
+ +              hop4_addr = 0, hop4_pte_addr = 0, hop4_pte = 0,
+ +              virt_addr = dev_entry->mmu_addr;
+ +
+ +      if (!hdev->mmu_enable)
+ +              return 0;
+ +
+ +      if (dev_entry->mmu_asid == HL_KERNEL_ASID_ID)
+ +              ctx = hdev->kernel_ctx;
+ +      else
+ +              ctx = hdev->compute_ctx;
+ +
+ +      if (!ctx) {
+ +              dev_err(hdev->dev, "no ctx available\n");
+ +              return 0;
+ +      }
+ +
+ +      is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
+ +                                              prop->dmmu.start_addr,
+ +                                              prop->dmmu.end_addr);
+ +
+ +      /* shifts and masks are the same in PMMU and HPMMU, use one of them */
+ +      mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+ +
+ +      mutex_lock(&ctx->mmu_lock);
+ +
+ +      /* the following lookup is copied from unmap() in mmu.c */
+ +
+ +      hop0_addr = get_hop0_addr(ctx);
+ +      hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
+ +      hop0_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr);
+ +      hop1_addr = get_next_hop_addr(hop0_pte);
+ +
+ +      if (hop1_addr == ULLONG_MAX)
+ +              goto not_mapped;
+ +
+ +      hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
+ +      hop1_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr);
+ +      hop2_addr = get_next_hop_addr(hop1_pte);
+ +
+ +      if (hop2_addr == ULLONG_MAX)
+ +              goto not_mapped;
+ +
+ +      hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
+ +      hop2_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr);
+ +      hop3_addr = get_next_hop_addr(hop2_pte);
+ +
+ +      if (hop3_addr == ULLONG_MAX)
+ +              goto not_mapped;
+ +
+ +      hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
+ +      hop3_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr);
+ +
+ +      if (!(hop3_pte & LAST_MASK)) {
+ +              hop4_addr = get_next_hop_addr(hop3_pte);
+ +
+ +              if (hop4_addr == ULLONG_MAX)
+ +                      goto not_mapped;
+ +
+ +              hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
+ +                                                      virt_addr);
+ +              hop4_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr);
+ +              if (!(hop4_pte & PAGE_PRESENT_MASK))
+ +                      goto not_mapped;
+ +      } else {
+ +              if (!(hop3_pte & PAGE_PRESENT_MASK))
+ +                      goto not_mapped;
+ +      }
+ +
+ +      seq_printf(s, "asid: %u, virt_addr: 0x%llx\n",
+ +                      dev_entry->mmu_asid, dev_entry->mmu_addr);
+ +
+ +      seq_printf(s, "hop0_addr: 0x%llx\n", hop0_addr);
+ +      seq_printf(s, "hop0_pte_addr: 0x%llx\n", hop0_pte_addr);
+ +      seq_printf(s, "hop0_pte: 0x%llx\n", hop0_pte);
+ +
+ +      seq_printf(s, "hop1_addr: 0x%llx\n", hop1_addr);
+ +      seq_printf(s, "hop1_pte_addr: 0x%llx\n", hop1_pte_addr);
+ +      seq_printf(s, "hop1_pte: 0x%llx\n", hop1_pte);
+ +
+ +      seq_printf(s, "hop2_addr: 0x%llx\n", hop2_addr);
+ +      seq_printf(s, "hop2_pte_addr: 0x%llx\n", hop2_pte_addr);
+ +      seq_printf(s, "hop2_pte: 0x%llx\n", hop2_pte);
+ +
+ +      seq_printf(s, "hop3_addr: 0x%llx\n", hop3_addr);
+ +      seq_printf(s, "hop3_pte_addr: 0x%llx\n", hop3_pte_addr);
+ +      seq_printf(s, "hop3_pte: 0x%llx\n", hop3_pte);
+ +
+ +      if (!(hop3_pte & LAST_MASK)) {
+ +              seq_printf(s, "hop4_addr: 0x%llx\n", hop4_addr);
+ +              seq_printf(s, "hop4_pte_addr: 0x%llx\n", hop4_pte_addr);
+ +              seq_printf(s, "hop4_pte: 0x%llx\n", hop4_pte);
+ +      }
+ +
+ +      goto out;
+ +
+ +not_mapped:
+ +      dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
+ +                      virt_addr);
+ +out:
+ +      mutex_unlock(&ctx->mmu_lock);
+ +
+ +      return 0;
+ +}
+ +
+ +static ssize_t mmu_asid_va_write(struct file *file, const char __user *buf,
+ +              size_t count, loff_t *f_pos)
+ +{
+ +      struct seq_file *s = file->private_data;
+ +      struct hl_debugfs_entry *entry = s->private;
+ +      struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+ +      struct hl_device *hdev = dev_entry->hdev;
+ +      char kbuf[MMU_KBUF_SIZE];
+ +      char *c;
+ +      ssize_t rc;
+ +
+ +      if (!hdev->mmu_enable)
+ +              return count;
+ +
+ +      if (count > sizeof(kbuf) - 1)
+ +              goto err;
+ +      if (copy_from_user(kbuf, buf, count))
+ +              goto err;
+ +      kbuf[count] = 0;
+ +
+ +      c = strchr(kbuf, ' ');
+ +      if (!c)
+ +              goto err;
+ +      *c = '\0';
+ +
+ +      rc = kstrtouint(kbuf, 10, &dev_entry->mmu_asid);
+ +      if (rc)
+ +              goto err;
+ +
+ +      if (strncmp(c+1, "0x", 2))
+ +              goto err;
+ +      rc = kstrtoull(c+3, 16, &dev_entry->mmu_addr);
+ +      if (rc)
+ +              goto err;
+ +
+ +      return count;
+ +
+ +err:
+ +      dev_err(hdev->dev, "usage: echo <asid> <0xaddr> > mmu\n");
+ +
+ +      return -EINVAL;
+ +}
+ +
+ +static int engines_show(struct seq_file *s, void *data)
+ +{
+ +      struct hl_debugfs_entry *entry = s->private;
+ +      struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+ +      struct hl_device *hdev = dev_entry->hdev;
+ +
+ +      if (atomic_read(&hdev->in_reset)) {
+ +              dev_warn_ratelimited(hdev->dev,
+ +                              "Can't check device idle during reset\n");
+ +              return 0;
+ +      }
+ +
+ +      hdev->asic_funcs->is_device_idle(hdev, NULL, s);
+ +
+ +      return 0;
+ +}
+ +
+ +static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
+ +{
+ +      struct asic_fixed_properties *prop = &hdev->asic_prop;
+ +
+ +      if (!hdev->mmu_enable)
+ +              goto out;
+ +
+ +      if (hdev->dram_supports_virtual_memory &&
+ +              (addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr))
+ +              return true;
+ +
+ +      if (addr >= prop->pmmu.start_addr &&
+ +              addr < prop->pmmu.end_addr)
+ +              return true;
+ +
+ +      if (addr >= prop->pmmu_huge.start_addr &&
+ +              addr < prop->pmmu_huge.end_addr)
+ +              return true;
+ +out:
+ +      return false;
+ +}
+ +
+ +static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
+ +                              u64 *phys_addr)
+ +{
+ +      struct hl_ctx *ctx = hdev->compute_ctx;
+ +      struct asic_fixed_properties *prop = &hdev->asic_prop;
+ +      struct hl_mmu_properties *mmu_prop;
+ +      u64 hop_addr, hop_pte_addr, hop_pte;
+ +      u64 offset_mask = HOP4_MASK | FLAGS_MASK;
+ +      int rc = 0;
+ +      bool is_dram_addr;
+ +
+ +      if (!ctx) {
+ +              dev_err(hdev->dev, "no ctx available\n");
+ +              return -EINVAL;
+ +      }
+ +
+ +      is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
+ +                                              prop->dmmu.start_addr,
+ +                                              prop->dmmu.end_addr);
+ +
+ +      /* shifts and masks are the same in PMMU and HPMMU, use one of them */
+ +      mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+ +
+ +      mutex_lock(&ctx->mmu_lock);
+ +
+ +      /* hop 0 */
+ +      hop_addr = get_hop0_addr(ctx);
+ +      hop_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
+ +      hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
+ +
+ +      /* hop 1 */
+ +      hop_addr = get_next_hop_addr(hop_pte);
+ +      if (hop_addr == ULLONG_MAX)
+ +              goto not_mapped;
+ +      hop_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
+ +      hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
+ +
+ +      /* hop 2 */
+ +      hop_addr = get_next_hop_addr(hop_pte);
+ +      if (hop_addr == ULLONG_MAX)
+ +              goto not_mapped;
+ +      hop_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
+ +      hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
+ +
+ +      /* hop 3 */
+ +      hop_addr = get_next_hop_addr(hop_pte);
+ +      if (hop_addr == ULLONG_MAX)
+ +              goto not_mapped;
+ +      hop_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
+ +      hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
+ +
+ +      if (!(hop_pte & LAST_MASK)) {
+ +              /* hop 4 */
+ +              hop_addr = get_next_hop_addr(hop_pte);
+ +              if (hop_addr == ULLONG_MAX)
+ +                      goto not_mapped;
+ +              hop_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop_addr,
+ +                                                      virt_addr);
+ +              hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
+ +
+ +              offset_mask = FLAGS_MASK;
+ +      }
+ +
+ +      if (!(hop_pte & PAGE_PRESENT_MASK))
+ +              goto not_mapped;
+ +
+ +      *phys_addr = (hop_pte & ~offset_mask) | (virt_addr & offset_mask);
+ +
+ +      goto out;
+ +
+ +not_mapped:
+ +      dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
+ +                      virt_addr);
+ +      rc = -EINVAL;
+ +out:
+ +      mutex_unlock(&ctx->mmu_lock);
+ +      return rc;
+ +}
+ +
+ +static ssize_t hl_data_read32(struct file *f, char __user *buf,
+ +                                      size_t count, loff_t *ppos)
+ +{
+ +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ +      struct hl_device *hdev = entry->hdev;
+ +      char tmp_buf[32];
+ +      u64 addr = entry->addr;
+ +      u32 val;
+ +      ssize_t rc;
+ +
+ +      if (atomic_read(&hdev->in_reset)) {
+ +              dev_warn_ratelimited(hdev->dev, "Can't read during reset\n");
+ +              return 0;
+ +      }
+ +
+ +      if (*ppos)
+ +              return 0;
+ +
+ +      if (hl_is_device_va(hdev, addr)) {
+ +              rc = device_va_to_pa(hdev, addr, &addr);
+ +              if (rc)
+ +                      return rc;
+ +      }
+ +
+ +      rc = hdev->asic_funcs->debugfs_read32(hdev, addr, &val);
+ +      if (rc) {
+ +              dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr);
+ +              return rc;
+ +      }
+ +
+ +      sprintf(tmp_buf, "0x%08x\n", val);
+ +      return simple_read_from_buffer(buf, count, ppos, tmp_buf,
+ +                      strlen(tmp_buf));
+ +}
+ +
+ +static ssize_t hl_data_write32(struct file *f, const char __user *buf,
+ +                                      size_t count, loff_t *ppos)
+ +{
+ +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ +      struct hl_device *hdev = entry->hdev;
+ +      u64 addr = entry->addr;
+ +      u32 value;
+ +      ssize_t rc;
+ +
+ +      if (atomic_read(&hdev->in_reset)) {
+ +              dev_warn_ratelimited(hdev->dev, "Can't write during reset\n");
+ +              return 0;
+ +      }
+ +
+ +      rc = kstrtouint_from_user(buf, count, 16, &value);
+ +      if (rc)
+ +              return rc;
+ +
+ +      if (hl_is_device_va(hdev, addr)) {
+ +              rc = device_va_to_pa(hdev, addr, &addr);
+ +              if (rc)
+ +                      return rc;
+ +      }
+ +
+ +      rc = hdev->asic_funcs->debugfs_write32(hdev, addr, value);
+ +      if (rc) {
+ +              dev_err(hdev->dev, "Failed to write 0x%08x to 0x%010llx\n",
+ +                      value, addr);
+ +              return rc;
+ +      }
+ +
+ +      return count;
+ +}
+ +
+ +static ssize_t hl_data_read64(struct file *f, char __user *buf,
+ +                                      size_t count, loff_t *ppos)
+ +{
+ +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ +      struct hl_device *hdev = entry->hdev;
+ +      char tmp_buf[32];
+ +      u64 addr = entry->addr;
+ +      u64 val;
+ +      ssize_t rc;
+ +
+ +      if (*ppos)
+ +              return 0;
+ +
+ +      if (hl_is_device_va(hdev, addr)) {
+ +              rc = device_va_to_pa(hdev, addr, &addr);
+ +              if (rc)
+ +                      return rc;
+ +      }
+ +
+ +      rc = hdev->asic_funcs->debugfs_read64(hdev, addr, &val);
+ +      if (rc) {
+ +              dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr);
+ +              return rc;
+ +      }
+ +
+ +      sprintf(tmp_buf, "0x%016llx\n", val);
+ +      return simple_read_from_buffer(buf, count, ppos, tmp_buf,
+ +                      strlen(tmp_buf));
+ +}
+ +
+ +static ssize_t hl_data_write64(struct file *f, const char __user *buf,
+ +                                      size_t count, loff_t *ppos)
+ +{
+ +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ +      struct hl_device *hdev = entry->hdev;
+ +      u64 addr = entry->addr;
+ +      u64 value;
+ +      ssize_t rc;
+ +
+ +      rc = kstrtoull_from_user(buf, count, 16, &value);
+ +      if (rc)
+ +              return rc;
+ +
+ +      if (hl_is_device_va(hdev, addr)) {
+ +              rc = device_va_to_pa(hdev, addr, &addr);
+ +              if (rc)
+ +                      return rc;
+ +      }
+ +
+ +      rc = hdev->asic_funcs->debugfs_write64(hdev, addr, value);
+ +      if (rc) {
+ +              dev_err(hdev->dev, "Failed to write 0x%016llx to 0x%010llx\n",
+ +                      value, addr);
+ +              return rc;
+ +      }
+ +
+ +      return count;
+ +}
+ +
+ +static ssize_t hl_get_power_state(struct file *f, char __user *buf,
+ +              size_t count, loff_t *ppos)
+ +{
+ +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ +      struct hl_device *hdev = entry->hdev;
+ +      char tmp_buf[200];
+ +      int i;
+ +
+ +      if (*ppos)
+ +              return 0;
+ +
+ +      if (hdev->pdev->current_state == PCI_D0)
+ +              i = 1;
+ +      else if (hdev->pdev->current_state == PCI_D3hot)
+ +              i = 2;
+ +      else
+ +              i = 3;
+ +
+ +      sprintf(tmp_buf,
+ +              "current power state: %d\n1 - D0\n2 - D3hot\n3 - Unknown\n", i);
+ +      return simple_read_from_buffer(buf, count, ppos, tmp_buf,
+ +                      strlen(tmp_buf));
+ +}
+ +
+ +static ssize_t hl_set_power_state(struct file *f, const char __user *buf,
+ +                                      size_t count, loff_t *ppos)
+ +{
+ +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ +      struct hl_device *hdev = entry->hdev;
+ +      u32 value;
+ +      ssize_t rc;
+ +
+ +      rc = kstrtouint_from_user(buf, count, 10, &value);
+ +      if (rc)
+ +              return rc;
+ +
+ +      if (value == 1) {
+ +              pci_set_power_state(hdev->pdev, PCI_D0);
+ +              pci_restore_state(hdev->pdev);
+ +              rc = pci_enable_device(hdev->pdev);
+ +      } else if (value == 2) {
+ +              pci_save_state(hdev->pdev);
+ +              pci_disable_device(hdev->pdev);
+ +              pci_set_power_state(hdev->pdev, PCI_D3hot);
+ +      } else {
+ +              dev_dbg(hdev->dev, "invalid power state value %u\n", value);
+ +              return -EINVAL;
+ +      }
+ +
+ +      return count;
+ +}
+ +
+ +static ssize_t hl_i2c_data_read(struct file *f, char __user *buf,
+ +                                      size_t count, loff_t *ppos)
+ +{
+ +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ +      struct hl_device *hdev = entry->hdev;
+ +      char tmp_buf[32];
+ +      u32 val;
+ +      ssize_t rc;
+ +
+ +      if (*ppos)
+ +              return 0;
+ +
+ +      rc = hl_debugfs_i2c_read(hdev, entry->i2c_bus, entry->i2c_addr,
+ +                      entry->i2c_reg, &val);
+ +      if (rc) {
+ +              dev_err(hdev->dev,
+ +                      "Failed to read from I2C bus %d, addr %d, reg %d\n",
+ +                      entry->i2c_bus, entry->i2c_addr, entry->i2c_reg);
+ +              return rc;
+ +      }
+ +
+ +      sprintf(tmp_buf, "0x%02x\n", val);
+ +      rc = simple_read_from_buffer(buf, count, ppos, tmp_buf,
+ +                      strlen(tmp_buf));
+ +
+ +      return rc;
+ +}
+ +
+ +static ssize_t hl_i2c_data_write(struct file *f, const char __user *buf,
+ +                                      size_t count, loff_t *ppos)
+ +{
+ +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ +      struct hl_device *hdev = entry->hdev;
+ +      u32 value;
+ +      ssize_t rc;
+ +
+ +      rc = kstrtouint_from_user(buf, count, 16, &value);
+ +      if (rc)
+ +              return rc;
+ +
+ +      rc = hl_debugfs_i2c_write(hdev, entry->i2c_bus, entry->i2c_addr,
+ +                      entry->i2c_reg, value);
+ +      if (rc) {
+ +              dev_err(hdev->dev,
+ +                      "Failed to write 0x%02x to I2C bus %d, addr %d, reg %d\n",
+ +                      value, entry->i2c_bus, entry->i2c_addr, entry->i2c_reg);
+ +              return rc;
+ +      }
+ +
+ +      return count;
+ +}
+ +
+ +static ssize_t hl_led0_write(struct file *f, const char __user *buf,
+ +                                      size_t count, loff_t *ppos)
+ +{
+ +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ +      struct hl_device *hdev = entry->hdev;
+ +      u32 value;
+ +      ssize_t rc;
+ +
+ +      rc = kstrtouint_from_user(buf, count, 10, &value);
+ +      if (rc)
+ +              return rc;
+ +
+ +      value = value ? 1 : 0;
+ +
+ +      hl_debugfs_led_set(hdev, 0, value);
+ +
+ +      return count;
+ +}
+ +
+ +static ssize_t hl_led1_write(struct file *f, const char __user *buf,
+ +                                      size_t count, loff_t *ppos)
+ +{
+ +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ +      struct hl_device *hdev = entry->hdev;
+ +      u32 value;
+ +      ssize_t rc;
+ +
+ +      rc = kstrtouint_from_user(buf, count, 10, &value);
+ +      if (rc)
+ +              return rc;
+ +
+ +      value = value ? 1 : 0;
+ +
+ +      hl_debugfs_led_set(hdev, 1, value);
+ +
+ +      return count;
+ +}
+ +
+ +static ssize_t hl_led2_write(struct file *f, const char __user *buf,
+ +                                      size_t count, loff_t *ppos)
+ +{
+ +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ +      struct hl_device *hdev = entry->hdev;
+ +      u32 value;
+ +      ssize_t rc;
+ +
+ +      rc = kstrtouint_from_user(buf, count, 10, &value);
+ +      if (rc)
+ +              return rc;
+ +
+ +      value = value ? 1 : 0;
+ +
+ +      hl_debugfs_led_set(hdev, 2, value);
+ +
+ +      return count;
+ +}
+ +
+ +static ssize_t hl_device_read(struct file *f, char __user *buf,
+ +                                      size_t count, loff_t *ppos)
+ +{
+ +      static const char *help =
+ +              "Valid values: disable, enable, suspend, resume, cpu_timeout\n";
+ +      return simple_read_from_buffer(buf, count, ppos, help, strlen(help));
+ +}
+ +
+ +static ssize_t hl_device_write(struct file *f, const char __user *buf,
+ +                                   size_t count, loff_t *ppos)
+ +{
+ +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ +      struct hl_device *hdev = entry->hdev;
+ +      char data[30] = {0};
+ +
+ +      /* don't allow partial writes */
+ +      if (*ppos != 0)
+ +              return 0;
+ +
+ +      simple_write_to_buffer(data, 29, ppos, buf, count);
+ +
+ +      if (strncmp("disable", data, strlen("disable")) == 0) {
+ +              hdev->disabled = true;
+ +      } else if (strncmp("enable", data, strlen("enable")) == 0) {
+ +              hdev->disabled = false;
+ +      } else if (strncmp("suspend", data, strlen("suspend")) == 0) {
+ +              hdev->asic_funcs->suspend(hdev);
+ +      } else if (strncmp("resume", data, strlen("resume")) == 0) {
+ +              hdev->asic_funcs->resume(hdev);
+ +      } else if (strncmp("cpu_timeout", data, strlen("cpu_timeout")) == 0) {
+ +              hdev->device_cpu_disabled = true;
+ +      } else {
+ +              dev_err(hdev->dev,
+ +                      "Valid values: disable, enable, suspend, resume, cpu_timeout\n");
+ +              count = -EINVAL;
+ +      }
+ +
+ +      return count;
+ +}
+ +
+ +static ssize_t hl_clk_gate_read(struct file *f, char __user *buf,
+ +                                      size_t count, loff_t *ppos)
+ +{
+ +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ +      struct hl_device *hdev = entry->hdev;
+ +      char tmp_buf[200];
+ +      ssize_t rc;
+ +
+ +      if (*ppos)
+ +              return 0;
+ +
-       u32 value;
++      sprintf(tmp_buf, "0x%llx\n", hdev->clock_gating_mask);
+ +      rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
+ +                      strlen(tmp_buf) + 1);
+ +
+ +      return rc;
+ +}
+ +
+ +static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf,
+ +                                   size_t count, loff_t *ppos)
+ +{
+ +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ +      struct hl_device *hdev = entry->hdev;
-       rc = kstrtouint_from_user(buf, count, 10, &value);
++      u64 value;
+ +      ssize_t rc;
+ +
+ +      if (atomic_read(&hdev->in_reset)) {
+ +              dev_warn_ratelimited(hdev->dev,
+ +                              "Can't change clock gating during reset\n");
+ +              return 0;
+ +      }
+ +
-       if (value) {
-               hdev->clock_gating = 1;
-               if (hdev->asic_funcs->enable_clock_gating)
-                       hdev->asic_funcs->enable_clock_gating(hdev);
-       } else {
-               if (hdev->asic_funcs->disable_clock_gating)
-                       hdev->asic_funcs->disable_clock_gating(hdev);
-               hdev->clock_gating = 0;
-       }
++      rc = kstrtoull_from_user(buf, count, 16, &value);
+ +      if (rc)
+ +              return rc;
+ +
++      hdev->clock_gating_mask = value;
++      hdev->asic_funcs->set_clock_gating(hdev);
+ +
+ +      return count;
+ +}
+ +
+ +static ssize_t hl_stop_on_err_read(struct file *f, char __user *buf,
+ +                                      size_t count, loff_t *ppos)
+ +{
+ +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ +      struct hl_device *hdev = entry->hdev;
+ +      char tmp_buf[200];
+ +      ssize_t rc;
+ +
+ +      if (*ppos)
+ +              return 0;
+ +
+ +      sprintf(tmp_buf, "%d\n", hdev->stop_on_err);
+ +      rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
+ +                      strlen(tmp_buf) + 1);
+ +
+ +      return rc;
+ +}
+ +
+ +static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf,
+ +                                   size_t count, loff_t *ppos)
+ +{
+ +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+ +      struct hl_device *hdev = entry->hdev;
+ +      u32 value;
+ +      ssize_t rc;
+ +
+ +      if (atomic_read(&hdev->in_reset)) {
+ +              dev_warn_ratelimited(hdev->dev,
+ +                              "Can't change stop on error during reset\n");
+ +              return 0;
+ +      }
+ +
+ +      rc = kstrtouint_from_user(buf, count, 10, &value);
+ +      if (rc)
+ +              return rc;
+ +
+ +      hdev->stop_on_err = value ? 1 : 0;
+ +
+ +      hl_device_reset(hdev, false, false);
+ +
+ +      return count;
+ +}
+ +
+ +static const struct file_operations hl_data32b_fops = {
+ +      .owner = THIS_MODULE,
+ +      .read = hl_data_read32,
+ +      .write = hl_data_write32
+ +};
+ +
+ +static const struct file_operations hl_data64b_fops = {
+ +      .owner = THIS_MODULE,
+ +      .read = hl_data_read64,
+ +      .write = hl_data_write64
+ +};
+ +
+ +static const struct file_operations hl_i2c_data_fops = {
+ +      .owner = THIS_MODULE,
+ +      .read = hl_i2c_data_read,
+ +      .write = hl_i2c_data_write
+ +};
+ +
+ +static const struct file_operations hl_power_fops = {
+ +      .owner = THIS_MODULE,
+ +      .read = hl_get_power_state,
+ +      .write = hl_set_power_state
+ +};
+ +
+ +static const struct file_operations hl_led0_fops = {
+ +      .owner = THIS_MODULE,
+ +      .write = hl_led0_write
+ +};
+ +
+ +static const struct file_operations hl_led1_fops = {
+ +      .owner = THIS_MODULE,
+ +      .write = hl_led1_write
+ +};
+ +
+ +static const struct file_operations hl_led2_fops = {
+ +      .owner = THIS_MODULE,
+ +      .write = hl_led2_write
+ +};
+ +
+ +static const struct file_operations hl_device_fops = {
+ +      .owner = THIS_MODULE,
+ +      .read = hl_device_read,
+ +      .write = hl_device_write
+ +};
+ +
+ +static const struct file_operations hl_clk_gate_fops = {
+ +      .owner = THIS_MODULE,
+ +      .read = hl_clk_gate_read,
+ +      .write = hl_clk_gate_write
+ +};
+ +
+ +static const struct file_operations hl_stop_on_err_fops = {
+ +      .owner = THIS_MODULE,
+ +      .read = hl_stop_on_err_read,
+ +      .write = hl_stop_on_err_write
+ +};
+ +
+ +static const struct hl_info_list hl_debugfs_list[] = {
+ +      {"command_buffers", command_buffers_show, NULL},
+ +      {"command_submission", command_submission_show, NULL},
+ +      {"command_submission_jobs", command_submission_jobs_show, NULL},
+ +      {"userptr", userptr_show, NULL},
+ +      {"vm", vm_show, NULL},
+ +      {"mmu", mmu_show, mmu_asid_va_write},
+ +      {"engines", engines_show, NULL}
+ +};
+ +
+ +static int hl_debugfs_open(struct inode *inode, struct file *file)
+ +{
+ +      struct hl_debugfs_entry *node = inode->i_private;
+ +
+ +      return single_open(file, node->info_ent->show, node);
+ +}
+ +
+ +static ssize_t hl_debugfs_write(struct file *file, const char __user *buf,
+ +              size_t count, loff_t *f_pos)
+ +{
+ +      struct hl_debugfs_entry *node = file->f_inode->i_private;
+ +
+ +      if (node->info_ent->write)
+ +              return node->info_ent->write(file, buf, count, f_pos);
+ +      else
+ +              return -EINVAL;
+ +
+ +}
+ +
+ +static const struct file_operations hl_debugfs_fops = {
+ +      .owner = THIS_MODULE,
+ +      .open = hl_debugfs_open,
+ +      .read = seq_read,
+ +      .write = hl_debugfs_write,
+ +      .llseek = seq_lseek,
+ +      .release = single_release,
+ +};
+ +
+ +void hl_debugfs_add_device(struct hl_device *hdev)
+ +{
+ +      struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
+ +      int count = ARRAY_SIZE(hl_debugfs_list);
+ +      struct hl_debugfs_entry *entry;
+ +      struct dentry *ent;
+ +      int i;
+ +
+ +      dev_entry->hdev = hdev;
+ +      dev_entry->entry_arr = kmalloc_array(count,
+ +                                      sizeof(struct hl_debugfs_entry),
+ +                                      GFP_KERNEL);
+ +      if (!dev_entry->entry_arr)
+ +              return;
+ +
+ +      INIT_LIST_HEAD(&dev_entry->file_list);
+ +      INIT_LIST_HEAD(&dev_entry->cb_list);
+ +      INIT_LIST_HEAD(&dev_entry->cs_list);
+ +      INIT_LIST_HEAD(&dev_entry->cs_job_list);
+ +      INIT_LIST_HEAD(&dev_entry->userptr_list);
+ +      INIT_LIST_HEAD(&dev_entry->ctx_mem_hash_list);
+ +      mutex_init(&dev_entry->file_mutex);
+ +      spin_lock_init(&dev_entry->cb_spinlock);
+ +      spin_lock_init(&dev_entry->cs_spinlock);
+ +      spin_lock_init(&dev_entry->cs_job_spinlock);
+ +      spin_lock_init(&dev_entry->userptr_spinlock);
+ +      spin_lock_init(&dev_entry->ctx_mem_hash_spinlock);
+ +
+ +      dev_entry->root = debugfs_create_dir(dev_name(hdev->dev),
+ +                                              hl_debug_root);
+ +
+ +      debugfs_create_x64("addr",
+ +                              0644,
+ +                              dev_entry->root,
+ +                              &dev_entry->addr);
+ +
+ +      debugfs_create_file("data32",
+ +                              0644,
+ +                              dev_entry->root,
+ +                              dev_entry,
+ +                              &hl_data32b_fops);
+ +
+ +      debugfs_create_file("data64",
+ +                              0644,
+ +                              dev_entry->root,
+ +                              dev_entry,
+ +                              &hl_data64b_fops);
+ +
+ +      debugfs_create_file("set_power_state",
+ +                              0200,
+ +                              dev_entry->root,
+ +                              dev_entry,
+ +                              &hl_power_fops);
+ +
+ +      debugfs_create_u8("i2c_bus",
+ +                              0644,
+ +                              dev_entry->root,
+ +                              &dev_entry->i2c_bus);
+ +
+ +      debugfs_create_u8("i2c_addr",
+ +                              0644,
+ +                              dev_entry->root,
+ +                              &dev_entry->i2c_addr);
+ +
+ +      debugfs_create_u8("i2c_reg",
+ +                              0644,
+ +                              dev_entry->root,
+ +                              &dev_entry->i2c_reg);
+ +
+ +      debugfs_create_file("i2c_data",
+ +                              0644,
+ +                              dev_entry->root,
+ +                              dev_entry,
+ +                              &hl_i2c_data_fops);
+ +
+ +      debugfs_create_file("led0",
+ +                              0200,
+ +                              dev_entry->root,
+ +                              dev_entry,
+ +                              &hl_led0_fops);
+ +
+ +      debugfs_create_file("led1",
+ +                              0200,
+ +                              dev_entry->root,
+ +                              dev_entry,
+ +                              &hl_led1_fops);
+ +
+ +      debugfs_create_file("led2",
+ +                              0200,
+ +                              dev_entry->root,
+ +                              dev_entry,
+ +                              &hl_led2_fops);
+ +
+ +      debugfs_create_file("device",
+ +                              0200,
+ +                              dev_entry->root,
+ +                              dev_entry,
+ +                              &hl_device_fops);
+ +
+ +      debugfs_create_file("clk_gate",
+ +                              0200,
+ +                              dev_entry->root,
+ +                              dev_entry,
+ +                              &hl_clk_gate_fops);
+ +
+ +      debugfs_create_file("stop_on_err",
+ +                              0644,
+ +                              dev_entry->root,
+ +                              dev_entry,
+ +                              &hl_stop_on_err_fops);
+ +
+ +      for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
+ +
+ +              ent = debugfs_create_file(hl_debugfs_list[i].name,
+ +                                      0444,
+ +                                      dev_entry->root,
+ +                                      entry,
+ +                                      &hl_debugfs_fops);
+ +              entry->dent = ent;
+ +              entry->info_ent = &hl_debugfs_list[i];
+ +              entry->dev_entry = dev_entry;
+ +      }
+ +}
+ +
+ +void hl_debugfs_remove_device(struct hl_device *hdev)
+ +{
+ +      struct hl_dbg_device_entry *entry = &hdev->hl_debugfs;
+ +
+ +      debugfs_remove_recursive(entry->root);
+ +
+ +      mutex_destroy(&entry->file_mutex);
+ +      kfree(entry->entry_arr);
+ +}
+ +
+ +void hl_debugfs_add_file(struct hl_fpriv *hpriv)
+ +{
+ +      struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs;
+ +
+ +      mutex_lock(&dev_entry->file_mutex);
+ +      list_add(&hpriv->debugfs_list, &dev_entry->file_list);
+ +      mutex_unlock(&dev_entry->file_mutex);
+ +}
+ +
+ +void hl_debugfs_remove_file(struct hl_fpriv *hpriv)
+ +{
+ +      struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs;
+ +
+ +      mutex_lock(&dev_entry->file_mutex);
+ +      list_del(&hpriv->debugfs_list);
+ +      mutex_unlock(&dev_entry->file_mutex);
+ +}
+ +
+ +void hl_debugfs_add_cb(struct hl_cb *cb)
+ +{
+ +      struct hl_dbg_device_entry *dev_entry = &cb->hdev->hl_debugfs;
+ +
+ +      spin_lock(&dev_entry->cb_spinlock);
+ +      list_add(&cb->debugfs_list, &dev_entry->cb_list);
+ +      spin_unlock(&dev_entry->cb_spinlock);
+ +}
+ +
+ +void hl_debugfs_remove_cb(struct hl_cb *cb)
+ +{
+ +      struct hl_dbg_device_entry *dev_entry = &cb->hdev->hl_debugfs;
+ +
+ +      spin_lock(&dev_entry->cb_spinlock);
+ +      list_del(&cb->debugfs_list);
+ +      spin_unlock(&dev_entry->cb_spinlock);
+ +}
+ +
+ +void hl_debugfs_add_cs(struct hl_cs *cs)
+ +{
+ +      struct hl_dbg_device_entry *dev_entry = &cs->ctx->hdev->hl_debugfs;
+ +
+ +      spin_lock(&dev_entry->cs_spinlock);
+ +      list_add(&cs->debugfs_list, &dev_entry->cs_list);
+ +      spin_unlock(&dev_entry->cs_spinlock);
+ +}
+ +
+ +void hl_debugfs_remove_cs(struct hl_cs *cs)
+ +{
+ +      struct hl_dbg_device_entry *dev_entry = &cs->ctx->hdev->hl_debugfs;
+ +
+ +      spin_lock(&dev_entry->cs_spinlock);
+ +      list_del(&cs->debugfs_list);
+ +      spin_unlock(&dev_entry->cs_spinlock);
+ +}
+ +
+ +void hl_debugfs_add_job(struct hl_device *hdev, struct hl_cs_job *job)
+ +{
+ +      struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
+ +
+ +      spin_lock(&dev_entry->cs_job_spinlock);
+ +      list_add(&job->debugfs_list, &dev_entry->cs_job_list);
+ +      spin_unlock(&dev_entry->cs_job_spinlock);
+ +}
+ +
+ +void hl_debugfs_remove_job(struct hl_device *hdev, struct hl_cs_job *job)
+ +{
+ +      struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
+ +
+ +      spin_lock(&dev_entry->cs_job_spinlock);
+ +      list_del(&job->debugfs_list);
+ +      spin_unlock(&dev_entry->cs_job_spinlock);
+ +}
+ +
+ +void hl_debugfs_add_userptr(struct hl_device *hdev, struct hl_userptr *userptr)
+ +{
+ +      struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
+ +
+ +      spin_lock(&dev_entry->userptr_spinlock);
+ +      list_add(&userptr->debugfs_list, &dev_entry->userptr_list);
+ +      spin_unlock(&dev_entry->userptr_spinlock);
+ +}
+ +
+ +void hl_debugfs_remove_userptr(struct hl_device *hdev,
+ +                              struct hl_userptr *userptr)
+ +{
+ +      struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
+ +
+ +      spin_lock(&dev_entry->userptr_spinlock);
+ +      list_del(&userptr->debugfs_list);
+ +      spin_unlock(&dev_entry->userptr_spinlock);
+ +}
+ +
+ +void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx)
+ +{
+ +      struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
+ +
+ +      spin_lock(&dev_entry->ctx_mem_hash_spinlock);
+ +      list_add(&ctx->debugfs_list, &dev_entry->ctx_mem_hash_list);
+ +      spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
+ +}
+ +
+ +void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx)
+ +{
+ +      struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
+ +
+ +      spin_lock(&dev_entry->ctx_mem_hash_spinlock);
+ +      list_del(&ctx->debugfs_list);
+ +      spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
+ +}
+ +
+ +void __init hl_debugfs_init(void)
+ +{
+ +      hl_debug_root = debugfs_create_dir("habanalabs", NULL);
+ +}
+ +
+ +void hl_debugfs_fini(void)
+ +{
+ +      debugfs_remove_recursive(hl_debug_root);
+ +}
diff --cc drivers/misc/habanalabs/common/device.c

index 9919ff121067188ab08d2c74ba5cadf491624e3a,0000000000000000000000000000000000000000..be16b75bdfdb5bbd0ebb54b550a2a4fcfe36e76e

mode 100644,000000..100644
--- 1/drivers/misc/habanalabs/common/device.c
--- /dev/null
+++ b/drivers/misc/habanalabs/common/device.c
@@@ -1,1514 -1,0 +1,1514 @@@
-                       hdev->asic_funcs->enable_clock_gating(hdev);
+ +// SPDX-License-Identifier: GPL-2.0
+ +
+ +/*
+ + * Copyright 2016-2019 HabanaLabs, Ltd.
+ + * All Rights Reserved.
+ + */
+ +
+ +#define pr_fmt(fmt)                   "habanalabs: " fmt
+ +
+ +#include "habanalabs.h"
+ +
+ +#include <linux/pci.h>
+ +#include <linux/sched/signal.h>
+ +#include <linux/hwmon.h>
+ +#include <uapi/misc/habanalabs.h>
+ +
+ +#define HL_PLDM_PENDING_RESET_PER_SEC (HL_PENDING_RESET_PER_SEC * 10)
+ +
+ +bool hl_device_disabled_or_in_reset(struct hl_device *hdev)
+ +{
+ +      if ((hdev->disabled) || (atomic_read(&hdev->in_reset)))
+ +              return true;
+ +      else
+ +              return false;
+ +}
+ +
+ +enum hl_device_status hl_device_status(struct hl_device *hdev)
+ +{
+ +      enum hl_device_status status;
+ +
+ +      if (hdev->disabled)
+ +              status = HL_DEVICE_STATUS_MALFUNCTION;
+ +      else if (atomic_read(&hdev->in_reset))
+ +              status = HL_DEVICE_STATUS_IN_RESET;
+ +      else
+ +              status = HL_DEVICE_STATUS_OPERATIONAL;
+ +
+ +      return status;
+ +}
+ +
+ +static void hpriv_release(struct kref *ref)
+ +{
+ +      struct hl_fpriv *hpriv;
+ +      struct hl_device *hdev;
+ +
+ +      hpriv = container_of(ref, struct hl_fpriv, refcount);
+ +
+ +      hdev = hpriv->hdev;
+ +
+ +      put_pid(hpriv->taskpid);
+ +
+ +      hl_debugfs_remove_file(hpriv);
+ +
+ +      mutex_destroy(&hpriv->restore_phase_mutex);
+ +
+ +      mutex_lock(&hdev->fpriv_list_lock);
+ +      list_del(&hpriv->dev_node);
+ +      hdev->compute_ctx = NULL;
+ +      mutex_unlock(&hdev->fpriv_list_lock);
+ +
+ +      kfree(hpriv);
+ +}
+ +
+ +void hl_hpriv_get(struct hl_fpriv *hpriv)
+ +{
+ +      kref_get(&hpriv->refcount);
+ +}
+ +
+ +void hl_hpriv_put(struct hl_fpriv *hpriv)
+ +{
+ +      kref_put(&hpriv->refcount, hpriv_release);
+ +}
+ +
+ +/*
+ + * hl_device_release - release function for habanalabs device
+ + *
+ + * @inode: pointer to inode structure
+ + * @filp: pointer to file structure
+ + *
+ + * Called when process closes an habanalabs device
+ + */
+ +static int hl_device_release(struct inode *inode, struct file *filp)
+ +{
+ +      struct hl_fpriv *hpriv = filp->private_data;
+ +
+ +      hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
+ +      hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
+ +
+ +      filp->private_data = NULL;
+ +
+ +      hl_hpriv_put(hpriv);
+ +
+ +      return 0;
+ +}
+ +
+ +static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
+ +{
+ +      struct hl_fpriv *hpriv = filp->private_data;
+ +      struct hl_device *hdev;
+ +
+ +      filp->private_data = NULL;
+ +
+ +      hdev = hpriv->hdev;
+ +
+ +      mutex_lock(&hdev->fpriv_list_lock);
+ +      list_del(&hpriv->dev_node);
+ +      mutex_unlock(&hdev->fpriv_list_lock);
+ +
+ +      kfree(hpriv);
+ +
+ +      return 0;
+ +}
+ +
+ +/*
+ + * hl_mmap - mmap function for habanalabs device
+ + *
+ + * @*filp: pointer to file structure
+ + * @*vma: pointer to vm_area_struct of the process
+ + *
+ + * Called when process does an mmap on habanalabs device. Call the device's mmap
+ + * function at the end of the common code.
+ + */
+ +static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
+ +{
+ +      struct hl_fpriv *hpriv = filp->private_data;
+ +
+ +      if ((vma->vm_pgoff & HL_MMAP_CB_MASK) == HL_MMAP_CB_MASK) {
+ +              vma->vm_pgoff ^= HL_MMAP_CB_MASK;
+ +              return hl_cb_mmap(hpriv, vma);
+ +      }
+ +
+ +      return -EINVAL;
+ +}
+ +
+ +static const struct file_operations hl_ops = {
+ +      .owner = THIS_MODULE,
+ +      .open = hl_device_open,
+ +      .release = hl_device_release,
+ +      .mmap = hl_mmap,
+ +      .unlocked_ioctl = hl_ioctl,
+ +      .compat_ioctl = hl_ioctl
+ +};
+ +
+ +static const struct file_operations hl_ctrl_ops = {
+ +      .owner = THIS_MODULE,
+ +      .open = hl_device_open_ctrl,
+ +      .release = hl_device_release_ctrl,
+ +      .unlocked_ioctl = hl_ioctl_control,
+ +      .compat_ioctl = hl_ioctl_control
+ +};
+ +
+ +static void device_release_func(struct device *dev)
+ +{
+ +      kfree(dev);
+ +}
+ +
+ +/*
+ + * device_init_cdev - Initialize cdev and device for habanalabs device
+ + *
+ + * @hdev: pointer to habanalabs device structure
+ + * @hclass: pointer to the class object of the device
+ + * @minor: minor number of the specific device
+ + * @fpos: file operations to install for this device
+ + * @name: name of the device as it will appear in the filesystem
+ + * @cdev: pointer to the char device object that will be initialized
+ + * @dev: pointer to the device object that will be initialized
+ + *
+ + * Initialize a cdev and a Linux device for habanalabs's device.
+ + */
+ +static int device_init_cdev(struct hl_device *hdev, struct class *hclass,
+ +                              int minor, const struct file_operations *fops,
+ +                              char *name, struct cdev *cdev,
+ +                              struct device **dev)
+ +{
+ +      cdev_init(cdev, fops);
+ +      cdev->owner = THIS_MODULE;
+ +
+ +      *dev = kzalloc(sizeof(**dev), GFP_KERNEL);
+ +      if (!*dev)
+ +              return -ENOMEM;
+ +
+ +      device_initialize(*dev);
+ +      (*dev)->devt = MKDEV(hdev->major, minor);
+ +      (*dev)->class = hclass;
+ +      (*dev)->release = device_release_func;
+ +      dev_set_drvdata(*dev, hdev);
+ +      dev_set_name(*dev, "%s", name);
+ +
+ +      return 0;
+ +}
+ +
+ +static int device_cdev_sysfs_add(struct hl_device *hdev)
+ +{
+ +      int rc;
+ +
+ +      rc = cdev_device_add(&hdev->cdev, hdev->dev);
+ +      if (rc) {
+ +              dev_err(hdev->dev,
+ +                      "failed to add a char device to the system\n");
+ +              return rc;
+ +      }
+ +
+ +      rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl);
+ +      if (rc) {
+ +              dev_err(hdev->dev,
+ +                      "failed to add a control char device to the system\n");
+ +              goto delete_cdev_device;
+ +      }
+ +
+ +      /* hl_sysfs_init() must be done after adding the device to the system */
+ +      rc = hl_sysfs_init(hdev);
+ +      if (rc) {
+ +              dev_err(hdev->dev, "failed to initialize sysfs\n");
+ +              goto delete_ctrl_cdev_device;
+ +      }
+ +
+ +      hdev->cdev_sysfs_created = true;
+ +
+ +      return 0;
+ +
+ +delete_ctrl_cdev_device:
+ +      cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
+ +delete_cdev_device:
+ +      cdev_device_del(&hdev->cdev, hdev->dev);
+ +      return rc;
+ +}
+ +
+ +static void device_cdev_sysfs_del(struct hl_device *hdev)
+ +{
+ +      /* device_release() won't be called so must free devices explicitly */
+ +      if (!hdev->cdev_sysfs_created) {
+ +              kfree(hdev->dev_ctrl);
+ +              kfree(hdev->dev);
+ +              return;
+ +      }
+ +
+ +      hl_sysfs_fini(hdev);
+ +      cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
+ +      cdev_device_del(&hdev->cdev, hdev->dev);
+ +}
+ +
+ +/*
+ + * device_early_init - do some early initialization for the habanalabs device
+ + *
+ + * @hdev: pointer to habanalabs device structure
+ + *
+ + * Install the relevant function pointers and call the early_init function,
+ + * if such a function exists
+ + */
+ +static int device_early_init(struct hl_device *hdev)
+ +{
+ +      int i, rc;
+ +      char workq_name[32];
+ +
+ +      switch (hdev->asic_type) {
+ +      case ASIC_GOYA:
+ +              goya_set_asic_funcs(hdev);
+ +              strlcpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name));
+ +              break;
+ +      case ASIC_GAUDI:
+ +              gaudi_set_asic_funcs(hdev);
+ +              sprintf(hdev->asic_name, "GAUDI");
+ +              break;
+ +      default:
+ +              dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
+ +                      hdev->asic_type);
+ +              return -EINVAL;
+ +      }
+ +
+ +      rc = hdev->asic_funcs->early_init(hdev);
+ +      if (rc)
+ +              return rc;
+ +
+ +      rc = hl_asid_init(hdev);
+ +      if (rc)
+ +              goto early_fini;
+ +
+ +      if (hdev->asic_prop.completion_queues_count) {
+ +              hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count,
+ +                              sizeof(*hdev->cq_wq),
+ +                              GFP_ATOMIC);
+ +              if (!hdev->cq_wq) {
+ +                      rc = -ENOMEM;
+ +                      goto asid_fini;
+ +              }
+ +      }
+ +
+ +      for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
+ +              snprintf(workq_name, 32, "hl-free-jobs-%u", i);
+ +              hdev->cq_wq[i] = create_singlethread_workqueue(workq_name);
+ +              if (hdev->cq_wq == NULL) {
+ +                      dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
+ +                      rc = -ENOMEM;
+ +                      goto free_cq_wq;
+ +              }
+ +      }
+ +
+ +      hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0);
+ +      if (hdev->eq_wq == NULL) {
+ +              dev_err(hdev->dev, "Failed to allocate EQ workqueue\n");
+ +              rc = -ENOMEM;
+ +              goto free_cq_wq;
+ +      }
+ +
+ +      hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info),
+ +                                      GFP_KERNEL);
+ +      if (!hdev->hl_chip_info) {
+ +              rc = -ENOMEM;
+ +              goto free_eq_wq;
+ +      }
+ +
+ +      hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE,
+ +                                      sizeof(struct hl_device_idle_busy_ts),
+ +                                      (GFP_KERNEL | __GFP_ZERO));
+ +      if (!hdev->idle_busy_ts_arr) {
+ +              rc = -ENOMEM;
+ +              goto free_chip_info;
+ +      }
+ +
+ +      hl_cb_mgr_init(&hdev->kernel_cb_mgr);
+ +
+ +      mutex_init(&hdev->send_cpu_message_lock);
+ +      mutex_init(&hdev->debug_lock);
+ +      mutex_init(&hdev->mmu_cache_lock);
+ +      INIT_LIST_HEAD(&hdev->hw_queues_mirror_list);
+ +      spin_lock_init(&hdev->hw_queues_mirror_lock);
+ +      INIT_LIST_HEAD(&hdev->fpriv_list);
+ +      mutex_init(&hdev->fpriv_list_lock);
+ +      atomic_set(&hdev->in_reset, 0);
+ +
+ +      return 0;
+ +
+ +free_chip_info:
+ +      kfree(hdev->hl_chip_info);
+ +free_eq_wq:
+ +      destroy_workqueue(hdev->eq_wq);
+ +free_cq_wq:
+ +      for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
+ +              if (hdev->cq_wq[i])
+ +                      destroy_workqueue(hdev->cq_wq[i]);
+ +      kfree(hdev->cq_wq);
+ +asid_fini:
+ +      hl_asid_fini(hdev);
+ +early_fini:
+ +      if (hdev->asic_funcs->early_fini)
+ +              hdev->asic_funcs->early_fini(hdev);
+ +
+ +      return rc;
+ +}
+ +
+ +/*
+ + * device_early_fini - finalize all that was done in device_early_init
+ + *
+ + * @hdev: pointer to habanalabs device structure
+ + *
+ + */
+ +static void device_early_fini(struct hl_device *hdev)
+ +{
+ +      int i;
+ +
+ +      mutex_destroy(&hdev->mmu_cache_lock);
+ +      mutex_destroy(&hdev->debug_lock);
+ +      mutex_destroy(&hdev->send_cpu_message_lock);
+ +
+ +      mutex_destroy(&hdev->fpriv_list_lock);
+ +
+ +      hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
+ +
+ +      kfree(hdev->idle_busy_ts_arr);
+ +      kfree(hdev->hl_chip_info);
+ +
+ +      destroy_workqueue(hdev->eq_wq);
+ +
+ +      for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
+ +              destroy_workqueue(hdev->cq_wq[i]);
+ +      kfree(hdev->cq_wq);
+ +
+ +      hl_asid_fini(hdev);
+ +
+ +      if (hdev->asic_funcs->early_fini)
+ +              hdev->asic_funcs->early_fini(hdev);
+ +}
+ +
+ +static void set_freq_to_low_job(struct work_struct *work)
+ +{
+ +      struct hl_device *hdev = container_of(work, struct hl_device,
+ +                                              work_freq.work);
+ +
+ +      mutex_lock(&hdev->fpriv_list_lock);
+ +
+ +      if (!hdev->compute_ctx)
+ +              hl_device_set_frequency(hdev, PLL_LOW);
+ +
+ +      mutex_unlock(&hdev->fpriv_list_lock);
+ +
+ +      schedule_delayed_work(&hdev->work_freq,
+ +                      usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
+ +}
+ +
+ +static void hl_device_heartbeat(struct work_struct *work)
+ +{
+ +      struct hl_device *hdev = container_of(work, struct hl_device,
+ +                                              work_heartbeat.work);
+ +
+ +      if (hl_device_disabled_or_in_reset(hdev))
+ +              goto reschedule;
+ +
+ +      if (!hdev->asic_funcs->send_heartbeat(hdev))
+ +              goto reschedule;
+ +
+ +      dev_err(hdev->dev, "Device heartbeat failed!\n");
+ +      hl_device_reset(hdev, true, false);
+ +
+ +      return;
+ +
+ +reschedule:
+ +      schedule_delayed_work(&hdev->work_heartbeat,
+ +                      usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
+ +}
+ +
+ +/*
+ + * device_late_init - do late stuff initialization for the habanalabs device
+ + *
+ + * @hdev: pointer to habanalabs device structure
+ + *
+ + * Do stuff that either needs the device H/W queues to be active or needs
+ + * to happen after all the rest of the initialization is finished
+ + */
+ +static int device_late_init(struct hl_device *hdev)
+ +{
+ +      int rc;
+ +
+ +      if (hdev->asic_funcs->late_init) {
+ +              rc = hdev->asic_funcs->late_init(hdev);
+ +              if (rc) {
+ +                      dev_err(hdev->dev,
+ +                              "failed late initialization for the H/W\n");
+ +                      return rc;
+ +              }
+ +      }
+ +
+ +      hdev->high_pll = hdev->asic_prop.high_pll;
+ +
+ +      /* force setting to low frequency */
+ +      hdev->curr_pll_profile = PLL_LOW;
+ +
+ +      if (hdev->pm_mng_profile == PM_AUTO)
+ +              hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW);
+ +      else
+ +              hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST);
+ +
+ +      INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job);
+ +      schedule_delayed_work(&hdev->work_freq,
+ +      usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
+ +
+ +      if (hdev->heartbeat) {
+ +              INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
+ +              schedule_delayed_work(&hdev->work_heartbeat,
+ +                              usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
+ +      }
+ +
+ +      hdev->late_init_done = true;
+ +
+ +      return 0;
+ +}
+ +
+ +/*
+ + * device_late_fini - finalize all that was done in device_late_init
+ + *
+ + * @hdev: pointer to habanalabs device structure
+ + *
+ + */
+ +static void device_late_fini(struct hl_device *hdev)
+ +{
+ +      if (!hdev->late_init_done)
+ +              return;
+ +
+ +      cancel_delayed_work_sync(&hdev->work_freq);
+ +      if (hdev->heartbeat)
+ +              cancel_delayed_work_sync(&hdev->work_heartbeat);
+ +
+ +      if (hdev->asic_funcs->late_fini)
+ +              hdev->asic_funcs->late_fini(hdev);
+ +
+ +      hdev->late_init_done = false;
+ +}
+ +
+ +uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms)
+ +{
+ +      struct hl_device_idle_busy_ts *ts;
+ +      ktime_t zero_ktime, curr = ktime_get();
+ +      u32 overlap_cnt = 0, last_index = hdev->idle_busy_ts_idx;
+ +      s64 period_us, last_start_us, last_end_us, last_busy_time_us,
+ +              total_busy_time_us = 0, total_busy_time_ms;
+ +
+ +      zero_ktime = ktime_set(0, 0);
+ +      period_us = period_ms * USEC_PER_MSEC;
+ +      ts = &hdev->idle_busy_ts_arr[last_index];
+ +
+ +      /* check case that device is currently in idle */
+ +      if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime) &&
+ +                      !ktime_compare(ts->idle_to_busy_ts, zero_ktime)) {
+ +
+ +              last_index--;
+ +              /* Handle case idle_busy_ts_idx was 0 */
+ +              if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE)
+ +                      last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1;
+ +
+ +              ts = &hdev->idle_busy_ts_arr[last_index];
+ +      }
+ +
+ +      while (overlap_cnt < HL_IDLE_BUSY_TS_ARR_SIZE) {
+ +              /* Check if we are in last sample case. i.e. if the sample
+ +               * begun before the sampling period. This could be a real
+ +               * sample or 0 so need to handle both cases
+ +               */
+ +              last_start_us = ktime_to_us(
+ +                              ktime_sub(curr, ts->idle_to_busy_ts));
+ +
+ +              if (last_start_us > period_us) {
+ +
+ +                      /* First check two cases:
+ +                       * 1. If the device is currently busy
+ +                       * 2. If the device was idle during the whole sampling
+ +                       *    period
+ +                       */
+ +
+ +                      if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime)) {
+ +                              /* Check if the device is currently busy */
+ +                              if (ktime_compare(ts->idle_to_busy_ts,
+ +                                              zero_ktime))
+ +                                      return 100;
+ +
+ +                              /* We either didn't have any activity or we
+ +                               * reached an entry which is 0. Either way,
+ +                               * exit and return what was accumulated so far
+ +                               */
+ +                              break;
+ +                      }
+ +
+ +                      /* If sample has finished, check it is relevant */
+ +                      last_end_us = ktime_to_us(
+ +                                      ktime_sub(curr, ts->busy_to_idle_ts));
+ +
+ +                      if (last_end_us > period_us)
+ +                              break;
+ +
+ +                      /* It is relevant so add it but with adjustment */
+ +                      last_busy_time_us = ktime_to_us(
+ +                                              ktime_sub(ts->busy_to_idle_ts,
+ +                                              ts->idle_to_busy_ts));
+ +                      total_busy_time_us += last_busy_time_us -
+ +                                      (last_start_us - period_us);
+ +                      break;
+ +              }
+ +
+ +              /* Check if the sample is finished or still open */
+ +              if (ktime_compare(ts->busy_to_idle_ts, zero_ktime))
+ +                      last_busy_time_us = ktime_to_us(
+ +                                              ktime_sub(ts->busy_to_idle_ts,
+ +                                              ts->idle_to_busy_ts));
+ +              else
+ +                      last_busy_time_us = ktime_to_us(
+ +                                      ktime_sub(curr, ts->idle_to_busy_ts));
+ +
+ +              total_busy_time_us += last_busy_time_us;
+ +
+ +              last_index--;
+ +              /* Handle case idle_busy_ts_idx was 0 */
+ +              if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE)
+ +                      last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1;
+ +
+ +              ts = &hdev->idle_busy_ts_arr[last_index];
+ +
+ +              overlap_cnt++;
+ +      }
+ +
+ +      total_busy_time_ms = DIV_ROUND_UP_ULL(total_busy_time_us,
+ +                                              USEC_PER_MSEC);
+ +
+ +      return DIV_ROUND_UP_ULL(total_busy_time_ms * 100, period_ms);
+ +}
+ +
+ +/*
+ + * hl_device_set_frequency - set the frequency of the device
+ + *
+ + * @hdev: pointer to habanalabs device structure
+ + * @freq: the new frequency value
+ + *
+ + * Change the frequency if needed. This function has no protection against
+ + * concurrency, therefore it is assumed that the calling function has protected
+ + * itself against the case of calling this function from multiple threads with
+ + * different values
+ + *
+ + * Returns 0 if no change was done, otherwise returns 1
+ + */
+ +int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
+ +{
+ +      if ((hdev->pm_mng_profile == PM_MANUAL) ||
+ +                      (hdev->curr_pll_profile == freq))
+ +              return 0;
+ +
+ +      dev_dbg(hdev->dev, "Changing device frequency to %s\n",
+ +              freq == PLL_HIGH ? "high" : "low");
+ +
+ +      hdev->asic_funcs->set_pll_profile(hdev, freq);
+ +
+ +      hdev->curr_pll_profile = freq;
+ +
+ +      return 1;
+ +}
+ +
+ +int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)
+ +{
+ +      int rc = 0;
+ +
+ +      mutex_lock(&hdev->debug_lock);
+ +
+ +      if (!enable) {
+ +              if (!hdev->in_debug) {
+ +                      dev_err(hdev->dev,
+ +                              "Failed to disable debug mode because device was not in debug mode\n");
+ +                      rc = -EFAULT;
+ +                      goto out;
+ +              }
+ +
+ +              if (!hdev->hard_reset_pending)
+ +                      hdev->asic_funcs->halt_coresight(hdev);
+ +
+ +              hdev->in_debug = 0;
+ +
+ +              if (!hdev->hard_reset_pending)
++                      hdev->asic_funcs->set_clock_gating(hdev);
+ +
+ +              goto out;
+ +      }
+ +
+ +      if (hdev->in_debug) {
+ +              dev_err(hdev->dev,
+ +                      "Failed to enable debug mode because device is already in debug mode\n");
+ +              rc = -EFAULT;
+ +              goto out;
+ +      }
+ +
+ +      hdev->asic_funcs->disable_clock_gating(hdev);
+ +      hdev->in_debug = 1;
+ +
+ +out:
+ +      mutex_unlock(&hdev->debug_lock);
+ +
+ +      return rc;
+ +}
+ +
+ +/*
+ + * hl_device_suspend - initiate device suspend
+ + *
+ + * @hdev: pointer to habanalabs device structure
+ + *
+ + * Puts the hw in the suspend state (all asics).
+ + * Returns 0 for success or an error on failure.
+ + * Called at driver suspend.
+ + */
+ +int hl_device_suspend(struct hl_device *hdev)
+ +{
+ +      int rc;
+ +
+ +      pci_save_state(hdev->pdev);
+ +
+ +      /* Block future CS/VM/JOB completion operations */
+ +      rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
+ +      if (rc) {
+ +              dev_err(hdev->dev, "Can't suspend while in reset\n");
+ +              return -EIO;
+ +      }
+ +
+ +      /* This blocks all other stuff that is not blocked by in_reset */
+ +      hdev->disabled = true;
+ +
+ +      /*
+ +       * Flush anyone that is inside the critical section of enqueue
+ +       * jobs to the H/W
+ +       */
+ +      hdev->asic_funcs->hw_queues_lock(hdev);
+ +      hdev->asic_funcs->hw_queues_unlock(hdev);
+ +
+ +      /* Flush processes that are sending message to CPU */
+ +      mutex_lock(&hdev->send_cpu_message_lock);
+ +      mutex_unlock(&hdev->send_cpu_message_lock);
+ +
+ +      rc = hdev->asic_funcs->suspend(hdev);
+ +      if (rc)
+ +              dev_err(hdev->dev,
+ +                      "Failed to disable PCI access of device CPU\n");
+ +
+ +      /* Shut down the device */
+ +      pci_disable_device(hdev->pdev);
+ +      pci_set_power_state(hdev->pdev, PCI_D3hot);
+ +
+ +      return 0;
+ +}
+ +
+ +/*
+ + * hl_device_resume - initiate device resume
+ + *
+ + * @hdev: pointer to habanalabs device structure
+ + *
+ + * Bring the hw back to operating state (all asics).
+ + * Returns 0 for success or an error on failure.
+ + * Called at driver resume.
+ + */
+ +int hl_device_resume(struct hl_device *hdev)
+ +{
+ +      int rc;
+ +
+ +      pci_set_power_state(hdev->pdev, PCI_D0);
+ +      pci_restore_state(hdev->pdev);
+ +      rc = pci_enable_device_mem(hdev->pdev);
+ +      if (rc) {
+ +              dev_err(hdev->dev,
+ +                      "Failed to enable PCI device in resume\n");
+ +              return rc;
+ +      }
+ +
+ +      pci_set_master(hdev->pdev);
+ +
+ +      rc = hdev->asic_funcs->resume(hdev);
+ +      if (rc) {
+ +              dev_err(hdev->dev, "Failed to resume device after suspend\n");
+ +              goto disable_device;
+ +      }
+ +
+ +
+ +      hdev->disabled = false;
+ +      atomic_set(&hdev->in_reset, 0);
+ +
+ +      rc = hl_device_reset(hdev, true, false);
+ +      if (rc) {
+ +              dev_err(hdev->dev, "Failed to reset device during resume\n");
+ +              goto disable_device;
+ +      }
+ +
+ +      return 0;
+ +
+ +disable_device:
+ +      pci_clear_master(hdev->pdev);
+ +      pci_disable_device(hdev->pdev);
+ +
+ +      return rc;
+ +}
+ +
+ +static int device_kill_open_processes(struct hl_device *hdev)
+ +{
+ +      u16 pending_total, pending_cnt;
+ +      struct hl_fpriv *hpriv;
+ +      struct task_struct *task = NULL;
+ +
+ +      if (hdev->pldm)
+ +              pending_total = HL_PLDM_PENDING_RESET_PER_SEC;
+ +      else
+ +              pending_total = HL_PENDING_RESET_PER_SEC;
+ +
+ +      /* Giving time for user to close FD, and for processes that are inside
+ +       * hl_device_open to finish
+ +       */
+ +      if (!list_empty(&hdev->fpriv_list))
+ +              ssleep(1);
+ +
+ +      mutex_lock(&hdev->fpriv_list_lock);
+ +
+ +      /* This section must be protected because we are dereferencing
+ +       * pointers that are freed if the process exits
+ +       */
+ +      list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) {
+ +              task = get_pid_task(hpriv->taskpid, PIDTYPE_PID);
+ +              if (task) {
+ +                      dev_info(hdev->dev, "Killing user process pid=%d\n",
+ +                              task_pid_nr(task));
+ +                      send_sig(SIGKILL, task, 1);
+ +                      usleep_range(1000, 10000);
+ +
+ +                      put_task_struct(task);
+ +              }
+ +      }
+ +
+ +      mutex_unlock(&hdev->fpriv_list_lock);
+ +
+ +      /* We killed the open users, but because the driver cleans up after the
+ +       * user contexts are closed (e.g. mmu mappings), we need to wait again
+ +       * to make sure the cleaning phase is finished before continuing with
+ +       * the reset
+ +       */
+ +
+ +      pending_cnt = pending_total;
+ +
+ +      while ((!list_empty(&hdev->fpriv_list)) && (pending_cnt)) {
+ +              dev_info(hdev->dev,
+ +                      "Waiting for all unmap operations to finish before hard reset\n");
+ +
+ +              pending_cnt--;
+ +
+ +              ssleep(1);
+ +      }
+ +
+ +      return list_empty(&hdev->fpriv_list) ? 0 : -EBUSY;
+ +}
+ +
+ +static void device_hard_reset_pending(struct work_struct *work)
+ +{
+ +      struct hl_device_reset_work *device_reset_work =
+ +              container_of(work, struct hl_device_reset_work, reset_work);
+ +      struct hl_device *hdev = device_reset_work->hdev;
+ +
+ +      hl_device_reset(hdev, true, true);
+ +
+ +      kfree(device_reset_work);
+ +}
+ +
+ +/*
+ + * hl_device_reset - reset the device
+ + *
+ + * @hdev: pointer to habanalabs device structure
+ + * @hard_reset: should we do hard reset to all engines or just reset the
+ + *              compute/dma engines
+ + * @from_hard_reset_thread: is the caller the hard-reset thread
+ + *
+ + * Block future CS and wait for pending CS to be enqueued
+ + * Call ASIC H/W fini
+ + * Flush all completions
+ + * Re-initialize all internal data structures
+ + * Call ASIC H/W init, late_init
+ + * Test queues
+ + * Enable device
+ + *
+ + * Returns 0 for success or an error on failure.
+ + */
+ +int hl_device_reset(struct hl_device *hdev, bool hard_reset,
+ +                      bool from_hard_reset_thread)
+ +{
+ +      int i, rc;
+ +
+ +      if (!hdev->init_done) {
+ +              dev_err(hdev->dev,
+ +                      "Can't reset before initialization is done\n");
+ +              return 0;
+ +      }
+ +
+ +      if ((!hard_reset) && (!hdev->supports_soft_reset)) {
+ +              dev_dbg(hdev->dev, "Doing hard-reset instead of soft-reset\n");
+ +              hard_reset = true;
+ +      }
+ +
+ +      /*
+ +       * Prevent concurrency in this function - only one reset should be
+ +       * done at any given time. Only need to perform this if we didn't
+ +       * get from the dedicated hard reset thread
+ +       */
+ +      if (!from_hard_reset_thread) {
+ +              /* Block future CS/VM/JOB completion operations */
+ +              rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
+ +              if (rc)
+ +                      return 0;
+ +
+ +              if (hard_reset) {
+ +                      /* Disable PCI access from device F/W so he won't send
+ +                       * us additional interrupts. We disable MSI/MSI-X at
+ +                       * the halt_engines function and we can't have the F/W
+ +                       * sending us interrupts after that. We need to disable
+ +                       * the access here because if the device is marked
+ +                       * disable, the message won't be send. Also, in case
+ +                       * of heartbeat, the device CPU is marked as disable
+ +                       * so this message won't be sent
+ +                       */
+ +                      if (hl_fw_send_pci_access_msg(hdev,
+ +                                      ARMCP_PACKET_DISABLE_PCI_ACCESS))
+ +                              dev_warn(hdev->dev,
+ +                                      "Failed to disable PCI access by F/W\n");
+ +              }
+ +
+ +              /* This also blocks future CS/VM/JOB completion operations */
+ +              hdev->disabled = true;
+ +
+ +              /* Flush anyone that is inside the critical section of enqueue
+ +               * jobs to the H/W
+ +               */
+ +              hdev->asic_funcs->hw_queues_lock(hdev);
+ +              hdev->asic_funcs->hw_queues_unlock(hdev);
+ +
+ +              /* Flush anyone that is inside device open */
+ +              mutex_lock(&hdev->fpriv_list_lock);
+ +              mutex_unlock(&hdev->fpriv_list_lock);
+ +
+ +              dev_err(hdev->dev, "Going to RESET device!\n");
+ +      }
+ +
+ +again:
+ +      if ((hard_reset) && (!from_hard_reset_thread)) {
+ +              struct hl_device_reset_work *device_reset_work;
+ +
+ +              hdev->hard_reset_pending = true;
+ +
+ +              device_reset_work = kzalloc(sizeof(*device_reset_work),
+ +                                              GFP_ATOMIC);
+ +              if (!device_reset_work) {
+ +                      rc = -ENOMEM;
+ +                      goto out_err;
+ +              }
+ +
+ +              /*
+ +               * Because the reset function can't run from interrupt or
+ +               * from heartbeat work, we need to call the reset function
+ +               * from a dedicated work
+ +               */
+ +              INIT_WORK(&device_reset_work->reset_work,
+ +                              device_hard_reset_pending);
+ +              device_reset_work->hdev = hdev;
+ +              schedule_work(&device_reset_work->reset_work);
+ +
+ +              return 0;
+ +      }
+ +
+ +      if (hard_reset) {
+ +              device_late_fini(hdev);
+ +
+ +              /*
+ +               * Now that the heartbeat thread is closed, flush processes
+ +               * which are sending messages to CPU
+ +               */
+ +              mutex_lock(&hdev->send_cpu_message_lock);
+ +              mutex_unlock(&hdev->send_cpu_message_lock);
+ +      }
+ +
+ +      /*
+ +       * Halt the engines and disable interrupts so we won't get any more
+ +       * completions from H/W and we won't have any accesses from the
+ +       * H/W to the host machine
+ +       */
+ +      hdev->asic_funcs->halt_engines(hdev, hard_reset);
+ +
+ +      /* Go over all the queues, release all CS and their jobs */
+ +      hl_cs_rollback_all(hdev);
+ +
+ +      if (hard_reset) {
+ +              /* Kill processes here after CS rollback. This is because the
+ +               * process can't really exit until all its CSs are done, which
+ +               * is what we do in cs rollback
+ +               */
+ +              rc = device_kill_open_processes(hdev);
+ +              if (rc) {
+ +                      dev_crit(hdev->dev,
+ +                              "Failed to kill all open processes, stopping hard reset\n");
+ +                      goto out_err;
+ +              }
+ +
+ +              /* Flush the Event queue workers to make sure no other thread is
+ +               * reading or writing to registers during the reset
+ +               */
+ +              flush_workqueue(hdev->eq_wq);
+ +      }
+ +
+ +      /* Release kernel context */
+ +      if ((hard_reset) && (hl_ctx_put(hdev->kernel_ctx) == 1))
+ +              hdev->kernel_ctx = NULL;
+ +
+ +      /* Reset the H/W. It will be in idle state after this returns */
+ +      hdev->asic_funcs->hw_fini(hdev, hard_reset);
+ +
+ +      if (hard_reset) {
+ +              hl_vm_fini(hdev);
+ +              hl_mmu_fini(hdev);
+ +              hl_eq_reset(hdev, &hdev->event_queue);
+ +      }
+ +
+ +      /* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */
+ +      hl_hw_queue_reset(hdev, hard_reset);
+ +      for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
+ +              hl_cq_reset(hdev, &hdev->completion_queue[i]);
+ +
+ +      hdev->idle_busy_ts_idx = 0;
+ +      hdev->idle_busy_ts_arr[0].busy_to_idle_ts = ktime_set(0, 0);
+ +      hdev->idle_busy_ts_arr[0].idle_to_busy_ts = ktime_set(0, 0);
+ +
+ +      if (hdev->cs_active_cnt)
+ +              dev_crit(hdev->dev, "CS active cnt %d is not 0 during reset\n",
+ +                      hdev->cs_active_cnt);
+ +
+ +      mutex_lock(&hdev->fpriv_list_lock);
+ +
+ +      /* Make sure the context switch phase will run again */
+ +      if (hdev->compute_ctx) {
+ +              atomic_set(&hdev->compute_ctx->thread_ctx_switch_token, 1);
+ +              hdev->compute_ctx->thread_ctx_switch_wait_token = 0;
+ +      }
+ +
+ +      mutex_unlock(&hdev->fpriv_list_lock);
+ +
+ +      /* Finished tear-down, starting to re-initialize */
+ +
+ +      if (hard_reset) {
+ +              hdev->device_cpu_disabled = false;
+ +              hdev->hard_reset_pending = false;
+ +
+ +              if (hdev->kernel_ctx) {
+ +                      dev_crit(hdev->dev,
+ +                              "kernel ctx was alive during hard reset, something is terribly wrong\n");
+ +                      rc = -EBUSY;
+ +                      goto out_err;
+ +              }
+ +
+ +              rc = hl_mmu_init(hdev);
+ +              if (rc) {
+ +                      dev_err(hdev->dev,
+ +                              "Failed to initialize MMU S/W after hard reset\n");
+ +                      goto out_err;
+ +              }
+ +
+ +              /* Allocate the kernel context */
+ +              hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx),
+ +                                              GFP_KERNEL);
+ +              if (!hdev->kernel_ctx) {
+ +                      rc = -ENOMEM;
+ +                      goto out_err;
+ +              }
+ +
+ +              hdev->compute_ctx = NULL;
+ +
+ +              rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
+ +              if (rc) {
+ +                      dev_err(hdev->dev,
+ +                              "failed to init kernel ctx in hard reset\n");
+ +                      kfree(hdev->kernel_ctx);
+ +                      hdev->kernel_ctx = NULL;
+ +                      goto out_err;
+ +              }
+ +      }
+ +
+ +      /* Device is now enabled as part of the initialization requires
+ +       * communication with the device firmware to get information that
+ +       * is required for the initialization itself
+ +       */
+ +      hdev->disabled = false;
+ +
+ +      rc = hdev->asic_funcs->hw_init(hdev);
+ +      if (rc) {
+ +              dev_err(hdev->dev,
+ +                      "failed to initialize the H/W after reset\n");
+ +              goto out_err;
+ +      }
+ +
+ +      /* Check that the communication with the device is working */
+ +      rc = hdev->asic_funcs->test_queues(hdev);
+ +      if (rc) {
+ +              dev_err(hdev->dev,
+ +                      "Failed to detect if device is alive after reset\n");
+ +              goto out_err;
+ +      }
+ +
+ +      if (hard_reset) {
+ +              rc = device_late_init(hdev);
+ +              if (rc) {
+ +                      dev_err(hdev->dev,
+ +                              "Failed late init after hard reset\n");
+ +                      goto out_err;
+ +              }
+ +
+ +              rc = hl_vm_init(hdev);
+ +              if (rc) {
+ +                      dev_err(hdev->dev,
+ +                              "Failed to init memory module after hard reset\n");
+ +                      goto out_err;
+ +              }
+ +
+ +              hl_set_max_power(hdev, hdev->max_power);
+ +      } else {
+ +              rc = hdev->asic_funcs->soft_reset_late_init(hdev);
+ +              if (rc) {
+ +                      dev_err(hdev->dev,
+ +                              "Failed late init after soft reset\n");
+ +                      goto out_err;
+ +              }
+ +      }
+ +
+ +      atomic_set(&hdev->in_reset, 0);
+ +
+ +      if (hard_reset)
+ +              hdev->hard_reset_cnt++;
+ +      else
+ +              hdev->soft_reset_cnt++;
+ +
+ +      dev_warn(hdev->dev, "Successfully finished resetting the device\n");
+ +
+ +      return 0;
+ +
+ +out_err:
+ +      hdev->disabled = true;
+ +
+ +      if (hard_reset) {
+ +              dev_err(hdev->dev,
+ +                      "Failed to reset! Device is NOT usable\n");
+ +              hdev->hard_reset_cnt++;
+ +      } else {
+ +              dev_err(hdev->dev,
+ +                      "Failed to do soft-reset, trying hard reset\n");
+ +              hdev->soft_reset_cnt++;
+ +              hard_reset = true;
+ +              goto again;
+ +      }
+ +
+ +      atomic_set(&hdev->in_reset, 0);
+ +
+ +      return rc;
+ +}
+ +
+ +/*
+ + * hl_device_init - main initialization function for habanalabs device
+ + *
+ + * @hdev: pointer to habanalabs device structure
+ + *
+ + * Allocate an id for the device, do early initialization and then call the
+ + * ASIC specific initialization functions. Finally, create the cdev and the
+ + * Linux device to expose it to the user
+ + */
+ +int hl_device_init(struct hl_device *hdev, struct class *hclass)
+ +{
+ +      int i, rc, cq_cnt, cq_ready_cnt;
+ +      char *name;
+ +      bool add_cdev_sysfs_on_err = false;
+ +
+ +      name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2);
+ +      if (!name) {
+ +              rc = -ENOMEM;
+ +              goto out_disabled;
+ +      }
+ +
+ +      /* Initialize cdev and device structures */
+ +      rc = device_init_cdev(hdev, hclass, hdev->id, &hl_ops, name,
+ +                              &hdev->cdev, &hdev->dev);
+ +
+ +      kfree(name);
+ +
+ +      if (rc)
+ +              goto out_disabled;
+ +
+ +      name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2);
+ +      if (!name) {
+ +              rc = -ENOMEM;
+ +              goto free_dev;
+ +      }
+ +
+ +      /* Initialize cdev and device structures for control device */
+ +      rc = device_init_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops,
+ +                              name, &hdev->cdev_ctrl, &hdev->dev_ctrl);
+ +
+ +      kfree(name);
+ +
+ +      if (rc)
+ +              goto free_dev;
+ +
+ +      /* Initialize ASIC function pointers and perform early init */
+ +      rc = device_early_init(hdev);
+ +      if (rc)
+ +              goto free_dev_ctrl;
+ +
+ +      /*
+ +       * Start calling ASIC initialization. First S/W then H/W and finally
+ +       * late init
+ +       */
+ +      rc = hdev->asic_funcs->sw_init(hdev);
+ +      if (rc)
+ +              goto early_fini;
+ +
+ +      /*
+ +       * Initialize the H/W queues. Must be done before hw_init, because
+ +       * there the addresses of the kernel queue are being written to the
+ +       * registers of the device
+ +       */
+ +      rc = hl_hw_queues_create(hdev);
+ +      if (rc) {
+ +              dev_err(hdev->dev, "failed to initialize kernel queues\n");
+ +              goto sw_fini;
+ +      }
+ +
+ +      cq_cnt = hdev->asic_prop.completion_queues_count;
+ +
+ +      /*
+ +       * Initialize the completion queues. Must be done before hw_init,
+ +       * because there the addresses of the completion queues are being
+ +       * passed as arguments to request_irq
+ +       */
+ +      if (cq_cnt) {
+ +              hdev->completion_queue = kcalloc(cq_cnt,
+ +                              sizeof(*hdev->completion_queue),
+ +                              GFP_KERNEL);
+ +
+ +              if (!hdev->completion_queue) {
+ +                      dev_err(hdev->dev,
+ +                              "failed to allocate completion queues\n");
+ +                      rc = -ENOMEM;
+ +                      goto hw_queues_destroy;
+ +              }
+ +      }
+ +
+ +      for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) {
+ +              rc = hl_cq_init(hdev, &hdev->completion_queue[i],
+ +                              hdev->asic_funcs->get_queue_id_for_cq(hdev, i));
+ +              if (rc) {
+ +                      dev_err(hdev->dev,
+ +                              "failed to initialize completion queue\n");
+ +                      goto cq_fini;
+ +              }
+ +              hdev->completion_queue[i].cq_idx = i;
+ +      }
+ +
+ +      /*
+ +       * Initialize the event queue. Must be done before hw_init,
+ +       * because there the address of the event queue is being
+ +       * passed as argument to request_irq
+ +       */
+ +      rc = hl_eq_init(hdev, &hdev->event_queue);
+ +      if (rc) {
+ +              dev_err(hdev->dev, "failed to initialize event queue\n");
+ +              goto cq_fini;
+ +      }
+ +
+ +      /* MMU S/W must be initialized before kernel context is created */
+ +      rc = hl_mmu_init(hdev);
+ +      if (rc) {
+ +              dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n");
+ +              goto eq_fini;
+ +      }
+ +
+ +      /* Allocate the kernel context */
+ +      hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL);
+ +      if (!hdev->kernel_ctx) {
+ +              rc = -ENOMEM;
+ +              goto mmu_fini;
+ +      }
+ +
+ +      hdev->compute_ctx = NULL;
+ +
+ +      rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
+ +      if (rc) {
+ +              dev_err(hdev->dev, "failed to initialize kernel context\n");
+ +              kfree(hdev->kernel_ctx);
+ +              goto mmu_fini;
+ +      }
+ +
+ +      rc = hl_cb_pool_init(hdev);
+ +      if (rc) {
+ +              dev_err(hdev->dev, "failed to initialize CB pool\n");
+ +              goto release_ctx;
+ +      }
+ +
+ +      hl_debugfs_add_device(hdev);
+ +
+ +      if (hdev->asic_funcs->get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
+ +              dev_info(hdev->dev,
+ +                      "H/W state is dirty, must reset before initializing\n");
+ +              hdev->asic_funcs->halt_engines(hdev, true);
+ +              hdev->asic_funcs->hw_fini(hdev, true);
+ +      }
+ +
+ +      /*
+ +       * From this point, in case of an error, add char devices and create
+ +       * sysfs nodes as part of the error flow, to allow debugging.
+ +       */
+ +      add_cdev_sysfs_on_err = true;
+ +
+ +      /* Device is now enabled as part of the initialization requires
+ +       * communication with the device firmware to get information that
+ +       * is required for the initialization itself
+ +       */
+ +      hdev->disabled = false;
+ +
+ +      rc = hdev->asic_funcs->hw_init(hdev);
+ +      if (rc) {
+ +              dev_err(hdev->dev, "failed to initialize the H/W\n");
+ +              rc = 0;
+ +              goto out_disabled;
+ +      }
+ +
+ +      /* Check that the communication with the device is working */
+ +      rc = hdev->asic_funcs->test_queues(hdev);
+ +      if (rc) {
+ +              dev_err(hdev->dev, "Failed to detect if device is alive\n");
+ +              rc = 0;
+ +              goto out_disabled;
+ +      }
+ +
+ +      rc = device_late_init(hdev);
+ +      if (rc) {
+ +              dev_err(hdev->dev, "Failed late initialization\n");
+ +              rc = 0;
+ +              goto out_disabled;
+ +      }
+ +
+ +      dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n",
+ +              hdev->asic_name,
+ +              hdev->asic_prop.dram_size / 1024 / 1024 / 1024);
+ +
+ +      rc = hl_vm_init(hdev);
+ +      if (rc) {
+ +              dev_err(hdev->dev, "Failed to initialize memory module\n");
+ +              rc = 0;
+ +              goto out_disabled;
+ +      }
+ +
+ +      /*
+ +       * Expose devices and sysfs nodes to user.
+ +       * From here there is no need to add char devices and create sysfs nodes
+ +       * in case of an error.
+ +       */
+ +      add_cdev_sysfs_on_err = false;
+ +      rc = device_cdev_sysfs_add(hdev);
+ +      if (rc) {
+ +              dev_err(hdev->dev,
+ +                      "Failed to add char devices and sysfs nodes\n");
+ +              rc = 0;
+ +              goto out_disabled;
+ +      }
+ +
+ +      /*
+ +       * hl_hwmon_init() must be called after device_late_init(), because only
+ +       * there we get the information from the device about which
+ +       * hwmon-related sensors the device supports.
+ +       * Furthermore, it must be done after adding the device to the system.
+ +       */
+ +      rc = hl_hwmon_init(hdev);
+ +      if (rc) {
+ +              dev_err(hdev->dev, "Failed to initialize hwmon\n");
+ +              rc = 0;
+ +              goto out_disabled;
+ +      }
+ +
+ +      dev_notice(hdev->dev,
+ +              "Successfully added device to habanalabs driver\n");
+ +
+ +      hdev->init_done = true;
+ +
+ +      return 0;
+ +
+ +release_ctx:
+ +      if (hl_ctx_put(hdev->kernel_ctx) != 1)
+ +              dev_err(hdev->dev,
+ +                      "kernel ctx is still alive on initialization failure\n");
+ +mmu_fini:
+ +      hl_mmu_fini(hdev);
+ +eq_fini:
+ +      hl_eq_fini(hdev, &hdev->event_queue);
+ +cq_fini:
+ +      for (i = 0 ; i < cq_ready_cnt ; i++)
+ +              hl_cq_fini(hdev, &hdev->completion_queue[i]);
+ +      kfree(hdev->completion_queue);
+ +hw_queues_destroy:
+ +      hl_hw_queues_destroy(hdev);
+ +sw_fini:
+ +      hdev->asic_funcs->sw_fini(hdev);
+ +early_fini:
+ +      device_early_fini(hdev);
+ +free_dev_ctrl:
+ +      kfree(hdev->dev_ctrl);
+ +free_dev:
+ +      kfree(hdev->dev);
+ +out_disabled:
+ +      hdev->disabled = true;
+ +      if (add_cdev_sysfs_on_err)
+ +              device_cdev_sysfs_add(hdev);
+ +      if (hdev->pdev)
+ +              dev_err(&hdev->pdev->dev,
+ +                      "Failed to initialize hl%d. Device is NOT usable !\n",
+ +                      hdev->id / 2);
+ +      else
+ +              pr_err("Failed to initialize hl%d. Device is NOT usable !\n",
+ +                      hdev->id / 2);
+ +
+ +      return rc;
+ +}
+ +
+ +/*
+ + * hl_device_fini - main tear-down function for habanalabs device
+ + *
+ + * @hdev: pointer to habanalabs device structure
+ + *
+ + * Destroy the device, call ASIC fini functions and release the id
+ + */
+ +void hl_device_fini(struct hl_device *hdev)
+ +{
+ +      int i, rc;
+ +      ktime_t timeout;
+ +
+ +      dev_info(hdev->dev, "Removing device\n");
+ +
+ +      /*
+ +       * This function is competing with the reset function, so try to
+ +       * take the reset atomic and if we are already in middle of reset,
+ +       * wait until reset function is finished. Reset function is designed
+ +       * to always finish. However, in Gaudi, because of all the network
+ +       * ports, the hard reset could take between 10-30 seconds
+ +       */
+ +
+ +      timeout = ktime_add_us(ktime_get(),
+ +                              HL_HARD_RESET_MAX_TIMEOUT * 1000 * 1000);
+ +      rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
+ +      while (rc) {
+ +              usleep_range(50, 200);
+ +              rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
+ +              if (ktime_compare(ktime_get(), timeout) > 0) {
+ +                      WARN(1, "Failed to remove device because reset function did not finish\n");
+ +                      return;
+ +              }
+ +      }
+ +
+ +      /* Mark device as disabled */
+ +      hdev->disabled = true;
+ +
+ +      /* Flush anyone that is inside the critical section of enqueue
+ +       * jobs to the H/W
+ +       */
+ +      hdev->asic_funcs->hw_queues_lock(hdev);
+ +      hdev->asic_funcs->hw_queues_unlock(hdev);
+ +
+ +      /* Flush anyone that is inside device open */
+ +      mutex_lock(&hdev->fpriv_list_lock);
+ +      mutex_unlock(&hdev->fpriv_list_lock);
+ +
+ +      hdev->hard_reset_pending = true;
+ +
+ +      hl_hwmon_fini(hdev);
+ +
+ +      device_late_fini(hdev);
+ +
+ +      hl_debugfs_remove_device(hdev);
+ +
+ +      /*
+ +       * Halt the engines and disable interrupts so we won't get any more
+ +       * completions from H/W and we won't have any accesses from the
+ +       * H/W to the host machine
+ +       */
+ +      hdev->asic_funcs->halt_engines(hdev, true);
+ +
+ +      /* Go over all the queues, release all CS and their jobs */
+ +      hl_cs_rollback_all(hdev);
+ +
+ +      /* Kill processes here after CS rollback. This is because the process
+ +       * can't really exit until all its CSs are done, which is what we
+ +       * do in cs rollback
+ +       */
+ +      rc = device_kill_open_processes(hdev);
+ +      if (rc)
+ +              dev_crit(hdev->dev, "Failed to kill all open processes\n");
+ +
+ +      hl_cb_pool_fini(hdev);
+ +
+ +      /* Release kernel context */
+ +      if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
+ +              dev_err(hdev->dev, "kernel ctx is still alive\n");
+ +
+ +      /* Reset the H/W. It will be in idle state after this returns */
+ +      hdev->asic_funcs->hw_fini(hdev, true);
+ +
+ +      hl_vm_fini(hdev);
+ +
+ +      hl_mmu_fini(hdev);
+ +
+ +      hl_eq_fini(hdev, &hdev->event_queue);
+ +
+ +      for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
+ +              hl_cq_fini(hdev, &hdev->completion_queue[i]);
+ +      kfree(hdev->completion_queue);
+ +
+ +      hl_hw_queues_destroy(hdev);
+ +
+ +      /* Call ASIC S/W finalize function */
+ +      hdev->asic_funcs->sw_fini(hdev);
+ +
+ +      device_early_fini(hdev);
+ +
+ +      /* Hide devices and sysfs nodes from user */
+ +      device_cdev_sysfs_del(hdev);
+ +
+ +      pr_info("removed device successfully\n");
+ +}
+ +
+ +/*
+ + * MMIO register access helper functions.
+ + */
+ +
+ +/*
+ + * hl_rreg - Read an MMIO register
+ + *
+ + * @hdev: pointer to habanalabs device structure
+ + * @reg: MMIO register offset (in bytes)
+ + *
+ + * Returns the value of the MMIO register we are asked to read
+ + *
+ + */
+ +inline u32 hl_rreg(struct hl_device *hdev, u32 reg)
+ +{
+ +      return readl(hdev->rmmio + reg);
+ +}
+ +
+ +/*
+ + * hl_wreg - Write to an MMIO register
+ + *
+ + * @hdev: pointer to habanalabs device structure
+ + * @reg: MMIO register offset (in bytes)
+ + * @val: 32-bit value
+ + *
+ + * Writes the 32-bit value into the MMIO register
+ + *
+ + */
+ +inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val)
+ +{
+ +      writel(val, hdev->rmmio + reg);
+ +}
diff --cc drivers/misc/habanalabs/common/firmware_if.c

index b2b84510b932f0d00b01a735ca8486a474f183b5,0000000000000000000000000000000000000000..5981dbd8c6dffaee482a113ca3db9a35a9d09edc

mode 100644,000000..100644
--- 1/drivers/misc/habanalabs/common/firmware_if.c
--- /dev/null
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@@ -1,589 -1,0 +1,589 @@@
-                               sizeof(pkt), HL_DEVICE_TIMEOUT_USEC, NULL);
+ +// SPDX-License-Identifier: GPL-2.0
+ +
+ +/*
+ + * Copyright 2016-2019 HabanaLabs, Ltd.
+ + * All Rights Reserved.
+ + */
+ +
+ +#include "habanalabs.h"
+ +#include "include/common/hl_boot_if.h"
+ +
+ +#include <linux/firmware.h>
+ +#include <linux/genalloc.h>
+ +#include <linux/io-64-nonatomic-lo-hi.h>
+ +#include <linux/slab.h>
+ +
+ +/**
+ + * hl_fw_load_fw_to_device() - Load F/W code to device's memory.
+ + *
+ + * @hdev: pointer to hl_device structure.
+ + * @fw_name: the firmware image name
+ + * @dst: IO memory mapped address space to copy firmware to
+ + *
+ + * Copy fw code from firmware file to device memory.
+ + *
+ + * Return: 0 on success, non-zero for failure.
+ + */
+ +int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
+ +                              void __iomem *dst)
+ +{
+ +      const struct firmware *fw;
+ +      const u64 *fw_data;
+ +      size_t fw_size;
+ +      int rc;
+ +
+ +      rc = request_firmware(&fw, fw_name, hdev->dev);
+ +      if (rc) {
+ +              dev_err(hdev->dev, "Firmware file %s is not found!\n", fw_name);
+ +              goto out;
+ +      }
+ +
+ +      fw_size = fw->size;
+ +      if ((fw_size % 4) != 0) {
+ +              dev_err(hdev->dev, "Illegal %s firmware size %zu\n",
+ +                      fw_name, fw_size);
+ +              rc = -EINVAL;
+ +              goto out;
+ +      }
+ +
+ +      dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size);
+ +
+ +      fw_data = (const u64 *) fw->data;
+ +
+ +      memcpy_toio(dst, fw_data, fw_size);
+ +
+ +out:
+ +      release_firmware(fw);
+ +      return rc;
+ +}
+ +
+ +int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
+ +{
+ +      struct armcp_packet pkt = {};
+ +
+ +      pkt.ctl = cpu_to_le32(opcode << ARMCP_PKT_CTL_OPCODE_SHIFT);
+ +
+ +      return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt,
-                       HL_DEVICE_TIMEOUT_USEC, &result);
++                                              sizeof(pkt), 0, NULL);
+ +}
+ +
+ +int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
+ +                              u16 len, u32 timeout, long *result)
+ +{
+ +      struct armcp_packet *pkt;
+ +      dma_addr_t pkt_dma_addr;
+ +      u32 tmp;
+ +      int rc = 0;
+ +
+ +      pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
+ +                                                              &pkt_dma_addr);
+ +      if (!pkt) {
+ +              dev_err(hdev->dev,
+ +                      "Failed to allocate DMA memory for packet to CPU\n");
+ +              return -ENOMEM;
+ +      }
+ +
+ +      memcpy(pkt, msg, len);
+ +
+ +      mutex_lock(&hdev->send_cpu_message_lock);
+ +
+ +      if (hdev->disabled)
+ +              goto out;
+ +
+ +      if (hdev->device_cpu_disabled) {
+ +              rc = -EIO;
+ +              goto out;
+ +      }
+ +
+ +      rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr);
+ +      if (rc) {
+ +              dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc);
+ +              goto out;
+ +      }
+ +
+ +      rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp,
+ +                              (tmp == ARMCP_PACKET_FENCE_VAL), 1000,
+ +                              timeout, true);
+ +
+ +      hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
+ +
+ +      if (rc == -ETIMEDOUT) {
+ +              dev_err(hdev->dev, "Device CPU packet timeout (0x%x)\n", tmp);
+ +              hdev->device_cpu_disabled = true;
+ +              goto out;
+ +      }
+ +
+ +      tmp = le32_to_cpu(pkt->ctl);
+ +
+ +      rc = (tmp & ARMCP_PKT_CTL_RC_MASK) >> ARMCP_PKT_CTL_RC_SHIFT;
+ +      if (rc) {
+ +              dev_err(hdev->dev, "F/W ERROR %d for CPU packet %d\n",
+ +                      rc,
+ +                      (tmp & ARMCP_PKT_CTL_OPCODE_MASK)
+ +                                              >> ARMCP_PKT_CTL_OPCODE_SHIFT);
+ +              rc = -EIO;
+ +      } else if (result) {
+ +              *result = (long) le64_to_cpu(pkt->result);
+ +      }
+ +
+ +out:
+ +      mutex_unlock(&hdev->send_cpu_message_lock);
+ +
+ +      hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, len, pkt);
+ +
+ +      return rc;
+ +}
+ +
+ +int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type)
+ +{
+ +      struct armcp_packet pkt;
+ +      long result;
+ +      int rc;
+ +
+ +      memset(&pkt, 0, sizeof(pkt));
+ +
+ +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ <<
+ +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
+ +      pkt.value = cpu_to_le64(event_type);
+ +
+ +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                       total_pkt_size, HL_DEVICE_TIMEOUT_USEC, &result);
++                                              0, &result);
+ +
+ +      if (rc)
+ +              dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
+ +
+ +      return rc;
+ +}
+ +
+ +int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
+ +              size_t irq_arr_size)
+ +{
+ +      struct armcp_unmask_irq_arr_packet *pkt;
+ +      size_t total_pkt_size;
+ +      long result;
+ +      int rc;
+ +
+ +      total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) +
+ +                      irq_arr_size;
+ +
+ +      /* data should be aligned to 8 bytes in order to ArmCP to copy it */
+ +      total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
+ +
+ +      /* total_pkt_size is casted to u16 later on */
+ +      if (total_pkt_size > USHRT_MAX) {
+ +              dev_err(hdev->dev, "too many elements in IRQ array\n");
+ +              return -EINVAL;
+ +      }
+ +
+ +      pkt = kzalloc(total_pkt_size, GFP_KERNEL);
+ +      if (!pkt)
+ +              return -ENOMEM;
+ +
+ +      pkt->length = cpu_to_le32(irq_arr_size / sizeof(irq_arr[0]));
+ +      memcpy(&pkt->irqs, irq_arr, irq_arr_size);
+ +
+ +      pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
+ +                                              ARMCP_PKT_CTL_OPCODE_SHIFT);
+ +
+ +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
-                       sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result);
++                                              total_pkt_size, 0, &result);
+ +
+ +      if (rc)
+ +              dev_err(hdev->dev, "failed to unmask IRQ array\n");
+ +
+ +      kfree(pkt);
+ +
+ +      return rc;
+ +}
+ +
+ +int hl_fw_test_cpu_queue(struct hl_device *hdev)
+ +{
+ +      struct armcp_packet test_pkt = {};
+ +      long result;
+ +      int rc;
+ +
+ +      test_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST <<
+ +                                      ARMCP_PKT_CTL_OPCODE_SHIFT);
+ +      test_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL);
+ +
+ +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt,
-                       sizeof(hb_pkt), HL_DEVICE_TIMEOUT_USEC, &result);
++                                              sizeof(test_pkt), 0, &result);
+ +
+ +      if (!rc) {
+ +              if (result != ARMCP_PACKET_FENCE_VAL)
+ +                      dev_err(hdev->dev,
+ +                              "CPU queue test failed (0x%08lX)\n", result);
+ +      } else {
+ +              dev_err(hdev->dev, "CPU queue test failed, error %d\n", rc);
+ +      }
+ +
+ +      return rc;
+ +}
+ +
+ +void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
+ +                                              dma_addr_t *dma_handle)
+ +{
+ +      u64 kernel_addr;
+ +
+ +      kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size);
+ +
+ +      *dma_handle = hdev->cpu_accessible_dma_address +
+ +              (kernel_addr - (u64) (uintptr_t) hdev->cpu_accessible_dma_mem);
+ +
+ +      return (void *) (uintptr_t) kernel_addr;
+ +}
+ +
+ +void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
+ +                                      void *vaddr)
+ +{
+ +      gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr,
+ +                      size);
+ +}
+ +
+ +int hl_fw_send_heartbeat(struct hl_device *hdev)
+ +{
+ +      struct armcp_packet hb_pkt = {};
+ +      long result;
+ +      int rc;
+ +
+ +      hb_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST <<
+ +                                      ARMCP_PKT_CTL_OPCODE_SHIFT);
+ +      hb_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL);
+ +
+ +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt,
++                                              sizeof(hb_pkt), 0, &result);
+ +
+ +      if ((rc) || (result != ARMCP_PACKET_FENCE_VAL))
+ +              rc = -EIO;
+ +
+ +      return rc;
+ +}
+ +
+ +int hl_fw_armcp_info_get(struct hl_device *hdev)
+ +{
+ +      struct asic_fixed_properties *prop = &hdev->asic_prop;
+ +      struct armcp_packet pkt = {};
+ +      void *armcp_info_cpu_addr;
+ +      dma_addr_t armcp_info_dma_addr;
+ +      long result;
+ +      int rc;
+ +
+ +      armcp_info_cpu_addr =
+ +                      hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
+ +                                      sizeof(struct armcp_info),
+ +                                      &armcp_info_dma_addr);
+ +      if (!armcp_info_cpu_addr) {
+ +              dev_err(hdev->dev,
+ +                      "Failed to allocate DMA memory for ArmCP info packet\n");
+ +              return -ENOMEM;
+ +      }
+ +
+ +      memset(armcp_info_cpu_addr, 0, sizeof(struct armcp_info));
+ +
+ +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_INFO_GET <<
+ +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
+ +      pkt.addr = cpu_to_le64(armcp_info_dma_addr);
+ +      pkt.data_max_size = cpu_to_le32(sizeof(struct armcp_info));
+ +
+ +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ +                                      HL_ARMCP_INFO_TIMEOUT_USEC, &result);
+ +      if (rc) {
+ +              dev_err(hdev->dev,
+ +                      "Failed to handle ArmCP info pkt, error %d\n", rc);
+ +              goto out;
+ +      }
+ +
+ +      memcpy(&prop->armcp_info, armcp_info_cpu_addr,
+ +                      sizeof(prop->armcp_info));
+ +
+ +      rc = hl_build_hwmon_channel_info(hdev, prop->armcp_info.sensors);
+ +      if (rc) {
+ +              dev_err(hdev->dev,
+ +                      "Failed to build hwmon channel info, error %d\n", rc);
+ +              rc = -EFAULT;
+ +              goto out;
+ +      }
+ +
+ +out:
+ +      hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
+ +                      sizeof(struct armcp_info), armcp_info_cpu_addr);
+ +
+ +      return rc;
+ +}
+ +
+ +int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
+ +{
+ +      struct armcp_packet pkt = {};
+ +      void *eeprom_info_cpu_addr;
+ +      dma_addr_t eeprom_info_dma_addr;
+ +      long result;
+ +      int rc;
+ +
+ +      eeprom_info_cpu_addr =
+ +                      hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
+ +                                      max_size, &eeprom_info_dma_addr);
+ +      if (!eeprom_info_cpu_addr) {
+ +              dev_err(hdev->dev,
+ +                      "Failed to allocate DMA memory for ArmCP EEPROM packet\n");
+ +              return -ENOMEM;
+ +      }
+ +
+ +      memset(eeprom_info_cpu_addr, 0, max_size);
+ +
+ +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_EEPROM_DATA_GET <<
+ +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
+ +      pkt.addr = cpu_to_le64(eeprom_info_dma_addr);
+ +      pkt.data_max_size = cpu_to_le32(max_size);
+ +
+ +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ +                      HL_ARMCP_EEPROM_TIMEOUT_USEC, &result);
+ +
+ +      if (rc) {
+ +              dev_err(hdev->dev,
+ +                      "Failed to handle ArmCP EEPROM packet, error %d\n", rc);
+ +              goto out;
+ +      }
+ +
+ +      /* result contains the actual size */
+ +      memcpy(data, eeprom_info_cpu_addr, min((size_t)result, max_size));
+ +
+ +out:
+ +      hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, max_size,
+ +                      eeprom_info_cpu_addr);
+ +
+ +      return rc;
+ +}
+ +
+ +static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg)
+ +{
+ +      u32 err_val;
+ +
+ +      /* Some of the firmware status codes are deprecated in newer f/w
+ +       * versions. In those versions, the errors are reported
+ +       * in different registers. Therefore, we need to check those
+ +       * registers and print the exact errors. Moreover, there
+ +       * may be multiple errors, so we need to report on each error
+ +       * separately. Some of the error codes might indicate a state
+ +       * that is not an error per-se, but it is an error in production
+ +       * environment
+ +       */
+ +      err_val = RREG32(boot_err0_reg);
+ +      if (!(err_val & CPU_BOOT_ERR0_ENABLED))
+ +              return;
+ +
+ +      if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL)
+ +              dev_err(hdev->dev,
+ +                      "Device boot error - DRAM initialization failed\n");
+ +      if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED)
+ +              dev_err(hdev->dev, "Device boot error - FIT image corrupted\n");
+ +      if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL)
+ +              dev_err(hdev->dev,
+ +                      "Device boot error - Thermal Sensor initialization failed\n");
+ +      if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED)
+ +              dev_warn(hdev->dev,
+ +                      "Device boot warning - Skipped DRAM initialization\n");
+ +      if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED)
+ +              dev_warn(hdev->dev,
+ +                      "Device boot error - Skipped waiting for BMC\n");
+ +      if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY)
+ +              dev_err(hdev->dev,
+ +                      "Device boot error - Serdes data from BMC not available\n");
+ +      if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL)
+ +              dev_err(hdev->dev,
+ +                      "Device boot error - NIC F/W initialization failed\n");
+ +}
+ +
+ +static void hl_detect_cpu_boot_status(struct hl_device *hdev, u32 status)
+ +{
+ +      switch (status) {
+ +      case CPU_BOOT_STATUS_NA:
+ +              dev_err(hdev->dev,
+ +                      "Device boot error - BTL did NOT run\n");
+ +              break;
+ +      case CPU_BOOT_STATUS_IN_WFE:
+ +              dev_err(hdev->dev,
+ +                      "Device boot error - Stuck inside WFE loop\n");
+ +              break;
+ +      case CPU_BOOT_STATUS_IN_BTL:
+ +              dev_err(hdev->dev,
+ +                      "Device boot error - Stuck in BTL\n");
+ +              break;
+ +      case CPU_BOOT_STATUS_IN_PREBOOT:
+ +              dev_err(hdev->dev,
+ +                      "Device boot error - Stuck in Preboot\n");
+ +              break;
+ +      case CPU_BOOT_STATUS_IN_SPL:
+ +              dev_err(hdev->dev,
+ +                      "Device boot error - Stuck in SPL\n");
+ +              break;
+ +      case CPU_BOOT_STATUS_IN_UBOOT:
+ +              dev_err(hdev->dev,
+ +                      "Device boot error - Stuck in u-boot\n");
+ +              break;
+ +      case CPU_BOOT_STATUS_DRAM_INIT_FAIL:
+ +              dev_err(hdev->dev,
+ +                      "Device boot error - DRAM initialization failed\n");
+ +              break;
+ +      case CPU_BOOT_STATUS_UBOOT_NOT_READY:
+ +              dev_err(hdev->dev,
+ +                      "Device boot error - u-boot stopped by user\n");
+ +              break;
+ +      case CPU_BOOT_STATUS_TS_INIT_FAIL:
+ +              dev_err(hdev->dev,
+ +                      "Device boot error - Thermal Sensor initialization failed\n");
+ +              break;
+ +      default:
+ +              dev_err(hdev->dev,
+ +                      "Device boot error - Invalid status code %d\n",
+ +                      status);
+ +              break;
+ +      }
+ +}
+ +
+ +int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
+ +                      u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
+ +                      u32 boot_err0_reg, bool skip_bmc,
+ +                      u32 cpu_timeout, u32 boot_fit_timeout)
+ +{
+ +      u32 status;
+ +      int rc;
+ +
+ +      dev_info(hdev->dev, "Going to wait for device boot (up to %lds)\n",
+ +              cpu_timeout / USEC_PER_SEC);
+ +
+ +      /* Wait for boot FIT request */
+ +      rc = hl_poll_timeout(
+ +              hdev,
+ +              cpu_boot_status_reg,
+ +              status,
+ +              status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT,
+ +              10000,
+ +              boot_fit_timeout);
+ +
+ +      if (rc) {
+ +              dev_dbg(hdev->dev,
+ +                      "No boot fit request received, resuming boot\n");
+ +      } else {
+ +              rc = hdev->asic_funcs->load_boot_fit_to_device(hdev);
+ +              if (rc)
+ +                      goto out;
+ +
+ +              /* Clear device CPU message status */
+ +              WREG32(cpu_msg_status_reg, CPU_MSG_CLR);
+ +
+ +              /* Signal device CPU that boot loader is ready */
+ +              WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY);
+ +
+ +              /* Poll for CPU device ack */
+ +              rc = hl_poll_timeout(
+ +                      hdev,
+ +                      cpu_msg_status_reg,
+ +                      status,
+ +                      status == CPU_MSG_OK,
+ +                      10000,
+ +                      boot_fit_timeout);
+ +
+ +              if (rc) {
+ +                      dev_err(hdev->dev,
+ +                              "Timeout waiting for boot fit load ack\n");
+ +                      goto out;
+ +              }
+ +
+ +              /* Clear message */
+ +              WREG32(msg_to_cpu_reg, KMD_MSG_NA);
+ +      }
+ +
+ +      /* Make sure CPU boot-loader is running */
+ +      rc = hl_poll_timeout(
+ +              hdev,
+ +              cpu_boot_status_reg,
+ +              status,
+ +              (status == CPU_BOOT_STATUS_DRAM_RDY) ||
+ +              (status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
+ +              (status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
+ +              (status == CPU_BOOT_STATUS_SRAM_AVAIL),
+ +              10000,
+ +              cpu_timeout);
+ +
+ +      /* Read U-Boot, preboot versions now in case we will later fail */
+ +      hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT);
+ +      hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT);
+ +
+ +      /* Some of the status codes below are deprecated in newer f/w
+ +       * versions but we keep them here for backward compatibility
+ +       */
+ +      if (rc) {
+ +              hl_detect_cpu_boot_status(hdev, status);
+ +              rc = -EIO;
+ +              goto out;
+ +      }
+ +
+ +      if (!hdev->fw_loading) {
+ +              dev_info(hdev->dev, "Skip loading FW\n");
+ +              goto out;
+ +      }
+ +
+ +      if (status == CPU_BOOT_STATUS_SRAM_AVAIL)
+ +              goto out;
+ +
+ +      dev_info(hdev->dev,
+ +              "Loading firmware to device, may take some time...\n");
+ +
+ +      rc = hdev->asic_funcs->load_firmware_to_device(hdev);
+ +      if (rc)
+ +              goto out;
+ +
+ +      if (skip_bmc) {
+ +              WREG32(msg_to_cpu_reg, KMD_MSG_SKIP_BMC);
+ +
+ +              rc = hl_poll_timeout(
+ +                      hdev,
+ +                      cpu_boot_status_reg,
+ +                      status,
+ +                      (status == CPU_BOOT_STATUS_BMC_WAITING_SKIPPED),
+ +                      10000,
+ +                      cpu_timeout);
+ +
+ +              if (rc) {
+ +                      dev_err(hdev->dev,
+ +                              "Failed to get ACK on skipping BMC, %d\n",
+ +                              status);
+ +                      WREG32(msg_to_cpu_reg, KMD_MSG_NA);
+ +                      rc = -EIO;
+ +                      goto out;
+ +              }
+ +      }
+ +
+ +      WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY);
+ +
+ +      rc = hl_poll_timeout(
+ +              hdev,
+ +              cpu_boot_status_reg,
+ +              status,
+ +              (status == CPU_BOOT_STATUS_SRAM_AVAIL),
+ +              10000,
+ +              cpu_timeout);
+ +
+ +      /* Clear message */
+ +      WREG32(msg_to_cpu_reg, KMD_MSG_NA);
+ +
+ +      if (rc) {
+ +              if (status == CPU_BOOT_STATUS_FIT_CORRUPTED)
+ +                      dev_err(hdev->dev,
+ +                              "Device reports FIT image is corrupted\n");
+ +              else
+ +                      dev_err(hdev->dev,
+ +                              "Failed to load firmware to device, %d\n",
+ +                              status);
+ +
+ +              rc = -EIO;
+ +              goto out;
+ +      }
+ +
+ +      dev_info(hdev->dev, "Successfully loaded firmware to device\n");
+ +
+ +out:
+ +      fw_read_errors(hdev, boot_err0_reg);
+ +
+ +      return rc;
+ +}
diff --cc drivers/misc/habanalabs/common/habanalabs.h

index bf9abfa47b7a33e3ddad722f359d2894505cf3c3,0000000000000000000000000000000000000000..eb42aa5476a9a8d201e16d3869203c43843b46fc

mode 100644,000000..100644
--- 1/drivers/misc/habanalabs/common/habanalabs.h
--- /dev/null
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@@ -1,1962 -1,0 +1,1969 @@@
-  * @enable_clock_gating: enable clock gating for reducing power consumption.
-  * @disable_clock_gating: disable clock for accessing registers on HBW.
+ +/* SPDX-License-Identifier: GPL-2.0
+ + *
+ + * Copyright 2016-2019 HabanaLabs, Ltd.
+ + * All Rights Reserved.
+ + *
+ + */
+ +
+ +#ifndef HABANALABSP_H_
+ +#define HABANALABSP_H_
+ +
+ +#include "include/common/armcp_if.h"
+ +#include "include/common/qman_if.h"
+ +#include <uapi/misc/habanalabs.h>
+ +
+ +#include <linux/cdev.h>
+ +#include <linux/iopoll.h>
+ +#include <linux/irqreturn.h>
+ +#include <linux/dma-fence.h>
+ +#include <linux/dma-direction.h>
+ +#include <linux/scatterlist.h>
+ +#include <linux/hashtable.h>
+ +
+ +#define HL_NAME                               "habanalabs"
+ +
+ +#define HL_MMAP_CB_MASK                       (0x8000000000000000ull >> PAGE_SHIFT)
+ +
+ +#define HL_PENDING_RESET_PER_SEC      30
+ +
+ +#define HL_HARD_RESET_MAX_TIMEOUT     120
+ +
+ +#define HL_DEVICE_TIMEOUT_USEC                1000000 /* 1 s */
+ +
+ +#define HL_HEARTBEAT_PER_USEC         5000000 /* 5 s */
+ +
+ +#define HL_PLL_LOW_JOB_FREQ_USEC      5000000 /* 5 s */
+ +
+ +#define HL_ARMCP_INFO_TIMEOUT_USEC    10000000 /* 10s */
+ +#define HL_ARMCP_EEPROM_TIMEOUT_USEC  10000000 /* 10s */
+ +
+ +#define HL_PCI_ELBI_TIMEOUT_MSEC      10 /* 10ms */
+ +
+ +#define HL_SIM_MAX_TIMEOUT_US         10000000 /* 10s */
+ +
+ +#define HL_IDLE_BUSY_TS_ARR_SIZE      4096
+ +
+ +/* Memory */
+ +#define MEM_HASH_TABLE_BITS           7 /* 1 << 7 buckets */
+ +
+ +/* MMU */
+ +#define MMU_HASH_TABLE_BITS           7 /* 1 << 7 buckets */
+ +
+ +/*
+ + * HL_RSVD_SOBS 'sync stream' reserved sync objects per QMAN stream
+ + * HL_RSVD_MONS 'sync stream' reserved monitors per QMAN stream
+ + */
+ +#define HL_RSVD_SOBS                  4
+ +#define HL_RSVD_MONS                  2
+ +
+ +#define HL_RSVD_SOBS_IN_USE           2
+ +#define HL_RSVD_MONS_IN_USE           1
+ +
+ +#define HL_MAX_SOB_VAL                        (1 << 15)
+ +
+ +#define IS_POWER_OF_2(n)              (n != 0 && ((n & (n - 1)) == 0))
+ +#define IS_MAX_PENDING_CS_VALID(n)    (IS_POWER_OF_2(n) && (n > 1))
+ +
+ +#define HL_PCI_NUM_BARS                       6
+ +
+ +/**
+ + * struct pgt_info - MMU hop page info.
+ + * @node: hash linked-list node for the pgts shadow hash of pgts.
+ + * @phys_addr: physical address of the pgt.
+ + * @shadow_addr: shadow hop in the host.
+ + * @ctx: pointer to the owner ctx.
+ + * @num_of_ptes: indicates how many ptes are used in the pgt.
+ + *
+ + * The MMU page tables hierarchy is placed on the DRAM. When a new level (hop)
+ + * is needed during mapping, a new page is allocated and this structure holds
+ + * its essential information. During unmapping, if no valid PTEs remained in the
+ + * page, it is freed with its pgt_info structure.
+ + */
+ +struct pgt_info {
+ +      struct hlist_node       node;
+ +      u64                     phys_addr;
+ +      u64                     shadow_addr;
+ +      struct hl_ctx           *ctx;
+ +      int                     num_of_ptes;
+ +};
+ +
+ +struct hl_device;
+ +struct hl_fpriv;
+ +
+ +/**
+ + * enum hl_pci_match_mode - pci match mode per region
+ + * @PCI_ADDRESS_MATCH_MODE: address match mode
+ + * @PCI_BAR_MATCH_MODE: bar match mode
+ + */
+ +enum hl_pci_match_mode {
+ +      PCI_ADDRESS_MATCH_MODE,
+ +      PCI_BAR_MATCH_MODE
+ +};
+ +
+ +/**
+ + * enum hl_fw_component - F/W components to read version through registers.
+ + * @FW_COMP_UBOOT: u-boot.
+ + * @FW_COMP_PREBOOT: preboot.
+ + */
+ +enum hl_fw_component {
+ +      FW_COMP_UBOOT,
+ +      FW_COMP_PREBOOT
+ +};
+ +
+ +/**
+ + * enum hl_queue_type - Supported QUEUE types.
+ + * @QUEUE_TYPE_NA: queue is not available.
+ + * @QUEUE_TYPE_EXT: external queue which is a DMA channel that may access the
+ + *                  host.
+ + * @QUEUE_TYPE_INT: internal queue that performs DMA inside the device's
+ + *                    memories and/or operates the compute engines.
+ + * @QUEUE_TYPE_CPU: S/W queue for communication with the device's CPU.
+ + * @QUEUE_TYPE_HW: queue of DMA and compute engines jobs, for which completion
+ + *                 notifications are sent by H/W.
+ + */
+ +enum hl_queue_type {
+ +      QUEUE_TYPE_NA,
+ +      QUEUE_TYPE_EXT,
+ +      QUEUE_TYPE_INT,
+ +      QUEUE_TYPE_CPU,
+ +      QUEUE_TYPE_HW
+ +};
+ +
+ +enum hl_cs_type {
+ +      CS_TYPE_DEFAULT,
+ +      CS_TYPE_SIGNAL,
+ +      CS_TYPE_WAIT
+ +};
+ +
+ +/*
+ + * struct hl_inbound_pci_region - inbound region descriptor
+ + * @mode: pci match mode for this region
+ + * @addr: region target address
+ + * @size: region size in bytes
+ + * @offset_in_bar: offset within bar (address match mode)
+ + * @bar: bar id
+ + */
+ +struct hl_inbound_pci_region {
+ +      enum hl_pci_match_mode  mode;
+ +      u64                     addr;
+ +      u64                     size;
+ +      u64                     offset_in_bar;
+ +      u8                      bar;
+ +};
+ +
+ +/*
+ + * struct hl_outbound_pci_region - outbound region descriptor
+ + * @addr: region target address
+ + * @size: region size in bytes
+ + */
+ +struct hl_outbound_pci_region {
+ +      u64     addr;
+ +      u64     size;
+ +};
+ +
+ +/*
+ + * struct hl_hw_sob - H/W SOB info.
+ + * @hdev: habanalabs device structure.
+ + * @kref: refcount of this SOB. The SOB will reset once the refcount is zero.
+ + * @sob_id: id of this SOB.
+ + * @q_idx: the H/W queue that uses this SOB.
+ + */
+ +struct hl_hw_sob {
+ +      struct hl_device        *hdev;
+ +      struct kref             kref;
+ +      u32                     sob_id;
+ +      u32                     q_idx;
+ +};
+ +
+ +/**
+ + * struct hw_queue_properties - queue information.
+ + * @type: queue type.
+ + * @driver_only: true if only the driver is allowed to send a job to this queue,
+ + *               false otherwise.
+ + * @requires_kernel_cb: true if a CB handle must be provided for jobs on this
+ + *                      queue, false otherwise (a CB address must be provided).
+ + * @supports_sync_stream: True if queue supports sync stream
+ + */
+ +struct hw_queue_properties {
+ +      enum hl_queue_type      type;
+ +      u8                      driver_only;
+ +      u8                      requires_kernel_cb;
+ +      u8                      supports_sync_stream;
+ +};
+ +
+ +/**
+ + * enum vm_type_t - virtual memory mapping request information.
+ + * @VM_TYPE_USERPTR: mapping of user memory to device virtual address.
+ + * @VM_TYPE_PHYS_PACK: mapping of DRAM memory to device virtual address.
+ + */
+ +enum vm_type_t {
+ +      VM_TYPE_USERPTR = 0x1,
+ +      VM_TYPE_PHYS_PACK = 0x2
+ +};
+ +
+ +/**
+ + * enum hl_device_hw_state - H/W device state. use this to understand whether
+ + *                           to do reset before hw_init or not
+ + * @HL_DEVICE_HW_STATE_CLEAN: H/W state is clean. i.e. after hard reset
+ + * @HL_DEVICE_HW_STATE_DIRTY: H/W state is dirty. i.e. we started to execute
+ + *                            hw_init
+ + */
+ +enum hl_device_hw_state {
+ +      HL_DEVICE_HW_STATE_CLEAN = 0,
+ +      HL_DEVICE_HW_STATE_DIRTY
+ +};
+ +
+ +/**
+ + * struct hl_mmu_properties - ASIC specific MMU address translation properties.
+ + * @start_addr: virtual start address of the memory region.
+ + * @end_addr: virtual end address of the memory region.
+ + * @hop0_shift: shift of hop 0 mask.
+ + * @hop1_shift: shift of hop 1 mask.
+ + * @hop2_shift: shift of hop 2 mask.
+ + * @hop3_shift: shift of hop 3 mask.
+ + * @hop4_shift: shift of hop 4 mask.
+ + * @hop0_mask: mask to get the PTE address in hop 0.
+ + * @hop1_mask: mask to get the PTE address in hop 1.
+ + * @hop2_mask: mask to get the PTE address in hop 2.
+ + * @hop3_mask: mask to get the PTE address in hop 3.
+ + * @hop4_mask: mask to get the PTE address in hop 4.
+ + * @page_size: default page size used to allocate memory.
+ + */
+ +struct hl_mmu_properties {
+ +      u64     start_addr;
+ +      u64     end_addr;
+ +      u64     hop0_shift;
+ +      u64     hop1_shift;
+ +      u64     hop2_shift;
+ +      u64     hop3_shift;
+ +      u64     hop4_shift;
+ +      u64     hop0_mask;
+ +      u64     hop1_mask;
+ +      u64     hop2_mask;
+ +      u64     hop3_mask;
+ +      u64     hop4_mask;
+ +      u32     page_size;
+ +};
+ +
+ +/**
+ + * struct asic_fixed_properties - ASIC specific immutable properties.
+ + * @hw_queues_props: H/W queues properties.
+ + * @armcp_info: received various information from ArmCP regarding the H/W, e.g.
+ + *            available sensors.
+ + * @uboot_ver: F/W U-boot version.
+ + * @preboot_ver: F/W Preboot version.
+ + * @dmmu: DRAM MMU address translation properties.
+ + * @pmmu: PCI (host) MMU address translation properties.
+ + * @pmmu_huge: PCI (host) MMU address translation properties for memory
+ + *              allocated with huge pages.
+ + * @sram_base_address: SRAM physical start address.
+ + * @sram_end_address: SRAM physical end address.
+ + * @sram_user_base_address - SRAM physical start address for user access.
+ + * @dram_base_address: DRAM physical start address.
+ + * @dram_end_address: DRAM physical end address.
+ + * @dram_user_base_address: DRAM physical start address for user access.
+ + * @dram_size: DRAM total size.
+ + * @dram_pci_bar_size: size of PCI bar towards DRAM.
+ + * @max_power_default: max power of the device after reset
+ + * @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page
+ + *                                      fault.
+ + * @pcie_dbi_base_address: Base address of the PCIE_DBI block.
+ + * @pcie_aux_dbi_reg_addr: Address of the PCIE_AUX DBI register.
+ + * @mmu_pgt_addr: base physical address in DRAM of MMU page tables.
+ + * @mmu_dram_default_page_addr: DRAM default page physical address.
+ + * @mmu_pgt_size: MMU page tables total size.
+ + * @mmu_pte_size: PTE size in MMU page tables.
+ + * @mmu_hop_table_size: MMU hop table size.
+ + * @mmu_hop0_tables_total_size: total size of MMU hop0 tables.
+ + * @dram_page_size: page size for MMU DRAM allocation.
+ + * @cfg_size: configuration space size on SRAM.
+ + * @sram_size: total size of SRAM.
+ + * @max_asid: maximum number of open contexts (ASIDs).
+ + * @num_of_events: number of possible internal H/W IRQs.
+ + * @psoc_pci_pll_nr: PCI PLL NR value.
+ + * @psoc_pci_pll_nf: PCI PLL NF value.
+ + * @psoc_pci_pll_od: PCI PLL OD value.
+ + * @psoc_pci_pll_div_factor: PCI PLL DIV FACTOR 1 value.
+ + * @psoc_timestamp_frequency: frequency of the psoc timestamp clock.
+ + * @high_pll: high PLL frequency used by the device.
+ + * @cb_pool_cb_cnt: number of CBs in the CB pool.
+ + * @cb_pool_cb_size: size of each CB in the CB pool.
+ + * @max_pending_cs: maximum of concurrent pending command submissions
+ + * @max_queues: maximum amount of queues in the system
+ + * @sync_stream_first_sob: first sync object available for sync stream use
+ + * @sync_stream_first_mon: first monitor available for sync stream use
+ + * @tpc_enabled_mask: which TPCs are enabled.
+ + * @completion_queues_count: number of completion queues.
+ + */
+ +struct asic_fixed_properties {
+ +      struct hw_queue_properties      *hw_queues_props;
+ +      struct armcp_info               armcp_info;
+ +      char                            uboot_ver[VERSION_MAX_LEN];
+ +      char                            preboot_ver[VERSION_MAX_LEN];
+ +      struct hl_mmu_properties        dmmu;
+ +      struct hl_mmu_properties        pmmu;
+ +      struct hl_mmu_properties        pmmu_huge;
+ +      u64                             sram_base_address;
+ +      u64                             sram_end_address;
+ +      u64                             sram_user_base_address;
+ +      u64                             dram_base_address;
+ +      u64                             dram_end_address;
+ +      u64                             dram_user_base_address;
+ +      u64                             dram_size;
+ +      u64                             dram_pci_bar_size;
+ +      u64                             max_power_default;
+ +      u64                             dram_size_for_default_page_mapping;
+ +      u64                             pcie_dbi_base_address;
+ +      u64                             pcie_aux_dbi_reg_addr;
+ +      u64                             mmu_pgt_addr;
+ +      u64                             mmu_dram_default_page_addr;
+ +      u32                             mmu_pgt_size;
+ +      u32                             mmu_pte_size;
+ +      u32                             mmu_hop_table_size;
+ +      u32                             mmu_hop0_tables_total_size;
+ +      u32                             dram_page_size;
+ +      u32                             cfg_size;
+ +      u32                             sram_size;
+ +      u32                             max_asid;
+ +      u32                             num_of_events;
+ +      u32                             psoc_pci_pll_nr;
+ +      u32                             psoc_pci_pll_nf;
+ +      u32                             psoc_pci_pll_od;
+ +      u32                             psoc_pci_pll_div_factor;
+ +      u32                             psoc_timestamp_frequency;
+ +      u32                             high_pll;
+ +      u32                             cb_pool_cb_cnt;
+ +      u32                             cb_pool_cb_size;
+ +      u32                             max_pending_cs;
+ +      u32                             max_queues;
+ +      u16                             sync_stream_first_sob;
+ +      u16                             sync_stream_first_mon;
+ +      u8                              tpc_enabled_mask;
+ +      u8                              completion_queues_count;
+ +};
+ +
+ +/**
+ + * struct hl_cs_compl - command submission completion object.
+ + * @base_fence: kernel fence object.
+ + * @lock: spinlock to protect fence.
+ + * @hdev: habanalabs device structure.
+ + * @hw_sob: the H/W SOB used in this signal/wait CS.
+ + * @cs_seq: command submission sequence number.
+ + * @type: type of the CS - signal/wait.
+ + * @sob_val: the SOB value that is used in this signal/wait CS.
+ + */
+ +struct hl_cs_compl {
+ +      struct dma_fence        base_fence;
+ +      spinlock_t              lock;
+ +      struct hl_device        *hdev;
+ +      struct hl_hw_sob        *hw_sob;
+ +      u64                     cs_seq;
+ +      enum hl_cs_type         type;
+ +      u16                     sob_val;
+ +};
+ +
+ +/*
+ + * Command Buffers
+ + */
+ +
+ +/**
+ + * struct hl_cb_mgr - describes a Command Buffer Manager.
+ + * @cb_lock: protects cb_handles.
+ + * @cb_handles: an idr to hold all command buffer handles.
+ + */
+ +struct hl_cb_mgr {
+ +      spinlock_t              cb_lock;
+ +      struct idr              cb_handles; /* protected by cb_lock */
+ +};
+ +
+ +/**
+ + * struct hl_cb - describes a Command Buffer.
+ + * @refcount: reference counter for usage of the CB.
+ + * @hdev: pointer to device this CB belongs to.
+ + * @lock: spinlock to protect mmap/cs flows.
+ + * @debugfs_list: node in debugfs list of command buffers.
+ + * @pool_list: node in pool list of command buffers.
+ + * @kernel_address: Holds the CB's kernel virtual address.
+ + * @bus_address: Holds the CB's DMA address.
+ + * @mmap_size: Holds the CB's size that was mmaped.
+ + * @size: holds the CB's size.
+ + * @id: the CB's ID.
+ + * @cs_cnt: holds number of CS that this CB participates in.
+ + * @ctx_id: holds the ID of the owner's context.
+ + * @mmap: true if the CB is currently mmaped to user.
+ + * @is_pool: true if CB was acquired from the pool, false otherwise.
+ + * @is_internal: internaly allocated
+ + */
+ +struct hl_cb {
+ +      struct kref             refcount;
+ +      struct hl_device        *hdev;
+ +      spinlock_t              lock;
+ +      struct list_head        debugfs_list;
+ +      struct list_head        pool_list;
+ +      u64                     kernel_address;
+ +      dma_addr_t              bus_address;
+ +      u32                     mmap_size;
+ +      u32                     size;
+ +      u32                     id;
+ +      u32                     cs_cnt;
+ +      u32                     ctx_id;
+ +      u8                      mmap;
+ +      u8                      is_pool;
+ +      u8                      is_internal;
+ +};
+ +
+ +
+ +/*
+ + * QUEUES
+ + */
+ +
+ +struct hl_cs_job;
+ +
+ +/* Queue length of external and HW queues */
+ +#define HL_QUEUE_LENGTH                       4096
+ +#define HL_QUEUE_SIZE_IN_BYTES                (HL_QUEUE_LENGTH * HL_BD_SIZE)
+ +
+ +#if (HL_MAX_JOBS_PER_CS > HL_QUEUE_LENGTH)
+ +#error "HL_QUEUE_LENGTH must be greater than HL_MAX_JOBS_PER_CS"
+ +#endif
+ +
+ +/* HL_CQ_LENGTH is in units of struct hl_cq_entry */
+ +#define HL_CQ_LENGTH                  HL_QUEUE_LENGTH
+ +#define HL_CQ_SIZE_IN_BYTES           (HL_CQ_LENGTH * HL_CQ_ENTRY_SIZE)
+ +
+ +/* Must be power of 2 */
+ +#define HL_EQ_LENGTH                  64
+ +#define HL_EQ_SIZE_IN_BYTES           (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE)
+ +
+ +/* Host <-> ArmCP shared memory size */
+ +#define HL_CPU_ACCESSIBLE_MEM_SIZE    SZ_2M
+ +
+ +/**
+ + * struct hl_hw_queue - describes a H/W transport queue.
+ + * @hw_sob: array of the used H/W SOBs by this H/W queue.
+ + * @shadow_queue: pointer to a shadow queue that holds pointers to jobs.
+ + * @queue_type: type of queue.
+ + * @kernel_address: holds the queue's kernel virtual address.
+ + * @bus_address: holds the queue's DMA address.
+ + * @pi: holds the queue's pi value.
+ + * @ci: holds the queue's ci value, AS CALCULATED BY THE DRIVER (not real ci).
+ + * @hw_queue_id: the id of the H/W queue.
+ + * @cq_id: the id for the corresponding CQ for this H/W queue.
+ + * @msi_vec: the IRQ number of the H/W queue.
+ + * @int_queue_len: length of internal queue (number of entries).
+ + * @next_sob_val: the next value to use for the currently used SOB.
+ + * @base_sob_id: the base SOB id of the SOBs used by this queue.
+ + * @base_mon_id: the base MON id of the MONs used by this queue.
+ + * @valid: is the queue valid (we have array of 32 queues, not all of them
+ + *         exist).
+ + * @curr_sob_offset: the id offset to the currently used SOB from the
+ + *                   HL_RSVD_SOBS that are being used by this queue.
+ + * @supports_sync_stream: True if queue supports sync stream
+ + */
+ +struct hl_hw_queue {
+ +      struct hl_hw_sob        hw_sob[HL_RSVD_SOBS];
+ +      struct hl_cs_job        **shadow_queue;
+ +      enum hl_queue_type      queue_type;
+ +      u64                     kernel_address;
+ +      dma_addr_t              bus_address;
+ +      u32                     pi;
+ +      atomic_t                ci;
+ +      u32                     hw_queue_id;
+ +      u32                     cq_id;
+ +      u32                     msi_vec;
+ +      u16                     int_queue_len;
+ +      u16                     next_sob_val;
+ +      u16                     base_sob_id;
+ +      u16                     base_mon_id;
+ +      u8                      valid;
+ +      u8                      curr_sob_offset;
+ +      u8                      supports_sync_stream;
+ +};
+ +
+ +/**
+ + * struct hl_cq - describes a completion queue
+ + * @hdev: pointer to the device structure
+ + * @kernel_address: holds the queue's kernel virtual address
+ + * @bus_address: holds the queue's DMA address
+ + * @cq_idx: completion queue index in array
+ + * @hw_queue_id: the id of the matching H/W queue
+ + * @ci: ci inside the queue
+ + * @pi: pi inside the queue
+ + * @free_slots_cnt: counter of free slots in queue
+ + */
+ +struct hl_cq {
+ +      struct hl_device        *hdev;
+ +      u64                     kernel_address;
+ +      dma_addr_t              bus_address;
+ +      u32                     cq_idx;
+ +      u32                     hw_queue_id;
+ +      u32                     ci;
+ +      u32                     pi;
+ +      atomic_t                free_slots_cnt;
+ +};
+ +
+ +/**
+ + * struct hl_eq - describes the event queue (single one per device)
+ + * @hdev: pointer to the device structure
+ + * @kernel_address: holds the queue's kernel virtual address
+ + * @bus_address: holds the queue's DMA address
+ + * @ci: ci inside the queue
+ + */
+ +struct hl_eq {
+ +      struct hl_device        *hdev;
+ +      u64                     kernel_address;
+ +      dma_addr_t              bus_address;
+ +      u32                     ci;
+ +};
+ +
+ +
+ +/*
+ + * ASICs
+ + */
+ +
+ +/**
+ + * enum hl_asic_type - supported ASIC types.
+ + * @ASIC_INVALID: Invalid ASIC type.
+ + * @ASIC_GOYA: Goya device.
+ + * @ASIC_GAUDI: Gaudi device.
+ + */
+ +enum hl_asic_type {
+ +      ASIC_INVALID,
+ +      ASIC_GOYA,
+ +      ASIC_GAUDI
+ +};
+ +
+ +struct hl_cs_parser;
+ +
+ +/**
+ + * enum hl_pm_mng_profile - power management profile.
+ + * @PM_AUTO: internal clock is set by the Linux driver.
+ + * @PM_MANUAL: internal clock is set by the user.
+ + * @PM_LAST: last power management type.
+ + */
+ +enum hl_pm_mng_profile {
+ +      PM_AUTO = 1,
+ +      PM_MANUAL,
+ +      PM_LAST
+ +};
+ +
+ +/**
+ + * enum hl_pll_frequency - PLL frequency.
+ + * @PLL_HIGH: high frequency.
+ + * @PLL_LOW: low frequency.
+ + * @PLL_LAST: last frequency values that were configured by the user.
+ + */
+ +enum hl_pll_frequency {
+ +      PLL_HIGH = 1,
+ +      PLL_LOW,
+ +      PLL_LAST
+ +};
+ +
+ +#define PLL_REF_CLK 50
+ +
+ +enum div_select_defs {
+ +      DIV_SEL_REF_CLK = 0,
+ +      DIV_SEL_PLL_CLK = 1,
+ +      DIV_SEL_DIVIDED_REF = 2,
+ +      DIV_SEL_DIVIDED_PLL = 3,
+ +};
+ +
+ +/**
+ + * struct hl_asic_funcs - ASIC specific functions that are can be called from
+ + *                        common code.
+ + * @early_init: sets up early driver state (pre sw_init), doesn't configure H/W.
+ + * @early_fini: tears down what was done in early_init.
+ + * @late_init: sets up late driver/hw state (post hw_init) - Optional.
+ + * @late_fini: tears down what was done in late_init (pre hw_fini) - Optional.
+ + * @sw_init: sets up driver state, does not configure H/W.
+ + * @sw_fini: tears down driver state, does not configure H/W.
+ + * @hw_init: sets up the H/W state.
+ + * @hw_fini: tears down the H/W state.
+ + * @halt_engines: halt engines, needed for reset sequence. This also disables
+ + *                interrupts from the device. Should be called before
+ + *                hw_fini and before CS rollback.
+ + * @suspend: handles IP specific H/W or SW changes for suspend.
+ + * @resume: handles IP specific H/W or SW changes for resume.
+ + * @cb_mmap: maps a CB.
+ + * @ring_doorbell: increment PI on a given QMAN.
+ + * @pqe_write: Write the PQ entry to the PQ. This is ASIC-specific
+ + *             function because the PQs are located in different memory areas
+ + *             per ASIC (SRAM, DRAM, Host memory) and therefore, the method of
+ + *             writing the PQE must match the destination memory area
+ + *             properties.
+ + * @asic_dma_alloc_coherent: Allocate coherent DMA memory by calling
+ + *                           dma_alloc_coherent(). This is ASIC function because
+ + *                           its implementation is not trivial when the driver
+ + *                           is loaded in simulation mode (not upstreamed).
+ + * @asic_dma_free_coherent:  Free coherent DMA memory by calling
+ + *                           dma_free_coherent(). This is ASIC function because
+ + *                           its implementation is not trivial when the driver
+ + *                           is loaded in simulation mode (not upstreamed).
+ + * @get_int_queue_base: get the internal queue base address.
+ + * @test_queues: run simple test on all queues for sanity check.
+ + * @asic_dma_pool_zalloc: small DMA allocation of coherent memory from DMA pool.
+ + *                        size of allocation is HL_DMA_POOL_BLK_SIZE.
+ + * @asic_dma_pool_free: free small DMA allocation from pool.
+ + * @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool.
+ + * @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool.
+ + * @hl_dma_unmap_sg: DMA unmap scatter-gather list.
+ + * @cs_parser: parse Command Submission.
+ + * @asic_dma_map_sg: DMA map scatter-gather list.
+ + * @get_dma_desc_list_size: get number of LIN_DMA packets required for CB.
+ + * @add_end_of_cb_packets: Add packets to the end of CB, if device requires it.
+ + * @update_eq_ci: update event queue CI.
+ + * @context_switch: called upon ASID context switch.
+ + * @restore_phase_topology: clear all SOBs amd MONs.
+ + * @debugfs_read32: debug interface for reading u32 from DRAM/SRAM.
+ + * @debugfs_write32: debug interface for writing u32 to DRAM/SRAM.
+ + * @add_device_attr: add ASIC specific device attributes.
+ + * @handle_eqe: handle event queue entry (IRQ) from ArmCP.
+ + * @set_pll_profile: change PLL profile (manual/automatic).
+ + * @get_events_stat: retrieve event queue entries histogram.
+ + * @read_pte: read MMU page table entry from DRAM.
+ + * @write_pte: write MMU page table entry to DRAM.
+ + * @mmu_invalidate_cache: flush MMU STLB host/DRAM cache, either with soft
+ + *                        (L1 only) or hard (L0 & L1) flush.
+ + * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with
+ + *                              ASID-VA-size mask.
+ + * @send_heartbeat: send is-alive packet to ArmCP and verify response.
-  * @send_cpu_message: send buffer to ArmCP.
++ * @set_clock_gating: enable/disable clock gating per engine according to
++ *                    clock gating mask in hdev
++ * @disable_clock_gating: disable clock gating completely
+ + * @debug_coresight: perform certain actions on Coresight for debugging.
+ + * @is_device_idle: return true if device is idle, false otherwise.
+ + * @soft_reset_late_init: perform certain actions needed after soft reset.
+ + * @hw_queues_lock: acquire H/W queues lock.
+ + * @hw_queues_unlock: release H/W queues lock.
+ + * @get_pci_id: retrieve PCI ID.
+ + * @get_eeprom_data: retrieve EEPROM data from F/W.
-       void (*enable_clock_gating)(struct hl_device *hdev);
++ * @send_cpu_message: send message to F/W. If the message is timedout, the
++ *                    driver will eventually reset the device. The timeout can
++ *                    be determined by the calling function or it can be 0 and
++ *                    then the timeout is the default timeout for the specific
++ *                    ASIC
+ + * @get_hw_state: retrieve the H/W state
+ + * @pci_bars_map: Map PCI BARs.
+ + * @set_dram_bar_base: Set DRAM BAR to map specific device address. Returns
+ + *                     old address the bar pointed to or U64_MAX for failure
+ + * @init_iatu: Initialize the iATU unit inside the PCI controller.
+ + * @rreg: Read a register. Needed for simulator support.
+ + * @wreg: Write a register. Needed for simulator support.
+ + * @halt_coresight: stop the ETF and ETR traces.
+ + * @ctx_init: context dependent initialization.
+ + * @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz
+ + * @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index.
+ + * @read_device_fw_version: read the device's firmware versions that are
+ + *                          contained in registers
+ + * @load_firmware_to_device: load the firmware to the device's memory
+ + * @load_boot_fit_to_device: load boot fit to device's memory
+ + * @get_signal_cb_size: Get signal CB size.
+ + * @get_wait_cb_size: Get wait CB size.
+ + * @gen_signal_cb: Generate a signal CB.
+ + * @gen_wait_cb: Generate a wait CB.
+ + * @reset_sob: Reset a SOB.
+ + * @set_dma_mask_from_fw: set the DMA mask in the driver according to the
+ + *                        firmware configuration
+ + * @get_device_time: Get the device time.
+ + */
+ +struct hl_asic_funcs {
+ +      int (*early_init)(struct hl_device *hdev);
+ +      int (*early_fini)(struct hl_device *hdev);
+ +      int (*late_init)(struct hl_device *hdev);
+ +      void (*late_fini)(struct hl_device *hdev);
+ +      int (*sw_init)(struct hl_device *hdev);
+ +      int (*sw_fini)(struct hl_device *hdev);
+ +      int (*hw_init)(struct hl_device *hdev);
+ +      void (*hw_fini)(struct hl_device *hdev, bool hard_reset);
+ +      void (*halt_engines)(struct hl_device *hdev, bool hard_reset);
+ +      int (*suspend)(struct hl_device *hdev);
+ +      int (*resume)(struct hl_device *hdev);
+ +      int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
+ +                      u64 kaddress, phys_addr_t paddress, u32 size);
+ +      void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
+ +      void (*pqe_write)(struct hl_device *hdev, __le64 *pqe,
+ +                      struct hl_bd *bd);
+ +      void* (*asic_dma_alloc_coherent)(struct hl_device *hdev, size_t size,
+ +                                      dma_addr_t *dma_handle, gfp_t flag);
+ +      void (*asic_dma_free_coherent)(struct hl_device *hdev, size_t size,
+ +                                      void *cpu_addr, dma_addr_t dma_handle);
+ +      void* (*get_int_queue_base)(struct hl_device *hdev, u32 queue_id,
+ +                              dma_addr_t *dma_handle, u16 *queue_len);
+ +      int (*test_queues)(struct hl_device *hdev);
+ +      void* (*asic_dma_pool_zalloc)(struct hl_device *hdev, size_t size,
+ +                              gfp_t mem_flags, dma_addr_t *dma_handle);
+ +      void (*asic_dma_pool_free)(struct hl_device *hdev, void *vaddr,
+ +                              dma_addr_t dma_addr);
+ +      void* (*cpu_accessible_dma_pool_alloc)(struct hl_device *hdev,
+ +                              size_t size, dma_addr_t *dma_handle);
+ +      void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev,
+ +                              size_t size, void *vaddr);
+ +      void (*hl_dma_unmap_sg)(struct hl_device *hdev,
+ +                              struct scatterlist *sgl, int nents,
+ +                              enum dma_data_direction dir);
+ +      int (*cs_parser)(struct hl_device *hdev, struct hl_cs_parser *parser);
+ +      int (*asic_dma_map_sg)(struct hl_device *hdev,
+ +                              struct scatterlist *sgl, int nents,
+ +                              enum dma_data_direction dir);
+ +      u32 (*get_dma_desc_list_size)(struct hl_device *hdev,
+ +                                      struct sg_table *sgt);
+ +      void (*add_end_of_cb_packets)(struct hl_device *hdev,
+ +                                      u64 kernel_address, u32 len,
+ +                                      u64 cq_addr, u32 cq_val, u32 msix_num,
+ +                                      bool eb);
+ +      void (*update_eq_ci)(struct hl_device *hdev, u32 val);
+ +      int (*context_switch)(struct hl_device *hdev, u32 asid);
+ +      void (*restore_phase_topology)(struct hl_device *hdev);
+ +      int (*debugfs_read32)(struct hl_device *hdev, u64 addr, u32 *val);
+ +      int (*debugfs_write32)(struct hl_device *hdev, u64 addr, u32 val);
+ +      int (*debugfs_read64)(struct hl_device *hdev, u64 addr, u64 *val);
+ +      int (*debugfs_write64)(struct hl_device *hdev, u64 addr, u64 val);
+ +      void (*add_device_attr)(struct hl_device *hdev,
+ +                              struct attribute_group *dev_attr_grp);
+ +      void (*handle_eqe)(struct hl_device *hdev,
+ +                              struct hl_eq_entry *eq_entry);
+ +      void (*set_pll_profile)(struct hl_device *hdev,
+ +                      enum hl_pll_frequency freq);
+ +      void* (*get_events_stat)(struct hl_device *hdev, bool aggregate,
+ +                              u32 *size);
+ +      u64 (*read_pte)(struct hl_device *hdev, u64 addr);
+ +      void (*write_pte)(struct hl_device *hdev, u64 addr, u64 val);
+ +      int (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard,
+ +                                      u32 flags);
+ +      int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard,
+ +                      u32 asid, u64 va, u64 size);
+ +      int (*send_heartbeat)(struct hl_device *hdev);
-  * @clock_gating: is clock gating enabled.
++      void (*set_clock_gating)(struct hl_device *hdev);
+ +      void (*disable_clock_gating)(struct hl_device *hdev);
+ +      int (*debug_coresight)(struct hl_device *hdev, void *data);
+ +      bool (*is_device_idle)(struct hl_device *hdev, u32 *mask,
+ +                              struct seq_file *s);
+ +      int (*soft_reset_late_init)(struct hl_device *hdev);
+ +      void (*hw_queues_lock)(struct hl_device *hdev);
+ +      void (*hw_queues_unlock)(struct hl_device *hdev);
+ +      u32 (*get_pci_id)(struct hl_device *hdev);
+ +      int (*get_eeprom_data)(struct hl_device *hdev, void *data,
+ +                              size_t max_size);
+ +      int (*send_cpu_message)(struct hl_device *hdev, u32 *msg,
+ +                              u16 len, u32 timeout, long *result);
+ +      enum hl_device_hw_state (*get_hw_state)(struct hl_device *hdev);
+ +      int (*pci_bars_map)(struct hl_device *hdev);
+ +      u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr);
+ +      int (*init_iatu)(struct hl_device *hdev);
+ +      u32 (*rreg)(struct hl_device *hdev, u32 reg);
+ +      void (*wreg)(struct hl_device *hdev, u32 reg, u32 val);
+ +      void (*halt_coresight)(struct hl_device *hdev);
+ +      int (*ctx_init)(struct hl_ctx *ctx);
+ +      int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
+ +      u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx);
+ +      void (*read_device_fw_version)(struct hl_device *hdev,
+ +                                      enum hl_fw_component fwc);
+ +      int (*load_firmware_to_device)(struct hl_device *hdev);
+ +      int (*load_boot_fit_to_device)(struct hl_device *hdev);
+ +      u32 (*get_signal_cb_size)(struct hl_device *hdev);
+ +      u32 (*get_wait_cb_size)(struct hl_device *hdev);
+ +      void (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id);
+ +      void (*gen_wait_cb)(struct hl_device *hdev, void *data, u16 sob_id,
+ +                              u16 sob_val, u16 mon_id, u32 q_idx);
+ +      void (*reset_sob)(struct hl_device *hdev, void *data);
+ +      void (*set_dma_mask_from_fw)(struct hl_device *hdev);
+ +      u64 (*get_device_time)(struct hl_device *hdev);
+ +};
+ +
+ +
+ +/*
+ + * CONTEXTS
+ + */
+ +
+ +#define HL_KERNEL_ASID_ID     0
+ +
+ +/**
+ + * struct hl_va_range - virtual addresses range.
+ + * @lock: protects the virtual addresses list.
+ + * @list: list of virtual addresses blocks available for mappings.
+ + * @start_addr: range start address.
+ + * @end_addr: range end address.
+ + */
+ +struct hl_va_range {
+ +      struct mutex            lock;
+ +      struct list_head        list;
+ +      u64                     start_addr;
+ +      u64                     end_addr;
+ +};
+ +
+ +/**
+ + * struct hl_ctx - user/kernel context.
+ + * @mem_hash: holds mapping from virtual address to virtual memory area
+ + *            descriptor (hl_vm_phys_pg_list or hl_userptr).
+ + * @mmu_shadow_hash: holds a mapping from shadow address to pgt_info structure.
+ + * @hpriv: pointer to the private (Kernel Driver) data of the process (fd).
+ + * @hdev: pointer to the device structure.
+ + * @refcount: reference counter for the context. Context is released only when
+ + *            this hits 0l. It is incremented on CS and CS_WAIT.
+ + * @cs_pending: array of DMA fence objects representing pending CS.
+ + * @host_va_range: holds available virtual addresses for host mappings.
+ + * @host_huge_va_range: holds available virtual addresses for host mappings
+ + *                      with huge pages.
+ + * @dram_va_range: holds available virtual addresses for DRAM mappings.
+ + * @mem_hash_lock: protects the mem_hash.
+ + * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the
+ + *            MMU hash or walking the PGT requires talking this lock.
+ + * @debugfs_list: node in debugfs list of contexts.
+ + * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
+ + *                    to user so user could inquire about CS. It is used as
+ + *                    index to cs_pending array.
+ + * @dram_default_hops: array that holds all hops addresses needed for default
+ + *                     DRAM mapping.
+ + * @cs_lock: spinlock to protect cs_sequence.
+ + * @dram_phys_mem: amount of used physical DRAM memory by this context.
+ + * @thread_ctx_switch_token: token to prevent multiple threads of the same
+ + *                            context from running the context switch phase.
+ + *                            Only a single thread should run it.
+ + * @thread_ctx_switch_wait_token: token to prevent the threads that didn't run
+ + *                            the context switch phase from moving to their
+ + *                            execution phase before the context switch phase
+ + *                            has finished.
+ + * @asid: context's unique address space ID in the device's MMU.
+ + * @handle: context's opaque handle for user
+ + */
+ +struct hl_ctx {
+ +      DECLARE_HASHTABLE(mem_hash, MEM_HASH_TABLE_BITS);
+ +      DECLARE_HASHTABLE(mmu_shadow_hash, MMU_HASH_TABLE_BITS);
+ +      struct hl_fpriv         *hpriv;
+ +      struct hl_device        *hdev;
+ +      struct kref             refcount;
+ +      struct dma_fence        **cs_pending;
+ +      struct hl_va_range      *host_va_range;
+ +      struct hl_va_range      *host_huge_va_range;
+ +      struct hl_va_range      *dram_va_range;
+ +      struct mutex            mem_hash_lock;
+ +      struct mutex            mmu_lock;
+ +      struct list_head        debugfs_list;
+ +      struct hl_cs_counters   cs_counters;
+ +      u64                     cs_sequence;
+ +      u64                     *dram_default_hops;
+ +      spinlock_t              cs_lock;
+ +      atomic64_t              dram_phys_mem;
+ +      atomic_t                thread_ctx_switch_token;
+ +      u32                     thread_ctx_switch_wait_token;
+ +      u32                     asid;
+ +      u32                     handle;
+ +};
+ +
+ +/**
+ + * struct hl_ctx_mgr - for handling multiple contexts.
+ + * @ctx_lock: protects ctx_handles.
+ + * @ctx_handles: idr to hold all ctx handles.
+ + */
+ +struct hl_ctx_mgr {
+ +      struct mutex            ctx_lock;
+ +      struct idr              ctx_handles;
+ +};
+ +
+ +
+ +
+ +/*
+ + * COMMAND SUBMISSIONS
+ + */
+ +
+ +/**
+ + * struct hl_userptr - memory mapping chunk information
+ + * @vm_type: type of the VM.
+ + * @job_node: linked-list node for hanging the object on the Job's list.
+ + * @vec: pointer to the frame vector.
+ + * @sgt: pointer to the scatter-gather table that holds the pages.
+ + * @dir: for DMA unmapping, the direction must be supplied, so save it.
+ + * @debugfs_list: node in debugfs list of command submissions.
+ + * @addr: user-space virtual address of the start of the memory area.
+ + * @size: size of the memory area to pin & map.
+ + * @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise.
+ + */
+ +struct hl_userptr {
+ +      enum vm_type_t          vm_type; /* must be first */
+ +      struct list_head        job_node;
+ +      struct frame_vector     *vec;
+ +      struct sg_table         *sgt;
+ +      enum dma_data_direction dir;
+ +      struct list_head        debugfs_list;
+ +      u64                     addr;
+ +      u32                     size;
+ +      u8                      dma_mapped;
+ +};
+ +
+ +/**
+ + * struct hl_cs - command submission.
+ + * @jobs_in_queue_cnt: per each queue, maintain counter of submitted jobs.
+ + * @ctx: the context this CS belongs to.
+ + * @job_list: list of the CS's jobs in the various queues.
+ + * @job_lock: spinlock for the CS's jobs list. Needed for free_job.
+ + * @refcount: reference counter for usage of the CS.
+ + * @fence: pointer to the fence object of this CS.
+ + * @signal_fence: pointer to the fence object of the signal CS (used by wait
+ + *                CS only).
+ + * @finish_work: workqueue object to run when CS is completed by H/W.
+ + * @work_tdr: delayed work node for TDR.
+ + * @mirror_node : node in device mirror list of command submissions.
+ + * @debugfs_list: node in debugfs list of command submissions.
+ + * @sequence: the sequence number of this CS.
+ + * @type: CS_TYPE_*.
+ + * @submitted: true if CS was submitted to H/W.
+ + * @completed: true if CS was completed by device.
+ + * @timedout : true if CS was timedout.
+ + * @tdr_active: true if TDR was activated for this CS (to prevent
+ + *            double TDR activation).
+ + * @aborted: true if CS was aborted due to some device error.
+ + */
+ +struct hl_cs {
+ +      u16                     *jobs_in_queue_cnt;
+ +      struct hl_ctx           *ctx;
+ +      struct list_head        job_list;
+ +      spinlock_t              job_lock;
+ +      struct kref             refcount;
+ +      struct dma_fence        *fence;
+ +      struct dma_fence        *signal_fence;
+ +      struct work_struct      finish_work;
+ +      struct delayed_work     work_tdr;
+ +      struct list_head        mirror_node;
+ +      struct list_head        debugfs_list;
+ +      u64                     sequence;
+ +      enum hl_cs_type         type;
+ +      u8                      submitted;
+ +      u8                      completed;
+ +      u8                      timedout;
+ +      u8                      tdr_active;
+ +      u8                      aborted;
+ +};
+ +
+ +/**
+ + * struct hl_cs_job - command submission job.
+ + * @cs_node: the node to hang on the CS jobs list.
+ + * @cs: the CS this job belongs to.
+ + * @user_cb: the CB we got from the user.
+ + * @patched_cb: in case of patching, this is internal CB which is submitted on
+ + *            the queue instead of the CB we got from the IOCTL.
+ + * @finish_work: workqueue object to run when job is completed.
+ + * @userptr_list: linked-list of userptr mappings that belong to this job and
+ + *                    wait for completion.
+ + * @debugfs_list: node in debugfs list of command submission jobs.
+ + * @queue_type: the type of the H/W queue this job is submitted to.
+ + * @id: the id of this job inside a CS.
+ + * @hw_queue_id: the id of the H/W queue this job is submitted to.
+ + * @user_cb_size: the actual size of the CB we got from the user.
+ + * @job_cb_size: the actual size of the CB that we put on the queue.
+ + * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
+ + *                          handle to a kernel-allocated CB object, false
+ + *                          otherwise (SRAM/DRAM/host address).
+ + * @contains_dma_pkt: whether the JOB contains at least one DMA packet. This
+ + *                    info is needed later, when adding the 2xMSG_PROT at the
+ + *                    end of the JOB, to know which barriers to put in the
+ + *                    MSG_PROT packets. Relevant only for GAUDI as GOYA doesn't
+ + *                    have streams so the engine can't be busy by another
+ + *                    stream.
+ + */
+ +struct hl_cs_job {
+ +      struct list_head        cs_node;
+ +      struct hl_cs            *cs;
+ +      struct hl_cb            *user_cb;
+ +      struct hl_cb            *patched_cb;
+ +      struct work_struct      finish_work;
+ +      struct list_head        userptr_list;
+ +      struct list_head        debugfs_list;
+ +      enum hl_queue_type      queue_type;
+ +      u32                     id;
+ +      u32                     hw_queue_id;
+ +      u32                     user_cb_size;
+ +      u32                     job_cb_size;
+ +      u8                      is_kernel_allocated_cb;
+ +      u8                      contains_dma_pkt;
+ +};
+ +
+ +/**
+ + * struct hl_cs_parser - command submission parser properties.
+ + * @user_cb: the CB we got from the user.
+ + * @patched_cb: in case of patching, this is internal CB which is submitted on
+ + *            the queue instead of the CB we got from the IOCTL.
+ + * @job_userptr_list: linked-list of userptr mappings that belong to the related
+ + *                    job and wait for completion.
+ + * @cs_sequence: the sequence number of the related CS.
+ + * @queue_type: the type of the H/W queue this job is submitted to.
+ + * @ctx_id: the ID of the context the related CS belongs to.
+ + * @hw_queue_id: the id of the H/W queue this job is submitted to.
+ + * @user_cb_size: the actual size of the CB we got from the user.
+ + * @patched_cb_size: the size of the CB after parsing.
+ + * @job_id: the id of the related job inside the related CS.
+ + * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
+ + *                          handle to a kernel-allocated CB object, false
+ + *                          otherwise (SRAM/DRAM/host address).
+ + * @contains_dma_pkt: whether the JOB contains at least one DMA packet. This
+ + *                    info is needed later, when adding the 2xMSG_PROT at the
+ + *                    end of the JOB, to know which barriers to put in the
+ + *                    MSG_PROT packets. Relevant only for GAUDI as GOYA doesn't
+ + *                    have streams so the engine can't be busy by another
+ + *                    stream.
+ + */
+ +struct hl_cs_parser {
+ +      struct hl_cb            *user_cb;
+ +      struct hl_cb            *patched_cb;
+ +      struct list_head        *job_userptr_list;
+ +      u64                     cs_sequence;
+ +      enum hl_queue_type      queue_type;
+ +      u32                     ctx_id;
+ +      u32                     hw_queue_id;
+ +      u32                     user_cb_size;
+ +      u32                     patched_cb_size;
+ +      u8                      job_id;
+ +      u8                      is_kernel_allocated_cb;
+ +      u8                      contains_dma_pkt;
+ +};
+ +
+ +
+ +/*
+ + * MEMORY STRUCTURE
+ + */
+ +
+ +/**
+ + * struct hl_vm_hash_node - hash element from virtual address to virtual
+ + *                            memory area descriptor (hl_vm_phys_pg_list or
+ + *                            hl_userptr).
+ + * @node: node to hang on the hash table in context object.
+ + * @vaddr: key virtual address.
+ + * @ptr: value pointer (hl_vm_phys_pg_list or hl_userptr).
+ + */
+ +struct hl_vm_hash_node {
+ +      struct hlist_node       node;
+ +      u64                     vaddr;
+ +      void                    *ptr;
+ +};
+ +
+ +/**
+ + * struct hl_vm_phys_pg_pack - physical page pack.
+ + * @vm_type: describes the type of the virtual area descriptor.
+ + * @pages: the physical page array.
+ + * @npages: num physical pages in the pack.
+ + * @total_size: total size of all the pages in this list.
+ + * @mapping_cnt: number of shared mappings.
+ + * @asid: the context related to this list.
+ + * @page_size: size of each page in the pack.
+ + * @flags: HL_MEM_* flags related to this list.
+ + * @handle: the provided handle related to this list.
+ + * @offset: offset from the first page.
+ + * @contiguous: is contiguous physical memory.
+ + * @created_from_userptr: is product of host virtual address.
+ + */
+ +struct hl_vm_phys_pg_pack {
+ +      enum vm_type_t          vm_type; /* must be first */
+ +      u64                     *pages;
+ +      u64                     npages;
+ +      u64                     total_size;
+ +      atomic_t                mapping_cnt;
+ +      u32                     asid;
+ +      u32                     page_size;
+ +      u32                     flags;
+ +      u32                     handle;
+ +      u32                     offset;
+ +      u8                      contiguous;
+ +      u8                      created_from_userptr;
+ +};
+ +
+ +/**
+ + * struct hl_vm_va_block - virtual range block information.
+ + * @node: node to hang on the virtual range list in context object.
+ + * @start: virtual range start address.
+ + * @end: virtual range end address.
+ + * @size: virtual range size.
+ + */
+ +struct hl_vm_va_block {
+ +      struct list_head        node;
+ +      u64                     start;
+ +      u64                     end;
+ +      u64                     size;
+ +};
+ +
+ +/**
+ + * struct hl_vm - virtual memory manager for MMU.
+ + * @dram_pg_pool: pool for DRAM physical pages of 2MB.
+ + * @dram_pg_pool_refcount: reference counter for the pool usage.
+ + * @idr_lock: protects the phys_pg_list_handles.
+ + * @phys_pg_pack_handles: idr to hold all device allocations handles.
+ + * @init_done: whether initialization was done. We need this because VM
+ + *            initialization might be skipped during device initialization.
+ + */
+ +struct hl_vm {
+ +      struct gen_pool         *dram_pg_pool;
+ +      struct kref             dram_pg_pool_refcount;
+ +      spinlock_t              idr_lock;
+ +      struct idr              phys_pg_pack_handles;
+ +      u8                      init_done;
+ +};
+ +
+ +
+ +/*
+ + * DEBUG, PROFILING STRUCTURE
+ + */
+ +
+ +/**
+ + * struct hl_debug_params - Coresight debug parameters.
+ + * @input: pointer to component specific input parameters.
+ + * @output: pointer to component specific output parameters.
+ + * @output_size: size of output buffer.
+ + * @reg_idx: relevant register ID.
+ + * @op: component operation to execute.
+ + * @enable: true if to enable component debugging, false otherwise.
+ + */
+ +struct hl_debug_params {
+ +      void *input;
+ +      void *output;
+ +      u32 output_size;
+ +      u32 reg_idx;
+ +      u32 op;
+ +      bool enable;
+ +};
+ +
+ +/*
+ + * FILE PRIVATE STRUCTURE
+ + */
+ +
+ +/**
+ + * struct hl_fpriv - process information stored in FD private data.
+ + * @hdev: habanalabs device structure.
+ + * @filp: pointer to the given file structure.
+ + * @taskpid: current process ID.
+ + * @ctx: current executing context. TODO: remove for multiple ctx per process
+ + * @ctx_mgr: context manager to handle multiple context for this FD.
+ + * @cb_mgr: command buffer manager to handle multiple buffers for this FD.
+ + * @debugfs_list: list of relevant ASIC debugfs.
+ + * @dev_node: node in the device list of file private data
+ + * @refcount: number of related contexts.
+ + * @restore_phase_mutex: lock for context switch and restore phase.
+ + * @is_control: true for control device, false otherwise
+ + */
+ +struct hl_fpriv {
+ +      struct hl_device        *hdev;
+ +      struct file             *filp;
+ +      struct pid              *taskpid;
+ +      struct hl_ctx           *ctx;
+ +      struct hl_ctx_mgr       ctx_mgr;
+ +      struct hl_cb_mgr        cb_mgr;
+ +      struct list_head        debugfs_list;
+ +      struct list_head        dev_node;
+ +      struct kref             refcount;
+ +      struct mutex            restore_phase_mutex;
+ +      u8                      is_control;
+ +};
+ +
+ +
+ +/*
+ + * DebugFS
+ + */
+ +
+ +/**
+ + * struct hl_info_list - debugfs file ops.
+ + * @name: file name.
+ + * @show: function to output information.
+ + * @write: function to write to the file.
+ + */
+ +struct hl_info_list {
+ +      const char      *name;
+ +      int             (*show)(struct seq_file *s, void *data);
+ +      ssize_t         (*write)(struct file *file, const char __user *buf,
+ +                              size_t count, loff_t *f_pos);
+ +};
+ +
+ +/**
+ + * struct hl_debugfs_entry - debugfs dentry wrapper.
+ + * @dent: base debugfs entry structure.
+ + * @info_ent: dentry realted ops.
+ + * @dev_entry: ASIC specific debugfs manager.
+ + */
+ +struct hl_debugfs_entry {
+ +      struct dentry                   *dent;
+ +      const struct hl_info_list       *info_ent;
+ +      struct hl_dbg_device_entry      *dev_entry;
+ +};
+ +
+ +/**
+ + * struct hl_dbg_device_entry - ASIC specific debugfs manager.
+ + * @root: root dentry.
+ + * @hdev: habanalabs device structure.
+ + * @entry_arr: array of available hl_debugfs_entry.
+ + * @file_list: list of available debugfs files.
+ + * @file_mutex: protects file_list.
+ + * @cb_list: list of available CBs.
+ + * @cb_spinlock: protects cb_list.
+ + * @cs_list: list of available CSs.
+ + * @cs_spinlock: protects cs_list.
+ + * @cs_job_list: list of available CB jobs.
+ + * @cs_job_spinlock: protects cs_job_list.
+ + * @userptr_list: list of available userptrs (virtual memory chunk descriptor).
+ + * @userptr_spinlock: protects userptr_list.
+ + * @ctx_mem_hash_list: list of available contexts with MMU mappings.
+ + * @ctx_mem_hash_spinlock: protects cb_list.
+ + * @addr: next address to read/write from/to in read/write32.
+ + * @mmu_addr: next virtual address to translate to physical address in mmu_show.
+ + * @mmu_asid: ASID to use while translating in mmu_show.
+ + * @i2c_bus: generic u8 debugfs file for bus value to use in i2c_data_read.
+ + * @i2c_bus: generic u8 debugfs file for address value to use in i2c_data_read.
+ + * @i2c_bus: generic u8 debugfs file for register value to use in i2c_data_read.
+ + */
+ +struct hl_dbg_device_entry {
+ +      struct dentry                   *root;
+ +      struct hl_device                *hdev;
+ +      struct hl_debugfs_entry         *entry_arr;
+ +      struct list_head                file_list;
+ +      struct mutex                    file_mutex;
+ +      struct list_head                cb_list;
+ +      spinlock_t                      cb_spinlock;
+ +      struct list_head                cs_list;
+ +      spinlock_t                      cs_spinlock;
+ +      struct list_head                cs_job_list;
+ +      spinlock_t                      cs_job_spinlock;
+ +      struct list_head                userptr_list;
+ +      spinlock_t                      userptr_spinlock;
+ +      struct list_head                ctx_mem_hash_list;
+ +      spinlock_t                      ctx_mem_hash_spinlock;
+ +      u64                             addr;
+ +      u64                             mmu_addr;
+ +      u32                             mmu_asid;
+ +      u8                              i2c_bus;
+ +      u8                              i2c_addr;
+ +      u8                              i2c_reg;
+ +};
+ +
+ +
+ +/*
+ + * DEVICES
+ + */
+ +
+ +/* Theoretical limit only. A single host can only contain up to 4 or 8 PCIe
+ + * x16 cards. In extreme cases, there are hosts that can accommodate 16 cards.
+ + */
+ +#define HL_MAX_MINORS 256
+ +
+ +/*
+ + * Registers read & write functions.
+ + */
+ +
+ +u32 hl_rreg(struct hl_device *hdev, u32 reg);
+ +void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
+ +
+ +#define RREG32(reg) hdev->asic_funcs->rreg(hdev, (reg))
+ +#define WREG32(reg, v) hdev->asic_funcs->wreg(hdev, (reg), (v))
+ +#define DREG32(reg) pr_info("REGISTER: " #reg " : 0x%08X\n",  \
+ +                      hdev->asic_funcs->rreg(hdev, (reg)))
+ +
+ +#define WREG32_P(reg, val, mask)                              \
+ +      do {                                                    \
+ +              u32 tmp_ = RREG32(reg);                         \
+ +              tmp_ &= (mask);                                 \
+ +              tmp_ |= ((val) & ~(mask));                      \
+ +              WREG32(reg, tmp_);                              \
+ +      } while (0)
+ +#define WREG32_AND(reg, and) WREG32_P(reg, 0, and)
+ +#define WREG32_OR(reg, or) WREG32_P(reg, or, ~(or))
+ +
+ +#define RMWREG32(reg, val, mask)                              \
+ +      do {                                                    \
+ +              u32 tmp_ = RREG32(reg);                         \
+ +              tmp_ &= ~(mask);                                \
+ +              tmp_ |= ((val) << __ffs(mask));                 \
+ +              WREG32(reg, tmp_);                              \
+ +      } while (0)
+ +
+ +#define RREG32_MASK(reg, mask) ((RREG32(reg) & mask) >> __ffs(mask))
+ +
+ +#define REG_FIELD_SHIFT(reg, field) reg##_##field##_SHIFT
+ +#define REG_FIELD_MASK(reg, field) reg##_##field##_MASK
+ +#define WREG32_FIELD(reg, offset, field, val) \
+ +      WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & \
+ +                              ~REG_FIELD_MASK(reg, field)) | \
+ +                              (val) << REG_FIELD_SHIFT(reg, field))
+ +
+ +/* Timeout should be longer when working with simulator but cap the
+ + * increased timeout to some maximum
+ + */
+ +#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \
+ +({ \
+ +      ktime_t __timeout; \
+ +      if (hdev->pdev) \
+ +              __timeout = ktime_add_us(ktime_get(), timeout_us); \
+ +      else \
+ +              __timeout = ktime_add_us(ktime_get(),\
+ +                              min((u64)(timeout_us * 10), \
+ +                                      (u64) HL_SIM_MAX_TIMEOUT_US)); \
+ +      might_sleep_if(sleep_us); \
+ +      for (;;) { \
+ +              (val) = RREG32(addr); \
+ +              if (cond) \
+ +                      break; \
+ +              if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
+ +                      (val) = RREG32(addr); \
+ +                      break; \
+ +              } \
+ +              if (sleep_us) \
+ +                      usleep_range((sleep_us >> 2) + 1, sleep_us); \
+ +      } \
+ +      (cond) ? 0 : -ETIMEDOUT; \
+ +})
+ +
+ +/*
+ + * address in this macro points always to a memory location in the
+ + * host's (server's) memory. That location is updated asynchronously
+ + * either by the direct access of the device or by another core.
+ + *
+ + * To work both in LE and BE architectures, we need to distinguish between the
+ + * two states (device or another core updates the memory location). Therefore,
+ + * if mem_written_by_device is true, the host memory being polled will be
+ + * updated directly by the device. If false, the host memory being polled will
+ + * be updated by host CPU. Required so host knows whether or not the memory
+ + * might need to be byte-swapped before returning value to caller.
+ + */
+ +#define hl_poll_timeout_memory(hdev, addr, val, cond, sleep_us, timeout_us, \
+ +                              mem_written_by_device) \
+ +({ \
+ +      ktime_t __timeout; \
+ +      if (hdev->pdev) \
+ +              __timeout = ktime_add_us(ktime_get(), timeout_us); \
+ +      else \
+ +              __timeout = ktime_add_us(ktime_get(),\
+ +                              min((u64)(timeout_us * 10), \
+ +                                      (u64) HL_SIM_MAX_TIMEOUT_US)); \
+ +      might_sleep_if(sleep_us); \
+ +      for (;;) { \
+ +              /* Verify we read updates done by other cores or by device */ \
+ +              mb(); \
+ +              (val) = *((u32 *) (uintptr_t) (addr)); \
+ +              if (mem_written_by_device) \
+ +                      (val) = le32_to_cpu(*(__le32 *) &(val)); \
+ +              if (cond) \
+ +                      break; \
+ +              if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
+ +                      (val) = *((u32 *) (uintptr_t) (addr)); \
+ +                      if (mem_written_by_device) \
+ +                              (val) = le32_to_cpu(*(__le32 *) &(val)); \
+ +                      break; \
+ +              } \
+ +              if (sleep_us) \
+ +                      usleep_range((sleep_us >> 2) + 1, sleep_us); \
+ +      } \
+ +      (cond) ? 0 : -ETIMEDOUT; \
+ +})
+ +
+ +#define hl_poll_timeout_device_memory(hdev, addr, val, cond, sleep_us, \
+ +                                      timeout_us) \
+ +({ \
+ +      ktime_t __timeout; \
+ +      if (hdev->pdev) \
+ +              __timeout = ktime_add_us(ktime_get(), timeout_us); \
+ +      else \
+ +              __timeout = ktime_add_us(ktime_get(),\
+ +                              min((u64)(timeout_us * 10), \
+ +                                      (u64) HL_SIM_MAX_TIMEOUT_US)); \
+ +      might_sleep_if(sleep_us); \
+ +      for (;;) { \
+ +              (val) = readl(addr); \
+ +              if (cond) \
+ +                      break; \
+ +              if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
+ +                      (val) = readl(addr); \
+ +                      break; \
+ +              } \
+ +              if (sleep_us) \
+ +                      usleep_range((sleep_us >> 2) + 1, sleep_us); \
+ +      } \
+ +      (cond) ? 0 : -ETIMEDOUT; \
+ +})
+ +
+ +struct hwmon_chip_info;
+ +
+ +/**
+ + * struct hl_device_reset_work - reset workqueue task wrapper.
+ + * @reset_work: reset work to be done.
+ + * @hdev: habanalabs device structure.
+ + */
+ +struct hl_device_reset_work {
+ +      struct work_struct              reset_work;
+ +      struct hl_device                *hdev;
+ +};
+ +
+ +/**
+ + * struct hl_device_idle_busy_ts - used for calculating device utilization rate.
+ + * @idle_to_busy_ts: timestamp where device changed from idle to busy.
+ + * @busy_to_idle_ts: timestamp where device changed from busy to idle.
+ + */
+ +struct hl_device_idle_busy_ts {
+ +      ktime_t                         idle_to_busy_ts;
+ +      ktime_t                         busy_to_idle_ts;
+ +};
+ +
+ +/**
+ + * struct hl_device - habanalabs device structure.
+ + * @pdev: pointer to PCI device, can be NULL in case of simulator device.
+ + * @pcie_bar_phys: array of available PCIe bars physical addresses.
+ + *               (required only for PCI address match mode)
+ + * @pcie_bar: array of available PCIe bars virtual addresses.
+ + * @rmmio: configuration area address on SRAM.
+ + * @cdev: related char device.
+ + * @cdev_ctrl: char device for control operations only (INFO IOCTL)
+ + * @dev: related kernel basic device structure.
+ + * @dev_ctrl: related kernel device structure for the control device
+ + * @work_freq: delayed work to lower device frequency if possible.
+ + * @work_heartbeat: delayed work for ArmCP is-alive check.
+ + * @asic_name: ASIC specific nmae.
+ + * @asic_type: ASIC specific type.
+ + * @completion_queue: array of hl_cq.
+ + * @cq_wq: work queues of completion queues for executing work in process
+ + *         context.
+ + * @eq_wq: work queue of event queue for executing work in process context.
+ + * @kernel_ctx: Kernel driver context structure.
+ + * @kernel_queues: array of hl_hw_queue.
+ + * @hw_queues_mirror_list: CS mirror list for TDR.
+ + * @hw_queues_mirror_lock: protects hw_queues_mirror_list.
+ + * @kernel_cb_mgr: command buffer manager for creating/destroying/handling CGs.
+ + * @event_queue: event queue for IRQ from ArmCP.
+ + * @dma_pool: DMA pool for small allocations.
+ + * @cpu_accessible_dma_mem: Host <-> ArmCP shared memory CPU address.
+ + * @cpu_accessible_dma_address: Host <-> ArmCP shared memory DMA address.
+ + * @cpu_accessible_dma_pool: Host <-> ArmCP shared memory pool.
+ + * @asid_bitmap: holds used/available ASIDs.
+ + * @asid_mutex: protects asid_bitmap.
+ + * @send_cpu_message_lock: enforces only one message in Host <-> ArmCP queue.
+ + * @debug_lock: protects critical section of setting debug mode for device
+ + * @asic_prop: ASIC specific immutable properties.
+ + * @asic_funcs: ASIC specific functions.
+ + * @asic_specific: ASIC specific information to use only from ASIC files.
+ + * @mmu_pgt_pool: pool of available MMU hops.
+ + * @vm: virtual memory manager for MMU.
+ + * @mmu_cache_lock: protects MMU cache invalidation as it can serve one context.
+ + * @mmu_shadow_hop0: shadow mapping of the MMU hop 0 zone.
+ + * @hwmon_dev: H/W monitor device.
+ + * @pm_mng_profile: current power management profile.
+ + * @hl_chip_info: ASIC's sensors information.
+ + * @hl_debugfs: device's debugfs manager.
+ + * @cb_pool: list of preallocated CBs.
+ + * @cb_pool_lock: protects the CB pool.
+ + * @internal_cb_pool_virt_addr: internal command buffer pool virtual address.
+ + * @internal_cb_pool_dma_addr: internal command buffer pool dma address.
+ + * @internal_cb_pool: internal command buffer memory pool.
+ + * @internal_cb_va_base: internal cb pool mmu virtual address base
+ + * @fpriv_list: list of file private data structures. Each structure is created
+ + *              when a user opens the device
+ + * @fpriv_list_lock: protects the fpriv_list
+ + * @compute_ctx: current compute context executing.
+ + * @idle_busy_ts_arr: array to hold time stamps of transitions from idle to busy
+ + *                    and vice-versa
+ + * @aggregated_cs_counters: aggregated cs counters among all contexts
+ + * @dram_used_mem: current DRAM memory consumption.
+ + * @timeout_jiffies: device CS timeout value.
+ + * @max_power: the max power of the device, as configured by the sysadmin. This
+ + *             value is saved so in case of hard-reset, the driver will restore
+ + *             this value and update the F/W after the re-initialization
++ * @clock_gating_mask: is clock gating enabled. bitmask that represents the
++ *                     different engines. See debugfs-driver-habanalabs for
++ *                     details.
+ + * @in_reset: is device in reset flow.
+ + * @curr_pll_profile: current PLL profile.
+ + * @cs_active_cnt: number of active command submissions on this device (active
+ + *                 means already in H/W queues)
+ + * @major: habanalabs kernel driver major.
+ + * @high_pll: high PLL profile frequency.
+ + * @soft_reset_cnt: number of soft reset since the driver was loaded.
+ + * @hard_reset_cnt: number of hard reset since the driver was loaded.
+ + * @idle_busy_ts_idx: index of current entry in idle_busy_ts_arr
+ + * @id: device minor.
+ + * @id_control: minor of the control device
+ + * @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit
+ + *                    addresses.
+ + * @disabled: is device disabled.
+ + * @late_init_done: is late init stage was done during initialization.
+ + * @hwmon_initialized: is H/W monitor sensors was initialized.
+ + * @hard_reset_pending: is there a hard reset work pending.
+ + * @heartbeat: is heartbeat sanity check towards ArmCP enabled.
+ + * @reset_on_lockup: true if a reset should be done in case of stuck CS, false
+ + *                   otherwise.
+ + * @dram_supports_virtual_memory: is MMU enabled towards DRAM.
+ + * @dram_default_page_mapping: is DRAM default page mapping enabled.
+ + * @pmmu_huge_range: is a different virtual addresses range used for PMMU with
+ + *                   huge pages.
+ + * @init_done: is the initialization of the device done.
+ + * @mmu_enable: is MMU enabled.
+ + * @mmu_huge_page_opt: is MMU huge pages optimization enabled.
-       u8                              clock_gating;
+ + * @device_cpu_disabled: is the device CPU disabled (due to timeouts)
+ + * @dma_mask: the dma mask that was set for this device
+ + * @in_debug: is device under debug. This, together with fpriv_list, enforces
+ + *            that only a single user is configuring the debug infrastructure.
+ + * @power9_64bit_dma_enable: true to enable 64-bit DMA mask support. Relevant
+ + *                           only to POWER9 machines.
+ + * @cdev_sysfs_created: were char devices and sysfs nodes created.
+ + * @stop_on_err: true if engines should stop on error.
+ + * @supports_sync_stream: is sync stream supported.
+ + * @sync_stream_queue_idx: helper index for sync stream queues initialization.
+ + * @supports_coresight: is CoreSight supported.
+ + * @supports_soft_reset: is soft reset supported.
+ + */
+ +struct hl_device {
+ +      struct pci_dev                  *pdev;
+ +      u64                             pcie_bar_phys[HL_PCI_NUM_BARS];
+ +      void __iomem                    *pcie_bar[HL_PCI_NUM_BARS];
+ +      void __iomem                    *rmmio;
+ +      struct cdev                     cdev;
+ +      struct cdev                     cdev_ctrl;
+ +      struct device                   *dev;
+ +      struct device                   *dev_ctrl;
+ +      struct delayed_work             work_freq;
+ +      struct delayed_work             work_heartbeat;
+ +      char                            asic_name[16];
+ +      enum hl_asic_type               asic_type;
+ +      struct hl_cq                    *completion_queue;
+ +      struct workqueue_struct         **cq_wq;
+ +      struct workqueue_struct         *eq_wq;
+ +      struct hl_ctx                   *kernel_ctx;
+ +      struct hl_hw_queue              *kernel_queues;
+ +      struct list_head                hw_queues_mirror_list;
+ +      spinlock_t                      hw_queues_mirror_lock;
+ +      struct hl_cb_mgr                kernel_cb_mgr;
+ +      struct hl_eq                    event_queue;
+ +      struct dma_pool                 *dma_pool;
+ +      void                            *cpu_accessible_dma_mem;
+ +      dma_addr_t                      cpu_accessible_dma_address;
+ +      struct gen_pool                 *cpu_accessible_dma_pool;
+ +      unsigned long                   *asid_bitmap;
+ +      struct mutex                    asid_mutex;
+ +      struct mutex                    send_cpu_message_lock;
+ +      struct mutex                    debug_lock;
+ +      struct asic_fixed_properties    asic_prop;
+ +      const struct hl_asic_funcs      *asic_funcs;
+ +      void                            *asic_specific;
+ +      struct gen_pool                 *mmu_pgt_pool;
+ +      struct hl_vm                    vm;
+ +      struct mutex                    mmu_cache_lock;
+ +      void                            *mmu_shadow_hop0;
+ +      struct device                   *hwmon_dev;
+ +      enum hl_pm_mng_profile          pm_mng_profile;
+ +      struct hwmon_chip_info          *hl_chip_info;
+ +
+ +      struct hl_dbg_device_entry      hl_debugfs;
+ +
+ +      struct list_head                cb_pool;
+ +      spinlock_t                      cb_pool_lock;
+ +
+ +      void                            *internal_cb_pool_virt_addr;
+ +      dma_addr_t                      internal_cb_pool_dma_addr;
+ +      struct gen_pool                 *internal_cb_pool;
+ +      u64                             internal_cb_va_base;
+ +
+ +      struct list_head                fpriv_list;
+ +      struct mutex                    fpriv_list_lock;
+ +
+ +      struct hl_ctx                   *compute_ctx;
+ +
+ +      struct hl_device_idle_busy_ts   *idle_busy_ts_arr;
+ +
+ +      struct hl_cs_counters           aggregated_cs_counters;
+ +
+ +      atomic64_t                      dram_used_mem;
+ +      u64                             timeout_jiffies;
+ +      u64                             max_power;
++      u64                             clock_gating_mask;
+ +      atomic_t                        in_reset;
+ +      enum hl_pll_frequency           curr_pll_profile;
+ +      int                             cs_active_cnt;
+ +      u32                             major;
+ +      u32                             high_pll;
+ +      u32                             soft_reset_cnt;
+ +      u32                             hard_reset_cnt;
+ +      u32                             idle_busy_ts_idx;
+ +      u16                             id;
+ +      u16                             id_control;
+ +      u16                             cpu_pci_msb_addr;
+ +      u8                              disabled;
+ +      u8                              late_init_done;
+ +      u8                              hwmon_initialized;
+ +      u8                              hard_reset_pending;
+ +      u8                              heartbeat;
+ +      u8                              reset_on_lockup;
+ +      u8                              dram_supports_virtual_memory;
+ +      u8                              dram_default_page_mapping;
+ +      u8                              pmmu_huge_range;
+ +      u8                              init_done;
+ +      u8                              device_cpu_disabled;
+ +      u8                              dma_mask;
+ +      u8                              in_debug;
+ +      u8                              power9_64bit_dma_enable;
+ +      u8                              cdev_sysfs_created;
+ +      u8                              stop_on_err;
+ +      u8                              supports_sync_stream;
+ +      u8                              sync_stream_queue_idx;
+ +      u8                              supports_coresight;
+ +      u8                              supports_soft_reset;
+ +
+ +      /* Parameters for bring-up */
+ +      u8                              mmu_enable;
+ +      u8                              mmu_huge_page_opt;
+ +      u8                              cpu_enable;
+ +      u8                              reset_pcilink;
+ +      u8                              cpu_queues_enable;
+ +      u8                              fw_loading;
+ +      u8                              pldm;
+ +      u8                              axi_drain;
+ +      u8                              sram_scrambler_enable;
+ +      u8                              dram_scrambler_enable;
+ +      u8                              hard_reset_on_fw_events;
+ +      u8                              bmc_enable;
+ +      u8                              rl_enable;
+ +};
+ +
+ +
+ +/*
+ + * IOCTLs
+ + */
+ +
+ +/**
+ + * typedef hl_ioctl_t - typedef for ioctl function in the driver
+ + * @hpriv: pointer to the FD's private data, which contains state of
+ + *            user process
+ + * @data: pointer to the input/output arguments structure of the IOCTL
+ + *
+ + * Return: 0 for success, negative value for error
+ + */
+ +typedef int hl_ioctl_t(struct hl_fpriv *hpriv, void *data);
+ +
+ +/**
+ + * struct hl_ioctl_desc - describes an IOCTL entry of the driver.
+ + * @cmd: the IOCTL code as created by the kernel macros.
+ + * @func: pointer to the driver's function that should be called for this IOCTL.
+ + */
+ +struct hl_ioctl_desc {
+ +      unsigned int cmd;
+ +      hl_ioctl_t *func;
+ +};
+ +
+ +
+ +/*
+ + * Kernel module functions that can be accessed by entire module
+ + */
+ +
+ +/**
+ + * hl_mem_area_inside_range() - Checks whether address+size are inside a range.
+ + * @address: The start address of the area we want to validate.
+ + * @size: The size in bytes of the area we want to validate.
+ + * @range_start_address: The start address of the valid range.
+ + * @range_end_address: The end address of the valid range.
+ + *
+ + * Return: true if the area is inside the valid range, false otherwise.
+ + */
+ +static inline bool hl_mem_area_inside_range(u64 address, u32 size,
+ +                              u64 range_start_address, u64 range_end_address)
+ +{
+ +      u64 end_address = address + size;
+ +
+ +      if ((address >= range_start_address) &&
+ +                      (end_address <= range_end_address) &&
+ +                      (end_address > address))
+ +              return true;
+ +
+ +      return false;
+ +}
+ +
+ +/**
+ + * hl_mem_area_crosses_range() - Checks whether address+size crossing a range.
+ + * @address: The start address of the area we want to validate.
+ + * @size: The size in bytes of the area we want to validate.
+ + * @range_start_address: The start address of the valid range.
+ + * @range_end_address: The end address of the valid range.
+ + *
+ + * Return: true if the area overlaps part or all of the valid range,
+ + *            false otherwise.
+ + */
+ +static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
+ +                              u64 range_start_address, u64 range_end_address)
+ +{
+ +      u64 end_address = address + size;
+ +
+ +      if ((address >= range_start_address) &&
+ +                      (address < range_end_address))
+ +              return true;
+ +
+ +      if ((end_address >= range_start_address) &&
+ +                      (end_address < range_end_address))
+ +              return true;
+ +
+ +      if ((address < range_start_address) &&
+ +                      (end_address >= range_end_address))
+ +              return true;
+ +
+ +      return false;
+ +}
+ +
+ +int hl_device_open(struct inode *inode, struct file *filp);
+ +int hl_device_open_ctrl(struct inode *inode, struct file *filp);
+ +bool hl_device_disabled_or_in_reset(struct hl_device *hdev);
+ +enum hl_device_status hl_device_status(struct hl_device *hdev);
+ +int hl_device_set_debug_mode(struct hl_device *hdev, bool enable);
+ +int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
+ +              enum hl_asic_type asic_type, int minor);
+ +void destroy_hdev(struct hl_device *hdev);
+ +int hl_hw_queues_create(struct hl_device *hdev);
+ +void hl_hw_queues_destroy(struct hl_device *hdev);
+ +int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
+ +                              u32 cb_size, u64 cb_ptr);
+ +int hl_hw_queue_schedule_cs(struct hl_cs *cs);
+ +u32 hl_hw_queue_add_ptr(u32 ptr, u16 val);
+ +void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id);
+ +void hl_int_hw_queue_update_ci(struct hl_cs *cs);
+ +void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset);
+ +
+ +#define hl_queue_inc_ptr(p)           hl_hw_queue_add_ptr(p, 1)
+ +#define hl_pi_2_offset(pi)            ((pi) & (HL_QUEUE_LENGTH - 1))
+ +
+ +int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id);
+ +void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q);
+ +int hl_eq_init(struct hl_device *hdev, struct hl_eq *q);
+ +void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q);
+ +void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q);
+ +void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q);
+ +irqreturn_t hl_irq_handler_cq(int irq, void *arg);
+ +irqreturn_t hl_irq_handler_eq(int irq, void *arg);
+ +u32 hl_cq_inc_ptr(u32 ptr);
+ +
+ +int hl_asid_init(struct hl_device *hdev);
+ +void hl_asid_fini(struct hl_device *hdev);
+ +unsigned long hl_asid_alloc(struct hl_device *hdev);
+ +void hl_asid_free(struct hl_device *hdev, unsigned long asid);
+ +
+ +int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv);
+ +void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx);
+ +int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx);
+ +void hl_ctx_do_release(struct kref *ref);
+ +void hl_ctx_get(struct hl_device *hdev,       struct hl_ctx *ctx);
+ +int hl_ctx_put(struct hl_ctx *ctx);
+ +struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq);
+ +void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr);
+ +void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr);
+ +
+ +int hl_device_init(struct hl_device *hdev, struct class *hclass);
+ +void hl_device_fini(struct hl_device *hdev);
+ +int hl_device_suspend(struct hl_device *hdev);
+ +int hl_device_resume(struct hl_device *hdev);
+ +int hl_device_reset(struct hl_device *hdev, bool hard_reset,
+ +                      bool from_hard_reset_thread);
+ +void hl_hpriv_get(struct hl_fpriv *hpriv);
+ +void hl_hpriv_put(struct hl_fpriv *hpriv);
+ +int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq);
+ +uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms);
+ +
+ +int hl_build_hwmon_channel_info(struct hl_device *hdev,
+ +              struct armcp_sensor *sensors_arr);
+ +
+ +int hl_sysfs_init(struct hl_device *hdev);
+ +void hl_sysfs_fini(struct hl_device *hdev);
+ +
+ +int hl_hwmon_init(struct hl_device *hdev);
+ +void hl_hwmon_fini(struct hl_device *hdev);
+ +
+ +int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, u32 cb_size,
+ +              u64 *handle, int ctx_id, bool internal_cb);
+ +int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle);
+ +int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
+ +struct hl_cb *hl_cb_get(struct hl_device *hdev,       struct hl_cb_mgr *mgr,
+ +                      u32 handle);
+ +void hl_cb_put(struct hl_cb *cb);
+ +void hl_cb_mgr_init(struct hl_cb_mgr *mgr);
+ +void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr);
+ +struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
+ +                                      bool internal_cb);
+ +int hl_cb_pool_init(struct hl_device *hdev);
+ +int hl_cb_pool_fini(struct hl_device *hdev);
+ +
+ +void hl_cs_rollback_all(struct hl_device *hdev);
+ +struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
+ +              enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
+ +void hl_sob_reset_error(struct kref *ref);
+ +
+ +void goya_set_asic_funcs(struct hl_device *hdev);
+ +void gaudi_set_asic_funcs(struct hl_device *hdev);
+ +
+ +int hl_vm_ctx_init(struct hl_ctx *ctx);
+ +void hl_vm_ctx_fini(struct hl_ctx *ctx);
+ +
+ +int hl_vm_init(struct hl_device *hdev);
+ +void hl_vm_fini(struct hl_device *hdev);
+ +
+ +int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
+ +                      struct hl_userptr *userptr);
+ +void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr);
+ +void hl_userptr_delete_list(struct hl_device *hdev,
+ +                              struct list_head *userptr_list);
+ +bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size,
+ +                              struct list_head *userptr_list,
+ +                              struct hl_userptr **userptr);
+ +
+ +int hl_mmu_init(struct hl_device *hdev);
+ +void hl_mmu_fini(struct hl_device *hdev);
+ +int hl_mmu_ctx_init(struct hl_ctx *ctx);
+ +void hl_mmu_ctx_fini(struct hl_ctx *ctx);
+ +int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
+ +              u32 page_size, bool flush_pte);
+ +int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
+ +              bool flush_pte);
+ +void hl_mmu_swap_out(struct hl_ctx *ctx);
+ +void hl_mmu_swap_in(struct hl_ctx *ctx);
+ +
+ +int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
+ +                              void __iomem *dst);
+ +int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode);
+ +int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
+ +                              u16 len, u32 timeout, long *result);
+ +int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type);
+ +int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
+ +              size_t irq_arr_size);
+ +int hl_fw_test_cpu_queue(struct hl_device *hdev);
+ +void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
+ +                                              dma_addr_t *dma_handle);
+ +void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
+ +                                      void *vaddr);
+ +int hl_fw_send_heartbeat(struct hl_device *hdev);
+ +int hl_fw_armcp_info_get(struct hl_device *hdev);
+ +int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
+ +int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
+ +                      u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
+ +                      u32 boot_err0_reg, bool skip_bmc,
+ +                      u32 cpu_timeout, u32 boot_fit_timeout);
+ +
+ +int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
+ +                      bool is_wc[3]);
+ +int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data);
+ +int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar,
+ +                              u64 addr);
+ +int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
+ +              struct hl_inbound_pci_region *pci_region);
+ +int hl_pci_set_outbound_region(struct hl_device *hdev,
+ +              struct hl_outbound_pci_region *pci_region);
+ +int hl_pci_init(struct hl_device *hdev);
+ +void hl_pci_fini(struct hl_device *hdev);
+ +
+ +long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
+ +void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq);
+ +int hl_get_temperature(struct hl_device *hdev,
+ +                     int sensor_index, u32 attr, long *value);
+ +int hl_set_temperature(struct hl_device *hdev,
+ +                     int sensor_index, u32 attr, long value);
+ +int hl_get_voltage(struct hl_device *hdev,
+ +                 int sensor_index, u32 attr, long *value);
+ +int hl_get_current(struct hl_device *hdev,
+ +                 int sensor_index, u32 attr, long *value);
+ +int hl_get_fan_speed(struct hl_device *hdev,
+ +                   int sensor_index, u32 attr, long *value);
+ +int hl_get_pwm_info(struct hl_device *hdev,
+ +                  int sensor_index, u32 attr, long *value);
+ +void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
+ +                      long value);
+ +u64 hl_get_max_power(struct hl_device *hdev);
+ +void hl_set_max_power(struct hl_device *hdev, u64 value);
+ +int hl_set_voltage(struct hl_device *hdev,
+ +                      int sensor_index, u32 attr, long value);
+ +int hl_set_current(struct hl_device *hdev,
+ +                      int sensor_index, u32 attr, long value);
+ +
+ +#ifdef CONFIG_DEBUG_FS
+ +
+ +void hl_debugfs_init(void);
+ +void hl_debugfs_fini(void);
+ +void hl_debugfs_add_device(struct hl_device *hdev);
+ +void hl_debugfs_remove_device(struct hl_device *hdev);
+ +void hl_debugfs_add_file(struct hl_fpriv *hpriv);
+ +void hl_debugfs_remove_file(struct hl_fpriv *hpriv);
+ +void hl_debugfs_add_cb(struct hl_cb *cb);
+ +void hl_debugfs_remove_cb(struct hl_cb *cb);
+ +void hl_debugfs_add_cs(struct hl_cs *cs);
+ +void hl_debugfs_remove_cs(struct hl_cs *cs);
+ +void hl_debugfs_add_job(struct hl_device *hdev, struct hl_cs_job *job);
+ +void hl_debugfs_remove_job(struct hl_device *hdev, struct hl_cs_job *job);
+ +void hl_debugfs_add_userptr(struct hl_device *hdev, struct hl_userptr *userptr);
+ +void hl_debugfs_remove_userptr(struct hl_device *hdev,
+ +                              struct hl_userptr *userptr);
+ +void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
+ +void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
+ +
+ +#else
+ +
+ +static inline void __init hl_debugfs_init(void)
+ +{
+ +}
+ +
+ +static inline void hl_debugfs_fini(void)
+ +{
+ +}
+ +
+ +static inline void hl_debugfs_add_device(struct hl_device *hdev)
+ +{
+ +}
+ +
+ +static inline void hl_debugfs_remove_device(struct hl_device *hdev)
+ +{
+ +}
+ +
+ +static inline void hl_debugfs_add_file(struct hl_fpriv *hpriv)
+ +{
+ +}
+ +
+ +static inline void hl_debugfs_remove_file(struct hl_fpriv *hpriv)
+ +{
+ +}
+ +
+ +static inline void hl_debugfs_add_cb(struct hl_cb *cb)
+ +{
+ +}
+ +
+ +static inline void hl_debugfs_remove_cb(struct hl_cb *cb)
+ +{
+ +}
+ +
+ +static inline void hl_debugfs_add_cs(struct hl_cs *cs)
+ +{
+ +}
+ +
+ +static inline void hl_debugfs_remove_cs(struct hl_cs *cs)
+ +{
+ +}
+ +
+ +static inline void hl_debugfs_add_job(struct hl_device *hdev,
+ +                                      struct hl_cs_job *job)
+ +{
+ +}
+ +
+ +static inline void hl_debugfs_remove_job(struct hl_device *hdev,
+ +                                      struct hl_cs_job *job)
+ +{
+ +}
+ +
+ +static inline void hl_debugfs_add_userptr(struct hl_device *hdev,
+ +                                      struct hl_userptr *userptr)
+ +{
+ +}
+ +
+ +static inline void hl_debugfs_remove_userptr(struct hl_device *hdev,
+ +                                      struct hl_userptr *userptr)
+ +{
+ +}
+ +
+ +static inline void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev,
+ +                                      struct hl_ctx *ctx)
+ +{
+ +}
+ +
+ +static inline void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev,
+ +                                      struct hl_ctx *ctx)
+ +{
+ +}
+ +
+ +#endif
+ +
+ +/* IOCTLs */
+ +long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
+ +long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg);
+ +int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data);
+ +int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data);
+ +int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data);
+ +int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data);
+ +
+ +#endif /* HABANALABSP_H_ */
diff --cc drivers/misc/habanalabs/common/habanalabs_drv.c

index f38664b03865bdaf3eca859bbdf2b46e34997932,0000000000000000000000000000000000000000..c6b31e93fb5eba0e2488656b69f377f1e68b1222

mode 100644,000000..100644
--- 1/drivers/misc/habanalabs/common/habanalabs_drv.c
--- /dev/null
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@@ -1,529 -1,0 +1,529 @@@
-       hdev->clock_gating = 1;
+ +// SPDX-License-Identifier: GPL-2.0
+ +
+ +/*
+ + * Copyright 2016-2019 HabanaLabs, Ltd.
+ + * All Rights Reserved.
+ + *
+ + */
+ +
+ +#define pr_fmt(fmt)           "habanalabs: " fmt
+ +
+ +#include "habanalabs.h"
+ +
+ +#include <linux/pci.h>
+ +#include <linux/module.h>
+ +
+ +#define HL_DRIVER_AUTHOR      "HabanaLabs Kernel Driver Team"
+ +
+ +#define HL_DRIVER_DESC                "Driver for HabanaLabs's AI Accelerators"
+ +
+ +MODULE_AUTHOR(HL_DRIVER_AUTHOR);
+ +MODULE_DESCRIPTION(HL_DRIVER_DESC);
+ +MODULE_LICENSE("GPL v2");
+ +
+ +static int hl_major;
+ +static struct class *hl_class;
+ +static DEFINE_IDR(hl_devs_idr);
+ +static DEFINE_MUTEX(hl_devs_idr_lock);
+ +
+ +static int timeout_locked = 5;
+ +static int reset_on_lockup = 1;
+ +
+ +module_param(timeout_locked, int, 0444);
+ +MODULE_PARM_DESC(timeout_locked,
+ +      "Device lockup timeout in seconds (0 = disabled, default 5s)");
+ +
+ +module_param(reset_on_lockup, int, 0444);
+ +MODULE_PARM_DESC(reset_on_lockup,
+ +      "Do device reset on lockup (0 = no, 1 = yes, default yes)");
+ +
+ +#define PCI_VENDOR_ID_HABANALABS      0x1da3
+ +
+ +#define PCI_IDS_GOYA                  0x0001
+ +#define PCI_IDS_GAUDI                 0x1000
+ +
+ +static const struct pci_device_id ids[] = {
+ +      { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), },
+ +      { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), },
+ +      { 0, }
+ +};
+ +MODULE_DEVICE_TABLE(pci, ids);
+ +
+ +/*
+ + * get_asic_type - translate device id to asic type
+ + *
+ + * @device: id of the PCI device
+ + *
+ + * Translate device id to asic type.
+ + * In case of unidentified device, return -1
+ + */
+ +static enum hl_asic_type get_asic_type(u16 device)
+ +{
+ +      enum hl_asic_type asic_type;
+ +
+ +      switch (device) {
+ +      case PCI_IDS_GOYA:
+ +              asic_type = ASIC_GOYA;
+ +              break;
+ +      case PCI_IDS_GAUDI:
+ +              asic_type = ASIC_GAUDI;
+ +              break;
+ +      default:
+ +              asic_type = ASIC_INVALID;
+ +              break;
+ +      }
+ +
+ +      return asic_type;
+ +}
+ +
+ +/*
+ + * hl_device_open - open function for habanalabs device
+ + *
+ + * @inode: pointer to inode structure
+ + * @filp: pointer to file structure
+ + *
+ + * Called when process opens an habanalabs device.
+ + */
+ +int hl_device_open(struct inode *inode, struct file *filp)
+ +{
+ +      struct hl_device *hdev;
+ +      struct hl_fpriv *hpriv;
+ +      int rc;
+ +
+ +      mutex_lock(&hl_devs_idr_lock);
+ +      hdev = idr_find(&hl_devs_idr, iminor(inode));
+ +      mutex_unlock(&hl_devs_idr_lock);
+ +
+ +      if (!hdev) {
+ +              pr_err("Couldn't find device %d:%d\n",
+ +                      imajor(inode), iminor(inode));
+ +              return -ENXIO;
+ +      }
+ +
+ +      hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
+ +      if (!hpriv)
+ +              return -ENOMEM;
+ +
+ +      hpriv->hdev = hdev;
+ +      filp->private_data = hpriv;
+ +      hpriv->filp = filp;
+ +      mutex_init(&hpriv->restore_phase_mutex);
+ +      kref_init(&hpriv->refcount);
+ +      nonseekable_open(inode, filp);
+ +
+ +      hl_cb_mgr_init(&hpriv->cb_mgr);
+ +      hl_ctx_mgr_init(&hpriv->ctx_mgr);
+ +
+ +      hpriv->taskpid = find_get_pid(current->pid);
+ +
+ +      mutex_lock(&hdev->fpriv_list_lock);
+ +
+ +      if (hl_device_disabled_or_in_reset(hdev)) {
+ +              dev_err_ratelimited(hdev->dev,
+ +                      "Can't open %s because it is disabled or in reset\n",
+ +                      dev_name(hdev->dev));
+ +              rc = -EPERM;
+ +              goto out_err;
+ +      }
+ +
+ +      if (hdev->in_debug) {
+ +              dev_err_ratelimited(hdev->dev,
+ +                      "Can't open %s because it is being debugged by another user\n",
+ +                      dev_name(hdev->dev));
+ +              rc = -EPERM;
+ +              goto out_err;
+ +      }
+ +
+ +      if (hdev->compute_ctx) {
+ +              dev_dbg_ratelimited(hdev->dev,
+ +                      "Can't open %s because another user is working on it\n",
+ +                      dev_name(hdev->dev));
+ +              rc = -EBUSY;
+ +              goto out_err;
+ +      }
+ +
+ +      rc = hl_ctx_create(hdev, hpriv);
+ +      if (rc) {
+ +              dev_err(hdev->dev, "Failed to create context %d\n", rc);
+ +              goto out_err;
+ +      }
+ +
+ +      /* Device is IDLE at this point so it is legal to change PLLs.
+ +       * There is no need to check anything because if the PLL is
+ +       * already HIGH, the set function will return without doing
+ +       * anything
+ +       */
+ +      hl_device_set_frequency(hdev, PLL_HIGH);
+ +
+ +      list_add(&hpriv->dev_node, &hdev->fpriv_list);
+ +      mutex_unlock(&hdev->fpriv_list_lock);
+ +
+ +      hl_debugfs_add_file(hpriv);
+ +
+ +      return 0;
+ +
+ +out_err:
+ +      mutex_unlock(&hdev->fpriv_list_lock);
+ +
+ +      hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
+ +      hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
+ +      filp->private_data = NULL;
+ +      mutex_destroy(&hpriv->restore_phase_mutex);
+ +      put_pid(hpriv->taskpid);
+ +
+ +      kfree(hpriv);
+ +
+ +      return rc;
+ +}
+ +
+ +int hl_device_open_ctrl(struct inode *inode, struct file *filp)
+ +{
+ +      struct hl_device *hdev;
+ +      struct hl_fpriv *hpriv;
+ +      int rc;
+ +
+ +      mutex_lock(&hl_devs_idr_lock);
+ +      hdev = idr_find(&hl_devs_idr, iminor(inode));
+ +      mutex_unlock(&hl_devs_idr_lock);
+ +
+ +      if (!hdev) {
+ +              pr_err("Couldn't find device %d:%d\n",
+ +                      imajor(inode), iminor(inode));
+ +              return -ENXIO;
+ +      }
+ +
+ +      hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
+ +      if (!hpriv)
+ +              return -ENOMEM;
+ +
+ +      mutex_lock(&hdev->fpriv_list_lock);
+ +
+ +      if (hl_device_disabled_or_in_reset(hdev)) {
+ +              dev_err_ratelimited(hdev->dev_ctrl,
+ +                      "Can't open %s because it is disabled or in reset\n",
+ +                      dev_name(hdev->dev_ctrl));
+ +              rc = -EPERM;
+ +              goto out_err;
+ +      }
+ +
+ +      list_add(&hpriv->dev_node, &hdev->fpriv_list);
+ +      mutex_unlock(&hdev->fpriv_list_lock);
+ +
+ +      hpriv->hdev = hdev;
+ +      filp->private_data = hpriv;
+ +      hpriv->filp = filp;
+ +      hpriv->is_control = true;
+ +      nonseekable_open(inode, filp);
+ +
+ +      hpriv->taskpid = find_get_pid(current->pid);
+ +
+ +      return 0;
+ +
+ +out_err:
+ +      mutex_unlock(&hdev->fpriv_list_lock);
+ +      kfree(hpriv);
+ +      return rc;
+ +}
+ +
+ +static void set_driver_behavior_per_device(struct hl_device *hdev)
+ +{
+ +      hdev->mmu_enable = 1;
+ +      hdev->cpu_enable = 1;
+ +      hdev->fw_loading = 1;
+ +      hdev->cpu_queues_enable = 1;
+ +      hdev->heartbeat = 1;
++      hdev->clock_gating_mask = ULONG_MAX;
+ +
+ +      hdev->reset_pcilink = 0;
+ +      hdev->axi_drain = 0;
+ +      hdev->sram_scrambler_enable = 1;
+ +      hdev->dram_scrambler_enable = 1;
+ +      hdev->bmc_enable = 1;
+ +      hdev->hard_reset_on_fw_events = 1;
+ +}
+ +
+ +/*
+ + * create_hdev - create habanalabs device instance
+ + *
+ + * @dev: will hold the pointer to the new habanalabs device structure
+ + * @pdev: pointer to the pci device
+ + * @asic_type: in case of simulator device, which device is it
+ + * @minor: in case of simulator device, the minor of the device
+ + *
+ + * Allocate memory for habanalabs device and initialize basic fields
+ + * Identify the ASIC type
+ + * Allocate ID (minor) for the device (only for real devices)
+ + */
+ +int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
+ +              enum hl_asic_type asic_type, int minor)
+ +{
+ +      struct hl_device *hdev;
+ +      int rc, main_id, ctrl_id = 0;
+ +
+ +      *dev = NULL;
+ +
+ +      hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
+ +      if (!hdev)
+ +              return -ENOMEM;
+ +
+ +      /* First, we must find out which ASIC are we handling. This is needed
+ +       * to configure the behavior of the driver (kernel parameters)
+ +       */
+ +      if (pdev) {
+ +              hdev->asic_type = get_asic_type(pdev->device);
+ +              if (hdev->asic_type == ASIC_INVALID) {
+ +                      dev_err(&pdev->dev, "Unsupported ASIC\n");
+ +                      rc = -ENODEV;
+ +                      goto free_hdev;
+ +              }
+ +      } else {
+ +              hdev->asic_type = asic_type;
+ +      }
+ +
+ +      hdev->major = hl_major;
+ +      hdev->reset_on_lockup = reset_on_lockup;
+ +      hdev->pldm = 0;
+ +
+ +      set_driver_behavior_per_device(hdev);
+ +
+ +      if (timeout_locked)
+ +              hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000);
+ +      else
+ +              hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
+ +
+ +      hdev->disabled = true;
+ +      hdev->pdev = pdev; /* can be NULL in case of simulator device */
+ +
+ +      /* Set default DMA mask to 32 bits */
+ +      hdev->dma_mask = 32;
+ +
+ +      mutex_lock(&hl_devs_idr_lock);
+ +
+ +      /* Always save 2 numbers, 1 for main device and 1 for control.
+ +       * They must be consecutive
+ +       */
+ +      main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS,
+ +                              GFP_KERNEL);
+ +
+ +      if (main_id >= 0)
+ +              ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1,
+ +                                      main_id + 2, GFP_KERNEL);
+ +
+ +      mutex_unlock(&hl_devs_idr_lock);
+ +
+ +      if ((main_id < 0) || (ctrl_id < 0)) {
+ +              if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC))
+ +                      pr_err("too many devices in the system\n");
+ +
+ +              if (main_id >= 0) {
+ +                      mutex_lock(&hl_devs_idr_lock);
+ +                      idr_remove(&hl_devs_idr, main_id);
+ +                      mutex_unlock(&hl_devs_idr_lock);
+ +              }
+ +
+ +              rc = -EBUSY;
+ +              goto free_hdev;
+ +      }
+ +
+ +      hdev->id = main_id;
+ +      hdev->id_control = ctrl_id;
+ +
+ +      *dev = hdev;
+ +
+ +      return 0;
+ +
+ +free_hdev:
+ +      kfree(hdev);
+ +      return rc;
+ +}
+ +
+ +/*
+ + * destroy_hdev - destroy habanalabs device instance
+ + *
+ + * @dev: pointer to the habanalabs device structure
+ + *
+ + */
+ +void destroy_hdev(struct hl_device *hdev)
+ +{
+ +      /* Remove device from the device list */
+ +      mutex_lock(&hl_devs_idr_lock);
+ +      idr_remove(&hl_devs_idr, hdev->id);
+ +      idr_remove(&hl_devs_idr, hdev->id_control);
+ +      mutex_unlock(&hl_devs_idr_lock);
+ +
+ +      kfree(hdev);
+ +}
+ +
+ +static int hl_pmops_suspend(struct device *dev)
+ +{
+ +      struct hl_device *hdev = dev_get_drvdata(dev);
+ +
+ +      pr_debug("Going to suspend PCI device\n");
+ +
+ +      if (!hdev) {
+ +              pr_err("device pointer is NULL in suspend\n");
+ +              return 0;
+ +      }
+ +
+ +      return hl_device_suspend(hdev);
+ +}
+ +
+ +static int hl_pmops_resume(struct device *dev)
+ +{
+ +      struct hl_device *hdev = dev_get_drvdata(dev);
+ +
+ +      pr_debug("Going to resume PCI device\n");
+ +
+ +      if (!hdev) {
+ +              pr_err("device pointer is NULL in resume\n");
+ +              return 0;
+ +      }
+ +
+ +      return hl_device_resume(hdev);
+ +}
+ +
+ +/*
+ + * hl_pci_probe - probe PCI habanalabs devices
+ + *
+ + * @pdev: pointer to pci device
+ + * @id: pointer to pci device id structure
+ + *
+ + * Standard PCI probe function for habanalabs device.
+ + * Create a new habanalabs device and initialize it according to the
+ + * device's type
+ + */
+ +static int hl_pci_probe(struct pci_dev *pdev,
+ +                              const struct pci_device_id *id)
+ +{
+ +      struct hl_device *hdev;
+ +      int rc;
+ +
+ +      dev_info(&pdev->dev, HL_NAME
+ +               " device found [%04x:%04x] (rev %x)\n",
+ +               (int)pdev->vendor, (int)pdev->device, (int)pdev->revision);
+ +
+ +      rc = create_hdev(&hdev, pdev, ASIC_INVALID, -1);
+ +      if (rc)
+ +              return rc;
+ +
+ +      pci_set_drvdata(pdev, hdev);
+ +
+ +      rc = hl_device_init(hdev, hl_class);
+ +      if (rc) {
+ +              dev_err(&pdev->dev, "Fatal error during habanalabs device init\n");
+ +              rc = -ENODEV;
+ +              goto disable_device;
+ +      }
+ +
+ +      return 0;
+ +
+ +disable_device:
+ +      pci_set_drvdata(pdev, NULL);
+ +      destroy_hdev(hdev);
+ +
+ +      return rc;
+ +}
+ +
+ +/*
+ + * hl_pci_remove - remove PCI habanalabs devices
+ + *
+ + * @pdev: pointer to pci device
+ + *
+ + * Standard PCI remove function for habanalabs device
+ + */
+ +static void hl_pci_remove(struct pci_dev *pdev)
+ +{
+ +      struct hl_device *hdev;
+ +
+ +      hdev = pci_get_drvdata(pdev);
+ +      if (!hdev)
+ +              return;
+ +
+ +      hl_device_fini(hdev);
+ +      pci_set_drvdata(pdev, NULL);
+ +
+ +      destroy_hdev(hdev);
+ +}
+ +
+ +static const struct dev_pm_ops hl_pm_ops = {
+ +      .suspend = hl_pmops_suspend,
+ +      .resume = hl_pmops_resume,
+ +};
+ +
+ +static struct pci_driver hl_pci_driver = {
+ +      .name = HL_NAME,
+ +      .id_table = ids,
+ +      .probe = hl_pci_probe,
+ +      .remove = hl_pci_remove,
+ +      .driver.pm = &hl_pm_ops,
+ +};
+ +
+ +/*
+ + * hl_init - Initialize the habanalabs kernel driver
+ + */
+ +static int __init hl_init(void)
+ +{
+ +      int rc;
+ +      dev_t dev;
+ +
+ +      pr_info("loading driver\n");
+ +
+ +      rc = alloc_chrdev_region(&dev, 0, HL_MAX_MINORS, HL_NAME);
+ +      if (rc < 0) {
+ +              pr_err("unable to get major\n");
+ +              return rc;
+ +      }
+ +
+ +      hl_major = MAJOR(dev);
+ +
+ +      hl_class = class_create(THIS_MODULE, HL_NAME);
+ +      if (IS_ERR(hl_class)) {
+ +              pr_err("failed to allocate class\n");
+ +              rc = PTR_ERR(hl_class);
+ +              goto remove_major;
+ +      }
+ +
+ +      hl_debugfs_init();
+ +
+ +      rc = pci_register_driver(&hl_pci_driver);
+ +      if (rc) {
+ +              pr_err("failed to register pci device\n");
+ +              goto remove_debugfs;
+ +      }
+ +
+ +      pr_debug("driver loaded\n");
+ +
+ +      return 0;
+ +
+ +remove_debugfs:
+ +      hl_debugfs_fini();
+ +      class_destroy(hl_class);
+ +remove_major:
+ +      unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
+ +      return rc;
+ +}
+ +
+ +/*
+ + * hl_exit - Release all resources of the habanalabs kernel driver
+ + */
+ +static void __exit hl_exit(void)
+ +{
+ +      pci_unregister_driver(&hl_pci_driver);
+ +
+ +      /*
+ +       * Removing debugfs must be after all devices or simulator devices
+ +       * have been removed because otherwise we get a bug in the
+ +       * debugfs module for referencing NULL objects
+ +       */
+ +      hl_debugfs_fini();
+ +
+ +      class_destroy(hl_class);
+ +      unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
+ +
+ +      idr_destroy(&hl_devs_idr);
+ +
+ +      pr_debug("driver removed\n");
+ +}
+ +
+ +module_init(hl_init);
+ +module_exit(hl_exit);
diff --cc drivers/misc/habanalabs/common/hwmon.c

index 8c6cd77e6af6bd3b6ada7cd114867d9d528191a7,0000000000000000000000000000000000000000..b997336fa75fc88ad7a565163697b789e977f4ca

mode 100644,000000..100644
--- 1/drivers/misc/habanalabs/common/hwmon.c
--- /dev/null
+++ b/drivers/misc/habanalabs/common/hwmon.c
@@@ -1,579 -1,0 +1,578 @@@
- #define SENSORS_PKT_TIMEOUT           1000000 /* 1s */
+ +// SPDX-License-Identifier: GPL-2.0
+ +
+ +/*
+ + * Copyright 2016-2019 HabanaLabs, Ltd.
+ + * All Rights Reserved.
+ + */
+ +
+ +#include "habanalabs.h"
+ +
+ +#include <linux/pci.h>
+ +#include <linux/hwmon.h>
+ +
-                       SENSORS_PKT_TIMEOUT, value);
+ +#define HWMON_NR_SENSOR_TYPES         (hwmon_pwm + 1)
+ +
+ +int hl_build_hwmon_channel_info(struct hl_device *hdev,
+ +                              struct armcp_sensor *sensors_arr)
+ +{
+ +      u32 counts[HWMON_NR_SENSOR_TYPES] = {0};
+ +      u32 *sensors_by_type[HWMON_NR_SENSOR_TYPES] = {NULL};
+ +      u32 sensors_by_type_next_index[HWMON_NR_SENSOR_TYPES] = {0};
+ +      struct hwmon_channel_info **channels_info;
+ +      u32 num_sensors_for_type, num_active_sensor_types = 0,
+ +                      arr_size = 0, *curr_arr;
+ +      enum hwmon_sensor_types type;
+ +      int rc, i, j;
+ +
+ +      for (i = 0 ; i < ARMCP_MAX_SENSORS ; i++) {
+ +              type = le32_to_cpu(sensors_arr[i].type);
+ +
+ +              if ((type == 0) && (sensors_arr[i].flags == 0))
+ +                      break;
+ +
+ +              if (type >= HWMON_NR_SENSOR_TYPES) {
+ +                      dev_err(hdev->dev,
+ +                              "Got wrong sensor type %d from device\n", type);
+ +                      return -EINVAL;
+ +              }
+ +
+ +              counts[type]++;
+ +              arr_size++;
+ +      }
+ +
+ +      for (i = 0 ; i < HWMON_NR_SENSOR_TYPES ; i++) {
+ +              if (counts[i] == 0)
+ +                      continue;
+ +
+ +              num_sensors_for_type = counts[i] + 1;
+ +              curr_arr = kcalloc(num_sensors_for_type, sizeof(*curr_arr),
+ +                              GFP_KERNEL);
+ +              if (!curr_arr) {
+ +                      rc = -ENOMEM;
+ +                      goto sensors_type_err;
+ +              }
+ +
+ +              num_active_sensor_types++;
+ +              sensors_by_type[i] = curr_arr;
+ +      }
+ +
+ +      for (i = 0 ; i < arr_size ; i++) {
+ +              type = le32_to_cpu(sensors_arr[i].type);
+ +              curr_arr = sensors_by_type[type];
+ +              curr_arr[sensors_by_type_next_index[type]++] =
+ +                              le32_to_cpu(sensors_arr[i].flags);
+ +      }
+ +
+ +      channels_info = kcalloc(num_active_sensor_types + 1,
+ +                      sizeof(*channels_info), GFP_KERNEL);
+ +      if (!channels_info) {
+ +              rc = -ENOMEM;
+ +              goto channels_info_array_err;
+ +      }
+ +
+ +      for (i = 0 ; i < num_active_sensor_types ; i++) {
+ +              channels_info[i] = kzalloc(sizeof(*channels_info[i]),
+ +                              GFP_KERNEL);
+ +              if (!channels_info[i]) {
+ +                      rc = -ENOMEM;
+ +                      goto channel_info_err;
+ +              }
+ +      }
+ +
+ +      for (i = 0, j = 0 ; i < HWMON_NR_SENSOR_TYPES ; i++) {
+ +              if (!sensors_by_type[i])
+ +                      continue;
+ +
+ +              channels_info[j]->type = i;
+ +              channels_info[j]->config = sensors_by_type[i];
+ +              j++;
+ +      }
+ +
+ +      hdev->hl_chip_info->info =
+ +                      (const struct hwmon_channel_info **)channels_info;
+ +
+ +      return 0;
+ +
+ +channel_info_err:
+ +      for (i = 0 ; i < num_active_sensor_types ; i++)
+ +              if (channels_info[i]) {
+ +                      kfree(channels_info[i]->config);
+ +                      kfree(channels_info[i]);
+ +              }
+ +      kfree(channels_info);
+ +channels_info_array_err:
+ +sensors_type_err:
+ +      for (i = 0 ; i < HWMON_NR_SENSOR_TYPES ; i++)
+ +              kfree(sensors_by_type[i]);
+ +
+ +      return rc;
+ +}
+ +
+ +static int hl_read(struct device *dev, enum hwmon_sensor_types type,
+ +                      u32 attr, int channel, long *val)
+ +{
+ +      struct hl_device *hdev = dev_get_drvdata(dev);
+ +      int rc;
+ +
+ +      if (hl_device_disabled_or_in_reset(hdev))
+ +              return -ENODEV;
+ +
+ +      switch (type) {
+ +      case hwmon_temp:
+ +              switch (attr) {
+ +              case hwmon_temp_input:
+ +              case hwmon_temp_max:
+ +              case hwmon_temp_crit:
+ +              case hwmon_temp_max_hyst:
+ +              case hwmon_temp_crit_hyst:
+ +              case hwmon_temp_offset:
+ +              case hwmon_temp_highest:
+ +                      break;
+ +              default:
+ +                      return -EINVAL;
+ +              }
+ +
+ +              rc = hl_get_temperature(hdev, channel, attr, val);
+ +              break;
+ +      case hwmon_in:
+ +              switch (attr) {
+ +              case hwmon_in_input:
+ +              case hwmon_in_min:
+ +              case hwmon_in_max:
+ +              case hwmon_in_highest:
+ +                      break;
+ +              default:
+ +                      return -EINVAL;
+ +              }
+ +
+ +              rc = hl_get_voltage(hdev, channel, attr, val);
+ +              break;
+ +      case hwmon_curr:
+ +              switch (attr) {
+ +              case hwmon_curr_input:
+ +              case hwmon_curr_min:
+ +              case hwmon_curr_max:
+ +              case hwmon_curr_highest:
+ +                      break;
+ +              default:
+ +                      return -EINVAL;
+ +              }
+ +
+ +              rc = hl_get_current(hdev, channel, attr, val);
+ +              break;
+ +      case hwmon_fan:
+ +              switch (attr) {
+ +              case hwmon_fan_input:
+ +              case hwmon_fan_min:
+ +              case hwmon_fan_max:
+ +                      break;
+ +              default:
+ +                      return -EINVAL;
+ +              }
+ +              rc = hl_get_fan_speed(hdev, channel, attr, val);
+ +              break;
+ +      case hwmon_pwm:
+ +              switch (attr) {
+ +              case hwmon_pwm_input:
+ +              case hwmon_pwm_enable:
+ +                      break;
+ +              default:
+ +                      return -EINVAL;
+ +              }
+ +              rc = hl_get_pwm_info(hdev, channel, attr, val);
+ +              break;
+ +      default:
+ +              return -EINVAL;
+ +      }
+ +      return rc;
+ +}
+ +
+ +static int hl_write(struct device *dev, enum hwmon_sensor_types type,
+ +                      u32 attr, int channel, long val)
+ +{
+ +      struct hl_device *hdev = dev_get_drvdata(dev);
+ +
+ +      if (hl_device_disabled_or_in_reset(hdev))
+ +              return -ENODEV;
+ +
+ +      switch (type) {
+ +      case hwmon_temp:
+ +              switch (attr) {
+ +              case hwmon_temp_offset:
+ +              case hwmon_temp_reset_history:
+ +                      break;
+ +              default:
+ +                      return -EINVAL;
+ +              }
+ +              hl_set_temperature(hdev, channel, attr, val);
+ +              break;
+ +      case hwmon_pwm:
+ +              switch (attr) {
+ +              case hwmon_pwm_input:
+ +              case hwmon_pwm_enable:
+ +                      break;
+ +              default:
+ +                      return -EINVAL;
+ +              }
+ +              hl_set_pwm_info(hdev, channel, attr, val);
+ +              break;
+ +      case hwmon_in:
+ +              switch (attr) {
+ +              case hwmon_in_reset_history:
+ +                      break;
+ +              default:
+ +                      return -EINVAL;
+ +              }
+ +              hl_set_voltage(hdev, channel, attr, val);
+ +              break;
+ +      case hwmon_curr:
+ +              switch (attr) {
+ +              case hwmon_curr_reset_history:
+ +                      break;
+ +              default:
+ +                      return -EINVAL;
+ +              }
+ +              hl_set_current(hdev, channel, attr, val);
+ +              break;
+ +      default:
+ +              return -EINVAL;
+ +      }
+ +      return 0;
+ +}
+ +
+ +static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type,
+ +                              u32 attr, int channel)
+ +{
+ +      switch (type) {
+ +      case hwmon_temp:
+ +              switch (attr) {
+ +              case hwmon_temp_input:
+ +              case hwmon_temp_max:
+ +              case hwmon_temp_max_hyst:
+ +              case hwmon_temp_crit:
+ +              case hwmon_temp_crit_hyst:
+ +              case hwmon_temp_highest:
+ +                      return 0444;
+ +              case hwmon_temp_offset:
+ +                      return 0644;
+ +              case hwmon_temp_reset_history:
+ +                      return 0200;
+ +              }
+ +              break;
+ +      case hwmon_in:
+ +              switch (attr) {
+ +              case hwmon_in_input:
+ +              case hwmon_in_min:
+ +              case hwmon_in_max:
+ +              case hwmon_in_highest:
+ +                      return 0444;
+ +              case hwmon_in_reset_history:
+ +                      return 0200;
+ +              }
+ +              break;
+ +      case hwmon_curr:
+ +              switch (attr) {
+ +              case hwmon_curr_input:
+ +              case hwmon_curr_min:
+ +              case hwmon_curr_max:
+ +              case hwmon_curr_highest:
+ +                      return 0444;
+ +              case hwmon_curr_reset_history:
+ +                      return 0200;
+ +              }
+ +              break;
+ +      case hwmon_fan:
+ +              switch (attr) {
+ +              case hwmon_fan_input:
+ +              case hwmon_fan_min:
+ +              case hwmon_fan_max:
+ +                      return 0444;
+ +              }
+ +              break;
+ +      case hwmon_pwm:
+ +              switch (attr) {
+ +              case hwmon_pwm_input:
+ +              case hwmon_pwm_enable:
+ +                      return 0644;
+ +              }
+ +              break;
+ +      default:
+ +              break;
+ +      }
+ +      return 0;
+ +}
+ +
+ +static const struct hwmon_ops hl_hwmon_ops = {
+ +      .is_visible = hl_is_visible,
+ +      .read = hl_read,
+ +      .write = hl_write
+ +};
+ +
+ +int hl_get_temperature(struct hl_device *hdev,
+ +                      int sensor_index, u32 attr, long *value)
+ +{
+ +      struct armcp_packet pkt;
+ +      int rc;
+ +
+ +      memset(&pkt, 0, sizeof(pkt));
+ +
+ +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_GET <<
+ +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
+ +      pkt.sensor_index = __cpu_to_le16(sensor_index);
+ +      pkt.type = __cpu_to_le16(attr);
+ +
+ +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                               SENSORS_PKT_TIMEOUT, NULL);
++                                              0, value);
+ +
+ +      if (rc) {
+ +              dev_err(hdev->dev,
+ +                      "Failed to get temperature from sensor %d, error %d\n",
+ +                      sensor_index, rc);
+ +              *value = 0;
+ +      }
+ +
+ +      return rc;
+ +}
+ +
+ +int hl_set_temperature(struct hl_device *hdev,
+ +                      int sensor_index, u32 attr, long value)
+ +{
+ +      struct armcp_packet pkt;
+ +      int rc;
+ +
+ +      memset(&pkt, 0, sizeof(pkt));
+ +
+ +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_SET <<
+ +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
+ +      pkt.sensor_index = __cpu_to_le16(sensor_index);
+ +      pkt.type = __cpu_to_le16(attr);
+ +      pkt.value = __cpu_to_le64(value);
+ +
+ +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                       SENSORS_PKT_TIMEOUT, value);
++                                              0, NULL);
+ +
+ +      if (rc)
+ +              dev_err(hdev->dev,
+ +                      "Failed to set temperature of sensor %d, error %d\n",
+ +                      sensor_index, rc);
+ +
+ +      return rc;
+ +}
+ +
+ +int hl_get_voltage(struct hl_device *hdev,
+ +                      int sensor_index, u32 attr, long *value)
+ +{
+ +      struct armcp_packet pkt;
+ +      int rc;
+ +
+ +      memset(&pkt, 0, sizeof(pkt));
+ +
+ +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_GET <<
+ +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
+ +      pkt.sensor_index = __cpu_to_le16(sensor_index);
+ +      pkt.type = __cpu_to_le16(attr);
+ +
+ +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                       SENSORS_PKT_TIMEOUT, value);
++                                              0, value);
+ +
+ +      if (rc) {
+ +              dev_err(hdev->dev,
+ +                      "Failed to get voltage from sensor %d, error %d\n",
+ +                      sensor_index, rc);
+ +              *value = 0;
+ +      }
+ +
+ +      return rc;
+ +}
+ +
+ +int hl_get_current(struct hl_device *hdev,
+ +                      int sensor_index, u32 attr, long *value)
+ +{
+ +      struct armcp_packet pkt;
+ +      int rc;
+ +
+ +      memset(&pkt, 0, sizeof(pkt));
+ +
+ +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_GET <<
+ +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
+ +      pkt.sensor_index = __cpu_to_le16(sensor_index);
+ +      pkt.type = __cpu_to_le16(attr);
+ +
+ +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                       SENSORS_PKT_TIMEOUT, value);
++                                              0, value);
+ +
+ +      if (rc) {
+ +              dev_err(hdev->dev,
+ +                      "Failed to get current from sensor %d, error %d\n",
+ +                      sensor_index, rc);
+ +              *value = 0;
+ +      }
+ +
+ +      return rc;
+ +}
+ +
+ +int hl_get_fan_speed(struct hl_device *hdev,
+ +                      int sensor_index, u32 attr, long *value)
+ +{
+ +      struct armcp_packet pkt;
+ +      int rc;
+ +
+ +      memset(&pkt, 0, sizeof(pkt));
+ +
+ +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_FAN_SPEED_GET <<
+ +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
+ +      pkt.sensor_index = __cpu_to_le16(sensor_index);
+ +      pkt.type = __cpu_to_le16(attr);
+ +
+ +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                       SENSORS_PKT_TIMEOUT, value);
++                                              0, value);
+ +
+ +      if (rc) {
+ +              dev_err(hdev->dev,
+ +                      "Failed to get fan speed from sensor %d, error %d\n",
+ +                      sensor_index, rc);
+ +              *value = 0;
+ +      }
+ +
+ +      return rc;
+ +}
+ +
+ +int hl_get_pwm_info(struct hl_device *hdev,
+ +                      int sensor_index, u32 attr, long *value)
+ +{
+ +      struct armcp_packet pkt;
+ +      int rc;
+ +
+ +      memset(&pkt, 0, sizeof(pkt));
+ +
+ +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_GET <<
+ +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
+ +      pkt.sensor_index = __cpu_to_le16(sensor_index);
+ +      pkt.type = __cpu_to_le16(attr);
+ +
+ +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                       SENSORS_PKT_TIMEOUT, NULL);
++                                              0, value);
+ +
+ +      if (rc) {
+ +              dev_err(hdev->dev,
+ +                      "Failed to get pwm info from sensor %d, error %d\n",
+ +                      sensor_index, rc);
+ +              *value = 0;
+ +      }
+ +
+ +      return rc;
+ +}
+ +
+ +void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
+ +                      long value)
+ +{
+ +      struct armcp_packet pkt;
+ +      int rc;
+ +
+ +      memset(&pkt, 0, sizeof(pkt));
+ +
+ +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_SET <<
+ +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
+ +      pkt.sensor_index = __cpu_to_le16(sensor_index);
+ +      pkt.type = __cpu_to_le16(attr);
+ +      pkt.value = cpu_to_le64(value);
+ +
+ +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                               SENSORS_PKT_TIMEOUT, NULL);
++                                              0, NULL);
+ +
+ +      if (rc)
+ +              dev_err(hdev->dev,
+ +                      "Failed to set pwm info to sensor %d, error %d\n",
+ +                      sensor_index, rc);
+ +}
+ +
+ +int hl_set_voltage(struct hl_device *hdev,
+ +                      int sensor_index, u32 attr, long value)
+ +{
+ +      struct armcp_packet pkt;
+ +      int rc;
+ +
+ +      memset(&pkt, 0, sizeof(pkt));
+ +
+ +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_SET <<
+ +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
+ +      pkt.sensor_index = __cpu_to_le16(sensor_index);
+ +      pkt.type = __cpu_to_le16(attr);
+ +      pkt.value = __cpu_to_le64(value);
+ +
+ +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                               SENSORS_PKT_TIMEOUT, NULL);
++                                              0, NULL);
+ +
+ +      if (rc)
+ +              dev_err(hdev->dev,
+ +                      "Failed to set voltage of sensor %d, error %d\n",
+ +                      sensor_index, rc);
+ +
+ +      return rc;
+ +}
+ +
+ +int hl_set_current(struct hl_device *hdev,
+ +                      int sensor_index, u32 attr, long value)
+ +{
+ +      struct armcp_packet pkt;
+ +      int rc;
+ +
+ +      memset(&pkt, 0, sizeof(pkt));
+ +
+ +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_SET <<
+ +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
+ +      pkt.sensor_index = __cpu_to_le16(sensor_index);
+ +      pkt.type = __cpu_to_le16(attr);
+ +      pkt.value = __cpu_to_le64(value);
+ +
+ +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
++                                              0, NULL);
+ +
+ +      if (rc)
+ +              dev_err(hdev->dev,
+ +                      "Failed to set current of sensor %d, error %d\n",
+ +                      sensor_index, rc);
+ +
+ +      return rc;
+ +}
+ +
+ +int hl_hwmon_init(struct hl_device *hdev)
+ +{
+ +      struct device *dev = hdev->pdev ? &hdev->pdev->dev : hdev->dev;
+ +      struct asic_fixed_properties *prop = &hdev->asic_prop;
+ +      int rc;
+ +
+ +      if ((hdev->hwmon_initialized) || !(hdev->fw_loading))
+ +              return 0;
+ +
+ +      if (hdev->hl_chip_info->info) {
+ +              hdev->hl_chip_info->ops = &hl_hwmon_ops;
+ +
+ +              hdev->hwmon_dev = hwmon_device_register_with_info(dev,
+ +                                      prop->armcp_info.card_name, hdev,
+ +                                      hdev->hl_chip_info, NULL);
+ +              if (IS_ERR(hdev->hwmon_dev)) {
+ +                      rc = PTR_ERR(hdev->hwmon_dev);
+ +                      dev_err(hdev->dev,
+ +                              "Unable to register hwmon device: %d\n", rc);
+ +                      return rc;
+ +              }
+ +
+ +              dev_info(hdev->dev, "%s: add sensors information\n",
+ +                      dev_name(hdev->hwmon_dev));
+ +
+ +              hdev->hwmon_initialized = true;
+ +      } else {
+ +              dev_info(hdev->dev, "no available sensors\n");
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +void hl_hwmon_fini(struct hl_device *hdev)
+ +{
+ +      if (!hdev->hwmon_initialized)
+ +              return;
+ +
+ +      hwmon_device_unregister(hdev->hwmon_dev);
+ +}
diff --cc drivers/misc/habanalabs/common/sysfs.c

index c4e7c682d58482ca85e64416d5d478d9d294f8ca,0000000000000000000000000000000000000000..b3cb0ac4721c5bb3d74b7d24d9371e97de8df03c

mode 100644,000000..100644
--- 1/drivers/misc/habanalabs/common/sysfs.c
--- /dev/null
+++ b/drivers/misc/habanalabs/common/sysfs.c
@@@ -1,445 -1,0 +1,442 @@@
- #define SET_CLK_PKT_TIMEOUT   1000000 /* 1s */
- #define SET_PWR_PKT_TIMEOUT   1000000 /* 1s */
- 
+ +// SPDX-License-Identifier: GPL-2.0
+ +
+ +/*
+ + * Copyright 2016-2019 HabanaLabs, Ltd.
+ + * All Rights Reserved.
+ + */
+ +
+ +#include "habanalabs.h"
+ +
+ +#include <linux/pci.h>
+ +
-                                               SET_CLK_PKT_TIMEOUT, &result);
+ +long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
+ +{
+ +      struct armcp_packet pkt;
+ +      long result;
+ +      int rc;
+ +
+ +      memset(&pkt, 0, sizeof(pkt));
+ +
+ +      if (curr)
+ +              pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_CURR_GET <<
+ +                                              ARMCP_PKT_CTL_OPCODE_SHIFT);
+ +      else
+ +              pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_GET <<
+ +                                              ARMCP_PKT_CTL_OPCODE_SHIFT);
+ +      pkt.pll_index = cpu_to_le32(pll_index);
+ +
+ +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                       SET_CLK_PKT_TIMEOUT, NULL);
++                                              0, &result);
+ +
+ +      if (rc) {
+ +              dev_err(hdev->dev,
+ +                      "Failed to get frequency of PLL %d, error %d\n",
+ +                      pll_index, rc);
+ +              result = rc;
+ +      }
+ +
+ +      return result;
+ +}
+ +
+ +void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
+ +{
+ +      struct armcp_packet pkt;
+ +      int rc;
+ +
+ +      memset(&pkt, 0, sizeof(pkt));
+ +
+ +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_SET <<
+ +                                      ARMCP_PKT_CTL_OPCODE_SHIFT);
+ +      pkt.pll_index = cpu_to_le32(pll_index);
+ +      pkt.value = cpu_to_le64(freq);
+ +
+ +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                               SET_PWR_PKT_TIMEOUT, &result);
++                                              0, NULL);
+ +
+ +      if (rc)
+ +              dev_err(hdev->dev,
+ +                      "Failed to set frequency to PLL %d, error %d\n",
+ +                      pll_index, rc);
+ +}
+ +
+ +u64 hl_get_max_power(struct hl_device *hdev)
+ +{
+ +      struct armcp_packet pkt;
+ +      long result;
+ +      int rc;
+ +
+ +      memset(&pkt, 0, sizeof(pkt));
+ +
+ +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_GET <<
+ +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
+ +
+ +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                       SET_PWR_PKT_TIMEOUT, NULL);
++                                              0, &result);
+ +
+ +      if (rc) {
+ +              dev_err(hdev->dev, "Failed to get max power, error %d\n", rc);
+ +              result = rc;
+ +      }
+ +
+ +      return result;
+ +}
+ +
+ +void hl_set_max_power(struct hl_device *hdev, u64 value)
+ +{
+ +      struct armcp_packet pkt;
+ +      int rc;
+ +
+ +      memset(&pkt, 0, sizeof(pkt));
+ +
+ +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_SET <<
+ +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
+ +      pkt.value = cpu_to_le64(value);
+ +
+ +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
++                                              0, NULL);
+ +
+ +      if (rc)
+ +              dev_err(hdev->dev, "Failed to set max power, error %d\n", rc);
+ +}
+ +
+ +static ssize_t uboot_ver_show(struct device *dev, struct device_attribute *attr,
+ +                              char *buf)
+ +{
+ +      struct hl_device *hdev = dev_get_drvdata(dev);
+ +
+ +      return sprintf(buf, "%s\n", hdev->asic_prop.uboot_ver);
+ +}
+ +
+ +static ssize_t armcp_kernel_ver_show(struct device *dev,
+ +                              struct device_attribute *attr, char *buf)
+ +{
+ +      struct hl_device *hdev = dev_get_drvdata(dev);
+ +
+ +      return sprintf(buf, "%s", hdev->asic_prop.armcp_info.kernel_version);
+ +}
+ +
+ +static ssize_t armcp_ver_show(struct device *dev, struct device_attribute *attr,
+ +                              char *buf)
+ +{
+ +      struct hl_device *hdev = dev_get_drvdata(dev);
+ +
+ +      return sprintf(buf, "%s\n", hdev->asic_prop.armcp_info.armcp_version);
+ +}
+ +
+ +static ssize_t cpld_ver_show(struct device *dev, struct device_attribute *attr,
+ +                              char *buf)
+ +{
+ +      struct hl_device *hdev = dev_get_drvdata(dev);
+ +
+ +      return sprintf(buf, "0x%08x\n",
+ +                      hdev->asic_prop.armcp_info.cpld_version);
+ +}
+ +
+ +static ssize_t infineon_ver_show(struct device *dev,
+ +                              struct device_attribute *attr, char *buf)
+ +{
+ +      struct hl_device *hdev = dev_get_drvdata(dev);
+ +
+ +      return sprintf(buf, "0x%04x\n",
+ +                      hdev->asic_prop.armcp_info.infineon_version);
+ +}
+ +
+ +static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr,
+ +                              char *buf)
+ +{
+ +      struct hl_device *hdev = dev_get_drvdata(dev);
+ +
+ +      return sprintf(buf, "%s\n", hdev->asic_prop.armcp_info.fuse_version);
+ +}
+ +
+ +static ssize_t thermal_ver_show(struct device *dev,
+ +                              struct device_attribute *attr, char *buf)
+ +{
+ +      struct hl_device *hdev = dev_get_drvdata(dev);
+ +
+ +      return sprintf(buf, "%s", hdev->asic_prop.armcp_info.thermal_version);
+ +}
+ +
+ +static ssize_t preboot_btl_ver_show(struct device *dev,
+ +                              struct device_attribute *attr, char *buf)
+ +{
+ +      struct hl_device *hdev = dev_get_drvdata(dev);
+ +
+ +      return sprintf(buf, "%s\n", hdev->asic_prop.preboot_ver);
+ +}
+ +
+ +static ssize_t soft_reset_store(struct device *dev,
+ +                              struct device_attribute *attr, const char *buf,
+ +                              size_t count)
+ +{
+ +      struct hl_device *hdev = dev_get_drvdata(dev);
+ +      long value;
+ +      int rc;
+ +
+ +      rc = kstrtoul(buf, 0, &value);
+ +
+ +      if (rc) {
+ +              count = -EINVAL;
+ +              goto out;
+ +      }
+ +
+ +      if (!hdev->supports_soft_reset) {
+ +              dev_err(hdev->dev, "Device does not support soft-reset\n");
+ +              goto out;
+ +      }
+ +
+ +      dev_warn(hdev->dev, "Soft-Reset requested through sysfs\n");
+ +
+ +      hl_device_reset(hdev, false, false);
+ +
+ +out:
+ +      return count;
+ +}
+ +
+ +static ssize_t hard_reset_store(struct device *dev,
+ +                              struct device_attribute *attr,
+ +                              const char *buf, size_t count)
+ +{
+ +      struct hl_device *hdev = dev_get_drvdata(dev);
+ +      long value;
+ +      int rc;
+ +
+ +      rc = kstrtoul(buf, 0, &value);
+ +
+ +      if (rc) {
+ +              count = -EINVAL;
+ +              goto out;
+ +      }
+ +
+ +      dev_warn(hdev->dev, "Hard-Reset requested through sysfs\n");
+ +
+ +      hl_device_reset(hdev, true, false);
+ +
+ +out:
+ +      return count;
+ +}
+ +
+ +static ssize_t device_type_show(struct device *dev,
+ +              struct device_attribute *attr, char *buf)
+ +{
+ +      struct hl_device *hdev = dev_get_drvdata(dev);
+ +      char *str;
+ +
+ +      switch (hdev->asic_type) {
+ +      case ASIC_GOYA:
+ +              str = "GOYA";
+ +              break;
+ +      case ASIC_GAUDI:
+ +              str = "GAUDI";
+ +              break;
+ +      default:
+ +              dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
+ +                              hdev->asic_type);
+ +              return -EINVAL;
+ +      }
+ +
+ +      return sprintf(buf, "%s\n", str);
+ +}
+ +
+ +static ssize_t pci_addr_show(struct device *dev, struct device_attribute *attr,
+ +                              char *buf)
+ +{
+ +      struct hl_device *hdev = dev_get_drvdata(dev);
+ +
+ +      return sprintf(buf, "%04x:%02x:%02x.%x\n",
+ +                      pci_domain_nr(hdev->pdev->bus),
+ +                      hdev->pdev->bus->number,
+ +                      PCI_SLOT(hdev->pdev->devfn),
+ +                      PCI_FUNC(hdev->pdev->devfn));
+ +}
+ +
+ +static ssize_t status_show(struct device *dev, struct device_attribute *attr,
+ +                              char *buf)
+ +{
+ +      struct hl_device *hdev = dev_get_drvdata(dev);
+ +      char *str;
+ +
+ +      if (atomic_read(&hdev->in_reset))
+ +              str = "In reset";
+ +      else if (hdev->disabled)
+ +              str = "Malfunction";
+ +      else
+ +              str = "Operational";
+ +
+ +      return sprintf(buf, "%s\n", str);
+ +}
+ +
+ +static ssize_t soft_reset_cnt_show(struct device *dev,
+ +              struct device_attribute *attr, char *buf)
+ +{
+ +      struct hl_device *hdev = dev_get_drvdata(dev);
+ +
+ +      return sprintf(buf, "%d\n", hdev->soft_reset_cnt);
+ +}
+ +
+ +static ssize_t hard_reset_cnt_show(struct device *dev,
+ +              struct device_attribute *attr, char *buf)
+ +{
+ +      struct hl_device *hdev = dev_get_drvdata(dev);
+ +
+ +      return sprintf(buf, "%d\n", hdev->hard_reset_cnt);
+ +}
+ +
+ +static ssize_t max_power_show(struct device *dev, struct device_attribute *attr,
+ +                              char *buf)
+ +{
+ +      struct hl_device *hdev = dev_get_drvdata(dev);
+ +      long val;
+ +
+ +      if (hl_device_disabled_or_in_reset(hdev))
+ +              return -ENODEV;
+ +
+ +      val = hl_get_max_power(hdev);
+ +
+ +      return sprintf(buf, "%lu\n", val);
+ +}
+ +
+ +static ssize_t max_power_store(struct device *dev,
+ +              struct device_attribute *attr, const char *buf, size_t count)
+ +{
+ +      struct hl_device *hdev = dev_get_drvdata(dev);
+ +      unsigned long value;
+ +      int rc;
+ +
+ +      if (hl_device_disabled_or_in_reset(hdev)) {
+ +              count = -ENODEV;
+ +              goto out;
+ +      }
+ +
+ +      rc = kstrtoul(buf, 0, &value);
+ +
+ +      if (rc) {
+ +              count = -EINVAL;
+ +              goto out;
+ +      }
+ +
+ +      hdev->max_power = value;
+ +      hl_set_max_power(hdev, value);
+ +
+ +out:
+ +      return count;
+ +}
+ +
+ +static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj,
+ +                      struct bin_attribute *attr, char *buf, loff_t offset,
+ +                      size_t max_size)
+ +{
+ +      struct device *dev = container_of(kobj, struct device, kobj);
+ +      struct hl_device *hdev = dev_get_drvdata(dev);
+ +      char *data;
+ +      int rc;
+ +
+ +      if (hl_device_disabled_or_in_reset(hdev))
+ +              return -ENODEV;
+ +
+ +      if (!max_size)
+ +              return -EINVAL;
+ +
+ +      data = kzalloc(max_size, GFP_KERNEL);
+ +      if (!data)
+ +              return -ENOMEM;
+ +
+ +      rc = hdev->asic_funcs->get_eeprom_data(hdev, data, max_size);
+ +      if (rc)
+ +              goto out;
+ +
+ +      memcpy(buf, data, max_size);
+ +
+ +out:
+ +      kfree(data);
+ +
+ +      return max_size;
+ +}
+ +
+ +static DEVICE_ATTR_RO(armcp_kernel_ver);
+ +static DEVICE_ATTR_RO(armcp_ver);
+ +static DEVICE_ATTR_RO(cpld_ver);
+ +static DEVICE_ATTR_RO(device_type);
+ +static DEVICE_ATTR_RO(fuse_ver);
+ +static DEVICE_ATTR_WO(hard_reset);
+ +static DEVICE_ATTR_RO(hard_reset_cnt);
+ +static DEVICE_ATTR_RO(infineon_ver);
+ +static DEVICE_ATTR_RW(max_power);
+ +static DEVICE_ATTR_RO(pci_addr);
+ +static DEVICE_ATTR_RO(preboot_btl_ver);
+ +static DEVICE_ATTR_WO(soft_reset);
+ +static DEVICE_ATTR_RO(soft_reset_cnt);
+ +static DEVICE_ATTR_RO(status);
+ +static DEVICE_ATTR_RO(thermal_ver);
+ +static DEVICE_ATTR_RO(uboot_ver);
+ +
+ +static struct bin_attribute bin_attr_eeprom = {
+ +      .attr = {.name = "eeprom", .mode = (0444)},
+ +      .size = PAGE_SIZE,
+ +      .read = eeprom_read_handler
+ +};
+ +
+ +static struct attribute *hl_dev_attrs[] = {
+ +      &dev_attr_armcp_kernel_ver.attr,
+ +      &dev_attr_armcp_ver.attr,
+ +      &dev_attr_cpld_ver.attr,
+ +      &dev_attr_device_type.attr,
+ +      &dev_attr_fuse_ver.attr,
+ +      &dev_attr_hard_reset.attr,
+ +      &dev_attr_hard_reset_cnt.attr,
+ +      &dev_attr_infineon_ver.attr,
+ +      &dev_attr_max_power.attr,
+ +      &dev_attr_pci_addr.attr,
+ +      &dev_attr_preboot_btl_ver.attr,
+ +      &dev_attr_soft_reset.attr,
+ +      &dev_attr_soft_reset_cnt.attr,
+ +      &dev_attr_status.attr,
+ +      &dev_attr_thermal_ver.attr,
+ +      &dev_attr_uboot_ver.attr,
+ +      NULL,
+ +};
+ +
+ +static struct bin_attribute *hl_dev_bin_attrs[] = {
+ +      &bin_attr_eeprom,
+ +      NULL
+ +};
+ +
+ +static struct attribute_group hl_dev_attr_group = {
+ +      .attrs = hl_dev_attrs,
+ +      .bin_attrs = hl_dev_bin_attrs,
+ +};
+ +
+ +static struct attribute_group hl_dev_clks_attr_group;
+ +
+ +static const struct attribute_group *hl_dev_attr_groups[] = {
+ +      &hl_dev_attr_group,
+ +      &hl_dev_clks_attr_group,
+ +      NULL,
+ +};
+ +
+ +int hl_sysfs_init(struct hl_device *hdev)
+ +{
+ +      int rc;
+ +
+ +      if (hdev->asic_type == ASIC_GOYA)
+ +              hdev->pm_mng_profile = PM_AUTO;
+ +      else
+ +              hdev->pm_mng_profile = PM_MANUAL;
+ +      hdev->max_power = hdev->asic_prop.max_power_default;
+ +
+ +      hdev->asic_funcs->add_device_attr(hdev, &hl_dev_clks_attr_group);
+ +
+ +      rc = device_add_groups(hdev->dev, hl_dev_attr_groups);
+ +      if (rc) {
+ +              dev_err(hdev->dev,
+ +                      "Failed to add groups to device, error %d\n", rc);
+ +              return rc;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
+ +void hl_sysfs_fini(struct hl_device *hdev)
+ +{
+ +      device_remove_groups(hdev->dev, hl_dev_attr_groups);
+ +}
diff --cc drivers/misc/habanalabs/gaudi/gaudi.c

index 4a1a52608fc0963f29acb6140cd396e652fd130f,637a9d608707f51e30226e121fad16019659a2bb..78fbff646f99a6b9387c5762c5c040fa56beea63
--- 1/drivers/misc/habanalabs/gaudi/gaudi.c
--- 2/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@@ -1766,7 -1823,9 +1772,7 @@@ static void gaudi_init_golden_registers
   
         gaudi_init_hbm_cred(hdev);
   
-       gaudi_disable_clock_gating(hdev);
- -      gaudi_init_rate_limiter(hdev);
- -
+       hdev->asic_funcs->disable_clock_gating(hdev);
   
         for (tpc_id = 0, tpc_offset = 0;
                                 tpc_id < TPC_NUMBER_OF_ENGINES;
@@@ -5166,50 -5275,41 +5211,51 @@@ static int gaudi_extract_ecc_info(struc
                 hdev->asic_funcs->disable_clock_gating(hdev);
         }
   
- -      switch (num_mem_regs) {
- -      case 1:
- -              dev_err(hdev->dev,
- -                      "%s ECC indication: 0x%08x\n",
- -                      block_name, RREG32(block_address));
- -              break;
- -      case 2:
- -              dev_err(hdev->dev,
- -                      "%s ECC indication: 0x%08x 0x%08x\n",
- -                      block_name,
- -                      RREG32(block_address), RREG32(block_address + 4));
- -              break;
- -      case 3:
- -              dev_err(hdev->dev,
- -                      "%s ECC indication: 0x%08x 0x%08x 0x%08x\n",
- -                      block_name,
- -                      RREG32(block_address), RREG32(block_address + 4),
- -                      RREG32(block_address + 8));
- -              break;
- -      case 4:
- -              dev_err(hdev->dev,
- -                      "%s ECC indication: 0x%08x 0x%08x 0x%08x 0x%08x\n",
- -                      block_name,
- -                      RREG32(block_address), RREG32(block_address + 4),
- -                      RREG32(block_address + 8), RREG32(block_address + 0xc));
- -              break;
- -      default:
- -              break;
+ +      /* Set invalid wrapper index */
+ +      *memory_wrapper_idx = 0xFF;
+ +
+ +      /* Iterate through memory wrappers, a single bit must be set */
+ +      for (i = 0 ; i > num_mem_regs ; i++) {
+ +              err_addr += i * 4;
+ +              err_word = RREG32(err_addr);
+ +              if (err_word) {
+ +                      err_bit = __ffs(err_word);
+ +                      *memory_wrapper_idx = err_bit + (32 * i);
+ +                      break;
+ +              }
+ +      }
   
+ +      if (*memory_wrapper_idx == 0xFF) {
+ +              dev_err(hdev->dev, "ECC error information cannot be found\n");
+ +              rc = -EINVAL;
+ +              goto enable_clk_gate;
         }
   
- -      if (disable_clock_gating) {
+ +      WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
+ +                      *memory_wrapper_idx);
+ +
+ +      *ecc_address =
+ +              RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
+ +      *ecc_syndrom =
+ +              RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
+ +
+ +      /* Clear error indication */
+ +      reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
+ +      if (params->derr)
+ +              reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
+ +      else
+ +              reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
+ +
+ +      WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
+ +
+ +enable_clk_gate:
+ +      if (params->disable_clock_gating) {
-               hdev->asic_funcs->enable_clock_gating(hdev);
+               hdev->asic_funcs->set_clock_gating(hdev);
++
                 mutex_unlock(&gaudi->clk_gate_mutex);
         }
+ +
+ +      return rc;
   }
   
   static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
diff --cc drivers/misc/habanalabs/goya/goya.c
Simple merge
diff --cc include/asm-generic/vmlinux.lds.h
Simple merge
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 27 Jul 2020 09:49:37 +0000 (11:49 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 27 Jul 2020 09:49:37 +0000 (11:49 +0200)
		1	2
MAINTAINERS	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/fpga/dfl-afu-main.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/fpga/dfl-pci.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/interconnect/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/misc/habanalabs/common/debugfs.c	patch \|	diff1 \|	\|	blob \| history
drivers/misc/habanalabs/common/device.c	patch \|	diff1 \|	\|	blob \| history
drivers/misc/habanalabs/common/firmware_if.c	patch \|	diff1 \|	\|	blob \| history
drivers/misc/habanalabs/common/habanalabs.h	patch \|	diff1 \|	\|	blob \| history
drivers/misc/habanalabs/common/habanalabs_drv.c	patch \|	diff1 \|	\|	blob \| history
drivers/misc/habanalabs/common/hwmon.c	patch \|	diff1 \|	\|	blob \| history
drivers/misc/habanalabs/common/sysfs.c	patch \|	diff1 \|	\|	blob \| history
drivers/misc/habanalabs/gaudi/gaudi.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/misc/habanalabs/goya/goya.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/asm-generic/vmlinux.lds.h	patch \|	diff1 \|	diff2 \|	blob \| history