]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/commitdiff
Merge 5.8-rc7 into char-misc-next
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 27 Jul 2020 09:49:37 +0000 (11:49 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 27 Jul 2020 09:49:37 +0000 (11:49 +0200)
This should resolve the merge/build issues reported when trying to
create linux-next.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
14 files changed:
1  2 
MAINTAINERS
drivers/fpga/dfl-afu-main.c
drivers/fpga/dfl-pci.c
drivers/interconnect/core.c
drivers/misc/habanalabs/common/debugfs.c
drivers/misc/habanalabs/common/device.c
drivers/misc/habanalabs/common/firmware_if.c
drivers/misc/habanalabs/common/habanalabs.h
drivers/misc/habanalabs/common/habanalabs_drv.c
drivers/misc/habanalabs/common/hwmon.c
drivers/misc/habanalabs/common/sysfs.c
drivers/misc/habanalabs/gaudi/gaudi.c
drivers/misc/habanalabs/goya/goya.c
include/asm-generic/vmlinux.lds.h

diff --cc MAINTAINERS
Simple merge
Simple merge
Simple merge
Simple merge
index fc4372c18ce20798317e4022bd119001201f5a55,0000000000000000000000000000000000000000..0bc036e01ee8df1bfbfa8fccd8f41ba1890a4b3d
mode 100644,000000..100644
--- /dev/null
@@@ -1,1411 -1,0 +1,1404 @@@
-                                       HL_DEVICE_TIMEOUT_USEC, (long *) val);
 +// SPDX-License-Identifier: GPL-2.0
 +
 +/*
 + * Copyright 2016-2019 HabanaLabs, Ltd.
 + * All Rights Reserved.
 + */
 +
 +#include "habanalabs.h"
 +#include "include/hw_ip/mmu/mmu_general.h"
 +
 +#include <linux/pci.h>
 +#include <linux/debugfs.h>
 +#include <linux/uaccess.h>
 +
 +#define MMU_ADDR_BUF_SIZE     40
 +#define MMU_ASID_BUF_SIZE     10
 +#define MMU_KBUF_SIZE         (MMU_ADDR_BUF_SIZE + MMU_ASID_BUF_SIZE)
 +
 +static struct dentry *hl_debug_root;
 +
 +static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
 +                              u8 i2c_reg, u32 *val)
 +{
 +      struct armcp_packet pkt;
 +      int rc;
 +
 +      if (hl_device_disabled_or_in_reset(hdev))
 +              return -EBUSY;
 +
 +      memset(&pkt, 0, sizeof(pkt));
 +
 +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_RD <<
 +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
 +      pkt.i2c_bus = i2c_bus;
 +      pkt.i2c_addr = i2c_addr;
 +      pkt.i2c_reg = i2c_reg;
 +
 +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                       HL_DEVICE_TIMEOUT_USEC, NULL);
++                                              0, (long *) val);
 +
 +      if (rc)
 +              dev_err(hdev->dev, "Failed to read from I2C, error %d\n", rc);
 +
 +      return rc;
 +}
 +
 +static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
 +                              u8 i2c_reg, u32 val)
 +{
 +      struct armcp_packet pkt;
 +      int rc;
 +
 +      if (hl_device_disabled_or_in_reset(hdev))
 +              return -EBUSY;
 +
 +      memset(&pkt, 0, sizeof(pkt));
 +
 +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_WR <<
 +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
 +      pkt.i2c_bus = i2c_bus;
 +      pkt.i2c_addr = i2c_addr;
 +      pkt.i2c_reg = i2c_reg;
 +      pkt.value = cpu_to_le64(val);
 +
 +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                               HL_DEVICE_TIMEOUT_USEC, NULL);
++                                              0, NULL);
 +
 +      if (rc)
 +              dev_err(hdev->dev, "Failed to write to I2C, error %d\n", rc);
 +
 +      return rc;
 +}
 +
 +static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state)
 +{
 +      struct armcp_packet pkt;
 +      int rc;
 +
 +      if (hl_device_disabled_or_in_reset(hdev))
 +              return;
 +
 +      memset(&pkt, 0, sizeof(pkt));
 +
 +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_LED_SET <<
 +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
 +      pkt.led_index = cpu_to_le32(led);
 +      pkt.value = cpu_to_le64(state);
 +
 +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-       sprintf(tmp_buf, "%d\n", hdev->clock_gating);
++                                              0, NULL);
 +
 +      if (rc)
 +              dev_err(hdev->dev, "Failed to set LED %d, error %d\n", led, rc);
 +}
 +
 +static int command_buffers_show(struct seq_file *s, void *data)
 +{
 +      struct hl_debugfs_entry *entry = s->private;
 +      struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
 +      struct hl_cb *cb;
 +      bool first = true;
 +
 +      spin_lock(&dev_entry->cb_spinlock);
 +
 +      list_for_each_entry(cb, &dev_entry->cb_list, debugfs_list) {
 +              if (first) {
 +                      first = false;
 +                      seq_puts(s, "\n");
 +                      seq_puts(s, " CB ID   CTX ID   CB size    CB RefCnt    mmap?   CS counter\n");
 +                      seq_puts(s, "---------------------------------------------------------------\n");
 +              }
 +              seq_printf(s,
 +                      "   %03d        %d    0x%08x      %d          %d          %d\n",
 +                      cb->id, cb->ctx_id, cb->size,
 +                      kref_read(&cb->refcount),
 +                      cb->mmap, cb->cs_cnt);
 +      }
 +
 +      spin_unlock(&dev_entry->cb_spinlock);
 +
 +      if (!first)
 +              seq_puts(s, "\n");
 +
 +      return 0;
 +}
 +
 +static int command_submission_show(struct seq_file *s, void *data)
 +{
 +      struct hl_debugfs_entry *entry = s->private;
 +      struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
 +      struct hl_cs *cs;
 +      bool first = true;
 +
 +      spin_lock(&dev_entry->cs_spinlock);
 +
 +      list_for_each_entry(cs, &dev_entry->cs_list, debugfs_list) {
 +              if (first) {
 +                      first = false;
 +                      seq_puts(s, "\n");
 +                      seq_puts(s, " CS ID   CTX ASID   CS RefCnt   Submitted    Completed\n");
 +                      seq_puts(s, "------------------------------------------------------\n");
 +              }
 +              seq_printf(s,
 +                      "   %llu       %d          %d           %d            %d\n",
 +                      cs->sequence, cs->ctx->asid,
 +                      kref_read(&cs->refcount),
 +                      cs->submitted, cs->completed);
 +      }
 +
 +      spin_unlock(&dev_entry->cs_spinlock);
 +
 +      if (!first)
 +              seq_puts(s, "\n");
 +
 +      return 0;
 +}
 +
 +static int command_submission_jobs_show(struct seq_file *s, void *data)
 +{
 +      struct hl_debugfs_entry *entry = s->private;
 +      struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
 +      struct hl_cs_job *job;
 +      bool first = true;
 +
 +      spin_lock(&dev_entry->cs_job_spinlock);
 +
 +      list_for_each_entry(job, &dev_entry->cs_job_list, debugfs_list) {
 +              if (first) {
 +                      first = false;
 +                      seq_puts(s, "\n");
 +                      seq_puts(s, " JOB ID   CS ID    CTX ASID   H/W Queue\n");
 +                      seq_puts(s, "---------------------------------------\n");
 +              }
 +              if (job->cs)
 +                      seq_printf(s,
 +                              "    %02d       %llu         %d         %d\n",
 +                              job->id, job->cs->sequence, job->cs->ctx->asid,
 +                              job->hw_queue_id);
 +              else
 +                      seq_printf(s,
 +                              "    %02d       0         %d         %d\n",
 +                              job->id, HL_KERNEL_ASID_ID, job->hw_queue_id);
 +      }
 +
 +      spin_unlock(&dev_entry->cs_job_spinlock);
 +
 +      if (!first)
 +              seq_puts(s, "\n");
 +
 +      return 0;
 +}
 +
 +static int userptr_show(struct seq_file *s, void *data)
 +{
 +      struct hl_debugfs_entry *entry = s->private;
 +      struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
 +      struct hl_userptr *userptr;
 +      char dma_dir[4][30] = {"DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
 +                              "DMA_FROM_DEVICE", "DMA_NONE"};
 +      bool first = true;
 +
 +      spin_lock(&dev_entry->userptr_spinlock);
 +
 +      list_for_each_entry(userptr, &dev_entry->userptr_list, debugfs_list) {
 +              if (first) {
 +                      first = false;
 +                      seq_puts(s, "\n");
 +                      seq_puts(s, " user virtual address     size             dma dir\n");
 +                      seq_puts(s, "----------------------------------------------------------\n");
 +              }
 +              seq_printf(s,
 +                      "    0x%-14llx      %-10u    %-30s\n",
 +                      userptr->addr, userptr->size, dma_dir[userptr->dir]);
 +      }
 +
 +      spin_unlock(&dev_entry->userptr_spinlock);
 +
 +      if (!first)
 +              seq_puts(s, "\n");
 +
 +      return 0;
 +}
 +
 +static int vm_show(struct seq_file *s, void *data)
 +{
 +      struct hl_debugfs_entry *entry = s->private;
 +      struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
 +      struct hl_ctx *ctx;
 +      struct hl_vm *vm;
 +      struct hl_vm_hash_node *hnode;
 +      struct hl_userptr *userptr;
 +      struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
 +      enum vm_type_t *vm_type;
 +      bool once = true;
 +      u64 j;
 +      int i;
 +
 +      if (!dev_entry->hdev->mmu_enable)
 +              return 0;
 +
 +      spin_lock(&dev_entry->ctx_mem_hash_spinlock);
 +
 +      list_for_each_entry(ctx, &dev_entry->ctx_mem_hash_list, debugfs_list) {
 +              once = false;
 +              seq_puts(s, "\n\n----------------------------------------------------");
 +              seq_puts(s, "\n----------------------------------------------------\n\n");
 +              seq_printf(s, "ctx asid: %u\n", ctx->asid);
 +
 +              seq_puts(s, "\nmappings:\n\n");
 +              seq_puts(s, "    virtual address        size          handle\n");
 +              seq_puts(s, "----------------------------------------------------\n");
 +              mutex_lock(&ctx->mem_hash_lock);
 +              hash_for_each(ctx->mem_hash, i, hnode, node) {
 +                      vm_type = hnode->ptr;
 +
 +                      if (*vm_type == VM_TYPE_USERPTR) {
 +                              userptr = hnode->ptr;
 +                              seq_printf(s,
 +                                      "    0x%-14llx      %-10u\n",
 +                                      hnode->vaddr, userptr->size);
 +                      } else {
 +                              phys_pg_pack = hnode->ptr;
 +                              seq_printf(s,
 +                                      "    0x%-14llx      %-10llu       %-4u\n",
 +                                      hnode->vaddr, phys_pg_pack->total_size,
 +                                      phys_pg_pack->handle);
 +                      }
 +              }
 +              mutex_unlock(&ctx->mem_hash_lock);
 +
 +              vm = &ctx->hdev->vm;
 +              spin_lock(&vm->idr_lock);
 +
 +              if (!idr_is_empty(&vm->phys_pg_pack_handles))
 +                      seq_puts(s, "\n\nallocations:\n");
 +
 +              idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_pack, i) {
 +                      if (phys_pg_pack->asid != ctx->asid)
 +                              continue;
 +
 +                      seq_printf(s, "\nhandle: %u\n", phys_pg_pack->handle);
 +                      seq_printf(s, "page size: %u\n\n",
 +                                              phys_pg_pack->page_size);
 +                      seq_puts(s, "   physical address\n");
 +                      seq_puts(s, "---------------------\n");
 +                      for (j = 0 ; j < phys_pg_pack->npages ; j++) {
 +                              seq_printf(s, "    0x%-14llx\n",
 +                                              phys_pg_pack->pages[j]);
 +                      }
 +              }
 +              spin_unlock(&vm->idr_lock);
 +
 +      }
 +
 +      spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
 +
 +      if (!once)
 +              seq_puts(s, "\n");
 +
 +      return 0;
 +}
 +
 +/* these inline functions are copied from mmu.c */
 +static inline u64 get_hop0_addr(struct hl_ctx *ctx)
 +{
 +      return ctx->hdev->asic_prop.mmu_pgt_addr +
 +                      (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
 +}
 +
 +static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
 +                                      u64 virt_addr, u64 mask, u64 shift)
 +{
 +      return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
 +                      ((virt_addr & mask) >> shift);
 +}
 +
 +static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
 +                                      struct hl_mmu_properties *mmu_specs,
 +                                      u64 hop_addr, u64 vaddr)
 +{
 +      return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop0_mask,
 +                                      mmu_specs->hop0_shift);
 +}
 +
 +static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
 +                                      struct hl_mmu_properties *mmu_specs,
 +                                      u64 hop_addr, u64 vaddr)
 +{
 +      return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop1_mask,
 +                                      mmu_specs->hop1_shift);
 +}
 +
 +static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
 +                                      struct hl_mmu_properties *mmu_specs,
 +                                      u64 hop_addr, u64 vaddr)
 +{
 +      return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop2_mask,
 +                                      mmu_specs->hop2_shift);
 +}
 +
 +static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
 +                                      struct hl_mmu_properties *mmu_specs,
 +                                      u64 hop_addr, u64 vaddr)
 +{
 +      return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop3_mask,
 +                                      mmu_specs->hop3_shift);
 +}
 +
 +static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
 +                                      struct hl_mmu_properties *mmu_specs,
 +                                      u64 hop_addr, u64 vaddr)
 +{
 +      return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop4_mask,
 +                                      mmu_specs->hop4_shift);
 +}
 +
 +static inline u64 get_next_hop_addr(u64 curr_pte)
 +{
 +      if (curr_pte & PAGE_PRESENT_MASK)
 +              return curr_pte & HOP_PHYS_ADDR_MASK;
 +      else
 +              return ULLONG_MAX;
 +}
 +
 +static int mmu_show(struct seq_file *s, void *data)
 +{
 +      struct hl_debugfs_entry *entry = s->private;
 +      struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
 +      struct hl_device *hdev = dev_entry->hdev;
 +      struct asic_fixed_properties *prop = &hdev->asic_prop;
 +      struct hl_mmu_properties *mmu_prop;
 +      struct hl_ctx *ctx;
 +      bool is_dram_addr;
 +
 +      u64 hop0_addr = 0, hop0_pte_addr = 0, hop0_pte = 0,
 +              hop1_addr = 0, hop1_pte_addr = 0, hop1_pte = 0,
 +              hop2_addr = 0, hop2_pte_addr = 0, hop2_pte = 0,
 +              hop3_addr = 0, hop3_pte_addr = 0, hop3_pte = 0,
 +              hop4_addr = 0, hop4_pte_addr = 0, hop4_pte = 0,
 +              virt_addr = dev_entry->mmu_addr;
 +
 +      if (!hdev->mmu_enable)
 +              return 0;
 +
 +      if (dev_entry->mmu_asid == HL_KERNEL_ASID_ID)
 +              ctx = hdev->kernel_ctx;
 +      else
 +              ctx = hdev->compute_ctx;
 +
 +      if (!ctx) {
 +              dev_err(hdev->dev, "no ctx available\n");
 +              return 0;
 +      }
 +
 +      is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
 +                                              prop->dmmu.start_addr,
 +                                              prop->dmmu.end_addr);
 +
 +      /* shifts and masks are the same in PMMU and HPMMU, use one of them */
 +      mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
 +
 +      mutex_lock(&ctx->mmu_lock);
 +
 +      /* the following lookup is copied from unmap() in mmu.c */
 +
 +      hop0_addr = get_hop0_addr(ctx);
 +      hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
 +      hop0_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr);
 +      hop1_addr = get_next_hop_addr(hop0_pte);
 +
 +      if (hop1_addr == ULLONG_MAX)
 +              goto not_mapped;
 +
 +      hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
 +      hop1_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr);
 +      hop2_addr = get_next_hop_addr(hop1_pte);
 +
 +      if (hop2_addr == ULLONG_MAX)
 +              goto not_mapped;
 +
 +      hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
 +      hop2_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr);
 +      hop3_addr = get_next_hop_addr(hop2_pte);
 +
 +      if (hop3_addr == ULLONG_MAX)
 +              goto not_mapped;
 +
 +      hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
 +      hop3_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr);
 +
 +      if (!(hop3_pte & LAST_MASK)) {
 +              hop4_addr = get_next_hop_addr(hop3_pte);
 +
 +              if (hop4_addr == ULLONG_MAX)
 +                      goto not_mapped;
 +
 +              hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
 +                                                      virt_addr);
 +              hop4_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr);
 +              if (!(hop4_pte & PAGE_PRESENT_MASK))
 +                      goto not_mapped;
 +      } else {
 +              if (!(hop3_pte & PAGE_PRESENT_MASK))
 +                      goto not_mapped;
 +      }
 +
 +      seq_printf(s, "asid: %u, virt_addr: 0x%llx\n",
 +                      dev_entry->mmu_asid, dev_entry->mmu_addr);
 +
 +      seq_printf(s, "hop0_addr: 0x%llx\n", hop0_addr);
 +      seq_printf(s, "hop0_pte_addr: 0x%llx\n", hop0_pte_addr);
 +      seq_printf(s, "hop0_pte: 0x%llx\n", hop0_pte);
 +
 +      seq_printf(s, "hop1_addr: 0x%llx\n", hop1_addr);
 +      seq_printf(s, "hop1_pte_addr: 0x%llx\n", hop1_pte_addr);
 +      seq_printf(s, "hop1_pte: 0x%llx\n", hop1_pte);
 +
 +      seq_printf(s, "hop2_addr: 0x%llx\n", hop2_addr);
 +      seq_printf(s, "hop2_pte_addr: 0x%llx\n", hop2_pte_addr);
 +      seq_printf(s, "hop2_pte: 0x%llx\n", hop2_pte);
 +
 +      seq_printf(s, "hop3_addr: 0x%llx\n", hop3_addr);
 +      seq_printf(s, "hop3_pte_addr: 0x%llx\n", hop3_pte_addr);
 +      seq_printf(s, "hop3_pte: 0x%llx\n", hop3_pte);
 +
 +      if (!(hop3_pte & LAST_MASK)) {
 +              seq_printf(s, "hop4_addr: 0x%llx\n", hop4_addr);
 +              seq_printf(s, "hop4_pte_addr: 0x%llx\n", hop4_pte_addr);
 +              seq_printf(s, "hop4_pte: 0x%llx\n", hop4_pte);
 +      }
 +
 +      goto out;
 +
 +not_mapped:
 +      dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
 +                      virt_addr);
 +out:
 +      mutex_unlock(&ctx->mmu_lock);
 +
 +      return 0;
 +}
 +
 +static ssize_t mmu_asid_va_write(struct file *file, const char __user *buf,
 +              size_t count, loff_t *f_pos)
 +{
 +      struct seq_file *s = file->private_data;
 +      struct hl_debugfs_entry *entry = s->private;
 +      struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
 +      struct hl_device *hdev = dev_entry->hdev;
 +      char kbuf[MMU_KBUF_SIZE];
 +      char *c;
 +      ssize_t rc;
 +
 +      if (!hdev->mmu_enable)
 +              return count;
 +
 +      if (count > sizeof(kbuf) - 1)
 +              goto err;
 +      if (copy_from_user(kbuf, buf, count))
 +              goto err;
 +      kbuf[count] = 0;
 +
 +      c = strchr(kbuf, ' ');
 +      if (!c)
 +              goto err;
 +      *c = '\0';
 +
 +      rc = kstrtouint(kbuf, 10, &dev_entry->mmu_asid);
 +      if (rc)
 +              goto err;
 +
 +      if (strncmp(c+1, "0x", 2))
 +              goto err;
 +      rc = kstrtoull(c+3, 16, &dev_entry->mmu_addr);
 +      if (rc)
 +              goto err;
 +
 +      return count;
 +
 +err:
 +      dev_err(hdev->dev, "usage: echo <asid> <0xaddr> > mmu\n");
 +
 +      return -EINVAL;
 +}
 +
 +static int engines_show(struct seq_file *s, void *data)
 +{
 +      struct hl_debugfs_entry *entry = s->private;
 +      struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
 +      struct hl_device *hdev = dev_entry->hdev;
 +
 +      if (atomic_read(&hdev->in_reset)) {
 +              dev_warn_ratelimited(hdev->dev,
 +                              "Can't check device idle during reset\n");
 +              return 0;
 +      }
 +
 +      hdev->asic_funcs->is_device_idle(hdev, NULL, s);
 +
 +      return 0;
 +}
 +
 +static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
 +{
 +      struct asic_fixed_properties *prop = &hdev->asic_prop;
 +
 +      if (!hdev->mmu_enable)
 +              goto out;
 +
 +      if (hdev->dram_supports_virtual_memory &&
 +              (addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr))
 +              return true;
 +
 +      if (addr >= prop->pmmu.start_addr &&
 +              addr < prop->pmmu.end_addr)
 +              return true;
 +
 +      if (addr >= prop->pmmu_huge.start_addr &&
 +              addr < prop->pmmu_huge.end_addr)
 +              return true;
 +out:
 +      return false;
 +}
 +
 +static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
 +                              u64 *phys_addr)
 +{
 +      struct hl_ctx *ctx = hdev->compute_ctx;
 +      struct asic_fixed_properties *prop = &hdev->asic_prop;
 +      struct hl_mmu_properties *mmu_prop;
 +      u64 hop_addr, hop_pte_addr, hop_pte;
 +      u64 offset_mask = HOP4_MASK | FLAGS_MASK;
 +      int rc = 0;
 +      bool is_dram_addr;
 +
 +      if (!ctx) {
 +              dev_err(hdev->dev, "no ctx available\n");
 +              return -EINVAL;
 +      }
 +
 +      is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
 +                                              prop->dmmu.start_addr,
 +                                              prop->dmmu.end_addr);
 +
 +      /* shifts and masks are the same in PMMU and HPMMU, use one of them */
 +      mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
 +
 +      mutex_lock(&ctx->mmu_lock);
 +
 +      /* hop 0 */
 +      hop_addr = get_hop0_addr(ctx);
 +      hop_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
 +      hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
 +
 +      /* hop 1 */
 +      hop_addr = get_next_hop_addr(hop_pte);
 +      if (hop_addr == ULLONG_MAX)
 +              goto not_mapped;
 +      hop_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
 +      hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
 +
 +      /* hop 2 */
 +      hop_addr = get_next_hop_addr(hop_pte);
 +      if (hop_addr == ULLONG_MAX)
 +              goto not_mapped;
 +      hop_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
 +      hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
 +
 +      /* hop 3 */
 +      hop_addr = get_next_hop_addr(hop_pte);
 +      if (hop_addr == ULLONG_MAX)
 +              goto not_mapped;
 +      hop_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
 +      hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
 +
 +      if (!(hop_pte & LAST_MASK)) {
 +              /* hop 4 */
 +              hop_addr = get_next_hop_addr(hop_pte);
 +              if (hop_addr == ULLONG_MAX)
 +                      goto not_mapped;
 +              hop_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop_addr,
 +                                                      virt_addr);
 +              hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
 +
 +              offset_mask = FLAGS_MASK;
 +      }
 +
 +      if (!(hop_pte & PAGE_PRESENT_MASK))
 +              goto not_mapped;
 +
 +      *phys_addr = (hop_pte & ~offset_mask) | (virt_addr & offset_mask);
 +
 +      goto out;
 +
 +not_mapped:
 +      dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
 +                      virt_addr);
 +      rc = -EINVAL;
 +out:
 +      mutex_unlock(&ctx->mmu_lock);
 +      return rc;
 +}
 +
 +static ssize_t hl_data_read32(struct file *f, char __user *buf,
 +                                      size_t count, loff_t *ppos)
 +{
 +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
 +      struct hl_device *hdev = entry->hdev;
 +      char tmp_buf[32];
 +      u64 addr = entry->addr;
 +      u32 val;
 +      ssize_t rc;
 +
 +      if (atomic_read(&hdev->in_reset)) {
 +              dev_warn_ratelimited(hdev->dev, "Can't read during reset\n");
 +              return 0;
 +      }
 +
 +      if (*ppos)
 +              return 0;
 +
 +      if (hl_is_device_va(hdev, addr)) {
 +              rc = device_va_to_pa(hdev, addr, &addr);
 +              if (rc)
 +                      return rc;
 +      }
 +
 +      rc = hdev->asic_funcs->debugfs_read32(hdev, addr, &val);
 +      if (rc) {
 +              dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr);
 +              return rc;
 +      }
 +
 +      sprintf(tmp_buf, "0x%08x\n", val);
 +      return simple_read_from_buffer(buf, count, ppos, tmp_buf,
 +                      strlen(tmp_buf));
 +}
 +
 +static ssize_t hl_data_write32(struct file *f, const char __user *buf,
 +                                      size_t count, loff_t *ppos)
 +{
 +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
 +      struct hl_device *hdev = entry->hdev;
 +      u64 addr = entry->addr;
 +      u32 value;
 +      ssize_t rc;
 +
 +      if (atomic_read(&hdev->in_reset)) {
 +              dev_warn_ratelimited(hdev->dev, "Can't write during reset\n");
 +              return 0;
 +      }
 +
 +      rc = kstrtouint_from_user(buf, count, 16, &value);
 +      if (rc)
 +              return rc;
 +
 +      if (hl_is_device_va(hdev, addr)) {
 +              rc = device_va_to_pa(hdev, addr, &addr);
 +              if (rc)
 +                      return rc;
 +      }
 +
 +      rc = hdev->asic_funcs->debugfs_write32(hdev, addr, value);
 +      if (rc) {
 +              dev_err(hdev->dev, "Failed to write 0x%08x to 0x%010llx\n",
 +                      value, addr);
 +              return rc;
 +      }
 +
 +      return count;
 +}
 +
 +static ssize_t hl_data_read64(struct file *f, char __user *buf,
 +                                      size_t count, loff_t *ppos)
 +{
 +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
 +      struct hl_device *hdev = entry->hdev;
 +      char tmp_buf[32];
 +      u64 addr = entry->addr;
 +      u64 val;
 +      ssize_t rc;
 +
 +      if (*ppos)
 +              return 0;
 +
 +      if (hl_is_device_va(hdev, addr)) {
 +              rc = device_va_to_pa(hdev, addr, &addr);
 +              if (rc)
 +                      return rc;
 +      }
 +
 +      rc = hdev->asic_funcs->debugfs_read64(hdev, addr, &val);
 +      if (rc) {
 +              dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr);
 +              return rc;
 +      }
 +
 +      sprintf(tmp_buf, "0x%016llx\n", val);
 +      return simple_read_from_buffer(buf, count, ppos, tmp_buf,
 +                      strlen(tmp_buf));
 +}
 +
 +static ssize_t hl_data_write64(struct file *f, const char __user *buf,
 +                                      size_t count, loff_t *ppos)
 +{
 +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
 +      struct hl_device *hdev = entry->hdev;
 +      u64 addr = entry->addr;
 +      u64 value;
 +      ssize_t rc;
 +
 +      rc = kstrtoull_from_user(buf, count, 16, &value);
 +      if (rc)
 +              return rc;
 +
 +      if (hl_is_device_va(hdev, addr)) {
 +              rc = device_va_to_pa(hdev, addr, &addr);
 +              if (rc)
 +                      return rc;
 +      }
 +
 +      rc = hdev->asic_funcs->debugfs_write64(hdev, addr, value);
 +      if (rc) {
 +              dev_err(hdev->dev, "Failed to write 0x%016llx to 0x%010llx\n",
 +                      value, addr);
 +              return rc;
 +      }
 +
 +      return count;
 +}
 +
 +static ssize_t hl_get_power_state(struct file *f, char __user *buf,
 +              size_t count, loff_t *ppos)
 +{
 +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
 +      struct hl_device *hdev = entry->hdev;
 +      char tmp_buf[200];
 +      int i;
 +
 +      if (*ppos)
 +              return 0;
 +
 +      if (hdev->pdev->current_state == PCI_D0)
 +              i = 1;
 +      else if (hdev->pdev->current_state == PCI_D3hot)
 +              i = 2;
 +      else
 +              i = 3;
 +
 +      sprintf(tmp_buf,
 +              "current power state: %d\n1 - D0\n2 - D3hot\n3 - Unknown\n", i);
 +      return simple_read_from_buffer(buf, count, ppos, tmp_buf,
 +                      strlen(tmp_buf));
 +}
 +
 +static ssize_t hl_set_power_state(struct file *f, const char __user *buf,
 +                                      size_t count, loff_t *ppos)
 +{
 +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
 +      struct hl_device *hdev = entry->hdev;
 +      u32 value;
 +      ssize_t rc;
 +
 +      rc = kstrtouint_from_user(buf, count, 10, &value);
 +      if (rc)
 +              return rc;
 +
 +      if (value == 1) {
 +              pci_set_power_state(hdev->pdev, PCI_D0);
 +              pci_restore_state(hdev->pdev);
 +              rc = pci_enable_device(hdev->pdev);
 +      } else if (value == 2) {
 +              pci_save_state(hdev->pdev);
 +              pci_disable_device(hdev->pdev);
 +              pci_set_power_state(hdev->pdev, PCI_D3hot);
 +      } else {
 +              dev_dbg(hdev->dev, "invalid power state value %u\n", value);
 +              return -EINVAL;
 +      }
 +
 +      return count;
 +}
 +
 +static ssize_t hl_i2c_data_read(struct file *f, char __user *buf,
 +                                      size_t count, loff_t *ppos)
 +{
 +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
 +      struct hl_device *hdev = entry->hdev;
 +      char tmp_buf[32];
 +      u32 val;
 +      ssize_t rc;
 +
 +      if (*ppos)
 +              return 0;
 +
 +      rc = hl_debugfs_i2c_read(hdev, entry->i2c_bus, entry->i2c_addr,
 +                      entry->i2c_reg, &val);
 +      if (rc) {
 +              dev_err(hdev->dev,
 +                      "Failed to read from I2C bus %d, addr %d, reg %d\n",
 +                      entry->i2c_bus, entry->i2c_addr, entry->i2c_reg);
 +              return rc;
 +      }
 +
 +      sprintf(tmp_buf, "0x%02x\n", val);
 +      rc = simple_read_from_buffer(buf, count, ppos, tmp_buf,
 +                      strlen(tmp_buf));
 +
 +      return rc;
 +}
 +
 +static ssize_t hl_i2c_data_write(struct file *f, const char __user *buf,
 +                                      size_t count, loff_t *ppos)
 +{
 +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
 +      struct hl_device *hdev = entry->hdev;
 +      u32 value;
 +      ssize_t rc;
 +
 +      rc = kstrtouint_from_user(buf, count, 16, &value);
 +      if (rc)
 +              return rc;
 +
 +      rc = hl_debugfs_i2c_write(hdev, entry->i2c_bus, entry->i2c_addr,
 +                      entry->i2c_reg, value);
 +      if (rc) {
 +              dev_err(hdev->dev,
 +                      "Failed to write 0x%02x to I2C bus %d, addr %d, reg %d\n",
 +                      value, entry->i2c_bus, entry->i2c_addr, entry->i2c_reg);
 +              return rc;
 +      }
 +
 +      return count;
 +}
 +
 +static ssize_t hl_led0_write(struct file *f, const char __user *buf,
 +                                      size_t count, loff_t *ppos)
 +{
 +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
 +      struct hl_device *hdev = entry->hdev;
 +      u32 value;
 +      ssize_t rc;
 +
 +      rc = kstrtouint_from_user(buf, count, 10, &value);
 +      if (rc)
 +              return rc;
 +
 +      value = value ? 1 : 0;
 +
 +      hl_debugfs_led_set(hdev, 0, value);
 +
 +      return count;
 +}
 +
 +static ssize_t hl_led1_write(struct file *f, const char __user *buf,
 +                                      size_t count, loff_t *ppos)
 +{
 +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
 +      struct hl_device *hdev = entry->hdev;
 +      u32 value;
 +      ssize_t rc;
 +
 +      rc = kstrtouint_from_user(buf, count, 10, &value);
 +      if (rc)
 +              return rc;
 +
 +      value = value ? 1 : 0;
 +
 +      hl_debugfs_led_set(hdev, 1, value);
 +
 +      return count;
 +}
 +
 +static ssize_t hl_led2_write(struct file *f, const char __user *buf,
 +                                      size_t count, loff_t *ppos)
 +{
 +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
 +      struct hl_device *hdev = entry->hdev;
 +      u32 value;
 +      ssize_t rc;
 +
 +      rc = kstrtouint_from_user(buf, count, 10, &value);
 +      if (rc)
 +              return rc;
 +
 +      value = value ? 1 : 0;
 +
 +      hl_debugfs_led_set(hdev, 2, value);
 +
 +      return count;
 +}
 +
 +static ssize_t hl_device_read(struct file *f, char __user *buf,
 +                                      size_t count, loff_t *ppos)
 +{
 +      static const char *help =
 +              "Valid values: disable, enable, suspend, resume, cpu_timeout\n";
 +      return simple_read_from_buffer(buf, count, ppos, help, strlen(help));
 +}
 +
 +static ssize_t hl_device_write(struct file *f, const char __user *buf,
 +                                   size_t count, loff_t *ppos)
 +{
 +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
 +      struct hl_device *hdev = entry->hdev;
 +      char data[30] = {0};
 +
 +      /* don't allow partial writes */
 +      if (*ppos != 0)
 +              return 0;
 +
 +      simple_write_to_buffer(data, 29, ppos, buf, count);
 +
 +      if (strncmp("disable", data, strlen("disable")) == 0) {
 +              hdev->disabled = true;
 +      } else if (strncmp("enable", data, strlen("enable")) == 0) {
 +              hdev->disabled = false;
 +      } else if (strncmp("suspend", data, strlen("suspend")) == 0) {
 +              hdev->asic_funcs->suspend(hdev);
 +      } else if (strncmp("resume", data, strlen("resume")) == 0) {
 +              hdev->asic_funcs->resume(hdev);
 +      } else if (strncmp("cpu_timeout", data, strlen("cpu_timeout")) == 0) {
 +              hdev->device_cpu_disabled = true;
 +      } else {
 +              dev_err(hdev->dev,
 +                      "Valid values: disable, enable, suspend, resume, cpu_timeout\n");
 +              count = -EINVAL;
 +      }
 +
 +      return count;
 +}
 +
 +static ssize_t hl_clk_gate_read(struct file *f, char __user *buf,
 +                                      size_t count, loff_t *ppos)
 +{
 +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
 +      struct hl_device *hdev = entry->hdev;
 +      char tmp_buf[200];
 +      ssize_t rc;
 +
 +      if (*ppos)
 +              return 0;
 +
-       u32 value;
++      sprintf(tmp_buf, "0x%llx\n", hdev->clock_gating_mask);
 +      rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
 +                      strlen(tmp_buf) + 1);
 +
 +      return rc;
 +}
 +
 +static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf,
 +                                   size_t count, loff_t *ppos)
 +{
 +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
 +      struct hl_device *hdev = entry->hdev;
-       rc = kstrtouint_from_user(buf, count, 10, &value);
++      u64 value;
 +      ssize_t rc;
 +
 +      if (atomic_read(&hdev->in_reset)) {
 +              dev_warn_ratelimited(hdev->dev,
 +                              "Can't change clock gating during reset\n");
 +              return 0;
 +      }
 +
-       if (value) {
-               hdev->clock_gating = 1;
-               if (hdev->asic_funcs->enable_clock_gating)
-                       hdev->asic_funcs->enable_clock_gating(hdev);
-       } else {
-               if (hdev->asic_funcs->disable_clock_gating)
-                       hdev->asic_funcs->disable_clock_gating(hdev);
-               hdev->clock_gating = 0;
-       }
++      rc = kstrtoull_from_user(buf, count, 16, &value);
 +      if (rc)
 +              return rc;
 +
++      hdev->clock_gating_mask = value;
++      hdev->asic_funcs->set_clock_gating(hdev);
 +
 +      return count;
 +}
 +
 +static ssize_t hl_stop_on_err_read(struct file *f, char __user *buf,
 +                                      size_t count, loff_t *ppos)
 +{
 +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
 +      struct hl_device *hdev = entry->hdev;
 +      char tmp_buf[200];
 +      ssize_t rc;
 +
 +      if (*ppos)
 +              return 0;
 +
 +      sprintf(tmp_buf, "%d\n", hdev->stop_on_err);
 +      rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
 +                      strlen(tmp_buf) + 1);
 +
 +      return rc;
 +}
 +
 +static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf,
 +                                   size_t count, loff_t *ppos)
 +{
 +      struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
 +      struct hl_device *hdev = entry->hdev;
 +      u32 value;
 +      ssize_t rc;
 +
 +      if (atomic_read(&hdev->in_reset)) {
 +              dev_warn_ratelimited(hdev->dev,
 +                              "Can't change stop on error during reset\n");
 +              return 0;
 +      }
 +
 +      rc = kstrtouint_from_user(buf, count, 10, &value);
 +      if (rc)
 +              return rc;
 +
 +      hdev->stop_on_err = value ? 1 : 0;
 +
 +      hl_device_reset(hdev, false, false);
 +
 +      return count;
 +}
 +
 +static const struct file_operations hl_data32b_fops = {
 +      .owner = THIS_MODULE,
 +      .read = hl_data_read32,
 +      .write = hl_data_write32
 +};
 +
 +static const struct file_operations hl_data64b_fops = {
 +      .owner = THIS_MODULE,
 +      .read = hl_data_read64,
 +      .write = hl_data_write64
 +};
 +
 +static const struct file_operations hl_i2c_data_fops = {
 +      .owner = THIS_MODULE,
 +      .read = hl_i2c_data_read,
 +      .write = hl_i2c_data_write
 +};
 +
 +static const struct file_operations hl_power_fops = {
 +      .owner = THIS_MODULE,
 +      .read = hl_get_power_state,
 +      .write = hl_set_power_state
 +};
 +
 +static const struct file_operations hl_led0_fops = {
 +      .owner = THIS_MODULE,
 +      .write = hl_led0_write
 +};
 +
 +static const struct file_operations hl_led1_fops = {
 +      .owner = THIS_MODULE,
 +      .write = hl_led1_write
 +};
 +
 +static const struct file_operations hl_led2_fops = {
 +      .owner = THIS_MODULE,
 +      .write = hl_led2_write
 +};
 +
 +static const struct file_operations hl_device_fops = {
 +      .owner = THIS_MODULE,
 +      .read = hl_device_read,
 +      .write = hl_device_write
 +};
 +
 +static const struct file_operations hl_clk_gate_fops = {
 +      .owner = THIS_MODULE,
 +      .read = hl_clk_gate_read,
 +      .write = hl_clk_gate_write
 +};
 +
 +static const struct file_operations hl_stop_on_err_fops = {
 +      .owner = THIS_MODULE,
 +      .read = hl_stop_on_err_read,
 +      .write = hl_stop_on_err_write
 +};
 +
 +static const struct hl_info_list hl_debugfs_list[] = {
 +      {"command_buffers", command_buffers_show, NULL},
 +      {"command_submission", command_submission_show, NULL},
 +      {"command_submission_jobs", command_submission_jobs_show, NULL},
 +      {"userptr", userptr_show, NULL},
 +      {"vm", vm_show, NULL},
 +      {"mmu", mmu_show, mmu_asid_va_write},
 +      {"engines", engines_show, NULL}
 +};
 +
 +static int hl_debugfs_open(struct inode *inode, struct file *file)
 +{
 +      struct hl_debugfs_entry *node = inode->i_private;
 +
 +      return single_open(file, node->info_ent->show, node);
 +}
 +
 +static ssize_t hl_debugfs_write(struct file *file, const char __user *buf,
 +              size_t count, loff_t *f_pos)
 +{
 +      struct hl_debugfs_entry *node = file->f_inode->i_private;
 +
 +      if (node->info_ent->write)
 +              return node->info_ent->write(file, buf, count, f_pos);
 +      else
 +              return -EINVAL;
 +
 +}
 +
 +static const struct file_operations hl_debugfs_fops = {
 +      .owner = THIS_MODULE,
 +      .open = hl_debugfs_open,
 +      .read = seq_read,
 +      .write = hl_debugfs_write,
 +      .llseek = seq_lseek,
 +      .release = single_release,
 +};
 +
 +void hl_debugfs_add_device(struct hl_device *hdev)
 +{
 +      struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
 +      int count = ARRAY_SIZE(hl_debugfs_list);
 +      struct hl_debugfs_entry *entry;
 +      struct dentry *ent;
 +      int i;
 +
 +      dev_entry->hdev = hdev;
 +      dev_entry->entry_arr = kmalloc_array(count,
 +                                      sizeof(struct hl_debugfs_entry),
 +                                      GFP_KERNEL);
 +      if (!dev_entry->entry_arr)
 +              return;
 +
 +      INIT_LIST_HEAD(&dev_entry->file_list);
 +      INIT_LIST_HEAD(&dev_entry->cb_list);
 +      INIT_LIST_HEAD(&dev_entry->cs_list);
 +      INIT_LIST_HEAD(&dev_entry->cs_job_list);
 +      INIT_LIST_HEAD(&dev_entry->userptr_list);
 +      INIT_LIST_HEAD(&dev_entry->ctx_mem_hash_list);
 +      mutex_init(&dev_entry->file_mutex);
 +      spin_lock_init(&dev_entry->cb_spinlock);
 +      spin_lock_init(&dev_entry->cs_spinlock);
 +      spin_lock_init(&dev_entry->cs_job_spinlock);
 +      spin_lock_init(&dev_entry->userptr_spinlock);
 +      spin_lock_init(&dev_entry->ctx_mem_hash_spinlock);
 +
 +      dev_entry->root = debugfs_create_dir(dev_name(hdev->dev),
 +                                              hl_debug_root);
 +
 +      debugfs_create_x64("addr",
 +                              0644,
 +                              dev_entry->root,
 +                              &dev_entry->addr);
 +
 +      debugfs_create_file("data32",
 +                              0644,
 +                              dev_entry->root,
 +                              dev_entry,
 +                              &hl_data32b_fops);
 +
 +      debugfs_create_file("data64",
 +                              0644,
 +                              dev_entry->root,
 +                              dev_entry,
 +                              &hl_data64b_fops);
 +
 +      debugfs_create_file("set_power_state",
 +                              0200,
 +                              dev_entry->root,
 +                              dev_entry,
 +                              &hl_power_fops);
 +
 +      debugfs_create_u8("i2c_bus",
 +                              0644,
 +                              dev_entry->root,
 +                              &dev_entry->i2c_bus);
 +
 +      debugfs_create_u8("i2c_addr",
 +                              0644,
 +                              dev_entry->root,
 +                              &dev_entry->i2c_addr);
 +
 +      debugfs_create_u8("i2c_reg",
 +                              0644,
 +                              dev_entry->root,
 +                              &dev_entry->i2c_reg);
 +
 +      debugfs_create_file("i2c_data",
 +                              0644,
 +                              dev_entry->root,
 +                              dev_entry,
 +                              &hl_i2c_data_fops);
 +
 +      debugfs_create_file("led0",
 +                              0200,
 +                              dev_entry->root,
 +                              dev_entry,
 +                              &hl_led0_fops);
 +
 +      debugfs_create_file("led1",
 +                              0200,
 +                              dev_entry->root,
 +                              dev_entry,
 +                              &hl_led1_fops);
 +
 +      debugfs_create_file("led2",
 +                              0200,
 +                              dev_entry->root,
 +                              dev_entry,
 +                              &hl_led2_fops);
 +
 +      debugfs_create_file("device",
 +                              0200,
 +                              dev_entry->root,
 +                              dev_entry,
 +                              &hl_device_fops);
 +
 +      debugfs_create_file("clk_gate",
 +                              0200,
 +                              dev_entry->root,
 +                              dev_entry,
 +                              &hl_clk_gate_fops);
 +
 +      debugfs_create_file("stop_on_err",
 +                              0644,
 +                              dev_entry->root,
 +                              dev_entry,
 +                              &hl_stop_on_err_fops);
 +
 +      for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
 +
 +              ent = debugfs_create_file(hl_debugfs_list[i].name,
 +                                      0444,
 +                                      dev_entry->root,
 +                                      entry,
 +                                      &hl_debugfs_fops);
 +              entry->dent = ent;
 +              entry->info_ent = &hl_debugfs_list[i];
 +              entry->dev_entry = dev_entry;
 +      }
 +}
 +
 +void hl_debugfs_remove_device(struct hl_device *hdev)
 +{
 +      struct hl_dbg_device_entry *entry = &hdev->hl_debugfs;
 +
 +      debugfs_remove_recursive(entry->root);
 +
 +      mutex_destroy(&entry->file_mutex);
 +      kfree(entry->entry_arr);
 +}
 +
 +void hl_debugfs_add_file(struct hl_fpriv *hpriv)
 +{
 +      struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs;
 +
 +      mutex_lock(&dev_entry->file_mutex);
 +      list_add(&hpriv->debugfs_list, &dev_entry->file_list);
 +      mutex_unlock(&dev_entry->file_mutex);
 +}
 +
 +void hl_debugfs_remove_file(struct hl_fpriv *hpriv)
 +{
 +      struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs;
 +
 +      mutex_lock(&dev_entry->file_mutex);
 +      list_del(&hpriv->debugfs_list);
 +      mutex_unlock(&dev_entry->file_mutex);
 +}
 +
 +void hl_debugfs_add_cb(struct hl_cb *cb)
 +{
 +      struct hl_dbg_device_entry *dev_entry = &cb->hdev->hl_debugfs;
 +
 +      spin_lock(&dev_entry->cb_spinlock);
 +      list_add(&cb->debugfs_list, &dev_entry->cb_list);
 +      spin_unlock(&dev_entry->cb_spinlock);
 +}
 +
 +void hl_debugfs_remove_cb(struct hl_cb *cb)
 +{
 +      struct hl_dbg_device_entry *dev_entry = &cb->hdev->hl_debugfs;
 +
 +      spin_lock(&dev_entry->cb_spinlock);
 +      list_del(&cb->debugfs_list);
 +      spin_unlock(&dev_entry->cb_spinlock);
 +}
 +
 +void hl_debugfs_add_cs(struct hl_cs *cs)
 +{
 +      struct hl_dbg_device_entry *dev_entry = &cs->ctx->hdev->hl_debugfs;
 +
 +      spin_lock(&dev_entry->cs_spinlock);
 +      list_add(&cs->debugfs_list, &dev_entry->cs_list);
 +      spin_unlock(&dev_entry->cs_spinlock);
 +}
 +
 +void hl_debugfs_remove_cs(struct hl_cs *cs)
 +{
 +      struct hl_dbg_device_entry *dev_entry = &cs->ctx->hdev->hl_debugfs;
 +
 +      spin_lock(&dev_entry->cs_spinlock);
 +      list_del(&cs->debugfs_list);
 +      spin_unlock(&dev_entry->cs_spinlock);
 +}
 +
 +void hl_debugfs_add_job(struct hl_device *hdev, struct hl_cs_job *job)
 +{
 +      struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
 +
 +      spin_lock(&dev_entry->cs_job_spinlock);
 +      list_add(&job->debugfs_list, &dev_entry->cs_job_list);
 +      spin_unlock(&dev_entry->cs_job_spinlock);
 +}
 +
 +void hl_debugfs_remove_job(struct hl_device *hdev, struct hl_cs_job *job)
 +{
 +      struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
 +
 +      spin_lock(&dev_entry->cs_job_spinlock);
 +      list_del(&job->debugfs_list);
 +      spin_unlock(&dev_entry->cs_job_spinlock);
 +}
 +
 +void hl_debugfs_add_userptr(struct hl_device *hdev, struct hl_userptr *userptr)
 +{
 +      struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
 +
 +      spin_lock(&dev_entry->userptr_spinlock);
 +      list_add(&userptr->debugfs_list, &dev_entry->userptr_list);
 +      spin_unlock(&dev_entry->userptr_spinlock);
 +}
 +
 +void hl_debugfs_remove_userptr(struct hl_device *hdev,
 +                              struct hl_userptr *userptr)
 +{
 +      struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
 +
 +      spin_lock(&dev_entry->userptr_spinlock);
 +      list_del(&userptr->debugfs_list);
 +      spin_unlock(&dev_entry->userptr_spinlock);
 +}
 +
 +void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx)
 +{
 +      struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
 +
 +      spin_lock(&dev_entry->ctx_mem_hash_spinlock);
 +      list_add(&ctx->debugfs_list, &dev_entry->ctx_mem_hash_list);
 +      spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
 +}
 +
 +void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx)
 +{
 +      struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
 +
 +      spin_lock(&dev_entry->ctx_mem_hash_spinlock);
 +      list_del(&ctx->debugfs_list);
 +      spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
 +}
 +
 +void __init hl_debugfs_init(void)
 +{
 +      hl_debug_root = debugfs_create_dir("habanalabs", NULL);
 +}
 +
 +void hl_debugfs_fini(void)
 +{
 +      debugfs_remove_recursive(hl_debug_root);
 +}
index 9919ff121067188ab08d2c74ba5cadf491624e3a,0000000000000000000000000000000000000000..be16b75bdfdb5bbd0ebb54b550a2a4fcfe36e76e
mode 100644,000000..100644
--- /dev/null
@@@ -1,1514 -1,0 +1,1514 @@@
-                       hdev->asic_funcs->enable_clock_gating(hdev);
 +// SPDX-License-Identifier: GPL-2.0
 +
 +/*
 + * Copyright 2016-2019 HabanaLabs, Ltd.
 + * All Rights Reserved.
 + */
 +
 +#define pr_fmt(fmt)                   "habanalabs: " fmt
 +
 +#include "habanalabs.h"
 +
 +#include <linux/pci.h>
 +#include <linux/sched/signal.h>
 +#include <linux/hwmon.h>
 +#include <uapi/misc/habanalabs.h>
 +
 +#define HL_PLDM_PENDING_RESET_PER_SEC (HL_PENDING_RESET_PER_SEC * 10)
 +
 +bool hl_device_disabled_or_in_reset(struct hl_device *hdev)
 +{
 +      if ((hdev->disabled) || (atomic_read(&hdev->in_reset)))
 +              return true;
 +      else
 +              return false;
 +}
 +
 +enum hl_device_status hl_device_status(struct hl_device *hdev)
 +{
 +      enum hl_device_status status;
 +
 +      if (hdev->disabled)
 +              status = HL_DEVICE_STATUS_MALFUNCTION;
 +      else if (atomic_read(&hdev->in_reset))
 +              status = HL_DEVICE_STATUS_IN_RESET;
 +      else
 +              status = HL_DEVICE_STATUS_OPERATIONAL;
 +
 +      return status;
 +}
 +
 +static void hpriv_release(struct kref *ref)
 +{
 +      struct hl_fpriv *hpriv;
 +      struct hl_device *hdev;
 +
 +      hpriv = container_of(ref, struct hl_fpriv, refcount);
 +
 +      hdev = hpriv->hdev;
 +
 +      put_pid(hpriv->taskpid);
 +
 +      hl_debugfs_remove_file(hpriv);
 +
 +      mutex_destroy(&hpriv->restore_phase_mutex);
 +
 +      mutex_lock(&hdev->fpriv_list_lock);
 +      list_del(&hpriv->dev_node);
 +      hdev->compute_ctx = NULL;
 +      mutex_unlock(&hdev->fpriv_list_lock);
 +
 +      kfree(hpriv);
 +}
 +
 +void hl_hpriv_get(struct hl_fpriv *hpriv)
 +{
 +      kref_get(&hpriv->refcount);
 +}
 +
 +void hl_hpriv_put(struct hl_fpriv *hpriv)
 +{
 +      kref_put(&hpriv->refcount, hpriv_release);
 +}
 +
 +/*
 + * hl_device_release - release function for habanalabs device
 + *
 + * @inode: pointer to inode structure
 + * @filp: pointer to file structure
 + *
 + * Called when process closes an habanalabs device
 + */
 +static int hl_device_release(struct inode *inode, struct file *filp)
 +{
 +      struct hl_fpriv *hpriv = filp->private_data;
 +
 +      hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
 +      hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
 +
 +      filp->private_data = NULL;
 +
 +      hl_hpriv_put(hpriv);
 +
 +      return 0;
 +}
 +
 +static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
 +{
 +      struct hl_fpriv *hpriv = filp->private_data;
 +      struct hl_device *hdev;
 +
 +      filp->private_data = NULL;
 +
 +      hdev = hpriv->hdev;
 +
 +      mutex_lock(&hdev->fpriv_list_lock);
 +      list_del(&hpriv->dev_node);
 +      mutex_unlock(&hdev->fpriv_list_lock);
 +
 +      kfree(hpriv);
 +
 +      return 0;
 +}
 +
 +/*
 + * hl_mmap - mmap function for habanalabs device
 + *
 + * @*filp: pointer to file structure
 + * @*vma: pointer to vm_area_struct of the process
 + *
 + * Called when process does an mmap on habanalabs device. Call the device's mmap
 + * function at the end of the common code.
 + */
 +static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
 +{
 +      struct hl_fpriv *hpriv = filp->private_data;
 +
 +      if ((vma->vm_pgoff & HL_MMAP_CB_MASK) == HL_MMAP_CB_MASK) {
 +              vma->vm_pgoff ^= HL_MMAP_CB_MASK;
 +              return hl_cb_mmap(hpriv, vma);
 +      }
 +
 +      return -EINVAL;
 +}
 +
 +static const struct file_operations hl_ops = {
 +      .owner = THIS_MODULE,
 +      .open = hl_device_open,
 +      .release = hl_device_release,
 +      .mmap = hl_mmap,
 +      .unlocked_ioctl = hl_ioctl,
 +      .compat_ioctl = hl_ioctl
 +};
 +
 +static const struct file_operations hl_ctrl_ops = {
 +      .owner = THIS_MODULE,
 +      .open = hl_device_open_ctrl,
 +      .release = hl_device_release_ctrl,
 +      .unlocked_ioctl = hl_ioctl_control,
 +      .compat_ioctl = hl_ioctl_control
 +};
 +
 +static void device_release_func(struct device *dev)
 +{
 +      kfree(dev);
 +}
 +
 +/*
 + * device_init_cdev - Initialize cdev and device for habanalabs device
 + *
 + * @hdev: pointer to habanalabs device structure
 + * @hclass: pointer to the class object of the device
 + * @minor: minor number of the specific device
 + * @fpos: file operations to install for this device
 + * @name: name of the device as it will appear in the filesystem
 + * @cdev: pointer to the char device object that will be initialized
 + * @dev: pointer to the device object that will be initialized
 + *
 + * Initialize a cdev and a Linux device for habanalabs's device.
 + */
 +static int device_init_cdev(struct hl_device *hdev, struct class *hclass,
 +                              int minor, const struct file_operations *fops,
 +                              char *name, struct cdev *cdev,
 +                              struct device **dev)
 +{
 +      cdev_init(cdev, fops);
 +      cdev->owner = THIS_MODULE;
 +
 +      *dev = kzalloc(sizeof(**dev), GFP_KERNEL);
 +      if (!*dev)
 +              return -ENOMEM;
 +
 +      device_initialize(*dev);
 +      (*dev)->devt = MKDEV(hdev->major, minor);
 +      (*dev)->class = hclass;
 +      (*dev)->release = device_release_func;
 +      dev_set_drvdata(*dev, hdev);
 +      dev_set_name(*dev, "%s", name);
 +
 +      return 0;
 +}
 +
 +static int device_cdev_sysfs_add(struct hl_device *hdev)
 +{
 +      int rc;
 +
 +      rc = cdev_device_add(&hdev->cdev, hdev->dev);
 +      if (rc) {
 +              dev_err(hdev->dev,
 +                      "failed to add a char device to the system\n");
 +              return rc;
 +      }
 +
 +      rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl);
 +      if (rc) {
 +              dev_err(hdev->dev,
 +                      "failed to add a control char device to the system\n");
 +              goto delete_cdev_device;
 +      }
 +
 +      /* hl_sysfs_init() must be done after adding the device to the system */
 +      rc = hl_sysfs_init(hdev);
 +      if (rc) {
 +              dev_err(hdev->dev, "failed to initialize sysfs\n");
 +              goto delete_ctrl_cdev_device;
 +      }
 +
 +      hdev->cdev_sysfs_created = true;
 +
 +      return 0;
 +
 +delete_ctrl_cdev_device:
 +      cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
 +delete_cdev_device:
 +      cdev_device_del(&hdev->cdev, hdev->dev);
 +      return rc;
 +}
 +
 +static void device_cdev_sysfs_del(struct hl_device *hdev)
 +{
 +      /* device_release() won't be called so must free devices explicitly */
 +      if (!hdev->cdev_sysfs_created) {
 +              kfree(hdev->dev_ctrl);
 +              kfree(hdev->dev);
 +              return;
 +      }
 +
 +      hl_sysfs_fini(hdev);
 +      cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
 +      cdev_device_del(&hdev->cdev, hdev->dev);
 +}
 +
 +/*
 + * device_early_init - do some early initialization for the habanalabs device
 + *
 + * @hdev: pointer to habanalabs device structure
 + *
 + * Install the relevant function pointers and call the early_init function,
 + * if such a function exists
 + */
 +static int device_early_init(struct hl_device *hdev)
 +{
 +      int i, rc;
 +      char workq_name[32];
 +
 +      switch (hdev->asic_type) {
 +      case ASIC_GOYA:
 +              goya_set_asic_funcs(hdev);
 +              strlcpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name));
 +              break;
 +      case ASIC_GAUDI:
 +              gaudi_set_asic_funcs(hdev);
 +              sprintf(hdev->asic_name, "GAUDI");
 +              break;
 +      default:
 +              dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
 +                      hdev->asic_type);
 +              return -EINVAL;
 +      }
 +
 +      rc = hdev->asic_funcs->early_init(hdev);
 +      if (rc)
 +              return rc;
 +
 +      rc = hl_asid_init(hdev);
 +      if (rc)
 +              goto early_fini;
 +
 +      if (hdev->asic_prop.completion_queues_count) {
 +              hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count,
 +                              sizeof(*hdev->cq_wq),
 +                              GFP_ATOMIC);
 +              if (!hdev->cq_wq) {
 +                      rc = -ENOMEM;
 +                      goto asid_fini;
 +              }
 +      }
 +
 +      for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
 +              snprintf(workq_name, 32, "hl-free-jobs-%u", i);
 +              hdev->cq_wq[i] = create_singlethread_workqueue(workq_name);
 +              if (hdev->cq_wq == NULL) {
 +                      dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
 +                      rc = -ENOMEM;
 +                      goto free_cq_wq;
 +              }
 +      }
 +
 +      hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0);
 +      if (hdev->eq_wq == NULL) {
 +              dev_err(hdev->dev, "Failed to allocate EQ workqueue\n");
 +              rc = -ENOMEM;
 +              goto free_cq_wq;
 +      }
 +
 +      hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info),
 +                                      GFP_KERNEL);
 +      if (!hdev->hl_chip_info) {
 +              rc = -ENOMEM;
 +              goto free_eq_wq;
 +      }
 +
 +      hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE,
 +                                      sizeof(struct hl_device_idle_busy_ts),
 +                                      (GFP_KERNEL | __GFP_ZERO));
 +      if (!hdev->idle_busy_ts_arr) {
 +              rc = -ENOMEM;
 +              goto free_chip_info;
 +      }
 +
 +      hl_cb_mgr_init(&hdev->kernel_cb_mgr);
 +
 +      mutex_init(&hdev->send_cpu_message_lock);
 +      mutex_init(&hdev->debug_lock);
 +      mutex_init(&hdev->mmu_cache_lock);
 +      INIT_LIST_HEAD(&hdev->hw_queues_mirror_list);
 +      spin_lock_init(&hdev->hw_queues_mirror_lock);
 +      INIT_LIST_HEAD(&hdev->fpriv_list);
 +      mutex_init(&hdev->fpriv_list_lock);
 +      atomic_set(&hdev->in_reset, 0);
 +
 +      return 0;
 +
 +free_chip_info:
 +      kfree(hdev->hl_chip_info);
 +free_eq_wq:
 +      destroy_workqueue(hdev->eq_wq);
 +free_cq_wq:
 +      for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
 +              if (hdev->cq_wq[i])
 +                      destroy_workqueue(hdev->cq_wq[i]);
 +      kfree(hdev->cq_wq);
 +asid_fini:
 +      hl_asid_fini(hdev);
 +early_fini:
 +      if (hdev->asic_funcs->early_fini)
 +              hdev->asic_funcs->early_fini(hdev);
 +
 +      return rc;
 +}
 +
 +/*
 + * device_early_fini - finalize all that was done in device_early_init
 + *
 + * @hdev: pointer to habanalabs device structure
 + *
 + */
 +static void device_early_fini(struct hl_device *hdev)
 +{
 +      int i;
 +
 +      mutex_destroy(&hdev->mmu_cache_lock);
 +      mutex_destroy(&hdev->debug_lock);
 +      mutex_destroy(&hdev->send_cpu_message_lock);
 +
 +      mutex_destroy(&hdev->fpriv_list_lock);
 +
 +      hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
 +
 +      kfree(hdev->idle_busy_ts_arr);
 +      kfree(hdev->hl_chip_info);
 +
 +      destroy_workqueue(hdev->eq_wq);
 +
 +      for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
 +              destroy_workqueue(hdev->cq_wq[i]);
 +      kfree(hdev->cq_wq);
 +
 +      hl_asid_fini(hdev);
 +
 +      if (hdev->asic_funcs->early_fini)
 +              hdev->asic_funcs->early_fini(hdev);
 +}
 +
 +static void set_freq_to_low_job(struct work_struct *work)
 +{
 +      struct hl_device *hdev = container_of(work, struct hl_device,
 +                                              work_freq.work);
 +
 +      mutex_lock(&hdev->fpriv_list_lock);
 +
 +      if (!hdev->compute_ctx)
 +              hl_device_set_frequency(hdev, PLL_LOW);
 +
 +      mutex_unlock(&hdev->fpriv_list_lock);
 +
 +      schedule_delayed_work(&hdev->work_freq,
 +                      usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
 +}
 +
 +static void hl_device_heartbeat(struct work_struct *work)
 +{
 +      struct hl_device *hdev = container_of(work, struct hl_device,
 +                                              work_heartbeat.work);
 +
 +      if (hl_device_disabled_or_in_reset(hdev))
 +              goto reschedule;
 +
 +      if (!hdev->asic_funcs->send_heartbeat(hdev))
 +              goto reschedule;
 +
 +      dev_err(hdev->dev, "Device heartbeat failed!\n");
 +      hl_device_reset(hdev, true, false);
 +
 +      return;
 +
 +reschedule:
 +      schedule_delayed_work(&hdev->work_heartbeat,
 +                      usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
 +}
 +
 +/*
 + * device_late_init - do late stuff initialization for the habanalabs device
 + *
 + * @hdev: pointer to habanalabs device structure
 + *
 + * Do stuff that either needs the device H/W queues to be active or needs
 + * to happen after all the rest of the initialization is finished
 + */
 +static int device_late_init(struct hl_device *hdev)
 +{
 +      int rc;
 +
 +      if (hdev->asic_funcs->late_init) {
 +              rc = hdev->asic_funcs->late_init(hdev);
 +              if (rc) {
 +                      dev_err(hdev->dev,
 +                              "failed late initialization for the H/W\n");
 +                      return rc;
 +              }
 +      }
 +
 +      hdev->high_pll = hdev->asic_prop.high_pll;
 +
 +      /* force setting to low frequency */
 +      hdev->curr_pll_profile = PLL_LOW;
 +
 +      if (hdev->pm_mng_profile == PM_AUTO)
 +              hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW);
 +      else
 +              hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST);
 +
 +      INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job);
 +      schedule_delayed_work(&hdev->work_freq,
 +      usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
 +
 +      if (hdev->heartbeat) {
 +              INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
 +              schedule_delayed_work(&hdev->work_heartbeat,
 +                              usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
 +      }
 +
 +      hdev->late_init_done = true;
 +
 +      return 0;
 +}
 +
 +/*
 + * device_late_fini - finalize all that was done in device_late_init
 + *
 + * @hdev: pointer to habanalabs device structure
 + *
 + */
 +static void device_late_fini(struct hl_device *hdev)
 +{
 +      if (!hdev->late_init_done)
 +              return;
 +
 +      cancel_delayed_work_sync(&hdev->work_freq);
 +      if (hdev->heartbeat)
 +              cancel_delayed_work_sync(&hdev->work_heartbeat);
 +
 +      if (hdev->asic_funcs->late_fini)
 +              hdev->asic_funcs->late_fini(hdev);
 +
 +      hdev->late_init_done = false;
 +}
 +
 +uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms)
 +{
 +      struct hl_device_idle_busy_ts *ts;
 +      ktime_t zero_ktime, curr = ktime_get();
 +      u32 overlap_cnt = 0, last_index = hdev->idle_busy_ts_idx;
 +      s64 period_us, last_start_us, last_end_us, last_busy_time_us,
 +              total_busy_time_us = 0, total_busy_time_ms;
 +
 +      zero_ktime = ktime_set(0, 0);
 +      period_us = period_ms * USEC_PER_MSEC;
 +      ts = &hdev->idle_busy_ts_arr[last_index];
 +
 +      /* check case that device is currently in idle */
 +      if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime) &&
 +                      !ktime_compare(ts->idle_to_busy_ts, zero_ktime)) {
 +
 +              last_index--;
 +              /* Handle case idle_busy_ts_idx was 0 */
 +              if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE)
 +                      last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1;
 +
 +              ts = &hdev->idle_busy_ts_arr[last_index];
 +      }
 +
 +      while (overlap_cnt < HL_IDLE_BUSY_TS_ARR_SIZE) {
 +              /* Check if we are in last sample case. i.e. if the sample
 +               * begun before the sampling period. This could be a real
 +               * sample or 0 so need to handle both cases
 +               */
 +              last_start_us = ktime_to_us(
 +                              ktime_sub(curr, ts->idle_to_busy_ts));
 +
 +              if (last_start_us > period_us) {
 +
 +                      /* First check two cases:
 +                       * 1. If the device is currently busy
 +                       * 2. If the device was idle during the whole sampling
 +                       *    period
 +                       */
 +
 +                      if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime)) {
 +                              /* Check if the device is currently busy */
 +                              if (ktime_compare(ts->idle_to_busy_ts,
 +                                              zero_ktime))
 +                                      return 100;
 +
 +                              /* We either didn't have any activity or we
 +                               * reached an entry which is 0. Either way,
 +                               * exit and return what was accumulated so far
 +                               */
 +                              break;
 +                      }
 +
 +                      /* If sample has finished, check it is relevant */
 +                      last_end_us = ktime_to_us(
 +                                      ktime_sub(curr, ts->busy_to_idle_ts));
 +
 +                      if (last_end_us > period_us)
 +                              break;
 +
 +                      /* It is relevant so add it but with adjustment */
 +                      last_busy_time_us = ktime_to_us(
 +                                              ktime_sub(ts->busy_to_idle_ts,
 +                                              ts->idle_to_busy_ts));
 +                      total_busy_time_us += last_busy_time_us -
 +                                      (last_start_us - period_us);
 +                      break;
 +              }
 +
 +              /* Check if the sample is finished or still open */
 +              if (ktime_compare(ts->busy_to_idle_ts, zero_ktime))
 +                      last_busy_time_us = ktime_to_us(
 +                                              ktime_sub(ts->busy_to_idle_ts,
 +                                              ts->idle_to_busy_ts));
 +              else
 +                      last_busy_time_us = ktime_to_us(
 +                                      ktime_sub(curr, ts->idle_to_busy_ts));
 +
 +              total_busy_time_us += last_busy_time_us;
 +
 +              last_index--;
 +              /* Handle case idle_busy_ts_idx was 0 */
 +              if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE)
 +                      last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1;
 +
 +              ts = &hdev->idle_busy_ts_arr[last_index];
 +
 +              overlap_cnt++;
 +      }
 +
 +      total_busy_time_ms = DIV_ROUND_UP_ULL(total_busy_time_us,
 +                                              USEC_PER_MSEC);
 +
 +      return DIV_ROUND_UP_ULL(total_busy_time_ms * 100, period_ms);
 +}
 +
 +/*
 + * hl_device_set_frequency - set the frequency of the device
 + *
 + * @hdev: pointer to habanalabs device structure
 + * @freq: the new frequency value
 + *
 + * Change the frequency if needed. This function has no protection against
 + * concurrency, therefore it is assumed that the calling function has protected
 + * itself against the case of calling this function from multiple threads with
 + * different values
 + *
 + * Returns 0 if no change was done, otherwise returns 1
 + */
 +int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
 +{
 +      if ((hdev->pm_mng_profile == PM_MANUAL) ||
 +                      (hdev->curr_pll_profile == freq))
 +              return 0;
 +
 +      dev_dbg(hdev->dev, "Changing device frequency to %s\n",
 +              freq == PLL_HIGH ? "high" : "low");
 +
 +      hdev->asic_funcs->set_pll_profile(hdev, freq);
 +
 +      hdev->curr_pll_profile = freq;
 +
 +      return 1;
 +}
 +
 +int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)
 +{
 +      int rc = 0;
 +
 +      mutex_lock(&hdev->debug_lock);
 +
 +      if (!enable) {
 +              if (!hdev->in_debug) {
 +                      dev_err(hdev->dev,
 +                              "Failed to disable debug mode because device was not in debug mode\n");
 +                      rc = -EFAULT;
 +                      goto out;
 +              }
 +
 +              if (!hdev->hard_reset_pending)
 +                      hdev->asic_funcs->halt_coresight(hdev);
 +
 +              hdev->in_debug = 0;
 +
 +              if (!hdev->hard_reset_pending)
++                      hdev->asic_funcs->set_clock_gating(hdev);
 +
 +              goto out;
 +      }
 +
 +      if (hdev->in_debug) {
 +              dev_err(hdev->dev,
 +                      "Failed to enable debug mode because device is already in debug mode\n");
 +              rc = -EFAULT;
 +              goto out;
 +      }
 +
 +      hdev->asic_funcs->disable_clock_gating(hdev);
 +      hdev->in_debug = 1;
 +
 +out:
 +      mutex_unlock(&hdev->debug_lock);
 +
 +      return rc;
 +}
 +
 +/*
 + * hl_device_suspend - initiate device suspend
 + *
 + * @hdev: pointer to habanalabs device structure
 + *
 + * Puts the hw in the suspend state (all asics).
 + * Returns 0 for success or an error on failure.
 + * Called at driver suspend.
 + */
 +int hl_device_suspend(struct hl_device *hdev)
 +{
 +      int rc;
 +
 +      pci_save_state(hdev->pdev);
 +
 +      /* Block future CS/VM/JOB completion operations */
 +      rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
 +      if (rc) {
 +              dev_err(hdev->dev, "Can't suspend while in reset\n");
 +              return -EIO;
 +      }
 +
 +      /* This blocks all other stuff that is not blocked by in_reset */
 +      hdev->disabled = true;
 +
 +      /*
 +       * Flush anyone that is inside the critical section of enqueue
 +       * jobs to the H/W
 +       */
 +      hdev->asic_funcs->hw_queues_lock(hdev);
 +      hdev->asic_funcs->hw_queues_unlock(hdev);
 +
 +      /* Flush processes that are sending message to CPU */
 +      mutex_lock(&hdev->send_cpu_message_lock);
 +      mutex_unlock(&hdev->send_cpu_message_lock);
 +
 +      rc = hdev->asic_funcs->suspend(hdev);
 +      if (rc)
 +              dev_err(hdev->dev,
 +                      "Failed to disable PCI access of device CPU\n");
 +
 +      /* Shut down the device */
 +      pci_disable_device(hdev->pdev);
 +      pci_set_power_state(hdev->pdev, PCI_D3hot);
 +
 +      return 0;
 +}
 +
 +/*
 + * hl_device_resume - initiate device resume
 + *
 + * @hdev: pointer to habanalabs device structure
 + *
 + * Bring the hw back to operating state (all asics).
 + * Returns 0 for success or an error on failure.
 + * Called at driver resume.
 + */
 +int hl_device_resume(struct hl_device *hdev)
 +{
 +      int rc;
 +
 +      pci_set_power_state(hdev->pdev, PCI_D0);
 +      pci_restore_state(hdev->pdev);
 +      rc = pci_enable_device_mem(hdev->pdev);
 +      if (rc) {
 +              dev_err(hdev->dev,
 +                      "Failed to enable PCI device in resume\n");
 +              return rc;
 +      }
 +
 +      pci_set_master(hdev->pdev);
 +
 +      rc = hdev->asic_funcs->resume(hdev);
 +      if (rc) {
 +              dev_err(hdev->dev, "Failed to resume device after suspend\n");
 +              goto disable_device;
 +      }
 +
 +
 +      hdev->disabled = false;
 +      atomic_set(&hdev->in_reset, 0);
 +
 +      rc = hl_device_reset(hdev, true, false);
 +      if (rc) {
 +              dev_err(hdev->dev, "Failed to reset device during resume\n");
 +              goto disable_device;
 +      }
 +
 +      return 0;
 +
 +disable_device:
 +      pci_clear_master(hdev->pdev);
 +      pci_disable_device(hdev->pdev);
 +
 +      return rc;
 +}
 +
 +static int device_kill_open_processes(struct hl_device *hdev)
 +{
 +      u16 pending_total, pending_cnt;
 +      struct hl_fpriv *hpriv;
 +      struct task_struct *task = NULL;
 +
 +      if (hdev->pldm)
 +              pending_total = HL_PLDM_PENDING_RESET_PER_SEC;
 +      else
 +              pending_total = HL_PENDING_RESET_PER_SEC;
 +
 +      /* Giving time for user to close FD, and for processes that are inside
 +       * hl_device_open to finish
 +       */
 +      if (!list_empty(&hdev->fpriv_list))
 +              ssleep(1);
 +
 +      mutex_lock(&hdev->fpriv_list_lock);
 +
 +      /* This section must be protected because we are dereferencing
 +       * pointers that are freed if the process exits
 +       */
 +      list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) {
 +              task = get_pid_task(hpriv->taskpid, PIDTYPE_PID);
 +              if (task) {
 +                      dev_info(hdev->dev, "Killing user process pid=%d\n",
 +                              task_pid_nr(task));
 +                      send_sig(SIGKILL, task, 1);
 +                      usleep_range(1000, 10000);
 +
 +                      put_task_struct(task);
 +              }
 +      }
 +
 +      mutex_unlock(&hdev->fpriv_list_lock);
 +
 +      /* We killed the open users, but because the driver cleans up after the
 +       * user contexts are closed (e.g. mmu mappings), we need to wait again
 +       * to make sure the cleaning phase is finished before continuing with
 +       * the reset
 +       */
 +
 +      pending_cnt = pending_total;
 +
 +      while ((!list_empty(&hdev->fpriv_list)) && (pending_cnt)) {
 +              dev_info(hdev->dev,
 +                      "Waiting for all unmap operations to finish before hard reset\n");
 +
 +              pending_cnt--;
 +
 +              ssleep(1);
 +      }
 +
 +      return list_empty(&hdev->fpriv_list) ? 0 : -EBUSY;
 +}
 +
 +static void device_hard_reset_pending(struct work_struct *work)
 +{
 +      struct hl_device_reset_work *device_reset_work =
 +              container_of(work, struct hl_device_reset_work, reset_work);
 +      struct hl_device *hdev = device_reset_work->hdev;
 +
 +      hl_device_reset(hdev, true, true);
 +
 +      kfree(device_reset_work);
 +}
 +
 +/*
 + * hl_device_reset - reset the device
 + *
 + * @hdev: pointer to habanalabs device structure
 + * @hard_reset: should we do hard reset to all engines or just reset the
 + *              compute/dma engines
 + * @from_hard_reset_thread: is the caller the hard-reset thread
 + *
 + * Block future CS and wait for pending CS to be enqueued
 + * Call ASIC H/W fini
 + * Flush all completions
 + * Re-initialize all internal data structures
 + * Call ASIC H/W init, late_init
 + * Test queues
 + * Enable device
 + *
 + * Returns 0 for success or an error on failure.
 + */
 +int hl_device_reset(struct hl_device *hdev, bool hard_reset,
 +                      bool from_hard_reset_thread)
 +{
 +      int i, rc;
 +
 +      if (!hdev->init_done) {
 +              dev_err(hdev->dev,
 +                      "Can't reset before initialization is done\n");
 +              return 0;
 +      }
 +
 +      if ((!hard_reset) && (!hdev->supports_soft_reset)) {
 +              dev_dbg(hdev->dev, "Doing hard-reset instead of soft-reset\n");
 +              hard_reset = true;
 +      }
 +
 +      /*
 +       * Prevent concurrency in this function - only one reset should be
 +       * done at any given time. Only need to perform this if we didn't
 +       * get from the dedicated hard reset thread
 +       */
 +      if (!from_hard_reset_thread) {
 +              /* Block future CS/VM/JOB completion operations */
 +              rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
 +              if (rc)
 +                      return 0;
 +
 +              if (hard_reset) {
 +                      /* Disable PCI access from device F/W so he won't send
 +                       * us additional interrupts. We disable MSI/MSI-X at
 +                       * the halt_engines function and we can't have the F/W
 +                       * sending us interrupts after that. We need to disable
 +                       * the access here because if the device is marked
 +                       * disable, the message won't be send. Also, in case
 +                       * of heartbeat, the device CPU is marked as disable
 +                       * so this message won't be sent
 +                       */
 +                      if (hl_fw_send_pci_access_msg(hdev,
 +                                      ARMCP_PACKET_DISABLE_PCI_ACCESS))
 +                              dev_warn(hdev->dev,
 +                                      "Failed to disable PCI access by F/W\n");
 +              }
 +
 +              /* This also blocks future CS/VM/JOB completion operations */
 +              hdev->disabled = true;
 +
 +              /* Flush anyone that is inside the critical section of enqueue
 +               * jobs to the H/W
 +               */
 +              hdev->asic_funcs->hw_queues_lock(hdev);
 +              hdev->asic_funcs->hw_queues_unlock(hdev);
 +
 +              /* Flush anyone that is inside device open */
 +              mutex_lock(&hdev->fpriv_list_lock);
 +              mutex_unlock(&hdev->fpriv_list_lock);
 +
 +              dev_err(hdev->dev, "Going to RESET device!\n");
 +      }
 +
 +again:
 +      if ((hard_reset) && (!from_hard_reset_thread)) {
 +              struct hl_device_reset_work *device_reset_work;
 +
 +              hdev->hard_reset_pending = true;
 +
 +              device_reset_work = kzalloc(sizeof(*device_reset_work),
 +                                              GFP_ATOMIC);
 +              if (!device_reset_work) {
 +                      rc = -ENOMEM;
 +                      goto out_err;
 +              }
 +
 +              /*
 +               * Because the reset function can't run from interrupt or
 +               * from heartbeat work, we need to call the reset function
 +               * from a dedicated work
 +               */
 +              INIT_WORK(&device_reset_work->reset_work,
 +                              device_hard_reset_pending);
 +              device_reset_work->hdev = hdev;
 +              schedule_work(&device_reset_work->reset_work);
 +
 +              return 0;
 +      }
 +
 +      if (hard_reset) {
 +              device_late_fini(hdev);
 +
 +              /*
 +               * Now that the heartbeat thread is closed, flush processes
 +               * which are sending messages to CPU
 +               */
 +              mutex_lock(&hdev->send_cpu_message_lock);
 +              mutex_unlock(&hdev->send_cpu_message_lock);
 +      }
 +
 +      /*
 +       * Halt the engines and disable interrupts so we won't get any more
 +       * completions from H/W and we won't have any accesses from the
 +       * H/W to the host machine
 +       */
 +      hdev->asic_funcs->halt_engines(hdev, hard_reset);
 +
 +      /* Go over all the queues, release all CS and their jobs */
 +      hl_cs_rollback_all(hdev);
 +
 +      if (hard_reset) {
 +              /* Kill processes here after CS rollback. This is because the
 +               * process can't really exit until all its CSs are done, which
 +               * is what we do in cs rollback
 +               */
 +              rc = device_kill_open_processes(hdev);
 +              if (rc) {
 +                      dev_crit(hdev->dev,
 +                              "Failed to kill all open processes, stopping hard reset\n");
 +                      goto out_err;
 +              }
 +
 +              /* Flush the Event queue workers to make sure no other thread is
 +               * reading or writing to registers during the reset
 +               */
 +              flush_workqueue(hdev->eq_wq);
 +      }
 +
 +      /* Release kernel context */
 +      if ((hard_reset) && (hl_ctx_put(hdev->kernel_ctx) == 1))
 +              hdev->kernel_ctx = NULL;
 +
 +      /* Reset the H/W. It will be in idle state after this returns */
 +      hdev->asic_funcs->hw_fini(hdev, hard_reset);
 +
 +      if (hard_reset) {
 +              hl_vm_fini(hdev);
 +              hl_mmu_fini(hdev);
 +              hl_eq_reset(hdev, &hdev->event_queue);
 +      }
 +
 +      /* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */
 +      hl_hw_queue_reset(hdev, hard_reset);
 +      for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
 +              hl_cq_reset(hdev, &hdev->completion_queue[i]);
 +
 +      hdev->idle_busy_ts_idx = 0;
 +      hdev->idle_busy_ts_arr[0].busy_to_idle_ts = ktime_set(0, 0);
 +      hdev->idle_busy_ts_arr[0].idle_to_busy_ts = ktime_set(0, 0);
 +
 +      if (hdev->cs_active_cnt)
 +              dev_crit(hdev->dev, "CS active cnt %d is not 0 during reset\n",
 +                      hdev->cs_active_cnt);
 +
 +      mutex_lock(&hdev->fpriv_list_lock);
 +
 +      /* Make sure the context switch phase will run again */
 +      if (hdev->compute_ctx) {
 +              atomic_set(&hdev->compute_ctx->thread_ctx_switch_token, 1);
 +              hdev->compute_ctx->thread_ctx_switch_wait_token = 0;
 +      }
 +
 +      mutex_unlock(&hdev->fpriv_list_lock);
 +
 +      /* Finished tear-down, starting to re-initialize */
 +
 +      if (hard_reset) {
 +              hdev->device_cpu_disabled = false;
 +              hdev->hard_reset_pending = false;
 +
 +              if (hdev->kernel_ctx) {
 +                      dev_crit(hdev->dev,
 +                              "kernel ctx was alive during hard reset, something is terribly wrong\n");
 +                      rc = -EBUSY;
 +                      goto out_err;
 +              }
 +
 +              rc = hl_mmu_init(hdev);
 +              if (rc) {
 +                      dev_err(hdev->dev,
 +                              "Failed to initialize MMU S/W after hard reset\n");
 +                      goto out_err;
 +              }
 +
 +              /* Allocate the kernel context */
 +              hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx),
 +                                              GFP_KERNEL);
 +              if (!hdev->kernel_ctx) {
 +                      rc = -ENOMEM;
 +                      goto out_err;
 +              }
 +
 +              hdev->compute_ctx = NULL;
 +
 +              rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
 +              if (rc) {
 +                      dev_err(hdev->dev,
 +                              "failed to init kernel ctx in hard reset\n");
 +                      kfree(hdev->kernel_ctx);
 +                      hdev->kernel_ctx = NULL;
 +                      goto out_err;
 +              }
 +      }
 +
 +      /* Device is now enabled as part of the initialization requires
 +       * communication with the device firmware to get information that
 +       * is required for the initialization itself
 +       */
 +      hdev->disabled = false;
 +
 +      rc = hdev->asic_funcs->hw_init(hdev);
 +      if (rc) {
 +              dev_err(hdev->dev,
 +                      "failed to initialize the H/W after reset\n");
 +              goto out_err;
 +      }
 +
 +      /* Check that the communication with the device is working */
 +      rc = hdev->asic_funcs->test_queues(hdev);
 +      if (rc) {
 +              dev_err(hdev->dev,
 +                      "Failed to detect if device is alive after reset\n");
 +              goto out_err;
 +      }
 +
 +      if (hard_reset) {
 +              rc = device_late_init(hdev);
 +              if (rc) {
 +                      dev_err(hdev->dev,
 +                              "Failed late init after hard reset\n");
 +                      goto out_err;
 +              }
 +
 +              rc = hl_vm_init(hdev);
 +              if (rc) {
 +                      dev_err(hdev->dev,
 +                              "Failed to init memory module after hard reset\n");
 +                      goto out_err;
 +              }
 +
 +              hl_set_max_power(hdev, hdev->max_power);
 +      } else {
 +              rc = hdev->asic_funcs->soft_reset_late_init(hdev);
 +              if (rc) {
 +                      dev_err(hdev->dev,
 +                              "Failed late init after soft reset\n");
 +                      goto out_err;
 +              }
 +      }
 +
 +      atomic_set(&hdev->in_reset, 0);
 +
 +      if (hard_reset)
 +              hdev->hard_reset_cnt++;
 +      else
 +              hdev->soft_reset_cnt++;
 +
 +      dev_warn(hdev->dev, "Successfully finished resetting the device\n");
 +
 +      return 0;
 +
 +out_err:
 +      hdev->disabled = true;
 +
 +      if (hard_reset) {
 +              dev_err(hdev->dev,
 +                      "Failed to reset! Device is NOT usable\n");
 +              hdev->hard_reset_cnt++;
 +      } else {
 +              dev_err(hdev->dev,
 +                      "Failed to do soft-reset, trying hard reset\n");
 +              hdev->soft_reset_cnt++;
 +              hard_reset = true;
 +              goto again;
 +      }
 +
 +      atomic_set(&hdev->in_reset, 0);
 +
 +      return rc;
 +}
 +
 +/*
 + * hl_device_init - main initialization function for habanalabs device
 + *
 + * @hdev: pointer to habanalabs device structure
 + *
 + * Allocate an id for the device, do early initialization and then call the
 + * ASIC specific initialization functions. Finally, create the cdev and the
 + * Linux device to expose it to the user
 + */
 +int hl_device_init(struct hl_device *hdev, struct class *hclass)
 +{
 +      int i, rc, cq_cnt, cq_ready_cnt;
 +      char *name;
 +      bool add_cdev_sysfs_on_err = false;
 +
 +      name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2);
 +      if (!name) {
 +              rc = -ENOMEM;
 +              goto out_disabled;
 +      }
 +
 +      /* Initialize cdev and device structures */
 +      rc = device_init_cdev(hdev, hclass, hdev->id, &hl_ops, name,
 +                              &hdev->cdev, &hdev->dev);
 +
 +      kfree(name);
 +
 +      if (rc)
 +              goto out_disabled;
 +
 +      name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2);
 +      if (!name) {
 +              rc = -ENOMEM;
 +              goto free_dev;
 +      }
 +
 +      /* Initialize cdev and device structures for control device */
 +      rc = device_init_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops,
 +                              name, &hdev->cdev_ctrl, &hdev->dev_ctrl);
 +
 +      kfree(name);
 +
 +      if (rc)
 +              goto free_dev;
 +
 +      /* Initialize ASIC function pointers and perform early init */
 +      rc = device_early_init(hdev);
 +      if (rc)
 +              goto free_dev_ctrl;
 +
 +      /*
 +       * Start calling ASIC initialization. First S/W then H/W and finally
 +       * late init
 +       */
 +      rc = hdev->asic_funcs->sw_init(hdev);
 +      if (rc)
 +              goto early_fini;
 +
 +      /*
 +       * Initialize the H/W queues. Must be done before hw_init, because
 +       * there the addresses of the kernel queue are being written to the
 +       * registers of the device
 +       */
 +      rc = hl_hw_queues_create(hdev);
 +      if (rc) {
 +              dev_err(hdev->dev, "failed to initialize kernel queues\n");
 +              goto sw_fini;
 +      }
 +
 +      cq_cnt = hdev->asic_prop.completion_queues_count;
 +
 +      /*
 +       * Initialize the completion queues. Must be done before hw_init,
 +       * because there the addresses of the completion queues are being
 +       * passed as arguments to request_irq
 +       */
 +      if (cq_cnt) {
 +              hdev->completion_queue = kcalloc(cq_cnt,
 +                              sizeof(*hdev->completion_queue),
 +                              GFP_KERNEL);
 +
 +              if (!hdev->completion_queue) {
 +                      dev_err(hdev->dev,
 +                              "failed to allocate completion queues\n");
 +                      rc = -ENOMEM;
 +                      goto hw_queues_destroy;
 +              }
 +      }
 +
 +      for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) {
 +              rc = hl_cq_init(hdev, &hdev->completion_queue[i],
 +                              hdev->asic_funcs->get_queue_id_for_cq(hdev, i));
 +              if (rc) {
 +                      dev_err(hdev->dev,
 +                              "failed to initialize completion queue\n");
 +                      goto cq_fini;
 +              }
 +              hdev->completion_queue[i].cq_idx = i;
 +      }
 +
 +      /*
 +       * Initialize the event queue. Must be done before hw_init,
 +       * because there the address of the event queue is being
 +       * passed as argument to request_irq
 +       */
 +      rc = hl_eq_init(hdev, &hdev->event_queue);
 +      if (rc) {
 +              dev_err(hdev->dev, "failed to initialize event queue\n");
 +              goto cq_fini;
 +      }
 +
 +      /* MMU S/W must be initialized before kernel context is created */
 +      rc = hl_mmu_init(hdev);
 +      if (rc) {
 +              dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n");
 +              goto eq_fini;
 +      }
 +
 +      /* Allocate the kernel context */
 +      hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL);
 +      if (!hdev->kernel_ctx) {
 +              rc = -ENOMEM;
 +              goto mmu_fini;
 +      }
 +
 +      hdev->compute_ctx = NULL;
 +
 +      rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
 +      if (rc) {
 +              dev_err(hdev->dev, "failed to initialize kernel context\n");
 +              kfree(hdev->kernel_ctx);
 +              goto mmu_fini;
 +      }
 +
 +      rc = hl_cb_pool_init(hdev);
 +      if (rc) {
 +              dev_err(hdev->dev, "failed to initialize CB pool\n");
 +              goto release_ctx;
 +      }
 +
 +      hl_debugfs_add_device(hdev);
 +
 +      if (hdev->asic_funcs->get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
 +              dev_info(hdev->dev,
 +                      "H/W state is dirty, must reset before initializing\n");
 +              hdev->asic_funcs->halt_engines(hdev, true);
 +              hdev->asic_funcs->hw_fini(hdev, true);
 +      }
 +
 +      /*
 +       * From this point, in case of an error, add char devices and create
 +       * sysfs nodes as part of the error flow, to allow debugging.
 +       */
 +      add_cdev_sysfs_on_err = true;
 +
 +      /* Device is now enabled as part of the initialization requires
 +       * communication with the device firmware to get information that
 +       * is required for the initialization itself
 +       */
 +      hdev->disabled = false;
 +
 +      rc = hdev->asic_funcs->hw_init(hdev);
 +      if (rc) {
 +              dev_err(hdev->dev, "failed to initialize the H/W\n");
 +              rc = 0;
 +              goto out_disabled;
 +      }
 +
 +      /* Check that the communication with the device is working */
 +      rc = hdev->asic_funcs->test_queues(hdev);
 +      if (rc) {
 +              dev_err(hdev->dev, "Failed to detect if device is alive\n");
 +              rc = 0;
 +              goto out_disabled;
 +      }
 +
 +      rc = device_late_init(hdev);
 +      if (rc) {
 +              dev_err(hdev->dev, "Failed late initialization\n");
 +              rc = 0;
 +              goto out_disabled;
 +      }
 +
 +      dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n",
 +              hdev->asic_name,
 +              hdev->asic_prop.dram_size / 1024 / 1024 / 1024);
 +
 +      rc = hl_vm_init(hdev);
 +      if (rc) {
 +              dev_err(hdev->dev, "Failed to initialize memory module\n");
 +              rc = 0;
 +              goto out_disabled;
 +      }
 +
 +      /*
 +       * Expose devices and sysfs nodes to user.
 +       * From here there is no need to add char devices and create sysfs nodes
 +       * in case of an error.
 +       */
 +      add_cdev_sysfs_on_err = false;
 +      rc = device_cdev_sysfs_add(hdev);
 +      if (rc) {
 +              dev_err(hdev->dev,
 +                      "Failed to add char devices and sysfs nodes\n");
 +              rc = 0;
 +              goto out_disabled;
 +      }
 +
 +      /*
 +       * hl_hwmon_init() must be called after device_late_init(), because only
 +       * there we get the information from the device about which
 +       * hwmon-related sensors the device supports.
 +       * Furthermore, it must be done after adding the device to the system.
 +       */
 +      rc = hl_hwmon_init(hdev);
 +      if (rc) {
 +              dev_err(hdev->dev, "Failed to initialize hwmon\n");
 +              rc = 0;
 +              goto out_disabled;
 +      }
 +
 +      dev_notice(hdev->dev,
 +              "Successfully added device to habanalabs driver\n");
 +
 +      hdev->init_done = true;
 +
 +      return 0;
 +
 +release_ctx:
 +      if (hl_ctx_put(hdev->kernel_ctx) != 1)
 +              dev_err(hdev->dev,
 +                      "kernel ctx is still alive on initialization failure\n");
 +mmu_fini:
 +      hl_mmu_fini(hdev);
 +eq_fini:
 +      hl_eq_fini(hdev, &hdev->event_queue);
 +cq_fini:
 +      for (i = 0 ; i < cq_ready_cnt ; i++)
 +              hl_cq_fini(hdev, &hdev->completion_queue[i]);
 +      kfree(hdev->completion_queue);
 +hw_queues_destroy:
 +      hl_hw_queues_destroy(hdev);
 +sw_fini:
 +      hdev->asic_funcs->sw_fini(hdev);
 +early_fini:
 +      device_early_fini(hdev);
 +free_dev_ctrl:
 +      kfree(hdev->dev_ctrl);
 +free_dev:
 +      kfree(hdev->dev);
 +out_disabled:
 +      hdev->disabled = true;
 +      if (add_cdev_sysfs_on_err)
 +              device_cdev_sysfs_add(hdev);
 +      if (hdev->pdev)
 +              dev_err(&hdev->pdev->dev,
 +                      "Failed to initialize hl%d. Device is NOT usable !\n",
 +                      hdev->id / 2);
 +      else
 +              pr_err("Failed to initialize hl%d. Device is NOT usable !\n",
 +                      hdev->id / 2);
 +
 +      return rc;
 +}
 +
 +/*
 + * hl_device_fini - main tear-down function for habanalabs device
 + *
 + * @hdev: pointer to habanalabs device structure
 + *
 + * Destroy the device, call ASIC fini functions and release the id
 + */
 +void hl_device_fini(struct hl_device *hdev)
 +{
 +      int i, rc;
 +      ktime_t timeout;
 +
 +      dev_info(hdev->dev, "Removing device\n");
 +
 +      /*
 +       * This function is competing with the reset function, so try to
 +       * take the reset atomic and if we are already in middle of reset,
 +       * wait until reset function is finished. Reset function is designed
 +       * to always finish. However, in Gaudi, because of all the network
 +       * ports, the hard reset could take between 10-30 seconds
 +       */
 +
 +      timeout = ktime_add_us(ktime_get(),
 +                              HL_HARD_RESET_MAX_TIMEOUT * 1000 * 1000);
 +      rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
 +      while (rc) {
 +              usleep_range(50, 200);
 +              rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
 +              if (ktime_compare(ktime_get(), timeout) > 0) {
 +                      WARN(1, "Failed to remove device because reset function did not finish\n");
 +                      return;
 +              }
 +      }
 +
 +      /* Mark device as disabled */
 +      hdev->disabled = true;
 +
 +      /* Flush anyone that is inside the critical section of enqueue
 +       * jobs to the H/W
 +       */
 +      hdev->asic_funcs->hw_queues_lock(hdev);
 +      hdev->asic_funcs->hw_queues_unlock(hdev);
 +
 +      /* Flush anyone that is inside device open */
 +      mutex_lock(&hdev->fpriv_list_lock);
 +      mutex_unlock(&hdev->fpriv_list_lock);
 +
 +      hdev->hard_reset_pending = true;
 +
 +      hl_hwmon_fini(hdev);
 +
 +      device_late_fini(hdev);
 +
 +      hl_debugfs_remove_device(hdev);
 +
 +      /*
 +       * Halt the engines and disable interrupts so we won't get any more
 +       * completions from H/W and we won't have any accesses from the
 +       * H/W to the host machine
 +       */
 +      hdev->asic_funcs->halt_engines(hdev, true);
 +
 +      /* Go over all the queues, release all CS and their jobs */
 +      hl_cs_rollback_all(hdev);
 +
 +      /* Kill processes here after CS rollback. This is because the process
 +       * can't really exit until all its CSs are done, which is what we
 +       * do in cs rollback
 +       */
 +      rc = device_kill_open_processes(hdev);
 +      if (rc)
 +              dev_crit(hdev->dev, "Failed to kill all open processes\n");
 +
 +      hl_cb_pool_fini(hdev);
 +
 +      /* Release kernel context */
 +      if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
 +              dev_err(hdev->dev, "kernel ctx is still alive\n");
 +
 +      /* Reset the H/W. It will be in idle state after this returns */
 +      hdev->asic_funcs->hw_fini(hdev, true);
 +
 +      hl_vm_fini(hdev);
 +
 +      hl_mmu_fini(hdev);
 +
 +      hl_eq_fini(hdev, &hdev->event_queue);
 +
 +      for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
 +              hl_cq_fini(hdev, &hdev->completion_queue[i]);
 +      kfree(hdev->completion_queue);
 +
 +      hl_hw_queues_destroy(hdev);
 +
 +      /* Call ASIC S/W finalize function */
 +      hdev->asic_funcs->sw_fini(hdev);
 +
 +      device_early_fini(hdev);
 +
 +      /* Hide devices and sysfs nodes from user */
 +      device_cdev_sysfs_del(hdev);
 +
 +      pr_info("removed device successfully\n");
 +}
 +
 +/*
 + * MMIO register access helper functions.
 + */
 +
 +/*
 + * hl_rreg - Read an MMIO register
 + *
 + * @hdev: pointer to habanalabs device structure
 + * @reg: MMIO register offset (in bytes)
 + *
 + * Returns the value of the MMIO register we are asked to read
 + *
 + */
 +inline u32 hl_rreg(struct hl_device *hdev, u32 reg)
 +{
 +      return readl(hdev->rmmio + reg);
 +}
 +
 +/*
 + * hl_wreg - Write to an MMIO register
 + *
 + * @hdev: pointer to habanalabs device structure
 + * @reg: MMIO register offset (in bytes)
 + * @val: 32-bit value
 + *
 + * Writes the 32-bit value into the MMIO register
 + *
 + */
 +inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val)
 +{
 +      writel(val, hdev->rmmio + reg);
 +}
index b2b84510b932f0d00b01a735ca8486a474f183b5,0000000000000000000000000000000000000000..5981dbd8c6dffaee482a113ca3db9a35a9d09edc
mode 100644,000000..100644
--- /dev/null
@@@ -1,589 -1,0 +1,589 @@@
-                               sizeof(pkt), HL_DEVICE_TIMEOUT_USEC, NULL);
 +// SPDX-License-Identifier: GPL-2.0
 +
 +/*
 + * Copyright 2016-2019 HabanaLabs, Ltd.
 + * All Rights Reserved.
 + */
 +
 +#include "habanalabs.h"
 +#include "include/common/hl_boot_if.h"
 +
 +#include <linux/firmware.h>
 +#include <linux/genalloc.h>
 +#include <linux/io-64-nonatomic-lo-hi.h>
 +#include <linux/slab.h>
 +
 +/**
 + * hl_fw_load_fw_to_device() - Load F/W code to device's memory.
 + *
 + * @hdev: pointer to hl_device structure.
 + * @fw_name: the firmware image name
 + * @dst: IO memory mapped address space to copy firmware to
 + *
 + * Copy fw code from firmware file to device memory.
 + *
 + * Return: 0 on success, non-zero for failure.
 + */
 +int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
 +                              void __iomem *dst)
 +{
 +      const struct firmware *fw;
 +      const u64 *fw_data;
 +      size_t fw_size;
 +      int rc;
 +
 +      rc = request_firmware(&fw, fw_name, hdev->dev);
 +      if (rc) {
 +              dev_err(hdev->dev, "Firmware file %s is not found!\n", fw_name);
 +              goto out;
 +      }
 +
 +      fw_size = fw->size;
 +      if ((fw_size % 4) != 0) {
 +              dev_err(hdev->dev, "Illegal %s firmware size %zu\n",
 +                      fw_name, fw_size);
 +              rc = -EINVAL;
 +              goto out;
 +      }
 +
 +      dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size);
 +
 +      fw_data = (const u64 *) fw->data;
 +
 +      memcpy_toio(dst, fw_data, fw_size);
 +
 +out:
 +      release_firmware(fw);
 +      return rc;
 +}
 +
 +int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
 +{
 +      struct armcp_packet pkt = {};
 +
 +      pkt.ctl = cpu_to_le32(opcode << ARMCP_PKT_CTL_OPCODE_SHIFT);
 +
 +      return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt,
-                       HL_DEVICE_TIMEOUT_USEC, &result);
++                                              sizeof(pkt), 0, NULL);
 +}
 +
 +int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
 +                              u16 len, u32 timeout, long *result)
 +{
 +      struct armcp_packet *pkt;
 +      dma_addr_t pkt_dma_addr;
 +      u32 tmp;
 +      int rc = 0;
 +
 +      pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
 +                                                              &pkt_dma_addr);
 +      if (!pkt) {
 +              dev_err(hdev->dev,
 +                      "Failed to allocate DMA memory for packet to CPU\n");
 +              return -ENOMEM;
 +      }
 +
 +      memcpy(pkt, msg, len);
 +
 +      mutex_lock(&hdev->send_cpu_message_lock);
 +
 +      if (hdev->disabled)
 +              goto out;
 +
 +      if (hdev->device_cpu_disabled) {
 +              rc = -EIO;
 +              goto out;
 +      }
 +
 +      rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr);
 +      if (rc) {
 +              dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc);
 +              goto out;
 +      }
 +
 +      rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp,
 +                              (tmp == ARMCP_PACKET_FENCE_VAL), 1000,
 +                              timeout, true);
 +
 +      hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
 +
 +      if (rc == -ETIMEDOUT) {
 +              dev_err(hdev->dev, "Device CPU packet timeout (0x%x)\n", tmp);
 +              hdev->device_cpu_disabled = true;
 +              goto out;
 +      }
 +
 +      tmp = le32_to_cpu(pkt->ctl);
 +
 +      rc = (tmp & ARMCP_PKT_CTL_RC_MASK) >> ARMCP_PKT_CTL_RC_SHIFT;
 +      if (rc) {
 +              dev_err(hdev->dev, "F/W ERROR %d for CPU packet %d\n",
 +                      rc,
 +                      (tmp & ARMCP_PKT_CTL_OPCODE_MASK)
 +                                              >> ARMCP_PKT_CTL_OPCODE_SHIFT);
 +              rc = -EIO;
 +      } else if (result) {
 +              *result = (long) le64_to_cpu(pkt->result);
 +      }
 +
 +out:
 +      mutex_unlock(&hdev->send_cpu_message_lock);
 +
 +      hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, len, pkt);
 +
 +      return rc;
 +}
 +
 +int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type)
 +{
 +      struct armcp_packet pkt;
 +      long result;
 +      int rc;
 +
 +      memset(&pkt, 0, sizeof(pkt));
 +
 +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ <<
 +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
 +      pkt.value = cpu_to_le64(event_type);
 +
 +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                       total_pkt_size, HL_DEVICE_TIMEOUT_USEC, &result);
++                                              0, &result);
 +
 +      if (rc)
 +              dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
 +
 +      return rc;
 +}
 +
 +int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
 +              size_t irq_arr_size)
 +{
 +      struct armcp_unmask_irq_arr_packet *pkt;
 +      size_t total_pkt_size;
 +      long result;
 +      int rc;
 +
 +      total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) +
 +                      irq_arr_size;
 +
 +      /* data should be aligned to 8 bytes in order to ArmCP to copy it */
 +      total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
 +
 +      /* total_pkt_size is casted to u16 later on */
 +      if (total_pkt_size > USHRT_MAX) {
 +              dev_err(hdev->dev, "too many elements in IRQ array\n");
 +              return -EINVAL;
 +      }
 +
 +      pkt = kzalloc(total_pkt_size, GFP_KERNEL);
 +      if (!pkt)
 +              return -ENOMEM;
 +
 +      pkt->length = cpu_to_le32(irq_arr_size / sizeof(irq_arr[0]));
 +      memcpy(&pkt->irqs, irq_arr, irq_arr_size);
 +
 +      pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
 +                                              ARMCP_PKT_CTL_OPCODE_SHIFT);
 +
 +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
-                       sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result);
++                                              total_pkt_size, 0, &result);
 +
 +      if (rc)
 +              dev_err(hdev->dev, "failed to unmask IRQ array\n");
 +
 +      kfree(pkt);
 +
 +      return rc;
 +}
 +
 +int hl_fw_test_cpu_queue(struct hl_device *hdev)
 +{
 +      struct armcp_packet test_pkt = {};
 +      long result;
 +      int rc;
 +
 +      test_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST <<
 +                                      ARMCP_PKT_CTL_OPCODE_SHIFT);
 +      test_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL);
 +
 +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt,
-                       sizeof(hb_pkt), HL_DEVICE_TIMEOUT_USEC, &result);
++                                              sizeof(test_pkt), 0, &result);
 +
 +      if (!rc) {
 +              if (result != ARMCP_PACKET_FENCE_VAL)
 +                      dev_err(hdev->dev,
 +                              "CPU queue test failed (0x%08lX)\n", result);
 +      } else {
 +              dev_err(hdev->dev, "CPU queue test failed, error %d\n", rc);
 +      }
 +
 +      return rc;
 +}
 +
 +void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
 +                                              dma_addr_t *dma_handle)
 +{
 +      u64 kernel_addr;
 +
 +      kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size);
 +
 +      *dma_handle = hdev->cpu_accessible_dma_address +
 +              (kernel_addr - (u64) (uintptr_t) hdev->cpu_accessible_dma_mem);
 +
 +      return (void *) (uintptr_t) kernel_addr;
 +}
 +
 +void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
 +                                      void *vaddr)
 +{
 +      gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr,
 +                      size);
 +}
 +
 +int hl_fw_send_heartbeat(struct hl_device *hdev)
 +{
 +      struct armcp_packet hb_pkt = {};
 +      long result;
 +      int rc;
 +
 +      hb_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST <<
 +                                      ARMCP_PKT_CTL_OPCODE_SHIFT);
 +      hb_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL);
 +
 +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt,
++                                              sizeof(hb_pkt), 0, &result);
 +
 +      if ((rc) || (result != ARMCP_PACKET_FENCE_VAL))
 +              rc = -EIO;
 +
 +      return rc;
 +}
 +
 +int hl_fw_armcp_info_get(struct hl_device *hdev)
 +{
 +      struct asic_fixed_properties *prop = &hdev->asic_prop;
 +      struct armcp_packet pkt = {};
 +      void *armcp_info_cpu_addr;
 +      dma_addr_t armcp_info_dma_addr;
 +      long result;
 +      int rc;
 +
 +      armcp_info_cpu_addr =
 +                      hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
 +                                      sizeof(struct armcp_info),
 +                                      &armcp_info_dma_addr);
 +      if (!armcp_info_cpu_addr) {
 +              dev_err(hdev->dev,
 +                      "Failed to allocate DMA memory for ArmCP info packet\n");
 +              return -ENOMEM;
 +      }
 +
 +      memset(armcp_info_cpu_addr, 0, sizeof(struct armcp_info));
 +
 +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_INFO_GET <<
 +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
 +      pkt.addr = cpu_to_le64(armcp_info_dma_addr);
 +      pkt.data_max_size = cpu_to_le32(sizeof(struct armcp_info));
 +
 +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
 +                                      HL_ARMCP_INFO_TIMEOUT_USEC, &result);
 +      if (rc) {
 +              dev_err(hdev->dev,
 +                      "Failed to handle ArmCP info pkt, error %d\n", rc);
 +              goto out;
 +      }
 +
 +      memcpy(&prop->armcp_info, armcp_info_cpu_addr,
 +                      sizeof(prop->armcp_info));
 +
 +      rc = hl_build_hwmon_channel_info(hdev, prop->armcp_info.sensors);
 +      if (rc) {
 +              dev_err(hdev->dev,
 +                      "Failed to build hwmon channel info, error %d\n", rc);
 +              rc = -EFAULT;
 +              goto out;
 +      }
 +
 +out:
 +      hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
 +                      sizeof(struct armcp_info), armcp_info_cpu_addr);
 +
 +      return rc;
 +}
 +
 +int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
 +{
 +      struct armcp_packet pkt = {};
 +      void *eeprom_info_cpu_addr;
 +      dma_addr_t eeprom_info_dma_addr;
 +      long result;
 +      int rc;
 +
 +      eeprom_info_cpu_addr =
 +                      hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
 +                                      max_size, &eeprom_info_dma_addr);
 +      if (!eeprom_info_cpu_addr) {
 +              dev_err(hdev->dev,
 +                      "Failed to allocate DMA memory for ArmCP EEPROM packet\n");
 +              return -ENOMEM;
 +      }
 +
 +      memset(eeprom_info_cpu_addr, 0, max_size);
 +
 +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_EEPROM_DATA_GET <<
 +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
 +      pkt.addr = cpu_to_le64(eeprom_info_dma_addr);
 +      pkt.data_max_size = cpu_to_le32(max_size);
 +
 +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
 +                      HL_ARMCP_EEPROM_TIMEOUT_USEC, &result);
 +
 +      if (rc) {
 +              dev_err(hdev->dev,
 +                      "Failed to handle ArmCP EEPROM packet, error %d\n", rc);
 +              goto out;
 +      }
 +
 +      /* result contains the actual size */
 +      memcpy(data, eeprom_info_cpu_addr, min((size_t)result, max_size));
 +
 +out:
 +      hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, max_size,
 +                      eeprom_info_cpu_addr);
 +
 +      return rc;
 +}
 +
 +static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg)
 +{
 +      u32 err_val;
 +
 +      /* Some of the firmware status codes are deprecated in newer f/w
 +       * versions. In those versions, the errors are reported
 +       * in different registers. Therefore, we need to check those
 +       * registers and print the exact errors. Moreover, there
 +       * may be multiple errors, so we need to report on each error
 +       * separately. Some of the error codes might indicate a state
 +       * that is not an error per-se, but it is an error in production
 +       * environment
 +       */
 +      err_val = RREG32(boot_err0_reg);
 +      if (!(err_val & CPU_BOOT_ERR0_ENABLED))
 +              return;
 +
 +      if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL)
 +              dev_err(hdev->dev,
 +                      "Device boot error - DRAM initialization failed\n");
 +      if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED)
 +              dev_err(hdev->dev, "Device boot error - FIT image corrupted\n");
 +      if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL)
 +              dev_err(hdev->dev,
 +                      "Device boot error - Thermal Sensor initialization failed\n");
 +      if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED)
 +              dev_warn(hdev->dev,
 +                      "Device boot warning - Skipped DRAM initialization\n");
 +      if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED)
 +              dev_warn(hdev->dev,
 +                      "Device boot error - Skipped waiting for BMC\n");
 +      if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY)
 +              dev_err(hdev->dev,
 +                      "Device boot error - Serdes data from BMC not available\n");
 +      if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL)
 +              dev_err(hdev->dev,
 +                      "Device boot error - NIC F/W initialization failed\n");
 +}
 +
 +static void hl_detect_cpu_boot_status(struct hl_device *hdev, u32 status)
 +{
 +      switch (status) {
 +      case CPU_BOOT_STATUS_NA:
 +              dev_err(hdev->dev,
 +                      "Device boot error - BTL did NOT run\n");
 +              break;
 +      case CPU_BOOT_STATUS_IN_WFE:
 +              dev_err(hdev->dev,
 +                      "Device boot error - Stuck inside WFE loop\n");
 +              break;
 +      case CPU_BOOT_STATUS_IN_BTL:
 +              dev_err(hdev->dev,
 +                      "Device boot error - Stuck in BTL\n");
 +              break;
 +      case CPU_BOOT_STATUS_IN_PREBOOT:
 +              dev_err(hdev->dev,
 +                      "Device boot error - Stuck in Preboot\n");
 +              break;
 +      case CPU_BOOT_STATUS_IN_SPL:
 +              dev_err(hdev->dev,
 +                      "Device boot error - Stuck in SPL\n");
 +              break;
 +      case CPU_BOOT_STATUS_IN_UBOOT:
 +              dev_err(hdev->dev,
 +                      "Device boot error - Stuck in u-boot\n");
 +              break;
 +      case CPU_BOOT_STATUS_DRAM_INIT_FAIL:
 +              dev_err(hdev->dev,
 +                      "Device boot error - DRAM initialization failed\n");
 +              break;
 +      case CPU_BOOT_STATUS_UBOOT_NOT_READY:
 +              dev_err(hdev->dev,
 +                      "Device boot error - u-boot stopped by user\n");
 +              break;
 +      case CPU_BOOT_STATUS_TS_INIT_FAIL:
 +              dev_err(hdev->dev,
 +                      "Device boot error - Thermal Sensor initialization failed\n");
 +              break;
 +      default:
 +              dev_err(hdev->dev,
 +                      "Device boot error - Invalid status code %d\n",
 +                      status);
 +              break;
 +      }
 +}
 +
 +int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
 +                      u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
 +                      u32 boot_err0_reg, bool skip_bmc,
 +                      u32 cpu_timeout, u32 boot_fit_timeout)
 +{
 +      u32 status;
 +      int rc;
 +
 +      dev_info(hdev->dev, "Going to wait for device boot (up to %lds)\n",
 +              cpu_timeout / USEC_PER_SEC);
 +
 +      /* Wait for boot FIT request */
 +      rc = hl_poll_timeout(
 +              hdev,
 +              cpu_boot_status_reg,
 +              status,
 +              status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT,
 +              10000,
 +              boot_fit_timeout);
 +
 +      if (rc) {
 +              dev_dbg(hdev->dev,
 +                      "No boot fit request received, resuming boot\n");
 +      } else {
 +              rc = hdev->asic_funcs->load_boot_fit_to_device(hdev);
 +              if (rc)
 +                      goto out;
 +
 +              /* Clear device CPU message status */
 +              WREG32(cpu_msg_status_reg, CPU_MSG_CLR);
 +
 +              /* Signal device CPU that boot loader is ready */
 +              WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY);
 +
 +              /* Poll for CPU device ack */
 +              rc = hl_poll_timeout(
 +                      hdev,
 +                      cpu_msg_status_reg,
 +                      status,
 +                      status == CPU_MSG_OK,
 +                      10000,
 +                      boot_fit_timeout);
 +
 +              if (rc) {
 +                      dev_err(hdev->dev,
 +                              "Timeout waiting for boot fit load ack\n");
 +                      goto out;
 +              }
 +
 +              /* Clear message */
 +              WREG32(msg_to_cpu_reg, KMD_MSG_NA);
 +      }
 +
 +      /* Make sure CPU boot-loader is running */
 +      rc = hl_poll_timeout(
 +              hdev,
 +              cpu_boot_status_reg,
 +              status,
 +              (status == CPU_BOOT_STATUS_DRAM_RDY) ||
 +              (status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
 +              (status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
 +              (status == CPU_BOOT_STATUS_SRAM_AVAIL),
 +              10000,
 +              cpu_timeout);
 +
 +      /* Read U-Boot, preboot versions now in case we will later fail */
 +      hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT);
 +      hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT);
 +
 +      /* Some of the status codes below are deprecated in newer f/w
 +       * versions but we keep them here for backward compatibility
 +       */
 +      if (rc) {
 +              hl_detect_cpu_boot_status(hdev, status);
 +              rc = -EIO;
 +              goto out;
 +      }
 +
 +      if (!hdev->fw_loading) {
 +              dev_info(hdev->dev, "Skip loading FW\n");
 +              goto out;
 +      }
 +
 +      if (status == CPU_BOOT_STATUS_SRAM_AVAIL)
 +              goto out;
 +
 +      dev_info(hdev->dev,
 +              "Loading firmware to device, may take some time...\n");
 +
 +      rc = hdev->asic_funcs->load_firmware_to_device(hdev);
 +      if (rc)
 +              goto out;
 +
 +      if (skip_bmc) {
 +              WREG32(msg_to_cpu_reg, KMD_MSG_SKIP_BMC);
 +
 +              rc = hl_poll_timeout(
 +                      hdev,
 +                      cpu_boot_status_reg,
 +                      status,
 +                      (status == CPU_BOOT_STATUS_BMC_WAITING_SKIPPED),
 +                      10000,
 +                      cpu_timeout);
 +
 +              if (rc) {
 +                      dev_err(hdev->dev,
 +                              "Failed to get ACK on skipping BMC, %d\n",
 +                              status);
 +                      WREG32(msg_to_cpu_reg, KMD_MSG_NA);
 +                      rc = -EIO;
 +                      goto out;
 +              }
 +      }
 +
 +      WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY);
 +
 +      rc = hl_poll_timeout(
 +              hdev,
 +              cpu_boot_status_reg,
 +              status,
 +              (status == CPU_BOOT_STATUS_SRAM_AVAIL),
 +              10000,
 +              cpu_timeout);
 +
 +      /* Clear message */
 +      WREG32(msg_to_cpu_reg, KMD_MSG_NA);
 +
 +      if (rc) {
 +              if (status == CPU_BOOT_STATUS_FIT_CORRUPTED)
 +                      dev_err(hdev->dev,
 +                              "Device reports FIT image is corrupted\n");
 +              else
 +                      dev_err(hdev->dev,
 +                              "Failed to load firmware to device, %d\n",
 +                              status);
 +
 +              rc = -EIO;
 +              goto out;
 +      }
 +
 +      dev_info(hdev->dev, "Successfully loaded firmware to device\n");
 +
 +out:
 +      fw_read_errors(hdev, boot_err0_reg);
 +
 +      return rc;
 +}
index bf9abfa47b7a33e3ddad722f359d2894505cf3c3,0000000000000000000000000000000000000000..eb42aa5476a9a8d201e16d3869203c43843b46fc
mode 100644,000000..100644
--- /dev/null
@@@ -1,1962 -1,0 +1,1969 @@@
-  * @enable_clock_gating: enable clock gating for reducing power consumption.
-  * @disable_clock_gating: disable clock for accessing registers on HBW.
 +/* SPDX-License-Identifier: GPL-2.0
 + *
 + * Copyright 2016-2019 HabanaLabs, Ltd.
 + * All Rights Reserved.
 + *
 + */
 +
 +#ifndef HABANALABSP_H_
 +#define HABANALABSP_H_
 +
 +#include "include/common/armcp_if.h"
 +#include "include/common/qman_if.h"
 +#include <uapi/misc/habanalabs.h>
 +
 +#include <linux/cdev.h>
 +#include <linux/iopoll.h>
 +#include <linux/irqreturn.h>
 +#include <linux/dma-fence.h>
 +#include <linux/dma-direction.h>
 +#include <linux/scatterlist.h>
 +#include <linux/hashtable.h>
 +
 +#define HL_NAME                               "habanalabs"
 +
 +#define HL_MMAP_CB_MASK                       (0x8000000000000000ull >> PAGE_SHIFT)
 +
 +#define HL_PENDING_RESET_PER_SEC      30
 +
 +#define HL_HARD_RESET_MAX_TIMEOUT     120
 +
 +#define HL_DEVICE_TIMEOUT_USEC                1000000 /* 1 s */
 +
 +#define HL_HEARTBEAT_PER_USEC         5000000 /* 5 s */
 +
 +#define HL_PLL_LOW_JOB_FREQ_USEC      5000000 /* 5 s */
 +
 +#define HL_ARMCP_INFO_TIMEOUT_USEC    10000000 /* 10s */
 +#define HL_ARMCP_EEPROM_TIMEOUT_USEC  10000000 /* 10s */
 +
 +#define HL_PCI_ELBI_TIMEOUT_MSEC      10 /* 10ms */
 +
 +#define HL_SIM_MAX_TIMEOUT_US         10000000 /* 10s */
 +
 +#define HL_IDLE_BUSY_TS_ARR_SIZE      4096
 +
 +/* Memory */
 +#define MEM_HASH_TABLE_BITS           7 /* 1 << 7 buckets */
 +
 +/* MMU */
 +#define MMU_HASH_TABLE_BITS           7 /* 1 << 7 buckets */
 +
 +/*
 + * HL_RSVD_SOBS 'sync stream' reserved sync objects per QMAN stream
 + * HL_RSVD_MONS 'sync stream' reserved monitors per QMAN stream
 + */
 +#define HL_RSVD_SOBS                  4
 +#define HL_RSVD_MONS                  2
 +
 +#define HL_RSVD_SOBS_IN_USE           2
 +#define HL_RSVD_MONS_IN_USE           1
 +
 +#define HL_MAX_SOB_VAL                        (1 << 15)
 +
 +#define IS_POWER_OF_2(n)              (n != 0 && ((n & (n - 1)) == 0))
 +#define IS_MAX_PENDING_CS_VALID(n)    (IS_POWER_OF_2(n) && (n > 1))
 +
 +#define HL_PCI_NUM_BARS                       6
 +
 +/**
 + * struct pgt_info - MMU hop page info.
 + * @node: hash linked-list node for the pgts shadow hash of pgts.
 + * @phys_addr: physical address of the pgt.
 + * @shadow_addr: shadow hop in the host.
 + * @ctx: pointer to the owner ctx.
 + * @num_of_ptes: indicates how many ptes are used in the pgt.
 + *
 + * The MMU page tables hierarchy is placed on the DRAM. When a new level (hop)
 + * is needed during mapping, a new page is allocated and this structure holds
 + * its essential information. During unmapping, if no valid PTEs remained in the
 + * page, it is freed with its pgt_info structure.
 + */
 +struct pgt_info {
 +      struct hlist_node       node;
 +      u64                     phys_addr;
 +      u64                     shadow_addr;
 +      struct hl_ctx           *ctx;
 +      int                     num_of_ptes;
 +};
 +
 +struct hl_device;
 +struct hl_fpriv;
 +
 +/**
 + * enum hl_pci_match_mode - pci match mode per region
 + * @PCI_ADDRESS_MATCH_MODE: address match mode
 + * @PCI_BAR_MATCH_MODE: bar match mode
 + */
 +enum hl_pci_match_mode {
 +      PCI_ADDRESS_MATCH_MODE,
 +      PCI_BAR_MATCH_MODE
 +};
 +
 +/**
 + * enum hl_fw_component - F/W components to read version through registers.
 + * @FW_COMP_UBOOT: u-boot.
 + * @FW_COMP_PREBOOT: preboot.
 + */
 +enum hl_fw_component {
 +      FW_COMP_UBOOT,
 +      FW_COMP_PREBOOT
 +};
 +
 +/**
 + * enum hl_queue_type - Supported QUEUE types.
 + * @QUEUE_TYPE_NA: queue is not available.
 + * @QUEUE_TYPE_EXT: external queue which is a DMA channel that may access the
 + *                  host.
 + * @QUEUE_TYPE_INT: internal queue that performs DMA inside the device's
 + *                    memories and/or operates the compute engines.
 + * @QUEUE_TYPE_CPU: S/W queue for communication with the device's CPU.
 + * @QUEUE_TYPE_HW: queue of DMA and compute engines jobs, for which completion
 + *                 notifications are sent by H/W.
 + */
 +enum hl_queue_type {
 +      QUEUE_TYPE_NA,
 +      QUEUE_TYPE_EXT,
 +      QUEUE_TYPE_INT,
 +      QUEUE_TYPE_CPU,
 +      QUEUE_TYPE_HW
 +};
 +
 +enum hl_cs_type {
 +      CS_TYPE_DEFAULT,
 +      CS_TYPE_SIGNAL,
 +      CS_TYPE_WAIT
 +};
 +
 +/*
 + * struct hl_inbound_pci_region - inbound region descriptor
 + * @mode: pci match mode for this region
 + * @addr: region target address
 + * @size: region size in bytes
 + * @offset_in_bar: offset within bar (address match mode)
 + * @bar: bar id
 + */
 +struct hl_inbound_pci_region {
 +      enum hl_pci_match_mode  mode;
 +      u64                     addr;
 +      u64                     size;
 +      u64                     offset_in_bar;
 +      u8                      bar;
 +};
 +
 +/*
 + * struct hl_outbound_pci_region - outbound region descriptor
 + * @addr: region target address
 + * @size: region size in bytes
 + */
 +struct hl_outbound_pci_region {
 +      u64     addr;
 +      u64     size;
 +};
 +
 +/*
 + * struct hl_hw_sob - H/W SOB info.
 + * @hdev: habanalabs device structure.
 + * @kref: refcount of this SOB. The SOB will reset once the refcount is zero.
 + * @sob_id: id of this SOB.
 + * @q_idx: the H/W queue that uses this SOB.
 + */
 +struct hl_hw_sob {
 +      struct hl_device        *hdev;
 +      struct kref             kref;
 +      u32                     sob_id;
 +      u32                     q_idx;
 +};
 +
 +/**
 + * struct hw_queue_properties - queue information.
 + * @type: queue type.
 + * @driver_only: true if only the driver is allowed to send a job to this queue,
 + *               false otherwise.
 + * @requires_kernel_cb: true if a CB handle must be provided for jobs on this
 + *                      queue, false otherwise (a CB address must be provided).
 + * @supports_sync_stream: True if queue supports sync stream
 + */
 +struct hw_queue_properties {
 +      enum hl_queue_type      type;
 +      u8                      driver_only;
 +      u8                      requires_kernel_cb;
 +      u8                      supports_sync_stream;
 +};
 +
 +/**
 + * enum vm_type_t - virtual memory mapping request information.
 + * @VM_TYPE_USERPTR: mapping of user memory to device virtual address.
 + * @VM_TYPE_PHYS_PACK: mapping of DRAM memory to device virtual address.
 + */
 +enum vm_type_t {
 +      VM_TYPE_USERPTR = 0x1,
 +      VM_TYPE_PHYS_PACK = 0x2
 +};
 +
 +/**
 + * enum hl_device_hw_state - H/W device state. use this to understand whether
 + *                           to do reset before hw_init or not
 + * @HL_DEVICE_HW_STATE_CLEAN: H/W state is clean. i.e. after hard reset
 + * @HL_DEVICE_HW_STATE_DIRTY: H/W state is dirty. i.e. we started to execute
 + *                            hw_init
 + */
 +enum hl_device_hw_state {
 +      HL_DEVICE_HW_STATE_CLEAN = 0,
 +      HL_DEVICE_HW_STATE_DIRTY
 +};
 +
 +/**
 + * struct hl_mmu_properties - ASIC specific MMU address translation properties.
 + * @start_addr: virtual start address of the memory region.
 + * @end_addr: virtual end address of the memory region.
 + * @hop0_shift: shift of hop 0 mask.
 + * @hop1_shift: shift of hop 1 mask.
 + * @hop2_shift: shift of hop 2 mask.
 + * @hop3_shift: shift of hop 3 mask.
 + * @hop4_shift: shift of hop 4 mask.
 + * @hop0_mask: mask to get the PTE address in hop 0.
 + * @hop1_mask: mask to get the PTE address in hop 1.
 + * @hop2_mask: mask to get the PTE address in hop 2.
 + * @hop3_mask: mask to get the PTE address in hop 3.
 + * @hop4_mask: mask to get the PTE address in hop 4.
 + * @page_size: default page size used to allocate memory.
 + */
 +struct hl_mmu_properties {
 +      u64     start_addr;
 +      u64     end_addr;
 +      u64     hop0_shift;
 +      u64     hop1_shift;
 +      u64     hop2_shift;
 +      u64     hop3_shift;
 +      u64     hop4_shift;
 +      u64     hop0_mask;
 +      u64     hop1_mask;
 +      u64     hop2_mask;
 +      u64     hop3_mask;
 +      u64     hop4_mask;
 +      u32     page_size;
 +};
 +
 +/**
 + * struct asic_fixed_properties - ASIC specific immutable properties.
 + * @hw_queues_props: H/W queues properties.
 + * @armcp_info: received various information from ArmCP regarding the H/W, e.g.
 + *            available sensors.
 + * @uboot_ver: F/W U-boot version.
 + * @preboot_ver: F/W Preboot version.
 + * @dmmu: DRAM MMU address translation properties.
 + * @pmmu: PCI (host) MMU address translation properties.
 + * @pmmu_huge: PCI (host) MMU address translation properties for memory
 + *              allocated with huge pages.
 + * @sram_base_address: SRAM physical start address.
 + * @sram_end_address: SRAM physical end address.
 + * @sram_user_base_address - SRAM physical start address for user access.
 + * @dram_base_address: DRAM physical start address.
 + * @dram_end_address: DRAM physical end address.
 + * @dram_user_base_address: DRAM physical start address for user access.
 + * @dram_size: DRAM total size.
 + * @dram_pci_bar_size: size of PCI bar towards DRAM.
 + * @max_power_default: max power of the device after reset
 + * @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page
 + *                                      fault.
 + * @pcie_dbi_base_address: Base address of the PCIE_DBI block.
 + * @pcie_aux_dbi_reg_addr: Address of the PCIE_AUX DBI register.
 + * @mmu_pgt_addr: base physical address in DRAM of MMU page tables.
 + * @mmu_dram_default_page_addr: DRAM default page physical address.
 + * @mmu_pgt_size: MMU page tables total size.
 + * @mmu_pte_size: PTE size in MMU page tables.
 + * @mmu_hop_table_size: MMU hop table size.
 + * @mmu_hop0_tables_total_size: total size of MMU hop0 tables.
 + * @dram_page_size: page size for MMU DRAM allocation.
 + * @cfg_size: configuration space size on SRAM.
 + * @sram_size: total size of SRAM.
 + * @max_asid: maximum number of open contexts (ASIDs).
 + * @num_of_events: number of possible internal H/W IRQs.
 + * @psoc_pci_pll_nr: PCI PLL NR value.
 + * @psoc_pci_pll_nf: PCI PLL NF value.
 + * @psoc_pci_pll_od: PCI PLL OD value.
 + * @psoc_pci_pll_div_factor: PCI PLL DIV FACTOR 1 value.
 + * @psoc_timestamp_frequency: frequency of the psoc timestamp clock.
 + * @high_pll: high PLL frequency used by the device.
 + * @cb_pool_cb_cnt: number of CBs in the CB pool.
 + * @cb_pool_cb_size: size of each CB in the CB pool.
 + * @max_pending_cs: maximum of concurrent pending command submissions
 + * @max_queues: maximum amount of queues in the system
 + * @sync_stream_first_sob: first sync object available for sync stream use
 + * @sync_stream_first_mon: first monitor available for sync stream use
 + * @tpc_enabled_mask: which TPCs are enabled.
 + * @completion_queues_count: number of completion queues.
 + */
 +struct asic_fixed_properties {
 +      struct hw_queue_properties      *hw_queues_props;
 +      struct armcp_info               armcp_info;
 +      char                            uboot_ver[VERSION_MAX_LEN];
 +      char                            preboot_ver[VERSION_MAX_LEN];
 +      struct hl_mmu_properties        dmmu;
 +      struct hl_mmu_properties        pmmu;
 +      struct hl_mmu_properties        pmmu_huge;
 +      u64                             sram_base_address;
 +      u64                             sram_end_address;
 +      u64                             sram_user_base_address;
 +      u64                             dram_base_address;
 +      u64                             dram_end_address;
 +      u64                             dram_user_base_address;
 +      u64                             dram_size;
 +      u64                             dram_pci_bar_size;
 +      u64                             max_power_default;
 +      u64                             dram_size_for_default_page_mapping;
 +      u64                             pcie_dbi_base_address;
 +      u64                             pcie_aux_dbi_reg_addr;
 +      u64                             mmu_pgt_addr;
 +      u64                             mmu_dram_default_page_addr;
 +      u32                             mmu_pgt_size;
 +      u32                             mmu_pte_size;
 +      u32                             mmu_hop_table_size;
 +      u32                             mmu_hop0_tables_total_size;
 +      u32                             dram_page_size;
 +      u32                             cfg_size;
 +      u32                             sram_size;
 +      u32                             max_asid;
 +      u32                             num_of_events;
 +      u32                             psoc_pci_pll_nr;
 +      u32                             psoc_pci_pll_nf;
 +      u32                             psoc_pci_pll_od;
 +      u32                             psoc_pci_pll_div_factor;
 +      u32                             psoc_timestamp_frequency;
 +      u32                             high_pll;
 +      u32                             cb_pool_cb_cnt;
 +      u32                             cb_pool_cb_size;
 +      u32                             max_pending_cs;
 +      u32                             max_queues;
 +      u16                             sync_stream_first_sob;
 +      u16                             sync_stream_first_mon;
 +      u8                              tpc_enabled_mask;
 +      u8                              completion_queues_count;
 +};
 +
 +/**
 + * struct hl_cs_compl - command submission completion object.
 + * @base_fence: kernel fence object.
 + * @lock: spinlock to protect fence.
 + * @hdev: habanalabs device structure.
 + * @hw_sob: the H/W SOB used in this signal/wait CS.
 + * @cs_seq: command submission sequence number.
 + * @type: type of the CS - signal/wait.
 + * @sob_val: the SOB value that is used in this signal/wait CS.
 + */
 +struct hl_cs_compl {
 +      struct dma_fence        base_fence;
 +      spinlock_t              lock;
 +      struct hl_device        *hdev;
 +      struct hl_hw_sob        *hw_sob;
 +      u64                     cs_seq;
 +      enum hl_cs_type         type;
 +      u16                     sob_val;
 +};
 +
 +/*
 + * Command Buffers
 + */
 +
 +/**
 + * struct hl_cb_mgr - describes a Command Buffer Manager.
 + * @cb_lock: protects cb_handles.
 + * @cb_handles: an idr to hold all command buffer handles.
 + */
 +struct hl_cb_mgr {
 +      spinlock_t              cb_lock;
 +      struct idr              cb_handles; /* protected by cb_lock */
 +};
 +
 +/**
 + * struct hl_cb - describes a Command Buffer.
 + * @refcount: reference counter for usage of the CB.
 + * @hdev: pointer to device this CB belongs to.
 + * @lock: spinlock to protect mmap/cs flows.
 + * @debugfs_list: node in debugfs list of command buffers.
 + * @pool_list: node in pool list of command buffers.
 + * @kernel_address: Holds the CB's kernel virtual address.
 + * @bus_address: Holds the CB's DMA address.
 + * @mmap_size: Holds the CB's size that was mmaped.
 + * @size: holds the CB's size.
 + * @id: the CB's ID.
 + * @cs_cnt: holds number of CS that this CB participates in.
 + * @ctx_id: holds the ID of the owner's context.
 + * @mmap: true if the CB is currently mmaped to user.
 + * @is_pool: true if CB was acquired from the pool, false otherwise.
 + * @is_internal: internaly allocated
 + */
 +struct hl_cb {
 +      struct kref             refcount;
 +      struct hl_device        *hdev;
 +      spinlock_t              lock;
 +      struct list_head        debugfs_list;
 +      struct list_head        pool_list;
 +      u64                     kernel_address;
 +      dma_addr_t              bus_address;
 +      u32                     mmap_size;
 +      u32                     size;
 +      u32                     id;
 +      u32                     cs_cnt;
 +      u32                     ctx_id;
 +      u8                      mmap;
 +      u8                      is_pool;
 +      u8                      is_internal;
 +};
 +
 +
 +/*
 + * QUEUES
 + */
 +
 +struct hl_cs_job;
 +
 +/* Queue length of external and HW queues */
 +#define HL_QUEUE_LENGTH                       4096
 +#define HL_QUEUE_SIZE_IN_BYTES                (HL_QUEUE_LENGTH * HL_BD_SIZE)
 +
 +#if (HL_MAX_JOBS_PER_CS > HL_QUEUE_LENGTH)
 +#error "HL_QUEUE_LENGTH must be greater than HL_MAX_JOBS_PER_CS"
 +#endif
 +
 +/* HL_CQ_LENGTH is in units of struct hl_cq_entry */
 +#define HL_CQ_LENGTH                  HL_QUEUE_LENGTH
 +#define HL_CQ_SIZE_IN_BYTES           (HL_CQ_LENGTH * HL_CQ_ENTRY_SIZE)
 +
 +/* Must be power of 2 */
 +#define HL_EQ_LENGTH                  64
 +#define HL_EQ_SIZE_IN_BYTES           (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE)
 +
 +/* Host <-> ArmCP shared memory size */
 +#define HL_CPU_ACCESSIBLE_MEM_SIZE    SZ_2M
 +
 +/**
 + * struct hl_hw_queue - describes a H/W transport queue.
 + * @hw_sob: array of the used H/W SOBs by this H/W queue.
 + * @shadow_queue: pointer to a shadow queue that holds pointers to jobs.
 + * @queue_type: type of queue.
 + * @kernel_address: holds the queue's kernel virtual address.
 + * @bus_address: holds the queue's DMA address.
 + * @pi: holds the queue's pi value.
 + * @ci: holds the queue's ci value, AS CALCULATED BY THE DRIVER (not real ci).
 + * @hw_queue_id: the id of the H/W queue.
 + * @cq_id: the id for the corresponding CQ for this H/W queue.
 + * @msi_vec: the IRQ number of the H/W queue.
 + * @int_queue_len: length of internal queue (number of entries).
 + * @next_sob_val: the next value to use for the currently used SOB.
 + * @base_sob_id: the base SOB id of the SOBs used by this queue.
 + * @base_mon_id: the base MON id of the MONs used by this queue.
 + * @valid: is the queue valid (we have array of 32 queues, not all of them
 + *         exist).
 + * @curr_sob_offset: the id offset to the currently used SOB from the
 + *                   HL_RSVD_SOBS that are being used by this queue.
 + * @supports_sync_stream: True if queue supports sync stream
 + */
 +struct hl_hw_queue {
 +      struct hl_hw_sob        hw_sob[HL_RSVD_SOBS];
 +      struct hl_cs_job        **shadow_queue;
 +      enum hl_queue_type      queue_type;
 +      u64                     kernel_address;
 +      dma_addr_t              bus_address;
 +      u32                     pi;
 +      atomic_t                ci;
 +      u32                     hw_queue_id;
 +      u32                     cq_id;
 +      u32                     msi_vec;
 +      u16                     int_queue_len;
 +      u16                     next_sob_val;
 +      u16                     base_sob_id;
 +      u16                     base_mon_id;
 +      u8                      valid;
 +      u8                      curr_sob_offset;
 +      u8                      supports_sync_stream;
 +};
 +
 +/**
 + * struct hl_cq - describes a completion queue
 + * @hdev: pointer to the device structure
 + * @kernel_address: holds the queue's kernel virtual address
 + * @bus_address: holds the queue's DMA address
 + * @cq_idx: completion queue index in array
 + * @hw_queue_id: the id of the matching H/W queue
 + * @ci: ci inside the queue
 + * @pi: pi inside the queue
 + * @free_slots_cnt: counter of free slots in queue
 + */
 +struct hl_cq {
 +      struct hl_device        *hdev;
 +      u64                     kernel_address;
 +      dma_addr_t              bus_address;
 +      u32                     cq_idx;
 +      u32                     hw_queue_id;
 +      u32                     ci;
 +      u32                     pi;
 +      atomic_t                free_slots_cnt;
 +};
 +
 +/**
 + * struct hl_eq - describes the event queue (single one per device)
 + * @hdev: pointer to the device structure
 + * @kernel_address: holds the queue's kernel virtual address
 + * @bus_address: holds the queue's DMA address
 + * @ci: ci inside the queue
 + */
 +struct hl_eq {
 +      struct hl_device        *hdev;
 +      u64                     kernel_address;
 +      dma_addr_t              bus_address;
 +      u32                     ci;
 +};
 +
 +
 +/*
 + * ASICs
 + */
 +
 +/**
 + * enum hl_asic_type - supported ASIC types.
 + * @ASIC_INVALID: Invalid ASIC type.
 + * @ASIC_GOYA: Goya device.
 + * @ASIC_GAUDI: Gaudi device.
 + */
 +enum hl_asic_type {
 +      ASIC_INVALID,
 +      ASIC_GOYA,
 +      ASIC_GAUDI
 +};
 +
 +struct hl_cs_parser;
 +
 +/**
 + * enum hl_pm_mng_profile - power management profile.
 + * @PM_AUTO: internal clock is set by the Linux driver.
 + * @PM_MANUAL: internal clock is set by the user.
 + * @PM_LAST: last power management type.
 + */
 +enum hl_pm_mng_profile {
 +      PM_AUTO = 1,
 +      PM_MANUAL,
 +      PM_LAST
 +};
 +
 +/**
 + * enum hl_pll_frequency - PLL frequency.
 + * @PLL_HIGH: high frequency.
 + * @PLL_LOW: low frequency.
 + * @PLL_LAST: last frequency values that were configured by the user.
 + */
 +enum hl_pll_frequency {
 +      PLL_HIGH = 1,
 +      PLL_LOW,
 +      PLL_LAST
 +};
 +
 +#define PLL_REF_CLK 50
 +
 +enum div_select_defs {
 +      DIV_SEL_REF_CLK = 0,
 +      DIV_SEL_PLL_CLK = 1,
 +      DIV_SEL_DIVIDED_REF = 2,
 +      DIV_SEL_DIVIDED_PLL = 3,
 +};
 +
 +/**
 + * struct hl_asic_funcs - ASIC specific functions that are can be called from
 + *                        common code.
 + * @early_init: sets up early driver state (pre sw_init), doesn't configure H/W.
 + * @early_fini: tears down what was done in early_init.
 + * @late_init: sets up late driver/hw state (post hw_init) - Optional.
 + * @late_fini: tears down what was done in late_init (pre hw_fini) - Optional.
 + * @sw_init: sets up driver state, does not configure H/W.
 + * @sw_fini: tears down driver state, does not configure H/W.
 + * @hw_init: sets up the H/W state.
 + * @hw_fini: tears down the H/W state.
 + * @halt_engines: halt engines, needed for reset sequence. This also disables
 + *                interrupts from the device. Should be called before
 + *                hw_fini and before CS rollback.
 + * @suspend: handles IP specific H/W or SW changes for suspend.
 + * @resume: handles IP specific H/W or SW changes for resume.
 + * @cb_mmap: maps a CB.
 + * @ring_doorbell: increment PI on a given QMAN.
 + * @pqe_write: Write the PQ entry to the PQ. This is ASIC-specific
 + *             function because the PQs are located in different memory areas
 + *             per ASIC (SRAM, DRAM, Host memory) and therefore, the method of
 + *             writing the PQE must match the destination memory area
 + *             properties.
 + * @asic_dma_alloc_coherent: Allocate coherent DMA memory by calling
 + *                           dma_alloc_coherent(). This is ASIC function because
 + *                           its implementation is not trivial when the driver
 + *                           is loaded in simulation mode (not upstreamed).
 + * @asic_dma_free_coherent:  Free coherent DMA memory by calling
 + *                           dma_free_coherent(). This is ASIC function because
 + *                           its implementation is not trivial when the driver
 + *                           is loaded in simulation mode (not upstreamed).
 + * @get_int_queue_base: get the internal queue base address.
 + * @test_queues: run simple test on all queues for sanity check.
 + * @asic_dma_pool_zalloc: small DMA allocation of coherent memory from DMA pool.
 + *                        size of allocation is HL_DMA_POOL_BLK_SIZE.
 + * @asic_dma_pool_free: free small DMA allocation from pool.
 + * @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool.
 + * @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool.
 + * @hl_dma_unmap_sg: DMA unmap scatter-gather list.
 + * @cs_parser: parse Command Submission.
 + * @asic_dma_map_sg: DMA map scatter-gather list.
 + * @get_dma_desc_list_size: get number of LIN_DMA packets required for CB.
 + * @add_end_of_cb_packets: Add packets to the end of CB, if device requires it.
 + * @update_eq_ci: update event queue CI.
 + * @context_switch: called upon ASID context switch.
 + * @restore_phase_topology: clear all SOBs amd MONs.
 + * @debugfs_read32: debug interface for reading u32 from DRAM/SRAM.
 + * @debugfs_write32: debug interface for writing u32 to DRAM/SRAM.
 + * @add_device_attr: add ASIC specific device attributes.
 + * @handle_eqe: handle event queue entry (IRQ) from ArmCP.
 + * @set_pll_profile: change PLL profile (manual/automatic).
 + * @get_events_stat: retrieve event queue entries histogram.
 + * @read_pte: read MMU page table entry from DRAM.
 + * @write_pte: write MMU page table entry to DRAM.
 + * @mmu_invalidate_cache: flush MMU STLB host/DRAM cache, either with soft
 + *                        (L1 only) or hard (L0 & L1) flush.
 + * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with
 + *                              ASID-VA-size mask.
 + * @send_heartbeat: send is-alive packet to ArmCP and verify response.
-  * @send_cpu_message: send buffer to ArmCP.
++ * @set_clock_gating: enable/disable clock gating per engine according to
++ *                    clock gating mask in hdev
++ * @disable_clock_gating: disable clock gating completely
 + * @debug_coresight: perform certain actions on Coresight for debugging.
 + * @is_device_idle: return true if device is idle, false otherwise.
 + * @soft_reset_late_init: perform certain actions needed after soft reset.
 + * @hw_queues_lock: acquire H/W queues lock.
 + * @hw_queues_unlock: release H/W queues lock.
 + * @get_pci_id: retrieve PCI ID.
 + * @get_eeprom_data: retrieve EEPROM data from F/W.
-       void (*enable_clock_gating)(struct hl_device *hdev);
++ * @send_cpu_message: send message to F/W. If the message is timedout, the
++ *                    driver will eventually reset the device. The timeout can
++ *                    be determined by the calling function or it can be 0 and
++ *                    then the timeout is the default timeout for the specific
++ *                    ASIC
 + * @get_hw_state: retrieve the H/W state
 + * @pci_bars_map: Map PCI BARs.
 + * @set_dram_bar_base: Set DRAM BAR to map specific device address. Returns
 + *                     old address the bar pointed to or U64_MAX for failure
 + * @init_iatu: Initialize the iATU unit inside the PCI controller.
 + * @rreg: Read a register. Needed for simulator support.
 + * @wreg: Write a register. Needed for simulator support.
 + * @halt_coresight: stop the ETF and ETR traces.
 + * @ctx_init: context dependent initialization.
 + * @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz
 + * @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index.
 + * @read_device_fw_version: read the device's firmware versions that are
 + *                          contained in registers
 + * @load_firmware_to_device: load the firmware to the device's memory
 + * @load_boot_fit_to_device: load boot fit to device's memory
 + * @get_signal_cb_size: Get signal CB size.
 + * @get_wait_cb_size: Get wait CB size.
 + * @gen_signal_cb: Generate a signal CB.
 + * @gen_wait_cb: Generate a wait CB.
 + * @reset_sob: Reset a SOB.
 + * @set_dma_mask_from_fw: set the DMA mask in the driver according to the
 + *                        firmware configuration
 + * @get_device_time: Get the device time.
 + */
 +struct hl_asic_funcs {
 +      int (*early_init)(struct hl_device *hdev);
 +      int (*early_fini)(struct hl_device *hdev);
 +      int (*late_init)(struct hl_device *hdev);
 +      void (*late_fini)(struct hl_device *hdev);
 +      int (*sw_init)(struct hl_device *hdev);
 +      int (*sw_fini)(struct hl_device *hdev);
 +      int (*hw_init)(struct hl_device *hdev);
 +      void (*hw_fini)(struct hl_device *hdev, bool hard_reset);
 +      void (*halt_engines)(struct hl_device *hdev, bool hard_reset);
 +      int (*suspend)(struct hl_device *hdev);
 +      int (*resume)(struct hl_device *hdev);
 +      int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
 +                      u64 kaddress, phys_addr_t paddress, u32 size);
 +      void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
 +      void (*pqe_write)(struct hl_device *hdev, __le64 *pqe,
 +                      struct hl_bd *bd);
 +      void* (*asic_dma_alloc_coherent)(struct hl_device *hdev, size_t size,
 +                                      dma_addr_t *dma_handle, gfp_t flag);
 +      void (*asic_dma_free_coherent)(struct hl_device *hdev, size_t size,
 +                                      void *cpu_addr, dma_addr_t dma_handle);
 +      void* (*get_int_queue_base)(struct hl_device *hdev, u32 queue_id,
 +                              dma_addr_t *dma_handle, u16 *queue_len);
 +      int (*test_queues)(struct hl_device *hdev);
 +      void* (*asic_dma_pool_zalloc)(struct hl_device *hdev, size_t size,
 +                              gfp_t mem_flags, dma_addr_t *dma_handle);
 +      void (*asic_dma_pool_free)(struct hl_device *hdev, void *vaddr,
 +                              dma_addr_t dma_addr);
 +      void* (*cpu_accessible_dma_pool_alloc)(struct hl_device *hdev,
 +                              size_t size, dma_addr_t *dma_handle);
 +      void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev,
 +                              size_t size, void *vaddr);
 +      void (*hl_dma_unmap_sg)(struct hl_device *hdev,
 +                              struct scatterlist *sgl, int nents,
 +                              enum dma_data_direction dir);
 +      int (*cs_parser)(struct hl_device *hdev, struct hl_cs_parser *parser);
 +      int (*asic_dma_map_sg)(struct hl_device *hdev,
 +                              struct scatterlist *sgl, int nents,
 +                              enum dma_data_direction dir);
 +      u32 (*get_dma_desc_list_size)(struct hl_device *hdev,
 +                                      struct sg_table *sgt);
 +      void (*add_end_of_cb_packets)(struct hl_device *hdev,
 +                                      u64 kernel_address, u32 len,
 +                                      u64 cq_addr, u32 cq_val, u32 msix_num,
 +                                      bool eb);
 +      void (*update_eq_ci)(struct hl_device *hdev, u32 val);
 +      int (*context_switch)(struct hl_device *hdev, u32 asid);
 +      void (*restore_phase_topology)(struct hl_device *hdev);
 +      int (*debugfs_read32)(struct hl_device *hdev, u64 addr, u32 *val);
 +      int (*debugfs_write32)(struct hl_device *hdev, u64 addr, u32 val);
 +      int (*debugfs_read64)(struct hl_device *hdev, u64 addr, u64 *val);
 +      int (*debugfs_write64)(struct hl_device *hdev, u64 addr, u64 val);
 +      void (*add_device_attr)(struct hl_device *hdev,
 +                              struct attribute_group *dev_attr_grp);
 +      void (*handle_eqe)(struct hl_device *hdev,
 +                              struct hl_eq_entry *eq_entry);
 +      void (*set_pll_profile)(struct hl_device *hdev,
 +                      enum hl_pll_frequency freq);
 +      void* (*get_events_stat)(struct hl_device *hdev, bool aggregate,
 +                              u32 *size);
 +      u64 (*read_pte)(struct hl_device *hdev, u64 addr);
 +      void (*write_pte)(struct hl_device *hdev, u64 addr, u64 val);
 +      int (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard,
 +                                      u32 flags);
 +      int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard,
 +                      u32 asid, u64 va, u64 size);
 +      int (*send_heartbeat)(struct hl_device *hdev);
-  * @clock_gating: is clock gating enabled.
++      void (*set_clock_gating)(struct hl_device *hdev);
 +      void (*disable_clock_gating)(struct hl_device *hdev);
 +      int (*debug_coresight)(struct hl_device *hdev, void *data);
 +      bool (*is_device_idle)(struct hl_device *hdev, u32 *mask,
 +                              struct seq_file *s);
 +      int (*soft_reset_late_init)(struct hl_device *hdev);
 +      void (*hw_queues_lock)(struct hl_device *hdev);
 +      void (*hw_queues_unlock)(struct hl_device *hdev);
 +      u32 (*get_pci_id)(struct hl_device *hdev);
 +      int (*get_eeprom_data)(struct hl_device *hdev, void *data,
 +                              size_t max_size);
 +      int (*send_cpu_message)(struct hl_device *hdev, u32 *msg,
 +                              u16 len, u32 timeout, long *result);
 +      enum hl_device_hw_state (*get_hw_state)(struct hl_device *hdev);
 +      int (*pci_bars_map)(struct hl_device *hdev);
 +      u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr);
 +      int (*init_iatu)(struct hl_device *hdev);
 +      u32 (*rreg)(struct hl_device *hdev, u32 reg);
 +      void (*wreg)(struct hl_device *hdev, u32 reg, u32 val);
 +      void (*halt_coresight)(struct hl_device *hdev);
 +      int (*ctx_init)(struct hl_ctx *ctx);
 +      int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
 +      u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx);
 +      void (*read_device_fw_version)(struct hl_device *hdev,
 +                                      enum hl_fw_component fwc);
 +      int (*load_firmware_to_device)(struct hl_device *hdev);
 +      int (*load_boot_fit_to_device)(struct hl_device *hdev);
 +      u32 (*get_signal_cb_size)(struct hl_device *hdev);
 +      u32 (*get_wait_cb_size)(struct hl_device *hdev);
 +      void (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id);
 +      void (*gen_wait_cb)(struct hl_device *hdev, void *data, u16 sob_id,
 +                              u16 sob_val, u16 mon_id, u32 q_idx);
 +      void (*reset_sob)(struct hl_device *hdev, void *data);
 +      void (*set_dma_mask_from_fw)(struct hl_device *hdev);
 +      u64 (*get_device_time)(struct hl_device *hdev);
 +};
 +
 +
 +/*
 + * CONTEXTS
 + */
 +
 +#define HL_KERNEL_ASID_ID     0
 +
 +/**
 + * struct hl_va_range - virtual addresses range.
 + * @lock: protects the virtual addresses list.
 + * @list: list of virtual addresses blocks available for mappings.
 + * @start_addr: range start address.
 + * @end_addr: range end address.
 + */
 +struct hl_va_range {
 +      struct mutex            lock;
 +      struct list_head        list;
 +      u64                     start_addr;
 +      u64                     end_addr;
 +};
 +
 +/**
 + * struct hl_ctx - user/kernel context.
 + * @mem_hash: holds mapping from virtual address to virtual memory area
 + *            descriptor (hl_vm_phys_pg_list or hl_userptr).
 + * @mmu_shadow_hash: holds a mapping from shadow address to pgt_info structure.
 + * @hpriv: pointer to the private (Kernel Driver) data of the process (fd).
 + * @hdev: pointer to the device structure.
 + * @refcount: reference counter for the context. Context is released only when
 + *            this hits 0l. It is incremented on CS and CS_WAIT.
 + * @cs_pending: array of DMA fence objects representing pending CS.
 + * @host_va_range: holds available virtual addresses for host mappings.
 + * @host_huge_va_range: holds available virtual addresses for host mappings
 + *                      with huge pages.
 + * @dram_va_range: holds available virtual addresses for DRAM mappings.
 + * @mem_hash_lock: protects the mem_hash.
 + * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the
 + *            MMU hash or walking the PGT requires talking this lock.
 + * @debugfs_list: node in debugfs list of contexts.
 + * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
 + *                    to user so user could inquire about CS. It is used as
 + *                    index to cs_pending array.
 + * @dram_default_hops: array that holds all hops addresses needed for default
 + *                     DRAM mapping.
 + * @cs_lock: spinlock to protect cs_sequence.
 + * @dram_phys_mem: amount of used physical DRAM memory by this context.
 + * @thread_ctx_switch_token: token to prevent multiple threads of the same
 + *                            context from running the context switch phase.
 + *                            Only a single thread should run it.
 + * @thread_ctx_switch_wait_token: token to prevent the threads that didn't run
 + *                            the context switch phase from moving to their
 + *                            execution phase before the context switch phase
 + *                            has finished.
 + * @asid: context's unique address space ID in the device's MMU.
 + * @handle: context's opaque handle for user
 + */
 +struct hl_ctx {
 +      DECLARE_HASHTABLE(mem_hash, MEM_HASH_TABLE_BITS);
 +      DECLARE_HASHTABLE(mmu_shadow_hash, MMU_HASH_TABLE_BITS);
 +      struct hl_fpriv         *hpriv;
 +      struct hl_device        *hdev;
 +      struct kref             refcount;
 +      struct dma_fence        **cs_pending;
 +      struct hl_va_range      *host_va_range;
 +      struct hl_va_range      *host_huge_va_range;
 +      struct hl_va_range      *dram_va_range;
 +      struct mutex            mem_hash_lock;
 +      struct mutex            mmu_lock;
 +      struct list_head        debugfs_list;
 +      struct hl_cs_counters   cs_counters;
 +      u64                     cs_sequence;
 +      u64                     *dram_default_hops;
 +      spinlock_t              cs_lock;
 +      atomic64_t              dram_phys_mem;
 +      atomic_t                thread_ctx_switch_token;
 +      u32                     thread_ctx_switch_wait_token;
 +      u32                     asid;
 +      u32                     handle;
 +};
 +
 +/**
 + * struct hl_ctx_mgr - for handling multiple contexts.
 + * @ctx_lock: protects ctx_handles.
 + * @ctx_handles: idr to hold all ctx handles.
 + */
 +struct hl_ctx_mgr {
 +      struct mutex            ctx_lock;
 +      struct idr              ctx_handles;
 +};
 +
 +
 +
 +/*
 + * COMMAND SUBMISSIONS
 + */
 +
 +/**
 + * struct hl_userptr - memory mapping chunk information
 + * @vm_type: type of the VM.
 + * @job_node: linked-list node for hanging the object on the Job's list.
 + * @vec: pointer to the frame vector.
 + * @sgt: pointer to the scatter-gather table that holds the pages.
 + * @dir: for DMA unmapping, the direction must be supplied, so save it.
 + * @debugfs_list: node in debugfs list of command submissions.
 + * @addr: user-space virtual address of the start of the memory area.
 + * @size: size of the memory area to pin & map.
 + * @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise.
 + */
 +struct hl_userptr {
 +      enum vm_type_t          vm_type; /* must be first */
 +      struct list_head        job_node;
 +      struct frame_vector     *vec;
 +      struct sg_table         *sgt;
 +      enum dma_data_direction dir;
 +      struct list_head        debugfs_list;
 +      u64                     addr;
 +      u32                     size;
 +      u8                      dma_mapped;
 +};
 +
 +/**
 + * struct hl_cs - command submission.
 + * @jobs_in_queue_cnt: per each queue, maintain counter of submitted jobs.
 + * @ctx: the context this CS belongs to.
 + * @job_list: list of the CS's jobs in the various queues.
 + * @job_lock: spinlock for the CS's jobs list. Needed for free_job.
 + * @refcount: reference counter for usage of the CS.
 + * @fence: pointer to the fence object of this CS.
 + * @signal_fence: pointer to the fence object of the signal CS (used by wait
 + *                CS only).
 + * @finish_work: workqueue object to run when CS is completed by H/W.
 + * @work_tdr: delayed work node for TDR.
 + * @mirror_node : node in device mirror list of command submissions.
 + * @debugfs_list: node in debugfs list of command submissions.
 + * @sequence: the sequence number of this CS.
 + * @type: CS_TYPE_*.
 + * @submitted: true if CS was submitted to H/W.
 + * @completed: true if CS was completed by device.
 + * @timedout : true if CS was timedout.
 + * @tdr_active: true if TDR was activated for this CS (to prevent
 + *            double TDR activation).
 + * @aborted: true if CS was aborted due to some device error.
 + */
 +struct hl_cs {
 +      u16                     *jobs_in_queue_cnt;
 +      struct hl_ctx           *ctx;
 +      struct list_head        job_list;
 +      spinlock_t              job_lock;
 +      struct kref             refcount;
 +      struct dma_fence        *fence;
 +      struct dma_fence        *signal_fence;
 +      struct work_struct      finish_work;
 +      struct delayed_work     work_tdr;
 +      struct list_head        mirror_node;
 +      struct list_head        debugfs_list;
 +      u64                     sequence;
 +      enum hl_cs_type         type;
 +      u8                      submitted;
 +      u8                      completed;
 +      u8                      timedout;
 +      u8                      tdr_active;
 +      u8                      aborted;
 +};
 +
 +/**
 + * struct hl_cs_job - command submission job.
 + * @cs_node: the node to hang on the CS jobs list.
 + * @cs: the CS this job belongs to.
 + * @user_cb: the CB we got from the user.
 + * @patched_cb: in case of patching, this is internal CB which is submitted on
 + *            the queue instead of the CB we got from the IOCTL.
 + * @finish_work: workqueue object to run when job is completed.
 + * @userptr_list: linked-list of userptr mappings that belong to this job and
 + *                    wait for completion.
 + * @debugfs_list: node in debugfs list of command submission jobs.
 + * @queue_type: the type of the H/W queue this job is submitted to.
 + * @id: the id of this job inside a CS.
 + * @hw_queue_id: the id of the H/W queue this job is submitted to.
 + * @user_cb_size: the actual size of the CB we got from the user.
 + * @job_cb_size: the actual size of the CB that we put on the queue.
 + * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
 + *                          handle to a kernel-allocated CB object, false
 + *                          otherwise (SRAM/DRAM/host address).
 + * @contains_dma_pkt: whether the JOB contains at least one DMA packet. This
 + *                    info is needed later, when adding the 2xMSG_PROT at the
 + *                    end of the JOB, to know which barriers to put in the
 + *                    MSG_PROT packets. Relevant only for GAUDI as GOYA doesn't
 + *                    have streams so the engine can't be busy by another
 + *                    stream.
 + */
 +struct hl_cs_job {
 +      struct list_head        cs_node;
 +      struct hl_cs            *cs;
 +      struct hl_cb            *user_cb;
 +      struct hl_cb            *patched_cb;
 +      struct work_struct      finish_work;
 +      struct list_head        userptr_list;
 +      struct list_head        debugfs_list;
 +      enum hl_queue_type      queue_type;
 +      u32                     id;
 +      u32                     hw_queue_id;
 +      u32                     user_cb_size;
 +      u32                     job_cb_size;
 +      u8                      is_kernel_allocated_cb;
 +      u8                      contains_dma_pkt;
 +};
 +
 +/**
 + * struct hl_cs_parser - command submission parser properties.
 + * @user_cb: the CB we got from the user.
 + * @patched_cb: in case of patching, this is internal CB which is submitted on
 + *            the queue instead of the CB we got from the IOCTL.
 + * @job_userptr_list: linked-list of userptr mappings that belong to the related
 + *                    job and wait for completion.
 + * @cs_sequence: the sequence number of the related CS.
 + * @queue_type: the type of the H/W queue this job is submitted to.
 + * @ctx_id: the ID of the context the related CS belongs to.
 + * @hw_queue_id: the id of the H/W queue this job is submitted to.
 + * @user_cb_size: the actual size of the CB we got from the user.
 + * @patched_cb_size: the size of the CB after parsing.
 + * @job_id: the id of the related job inside the related CS.
 + * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
 + *                          handle to a kernel-allocated CB object, false
 + *                          otherwise (SRAM/DRAM/host address).
 + * @contains_dma_pkt: whether the JOB contains at least one DMA packet. This
 + *                    info is needed later, when adding the 2xMSG_PROT at the
 + *                    end of the JOB, to know which barriers to put in the
 + *                    MSG_PROT packets. Relevant only for GAUDI as GOYA doesn't
 + *                    have streams so the engine can't be busy by another
 + *                    stream.
 + */
 +struct hl_cs_parser {
 +      struct hl_cb            *user_cb;
 +      struct hl_cb            *patched_cb;
 +      struct list_head        *job_userptr_list;
 +      u64                     cs_sequence;
 +      enum hl_queue_type      queue_type;
 +      u32                     ctx_id;
 +      u32                     hw_queue_id;
 +      u32                     user_cb_size;
 +      u32                     patched_cb_size;
 +      u8                      job_id;
 +      u8                      is_kernel_allocated_cb;
 +      u8                      contains_dma_pkt;
 +};
 +
 +
 +/*
 + * MEMORY STRUCTURE
 + */
 +
 +/**
 + * struct hl_vm_hash_node - hash element from virtual address to virtual
 + *                            memory area descriptor (hl_vm_phys_pg_list or
 + *                            hl_userptr).
 + * @node: node to hang on the hash table in context object.
 + * @vaddr: key virtual address.
 + * @ptr: value pointer (hl_vm_phys_pg_list or hl_userptr).
 + */
 +struct hl_vm_hash_node {
 +      struct hlist_node       node;
 +      u64                     vaddr;
 +      void                    *ptr;
 +};
 +
 +/**
 + * struct hl_vm_phys_pg_pack - physical page pack.
 + * @vm_type: describes the type of the virtual area descriptor.
 + * @pages: the physical page array.
 + * @npages: num physical pages in the pack.
 + * @total_size: total size of all the pages in this list.
 + * @mapping_cnt: number of shared mappings.
 + * @asid: the context related to this list.
 + * @page_size: size of each page in the pack.
 + * @flags: HL_MEM_* flags related to this list.
 + * @handle: the provided handle related to this list.
 + * @offset: offset from the first page.
 + * @contiguous: is contiguous physical memory.
 + * @created_from_userptr: is product of host virtual address.
 + */
 +struct hl_vm_phys_pg_pack {
 +      enum vm_type_t          vm_type; /* must be first */
 +      u64                     *pages;
 +      u64                     npages;
 +      u64                     total_size;
 +      atomic_t                mapping_cnt;
 +      u32                     asid;
 +      u32                     page_size;
 +      u32                     flags;
 +      u32                     handle;
 +      u32                     offset;
 +      u8                      contiguous;
 +      u8                      created_from_userptr;
 +};
 +
 +/**
 + * struct hl_vm_va_block - virtual range block information.
 + * @node: node to hang on the virtual range list in context object.
 + * @start: virtual range start address.
 + * @end: virtual range end address.
 + * @size: virtual range size.
 + */
 +struct hl_vm_va_block {
 +      struct list_head        node;
 +      u64                     start;
 +      u64                     end;
 +      u64                     size;
 +};
 +
 +/**
 + * struct hl_vm - virtual memory manager for MMU.
 + * @dram_pg_pool: pool for DRAM physical pages of 2MB.
 + * @dram_pg_pool_refcount: reference counter for the pool usage.
 + * @idr_lock: protects the phys_pg_list_handles.
 + * @phys_pg_pack_handles: idr to hold all device allocations handles.
 + * @init_done: whether initialization was done. We need this because VM
 + *            initialization might be skipped during device initialization.
 + */
 +struct hl_vm {
 +      struct gen_pool         *dram_pg_pool;
 +      struct kref             dram_pg_pool_refcount;
 +      spinlock_t              idr_lock;
 +      struct idr              phys_pg_pack_handles;
 +      u8                      init_done;
 +};
 +
 +
 +/*
 + * DEBUG, PROFILING STRUCTURE
 + */
 +
 +/**
 + * struct hl_debug_params - Coresight debug parameters.
 + * @input: pointer to component specific input parameters.
 + * @output: pointer to component specific output parameters.
 + * @output_size: size of output buffer.
 + * @reg_idx: relevant register ID.
 + * @op: component operation to execute.
 + * @enable: true if to enable component debugging, false otherwise.
 + */
 +struct hl_debug_params {
 +      void *input;
 +      void *output;
 +      u32 output_size;
 +      u32 reg_idx;
 +      u32 op;
 +      bool enable;
 +};
 +
 +/*
 + * FILE PRIVATE STRUCTURE
 + */
 +
 +/**
 + * struct hl_fpriv - process information stored in FD private data.
 + * @hdev: habanalabs device structure.
 + * @filp: pointer to the given file structure.
 + * @taskpid: current process ID.
 + * @ctx: current executing context. TODO: remove for multiple ctx per process
 + * @ctx_mgr: context manager to handle multiple context for this FD.
 + * @cb_mgr: command buffer manager to handle multiple buffers for this FD.
 + * @debugfs_list: list of relevant ASIC debugfs.
 + * @dev_node: node in the device list of file private data
 + * @refcount: number of related contexts.
 + * @restore_phase_mutex: lock for context switch and restore phase.
 + * @is_control: true for control device, false otherwise
 + */
 +struct hl_fpriv {
 +      struct hl_device        *hdev;
 +      struct file             *filp;
 +      struct pid              *taskpid;
 +      struct hl_ctx           *ctx;
 +      struct hl_ctx_mgr       ctx_mgr;
 +      struct hl_cb_mgr        cb_mgr;
 +      struct list_head        debugfs_list;
 +      struct list_head        dev_node;
 +      struct kref             refcount;
 +      struct mutex            restore_phase_mutex;
 +      u8                      is_control;
 +};
 +
 +
 +/*
 + * DebugFS
 + */
 +
 +/**
 + * struct hl_info_list - debugfs file ops.
 + * @name: file name.
 + * @show: function to output information.
 + * @write: function to write to the file.
 + */
 +struct hl_info_list {
 +      const char      *name;
 +      int             (*show)(struct seq_file *s, void *data);
 +      ssize_t         (*write)(struct file *file, const char __user *buf,
 +                              size_t count, loff_t *f_pos);
 +};
 +
 +/**
 + * struct hl_debugfs_entry - debugfs dentry wrapper.
 + * @dent: base debugfs entry structure.
 + * @info_ent: dentry realted ops.
 + * @dev_entry: ASIC specific debugfs manager.
 + */
 +struct hl_debugfs_entry {
 +      struct dentry                   *dent;
 +      const struct hl_info_list       *info_ent;
 +      struct hl_dbg_device_entry      *dev_entry;
 +};
 +
 +/**
 + * struct hl_dbg_device_entry - ASIC specific debugfs manager.
 + * @root: root dentry.
 + * @hdev: habanalabs device structure.
 + * @entry_arr: array of available hl_debugfs_entry.
 + * @file_list: list of available debugfs files.
 + * @file_mutex: protects file_list.
 + * @cb_list: list of available CBs.
 + * @cb_spinlock: protects cb_list.
 + * @cs_list: list of available CSs.
 + * @cs_spinlock: protects cs_list.
 + * @cs_job_list: list of available CB jobs.
 + * @cs_job_spinlock: protects cs_job_list.
 + * @userptr_list: list of available userptrs (virtual memory chunk descriptor).
 + * @userptr_spinlock: protects userptr_list.
 + * @ctx_mem_hash_list: list of available contexts with MMU mappings.
 + * @ctx_mem_hash_spinlock: protects cb_list.
 + * @addr: next address to read/write from/to in read/write32.
 + * @mmu_addr: next virtual address to translate to physical address in mmu_show.
 + * @mmu_asid: ASID to use while translating in mmu_show.
 + * @i2c_bus: generic u8 debugfs file for bus value to use in i2c_data_read.
 + * @i2c_bus: generic u8 debugfs file for address value to use in i2c_data_read.
 + * @i2c_bus: generic u8 debugfs file for register value to use in i2c_data_read.
 + */
 +struct hl_dbg_device_entry {
 +      struct dentry                   *root;
 +      struct hl_device                *hdev;
 +      struct hl_debugfs_entry         *entry_arr;
 +      struct list_head                file_list;
 +      struct mutex                    file_mutex;
 +      struct list_head                cb_list;
 +      spinlock_t                      cb_spinlock;
 +      struct list_head                cs_list;
 +      spinlock_t                      cs_spinlock;
 +      struct list_head                cs_job_list;
 +      spinlock_t                      cs_job_spinlock;
 +      struct list_head                userptr_list;
 +      spinlock_t                      userptr_spinlock;
 +      struct list_head                ctx_mem_hash_list;
 +      spinlock_t                      ctx_mem_hash_spinlock;
 +      u64                             addr;
 +      u64                             mmu_addr;
 +      u32                             mmu_asid;
 +      u8                              i2c_bus;
 +      u8                              i2c_addr;
 +      u8                              i2c_reg;
 +};
 +
 +
 +/*
 + * DEVICES
 + */
 +
 +/* Theoretical limit only. A single host can only contain up to 4 or 8 PCIe
 + * x16 cards. In extreme cases, there are hosts that can accommodate 16 cards.
 + */
 +#define HL_MAX_MINORS 256
 +
 +/*
 + * Registers read & write functions.
 + */
 +
 +u32 hl_rreg(struct hl_device *hdev, u32 reg);
 +void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
 +
 +#define RREG32(reg) hdev->asic_funcs->rreg(hdev, (reg))
 +#define WREG32(reg, v) hdev->asic_funcs->wreg(hdev, (reg), (v))
 +#define DREG32(reg) pr_info("REGISTER: " #reg " : 0x%08X\n",  \
 +                      hdev->asic_funcs->rreg(hdev, (reg)))
 +
 +#define WREG32_P(reg, val, mask)                              \
 +      do {                                                    \
 +              u32 tmp_ = RREG32(reg);                         \
 +              tmp_ &= (mask);                                 \
 +              tmp_ |= ((val) & ~(mask));                      \
 +              WREG32(reg, tmp_);                              \
 +      } while (0)
 +#define WREG32_AND(reg, and) WREG32_P(reg, 0, and)
 +#define WREG32_OR(reg, or) WREG32_P(reg, or, ~(or))
 +
 +#define RMWREG32(reg, val, mask)                              \
 +      do {                                                    \
 +              u32 tmp_ = RREG32(reg);                         \
 +              tmp_ &= ~(mask);                                \
 +              tmp_ |= ((val) << __ffs(mask));                 \
 +              WREG32(reg, tmp_);                              \
 +      } while (0)
 +
 +#define RREG32_MASK(reg, mask) ((RREG32(reg) & mask) >> __ffs(mask))
 +
 +#define REG_FIELD_SHIFT(reg, field) reg##_##field##_SHIFT
 +#define REG_FIELD_MASK(reg, field) reg##_##field##_MASK
 +#define WREG32_FIELD(reg, offset, field, val) \
 +      WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & \
 +                              ~REG_FIELD_MASK(reg, field)) | \
 +                              (val) << REG_FIELD_SHIFT(reg, field))
 +
 +/* Timeout should be longer when working with simulator but cap the
 + * increased timeout to some maximum
 + */
 +#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \
 +({ \
 +      ktime_t __timeout; \
 +      if (hdev->pdev) \
 +              __timeout = ktime_add_us(ktime_get(), timeout_us); \
 +      else \
 +              __timeout = ktime_add_us(ktime_get(),\
 +                              min((u64)(timeout_us * 10), \
 +                                      (u64) HL_SIM_MAX_TIMEOUT_US)); \
 +      might_sleep_if(sleep_us); \
 +      for (;;) { \
 +              (val) = RREG32(addr); \
 +              if (cond) \
 +                      break; \
 +              if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
 +                      (val) = RREG32(addr); \
 +                      break; \
 +              } \
 +              if (sleep_us) \
 +                      usleep_range((sleep_us >> 2) + 1, sleep_us); \
 +      } \
 +      (cond) ? 0 : -ETIMEDOUT; \
 +})
 +
 +/*
 + * address in this macro points always to a memory location in the
 + * host's (server's) memory. That location is updated asynchronously
 + * either by the direct access of the device or by another core.
 + *
 + * To work both in LE and BE architectures, we need to distinguish between the
 + * two states (device or another core updates the memory location). Therefore,
 + * if mem_written_by_device is true, the host memory being polled will be
 + * updated directly by the device. If false, the host memory being polled will
 + * be updated by host CPU. Required so host knows whether or not the memory
 + * might need to be byte-swapped before returning value to caller.
 + */
 +#define hl_poll_timeout_memory(hdev, addr, val, cond, sleep_us, timeout_us, \
 +                              mem_written_by_device) \
 +({ \
 +      ktime_t __timeout; \
 +      if (hdev->pdev) \
 +              __timeout = ktime_add_us(ktime_get(), timeout_us); \
 +      else \
 +              __timeout = ktime_add_us(ktime_get(),\
 +                              min((u64)(timeout_us * 10), \
 +                                      (u64) HL_SIM_MAX_TIMEOUT_US)); \
 +      might_sleep_if(sleep_us); \
 +      for (;;) { \
 +              /* Verify we read updates done by other cores or by device */ \
 +              mb(); \
 +              (val) = *((u32 *) (uintptr_t) (addr)); \
 +              if (mem_written_by_device) \
 +                      (val) = le32_to_cpu(*(__le32 *) &(val)); \
 +              if (cond) \
 +                      break; \
 +              if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
 +                      (val) = *((u32 *) (uintptr_t) (addr)); \
 +                      if (mem_written_by_device) \
 +                              (val) = le32_to_cpu(*(__le32 *) &(val)); \
 +                      break; \
 +              } \
 +              if (sleep_us) \
 +                      usleep_range((sleep_us >> 2) + 1, sleep_us); \
 +      } \
 +      (cond) ? 0 : -ETIMEDOUT; \
 +})
 +
 +#define hl_poll_timeout_device_memory(hdev, addr, val, cond, sleep_us, \
 +                                      timeout_us) \
 +({ \
 +      ktime_t __timeout; \
 +      if (hdev->pdev) \
 +              __timeout = ktime_add_us(ktime_get(), timeout_us); \
 +      else \
 +              __timeout = ktime_add_us(ktime_get(),\
 +                              min((u64)(timeout_us * 10), \
 +                                      (u64) HL_SIM_MAX_TIMEOUT_US)); \
 +      might_sleep_if(sleep_us); \
 +      for (;;) { \
 +              (val) = readl(addr); \
 +              if (cond) \
 +                      break; \
 +              if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \
 +                      (val) = readl(addr); \
 +                      break; \
 +              } \
 +              if (sleep_us) \
 +                      usleep_range((sleep_us >> 2) + 1, sleep_us); \
 +      } \
 +      (cond) ? 0 : -ETIMEDOUT; \
 +})
 +
 +struct hwmon_chip_info;
 +
 +/**
 + * struct hl_device_reset_work - reset workqueue task wrapper.
 + * @reset_work: reset work to be done.
 + * @hdev: habanalabs device structure.
 + */
 +struct hl_device_reset_work {
 +      struct work_struct              reset_work;
 +      struct hl_device                *hdev;
 +};
 +
 +/**
 + * struct hl_device_idle_busy_ts - used for calculating device utilization rate.
 + * @idle_to_busy_ts: timestamp where device changed from idle to busy.
 + * @busy_to_idle_ts: timestamp where device changed from busy to idle.
 + */
 +struct hl_device_idle_busy_ts {
 +      ktime_t                         idle_to_busy_ts;
 +      ktime_t                         busy_to_idle_ts;
 +};
 +
 +/**
 + * struct hl_device - habanalabs device structure.
 + * @pdev: pointer to PCI device, can be NULL in case of simulator device.
 + * @pcie_bar_phys: array of available PCIe bars physical addresses.
 + *               (required only for PCI address match mode)
 + * @pcie_bar: array of available PCIe bars virtual addresses.
 + * @rmmio: configuration area address on SRAM.
 + * @cdev: related char device.
 + * @cdev_ctrl: char device for control operations only (INFO IOCTL)
 + * @dev: related kernel basic device structure.
 + * @dev_ctrl: related kernel device structure for the control device
 + * @work_freq: delayed work to lower device frequency if possible.
 + * @work_heartbeat: delayed work for ArmCP is-alive check.
 + * @asic_name: ASIC specific nmae.
 + * @asic_type: ASIC specific type.
 + * @completion_queue: array of hl_cq.
 + * @cq_wq: work queues of completion queues for executing work in process
 + *         context.
 + * @eq_wq: work queue of event queue for executing work in process context.
 + * @kernel_ctx: Kernel driver context structure.
 + * @kernel_queues: array of hl_hw_queue.
 + * @hw_queues_mirror_list: CS mirror list for TDR.
 + * @hw_queues_mirror_lock: protects hw_queues_mirror_list.
 + * @kernel_cb_mgr: command buffer manager for creating/destroying/handling CGs.
 + * @event_queue: event queue for IRQ from ArmCP.
 + * @dma_pool: DMA pool for small allocations.
 + * @cpu_accessible_dma_mem: Host <-> ArmCP shared memory CPU address.
 + * @cpu_accessible_dma_address: Host <-> ArmCP shared memory DMA address.
 + * @cpu_accessible_dma_pool: Host <-> ArmCP shared memory pool.
 + * @asid_bitmap: holds used/available ASIDs.
 + * @asid_mutex: protects asid_bitmap.
 + * @send_cpu_message_lock: enforces only one message in Host <-> ArmCP queue.
 + * @debug_lock: protects critical section of setting debug mode for device
 + * @asic_prop: ASIC specific immutable properties.
 + * @asic_funcs: ASIC specific functions.
 + * @asic_specific: ASIC specific information to use only from ASIC files.
 + * @mmu_pgt_pool: pool of available MMU hops.
 + * @vm: virtual memory manager for MMU.
 + * @mmu_cache_lock: protects MMU cache invalidation as it can serve one context.
 + * @mmu_shadow_hop0: shadow mapping of the MMU hop 0 zone.
 + * @hwmon_dev: H/W monitor device.
 + * @pm_mng_profile: current power management profile.
 + * @hl_chip_info: ASIC's sensors information.
 + * @hl_debugfs: device's debugfs manager.
 + * @cb_pool: list of preallocated CBs.
 + * @cb_pool_lock: protects the CB pool.
 + * @internal_cb_pool_virt_addr: internal command buffer pool virtual address.
 + * @internal_cb_pool_dma_addr: internal command buffer pool dma address.
 + * @internal_cb_pool: internal command buffer memory pool.
 + * @internal_cb_va_base: internal cb pool mmu virtual address base
 + * @fpriv_list: list of file private data structures. Each structure is created
 + *              when a user opens the device
 + * @fpriv_list_lock: protects the fpriv_list
 + * @compute_ctx: current compute context executing.
 + * @idle_busy_ts_arr: array to hold time stamps of transitions from idle to busy
 + *                    and vice-versa
 + * @aggregated_cs_counters: aggregated cs counters among all contexts
 + * @dram_used_mem: current DRAM memory consumption.
 + * @timeout_jiffies: device CS timeout value.
 + * @max_power: the max power of the device, as configured by the sysadmin. This
 + *             value is saved so in case of hard-reset, the driver will restore
 + *             this value and update the F/W after the re-initialization
++ * @clock_gating_mask: is clock gating enabled. bitmask that represents the
++ *                     different engines. See debugfs-driver-habanalabs for
++ *                     details.
 + * @in_reset: is device in reset flow.
 + * @curr_pll_profile: current PLL profile.
 + * @cs_active_cnt: number of active command submissions on this device (active
 + *                 means already in H/W queues)
 + * @major: habanalabs kernel driver major.
 + * @high_pll: high PLL profile frequency.
 + * @soft_reset_cnt: number of soft reset since the driver was loaded.
 + * @hard_reset_cnt: number of hard reset since the driver was loaded.
 + * @idle_busy_ts_idx: index of current entry in idle_busy_ts_arr
 + * @id: device minor.
 + * @id_control: minor of the control device
 + * @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit
 + *                    addresses.
 + * @disabled: is device disabled.
 + * @late_init_done: is late init stage was done during initialization.
 + * @hwmon_initialized: is H/W monitor sensors was initialized.
 + * @hard_reset_pending: is there a hard reset work pending.
 + * @heartbeat: is heartbeat sanity check towards ArmCP enabled.
 + * @reset_on_lockup: true if a reset should be done in case of stuck CS, false
 + *                   otherwise.
 + * @dram_supports_virtual_memory: is MMU enabled towards DRAM.
 + * @dram_default_page_mapping: is DRAM default page mapping enabled.
 + * @pmmu_huge_range: is a different virtual addresses range used for PMMU with
 + *                   huge pages.
 + * @init_done: is the initialization of the device done.
 + * @mmu_enable: is MMU enabled.
 + * @mmu_huge_page_opt: is MMU huge pages optimization enabled.
-       u8                              clock_gating;
 + * @device_cpu_disabled: is the device CPU disabled (due to timeouts)
 + * @dma_mask: the dma mask that was set for this device
 + * @in_debug: is device under debug. This, together with fpriv_list, enforces
 + *            that only a single user is configuring the debug infrastructure.
 + * @power9_64bit_dma_enable: true to enable 64-bit DMA mask support. Relevant
 + *                           only to POWER9 machines.
 + * @cdev_sysfs_created: were char devices and sysfs nodes created.
 + * @stop_on_err: true if engines should stop on error.
 + * @supports_sync_stream: is sync stream supported.
 + * @sync_stream_queue_idx: helper index for sync stream queues initialization.
 + * @supports_coresight: is CoreSight supported.
 + * @supports_soft_reset: is soft reset supported.
 + */
 +struct hl_device {
 +      struct pci_dev                  *pdev;
 +      u64                             pcie_bar_phys[HL_PCI_NUM_BARS];
 +      void __iomem                    *pcie_bar[HL_PCI_NUM_BARS];
 +      void __iomem                    *rmmio;
 +      struct cdev                     cdev;
 +      struct cdev                     cdev_ctrl;
 +      struct device                   *dev;
 +      struct device                   *dev_ctrl;
 +      struct delayed_work             work_freq;
 +      struct delayed_work             work_heartbeat;
 +      char                            asic_name[16];
 +      enum hl_asic_type               asic_type;
 +      struct hl_cq                    *completion_queue;
 +      struct workqueue_struct         **cq_wq;
 +      struct workqueue_struct         *eq_wq;
 +      struct hl_ctx                   *kernel_ctx;
 +      struct hl_hw_queue              *kernel_queues;
 +      struct list_head                hw_queues_mirror_list;
 +      spinlock_t                      hw_queues_mirror_lock;
 +      struct hl_cb_mgr                kernel_cb_mgr;
 +      struct hl_eq                    event_queue;
 +      struct dma_pool                 *dma_pool;
 +      void                            *cpu_accessible_dma_mem;
 +      dma_addr_t                      cpu_accessible_dma_address;
 +      struct gen_pool                 *cpu_accessible_dma_pool;
 +      unsigned long                   *asid_bitmap;
 +      struct mutex                    asid_mutex;
 +      struct mutex                    send_cpu_message_lock;
 +      struct mutex                    debug_lock;
 +      struct asic_fixed_properties    asic_prop;
 +      const struct hl_asic_funcs      *asic_funcs;
 +      void                            *asic_specific;
 +      struct gen_pool                 *mmu_pgt_pool;
 +      struct hl_vm                    vm;
 +      struct mutex                    mmu_cache_lock;
 +      void                            *mmu_shadow_hop0;
 +      struct device                   *hwmon_dev;
 +      enum hl_pm_mng_profile          pm_mng_profile;
 +      struct hwmon_chip_info          *hl_chip_info;
 +
 +      struct hl_dbg_device_entry      hl_debugfs;
 +
 +      struct list_head                cb_pool;
 +      spinlock_t                      cb_pool_lock;
 +
 +      void                            *internal_cb_pool_virt_addr;
 +      dma_addr_t                      internal_cb_pool_dma_addr;
 +      struct gen_pool                 *internal_cb_pool;
 +      u64                             internal_cb_va_base;
 +
 +      struct list_head                fpriv_list;
 +      struct mutex                    fpriv_list_lock;
 +
 +      struct hl_ctx                   *compute_ctx;
 +
 +      struct hl_device_idle_busy_ts   *idle_busy_ts_arr;
 +
 +      struct hl_cs_counters           aggregated_cs_counters;
 +
 +      atomic64_t                      dram_used_mem;
 +      u64                             timeout_jiffies;
 +      u64                             max_power;
++      u64                             clock_gating_mask;
 +      atomic_t                        in_reset;
 +      enum hl_pll_frequency           curr_pll_profile;
 +      int                             cs_active_cnt;
 +      u32                             major;
 +      u32                             high_pll;
 +      u32                             soft_reset_cnt;
 +      u32                             hard_reset_cnt;
 +      u32                             idle_busy_ts_idx;
 +      u16                             id;
 +      u16                             id_control;
 +      u16                             cpu_pci_msb_addr;
 +      u8                              disabled;
 +      u8                              late_init_done;
 +      u8                              hwmon_initialized;
 +      u8                              hard_reset_pending;
 +      u8                              heartbeat;
 +      u8                              reset_on_lockup;
 +      u8                              dram_supports_virtual_memory;
 +      u8                              dram_default_page_mapping;
 +      u8                              pmmu_huge_range;
 +      u8                              init_done;
 +      u8                              device_cpu_disabled;
 +      u8                              dma_mask;
 +      u8                              in_debug;
 +      u8                              power9_64bit_dma_enable;
 +      u8                              cdev_sysfs_created;
 +      u8                              stop_on_err;
 +      u8                              supports_sync_stream;
 +      u8                              sync_stream_queue_idx;
 +      u8                              supports_coresight;
 +      u8                              supports_soft_reset;
 +
 +      /* Parameters for bring-up */
 +      u8                              mmu_enable;
 +      u8                              mmu_huge_page_opt;
 +      u8                              cpu_enable;
 +      u8                              reset_pcilink;
 +      u8                              cpu_queues_enable;
 +      u8                              fw_loading;
 +      u8                              pldm;
 +      u8                              axi_drain;
 +      u8                              sram_scrambler_enable;
 +      u8                              dram_scrambler_enable;
 +      u8                              hard_reset_on_fw_events;
 +      u8                              bmc_enable;
 +      u8                              rl_enable;
 +};
 +
 +
 +/*
 + * IOCTLs
 + */
 +
 +/**
 + * typedef hl_ioctl_t - typedef for ioctl function in the driver
 + * @hpriv: pointer to the FD's private data, which contains state of
 + *            user process
 + * @data: pointer to the input/output arguments structure of the IOCTL
 + *
 + * Return: 0 for success, negative value for error
 + */
 +typedef int hl_ioctl_t(struct hl_fpriv *hpriv, void *data);
 +
 +/**
 + * struct hl_ioctl_desc - describes an IOCTL entry of the driver.
 + * @cmd: the IOCTL code as created by the kernel macros.
 + * @func: pointer to the driver's function that should be called for this IOCTL.
 + */
 +struct hl_ioctl_desc {
 +      unsigned int cmd;
 +      hl_ioctl_t *func;
 +};
 +
 +
 +/*
 + * Kernel module functions that can be accessed by entire module
 + */
 +
 +/**
 + * hl_mem_area_inside_range() - Checks whether address+size are inside a range.
 + * @address: The start address of the area we want to validate.
 + * @size: The size in bytes of the area we want to validate.
 + * @range_start_address: The start address of the valid range.
 + * @range_end_address: The end address of the valid range.
 + *
 + * Return: true if the area is inside the valid range, false otherwise.
 + */
 +static inline bool hl_mem_area_inside_range(u64 address, u32 size,
 +                              u64 range_start_address, u64 range_end_address)
 +{
 +      u64 end_address = address + size;
 +
 +      if ((address >= range_start_address) &&
 +                      (end_address <= range_end_address) &&
 +                      (end_address > address))
 +              return true;
 +
 +      return false;
 +}
 +
 +/**
 + * hl_mem_area_crosses_range() - Checks whether address+size crossing a range.
 + * @address: The start address of the area we want to validate.
 + * @size: The size in bytes of the area we want to validate.
 + * @range_start_address: The start address of the valid range.
 + * @range_end_address: The end address of the valid range.
 + *
 + * Return: true if the area overlaps part or all of the valid range,
 + *            false otherwise.
 + */
 +static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
 +                              u64 range_start_address, u64 range_end_address)
 +{
 +      u64 end_address = address + size;
 +
 +      if ((address >= range_start_address) &&
 +                      (address < range_end_address))
 +              return true;
 +
 +      if ((end_address >= range_start_address) &&
 +                      (end_address < range_end_address))
 +              return true;
 +
 +      if ((address < range_start_address) &&
 +                      (end_address >= range_end_address))
 +              return true;
 +
 +      return false;
 +}
 +
 +int hl_device_open(struct inode *inode, struct file *filp);
 +int hl_device_open_ctrl(struct inode *inode, struct file *filp);
 +bool hl_device_disabled_or_in_reset(struct hl_device *hdev);
 +enum hl_device_status hl_device_status(struct hl_device *hdev);
 +int hl_device_set_debug_mode(struct hl_device *hdev, bool enable);
 +int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
 +              enum hl_asic_type asic_type, int minor);
 +void destroy_hdev(struct hl_device *hdev);
 +int hl_hw_queues_create(struct hl_device *hdev);
 +void hl_hw_queues_destroy(struct hl_device *hdev);
 +int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
 +                              u32 cb_size, u64 cb_ptr);
 +int hl_hw_queue_schedule_cs(struct hl_cs *cs);
 +u32 hl_hw_queue_add_ptr(u32 ptr, u16 val);
 +void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id);
 +void hl_int_hw_queue_update_ci(struct hl_cs *cs);
 +void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset);
 +
 +#define hl_queue_inc_ptr(p)           hl_hw_queue_add_ptr(p, 1)
 +#define hl_pi_2_offset(pi)            ((pi) & (HL_QUEUE_LENGTH - 1))
 +
 +int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id);
 +void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q);
 +int hl_eq_init(struct hl_device *hdev, struct hl_eq *q);
 +void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q);
 +void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q);
 +void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q);
 +irqreturn_t hl_irq_handler_cq(int irq, void *arg);
 +irqreturn_t hl_irq_handler_eq(int irq, void *arg);
 +u32 hl_cq_inc_ptr(u32 ptr);
 +
 +int hl_asid_init(struct hl_device *hdev);
 +void hl_asid_fini(struct hl_device *hdev);
 +unsigned long hl_asid_alloc(struct hl_device *hdev);
 +void hl_asid_free(struct hl_device *hdev, unsigned long asid);
 +
 +int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv);
 +void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx);
 +int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx);
 +void hl_ctx_do_release(struct kref *ref);
 +void hl_ctx_get(struct hl_device *hdev,       struct hl_ctx *ctx);
 +int hl_ctx_put(struct hl_ctx *ctx);
 +struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq);
 +void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr);
 +void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr);
 +
 +int hl_device_init(struct hl_device *hdev, struct class *hclass);
 +void hl_device_fini(struct hl_device *hdev);
 +int hl_device_suspend(struct hl_device *hdev);
 +int hl_device_resume(struct hl_device *hdev);
 +int hl_device_reset(struct hl_device *hdev, bool hard_reset,
 +                      bool from_hard_reset_thread);
 +void hl_hpriv_get(struct hl_fpriv *hpriv);
 +void hl_hpriv_put(struct hl_fpriv *hpriv);
 +int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq);
 +uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms);
 +
 +int hl_build_hwmon_channel_info(struct hl_device *hdev,
 +              struct armcp_sensor *sensors_arr);
 +
 +int hl_sysfs_init(struct hl_device *hdev);
 +void hl_sysfs_fini(struct hl_device *hdev);
 +
 +int hl_hwmon_init(struct hl_device *hdev);
 +void hl_hwmon_fini(struct hl_device *hdev);
 +
 +int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, u32 cb_size,
 +              u64 *handle, int ctx_id, bool internal_cb);
 +int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle);
 +int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
 +struct hl_cb *hl_cb_get(struct hl_device *hdev,       struct hl_cb_mgr *mgr,
 +                      u32 handle);
 +void hl_cb_put(struct hl_cb *cb);
 +void hl_cb_mgr_init(struct hl_cb_mgr *mgr);
 +void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr);
 +struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
 +                                      bool internal_cb);
 +int hl_cb_pool_init(struct hl_device *hdev);
 +int hl_cb_pool_fini(struct hl_device *hdev);
 +
 +void hl_cs_rollback_all(struct hl_device *hdev);
 +struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
 +              enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
 +void hl_sob_reset_error(struct kref *ref);
 +
 +void goya_set_asic_funcs(struct hl_device *hdev);
 +void gaudi_set_asic_funcs(struct hl_device *hdev);
 +
 +int hl_vm_ctx_init(struct hl_ctx *ctx);
 +void hl_vm_ctx_fini(struct hl_ctx *ctx);
 +
 +int hl_vm_init(struct hl_device *hdev);
 +void hl_vm_fini(struct hl_device *hdev);
 +
 +int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
 +                      struct hl_userptr *userptr);
 +void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr);
 +void hl_userptr_delete_list(struct hl_device *hdev,
 +                              struct list_head *userptr_list);
 +bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size,
 +                              struct list_head *userptr_list,
 +                              struct hl_userptr **userptr);
 +
 +int hl_mmu_init(struct hl_device *hdev);
 +void hl_mmu_fini(struct hl_device *hdev);
 +int hl_mmu_ctx_init(struct hl_ctx *ctx);
 +void hl_mmu_ctx_fini(struct hl_ctx *ctx);
 +int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
 +              u32 page_size, bool flush_pte);
 +int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
 +              bool flush_pte);
 +void hl_mmu_swap_out(struct hl_ctx *ctx);
 +void hl_mmu_swap_in(struct hl_ctx *ctx);
 +
 +int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
 +                              void __iomem *dst);
 +int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode);
 +int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
 +                              u16 len, u32 timeout, long *result);
 +int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type);
 +int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
 +              size_t irq_arr_size);
 +int hl_fw_test_cpu_queue(struct hl_device *hdev);
 +void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
 +                                              dma_addr_t *dma_handle);
 +void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
 +                                      void *vaddr);
 +int hl_fw_send_heartbeat(struct hl_device *hdev);
 +int hl_fw_armcp_info_get(struct hl_device *hdev);
 +int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
 +int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
 +                      u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
 +                      u32 boot_err0_reg, bool skip_bmc,
 +                      u32 cpu_timeout, u32 boot_fit_timeout);
 +
 +int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
 +                      bool is_wc[3]);
 +int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data);
 +int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar,
 +                              u64 addr);
 +int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
 +              struct hl_inbound_pci_region *pci_region);
 +int hl_pci_set_outbound_region(struct hl_device *hdev,
 +              struct hl_outbound_pci_region *pci_region);
 +int hl_pci_init(struct hl_device *hdev);
 +void hl_pci_fini(struct hl_device *hdev);
 +
 +long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
 +void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq);
 +int hl_get_temperature(struct hl_device *hdev,
 +                     int sensor_index, u32 attr, long *value);
 +int hl_set_temperature(struct hl_device *hdev,
 +                     int sensor_index, u32 attr, long value);
 +int hl_get_voltage(struct hl_device *hdev,
 +                 int sensor_index, u32 attr, long *value);
 +int hl_get_current(struct hl_device *hdev,
 +                 int sensor_index, u32 attr, long *value);
 +int hl_get_fan_speed(struct hl_device *hdev,
 +                   int sensor_index, u32 attr, long *value);
 +int hl_get_pwm_info(struct hl_device *hdev,
 +                  int sensor_index, u32 attr, long *value);
 +void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
 +                      long value);
 +u64 hl_get_max_power(struct hl_device *hdev);
 +void hl_set_max_power(struct hl_device *hdev, u64 value);
 +int hl_set_voltage(struct hl_device *hdev,
 +                      int sensor_index, u32 attr, long value);
 +int hl_set_current(struct hl_device *hdev,
 +                      int sensor_index, u32 attr, long value);
 +
 +#ifdef CONFIG_DEBUG_FS
 +
 +void hl_debugfs_init(void);
 +void hl_debugfs_fini(void);
 +void hl_debugfs_add_device(struct hl_device *hdev);
 +void hl_debugfs_remove_device(struct hl_device *hdev);
 +void hl_debugfs_add_file(struct hl_fpriv *hpriv);
 +void hl_debugfs_remove_file(struct hl_fpriv *hpriv);
 +void hl_debugfs_add_cb(struct hl_cb *cb);
 +void hl_debugfs_remove_cb(struct hl_cb *cb);
 +void hl_debugfs_add_cs(struct hl_cs *cs);
 +void hl_debugfs_remove_cs(struct hl_cs *cs);
 +void hl_debugfs_add_job(struct hl_device *hdev, struct hl_cs_job *job);
 +void hl_debugfs_remove_job(struct hl_device *hdev, struct hl_cs_job *job);
 +void hl_debugfs_add_userptr(struct hl_device *hdev, struct hl_userptr *userptr);
 +void hl_debugfs_remove_userptr(struct hl_device *hdev,
 +                              struct hl_userptr *userptr);
 +void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
 +void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
 +
 +#else
 +
 +static inline void __init hl_debugfs_init(void)
 +{
 +}
 +
 +static inline void hl_debugfs_fini(void)
 +{
 +}
 +
 +static inline void hl_debugfs_add_device(struct hl_device *hdev)
 +{
 +}
 +
 +static inline void hl_debugfs_remove_device(struct hl_device *hdev)
 +{
 +}
 +
 +static inline void hl_debugfs_add_file(struct hl_fpriv *hpriv)
 +{
 +}
 +
 +static inline void hl_debugfs_remove_file(struct hl_fpriv *hpriv)
 +{
 +}
 +
 +static inline void hl_debugfs_add_cb(struct hl_cb *cb)
 +{
 +}
 +
 +static inline void hl_debugfs_remove_cb(struct hl_cb *cb)
 +{
 +}
 +
 +static inline void hl_debugfs_add_cs(struct hl_cs *cs)
 +{
 +}
 +
 +static inline void hl_debugfs_remove_cs(struct hl_cs *cs)
 +{
 +}
 +
 +static inline void hl_debugfs_add_job(struct hl_device *hdev,
 +                                      struct hl_cs_job *job)
 +{
 +}
 +
 +static inline void hl_debugfs_remove_job(struct hl_device *hdev,
 +                                      struct hl_cs_job *job)
 +{
 +}
 +
 +static inline void hl_debugfs_add_userptr(struct hl_device *hdev,
 +                                      struct hl_userptr *userptr)
 +{
 +}
 +
 +static inline void hl_debugfs_remove_userptr(struct hl_device *hdev,
 +                                      struct hl_userptr *userptr)
 +{
 +}
 +
 +static inline void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev,
 +                                      struct hl_ctx *ctx)
 +{
 +}
 +
 +static inline void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev,
 +                                      struct hl_ctx *ctx)
 +{
 +}
 +
 +#endif
 +
 +/* IOCTLs */
 +long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 +long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg);
 +int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data);
 +int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data);
 +int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data);
 +int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data);
 +
 +#endif /* HABANALABSP_H_ */
index f38664b03865bdaf3eca859bbdf2b46e34997932,0000000000000000000000000000000000000000..c6b31e93fb5eba0e2488656b69f377f1e68b1222
mode 100644,000000..100644
--- /dev/null
@@@ -1,529 -1,0 +1,529 @@@
-       hdev->clock_gating = 1;
 +// SPDX-License-Identifier: GPL-2.0
 +
 +/*
 + * Copyright 2016-2019 HabanaLabs, Ltd.
 + * All Rights Reserved.
 + *
 + */
 +
 +#define pr_fmt(fmt)           "habanalabs: " fmt
 +
 +#include "habanalabs.h"
 +
 +#include <linux/pci.h>
 +#include <linux/module.h>
 +
 +#define HL_DRIVER_AUTHOR      "HabanaLabs Kernel Driver Team"
 +
 +#define HL_DRIVER_DESC                "Driver for HabanaLabs's AI Accelerators"
 +
 +MODULE_AUTHOR(HL_DRIVER_AUTHOR);
 +MODULE_DESCRIPTION(HL_DRIVER_DESC);
 +MODULE_LICENSE("GPL v2");
 +
 +static int hl_major;
 +static struct class *hl_class;
 +static DEFINE_IDR(hl_devs_idr);
 +static DEFINE_MUTEX(hl_devs_idr_lock);
 +
 +static int timeout_locked = 5;
 +static int reset_on_lockup = 1;
 +
 +module_param(timeout_locked, int, 0444);
 +MODULE_PARM_DESC(timeout_locked,
 +      "Device lockup timeout in seconds (0 = disabled, default 5s)");
 +
 +module_param(reset_on_lockup, int, 0444);
 +MODULE_PARM_DESC(reset_on_lockup,
 +      "Do device reset on lockup (0 = no, 1 = yes, default yes)");
 +
 +#define PCI_VENDOR_ID_HABANALABS      0x1da3
 +
 +#define PCI_IDS_GOYA                  0x0001
 +#define PCI_IDS_GAUDI                 0x1000
 +
 +static const struct pci_device_id ids[] = {
 +      { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), },
 +      { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), },
 +      { 0, }
 +};
 +MODULE_DEVICE_TABLE(pci, ids);
 +
 +/*
 + * get_asic_type - translate device id to asic type
 + *
 + * @device: id of the PCI device
 + *
 + * Translate device id to asic type.
 + * In case of unidentified device, return -1
 + */
 +static enum hl_asic_type get_asic_type(u16 device)
 +{
 +      enum hl_asic_type asic_type;
 +
 +      switch (device) {
 +      case PCI_IDS_GOYA:
 +              asic_type = ASIC_GOYA;
 +              break;
 +      case PCI_IDS_GAUDI:
 +              asic_type = ASIC_GAUDI;
 +              break;
 +      default:
 +              asic_type = ASIC_INVALID;
 +              break;
 +      }
 +
 +      return asic_type;
 +}
 +
 +/*
 + * hl_device_open - open function for habanalabs device
 + *
 + * @inode: pointer to inode structure
 + * @filp: pointer to file structure
 + *
 + * Called when process opens an habanalabs device.
 + */
 +int hl_device_open(struct inode *inode, struct file *filp)
 +{
 +      struct hl_device *hdev;
 +      struct hl_fpriv *hpriv;
 +      int rc;
 +
 +      mutex_lock(&hl_devs_idr_lock);
 +      hdev = idr_find(&hl_devs_idr, iminor(inode));
 +      mutex_unlock(&hl_devs_idr_lock);
 +
 +      if (!hdev) {
 +              pr_err("Couldn't find device %d:%d\n",
 +                      imajor(inode), iminor(inode));
 +              return -ENXIO;
 +      }
 +
 +      hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
 +      if (!hpriv)
 +              return -ENOMEM;
 +
 +      hpriv->hdev = hdev;
 +      filp->private_data = hpriv;
 +      hpriv->filp = filp;
 +      mutex_init(&hpriv->restore_phase_mutex);
 +      kref_init(&hpriv->refcount);
 +      nonseekable_open(inode, filp);
 +
 +      hl_cb_mgr_init(&hpriv->cb_mgr);
 +      hl_ctx_mgr_init(&hpriv->ctx_mgr);
 +
 +      hpriv->taskpid = find_get_pid(current->pid);
 +
 +      mutex_lock(&hdev->fpriv_list_lock);
 +
 +      if (hl_device_disabled_or_in_reset(hdev)) {
 +              dev_err_ratelimited(hdev->dev,
 +                      "Can't open %s because it is disabled or in reset\n",
 +                      dev_name(hdev->dev));
 +              rc = -EPERM;
 +              goto out_err;
 +      }
 +
 +      if (hdev->in_debug) {
 +              dev_err_ratelimited(hdev->dev,
 +                      "Can't open %s because it is being debugged by another user\n",
 +                      dev_name(hdev->dev));
 +              rc = -EPERM;
 +              goto out_err;
 +      }
 +
 +      if (hdev->compute_ctx) {
 +              dev_dbg_ratelimited(hdev->dev,
 +                      "Can't open %s because another user is working on it\n",
 +                      dev_name(hdev->dev));
 +              rc = -EBUSY;
 +              goto out_err;
 +      }
 +
 +      rc = hl_ctx_create(hdev, hpriv);
 +      if (rc) {
 +              dev_err(hdev->dev, "Failed to create context %d\n", rc);
 +              goto out_err;
 +      }
 +
 +      /* Device is IDLE at this point so it is legal to change PLLs.
 +       * There is no need to check anything because if the PLL is
 +       * already HIGH, the set function will return without doing
 +       * anything
 +       */
 +      hl_device_set_frequency(hdev, PLL_HIGH);
 +
 +      list_add(&hpriv->dev_node, &hdev->fpriv_list);
 +      mutex_unlock(&hdev->fpriv_list_lock);
 +
 +      hl_debugfs_add_file(hpriv);
 +
 +      return 0;
 +
 +out_err:
 +      mutex_unlock(&hdev->fpriv_list_lock);
 +
 +      hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
 +      hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
 +      filp->private_data = NULL;
 +      mutex_destroy(&hpriv->restore_phase_mutex);
 +      put_pid(hpriv->taskpid);
 +
 +      kfree(hpriv);
 +
 +      return rc;
 +}
 +
 +int hl_device_open_ctrl(struct inode *inode, struct file *filp)
 +{
 +      struct hl_device *hdev;
 +      struct hl_fpriv *hpriv;
 +      int rc;
 +
 +      mutex_lock(&hl_devs_idr_lock);
 +      hdev = idr_find(&hl_devs_idr, iminor(inode));
 +      mutex_unlock(&hl_devs_idr_lock);
 +
 +      if (!hdev) {
 +              pr_err("Couldn't find device %d:%d\n",
 +                      imajor(inode), iminor(inode));
 +              return -ENXIO;
 +      }
 +
 +      hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
 +      if (!hpriv)
 +              return -ENOMEM;
 +
 +      mutex_lock(&hdev->fpriv_list_lock);
 +
 +      if (hl_device_disabled_or_in_reset(hdev)) {
 +              dev_err_ratelimited(hdev->dev_ctrl,
 +                      "Can't open %s because it is disabled or in reset\n",
 +                      dev_name(hdev->dev_ctrl));
 +              rc = -EPERM;
 +              goto out_err;
 +      }
 +
 +      list_add(&hpriv->dev_node, &hdev->fpriv_list);
 +      mutex_unlock(&hdev->fpriv_list_lock);
 +
 +      hpriv->hdev = hdev;
 +      filp->private_data = hpriv;
 +      hpriv->filp = filp;
 +      hpriv->is_control = true;
 +      nonseekable_open(inode, filp);
 +
 +      hpriv->taskpid = find_get_pid(current->pid);
 +
 +      return 0;
 +
 +out_err:
 +      mutex_unlock(&hdev->fpriv_list_lock);
 +      kfree(hpriv);
 +      return rc;
 +}
 +
 +static void set_driver_behavior_per_device(struct hl_device *hdev)
 +{
 +      hdev->mmu_enable = 1;
 +      hdev->cpu_enable = 1;
 +      hdev->fw_loading = 1;
 +      hdev->cpu_queues_enable = 1;
 +      hdev->heartbeat = 1;
++      hdev->clock_gating_mask = ULONG_MAX;
 +
 +      hdev->reset_pcilink = 0;
 +      hdev->axi_drain = 0;
 +      hdev->sram_scrambler_enable = 1;
 +      hdev->dram_scrambler_enable = 1;
 +      hdev->bmc_enable = 1;
 +      hdev->hard_reset_on_fw_events = 1;
 +}
 +
 +/*
 + * create_hdev - create habanalabs device instance
 + *
 + * @dev: will hold the pointer to the new habanalabs device structure
 + * @pdev: pointer to the pci device
 + * @asic_type: in case of simulator device, which device is it
 + * @minor: in case of simulator device, the minor of the device
 + *
 + * Allocate memory for habanalabs device and initialize basic fields
 + * Identify the ASIC type
 + * Allocate ID (minor) for the device (only for real devices)
 + */
 +int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
 +              enum hl_asic_type asic_type, int minor)
 +{
 +      struct hl_device *hdev;
 +      int rc, main_id, ctrl_id = 0;
 +
 +      *dev = NULL;
 +
 +      hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
 +      if (!hdev)
 +              return -ENOMEM;
 +
 +      /* First, we must find out which ASIC are we handling. This is needed
 +       * to configure the behavior of the driver (kernel parameters)
 +       */
 +      if (pdev) {
 +              hdev->asic_type = get_asic_type(pdev->device);
 +              if (hdev->asic_type == ASIC_INVALID) {
 +                      dev_err(&pdev->dev, "Unsupported ASIC\n");
 +                      rc = -ENODEV;
 +                      goto free_hdev;
 +              }
 +      } else {
 +              hdev->asic_type = asic_type;
 +      }
 +
 +      hdev->major = hl_major;
 +      hdev->reset_on_lockup = reset_on_lockup;
 +      hdev->pldm = 0;
 +
 +      set_driver_behavior_per_device(hdev);
 +
 +      if (timeout_locked)
 +              hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000);
 +      else
 +              hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
 +
 +      hdev->disabled = true;
 +      hdev->pdev = pdev; /* can be NULL in case of simulator device */
 +
 +      /* Set default DMA mask to 32 bits */
 +      hdev->dma_mask = 32;
 +
 +      mutex_lock(&hl_devs_idr_lock);
 +
 +      /* Always save 2 numbers, 1 for main device and 1 for control.
 +       * They must be consecutive
 +       */
 +      main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS,
 +                              GFP_KERNEL);
 +
 +      if (main_id >= 0)
 +              ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1,
 +                                      main_id + 2, GFP_KERNEL);
 +
 +      mutex_unlock(&hl_devs_idr_lock);
 +
 +      if ((main_id < 0) || (ctrl_id < 0)) {
 +              if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC))
 +                      pr_err("too many devices in the system\n");
 +
 +              if (main_id >= 0) {
 +                      mutex_lock(&hl_devs_idr_lock);
 +                      idr_remove(&hl_devs_idr, main_id);
 +                      mutex_unlock(&hl_devs_idr_lock);
 +              }
 +
 +              rc = -EBUSY;
 +              goto free_hdev;
 +      }
 +
 +      hdev->id = main_id;
 +      hdev->id_control = ctrl_id;
 +
 +      *dev = hdev;
 +
 +      return 0;
 +
 +free_hdev:
 +      kfree(hdev);
 +      return rc;
 +}
 +
 +/*
 + * destroy_hdev - destroy habanalabs device instance
 + *
 + * @dev: pointer to the habanalabs device structure
 + *
 + */
 +void destroy_hdev(struct hl_device *hdev)
 +{
 +      /* Remove device from the device list */
 +      mutex_lock(&hl_devs_idr_lock);
 +      idr_remove(&hl_devs_idr, hdev->id);
 +      idr_remove(&hl_devs_idr, hdev->id_control);
 +      mutex_unlock(&hl_devs_idr_lock);
 +
 +      kfree(hdev);
 +}
 +
 +static int hl_pmops_suspend(struct device *dev)
 +{
 +      struct hl_device *hdev = dev_get_drvdata(dev);
 +
 +      pr_debug("Going to suspend PCI device\n");
 +
 +      if (!hdev) {
 +              pr_err("device pointer is NULL in suspend\n");
 +              return 0;
 +      }
 +
 +      return hl_device_suspend(hdev);
 +}
 +
 +static int hl_pmops_resume(struct device *dev)
 +{
 +      struct hl_device *hdev = dev_get_drvdata(dev);
 +
 +      pr_debug("Going to resume PCI device\n");
 +
 +      if (!hdev) {
 +              pr_err("device pointer is NULL in resume\n");
 +              return 0;
 +      }
 +
 +      return hl_device_resume(hdev);
 +}
 +
 +/*
 + * hl_pci_probe - probe PCI habanalabs devices
 + *
 + * @pdev: pointer to pci device
 + * @id: pointer to pci device id structure
 + *
 + * Standard PCI probe function for habanalabs device.
 + * Create a new habanalabs device and initialize it according to the
 + * device's type
 + */
 +static int hl_pci_probe(struct pci_dev *pdev,
 +                              const struct pci_device_id *id)
 +{
 +      struct hl_device *hdev;
 +      int rc;
 +
 +      dev_info(&pdev->dev, HL_NAME
 +               " device found [%04x:%04x] (rev %x)\n",
 +               (int)pdev->vendor, (int)pdev->device, (int)pdev->revision);
 +
 +      rc = create_hdev(&hdev, pdev, ASIC_INVALID, -1);
 +      if (rc)
 +              return rc;
 +
 +      pci_set_drvdata(pdev, hdev);
 +
 +      rc = hl_device_init(hdev, hl_class);
 +      if (rc) {
 +              dev_err(&pdev->dev, "Fatal error during habanalabs device init\n");
 +              rc = -ENODEV;
 +              goto disable_device;
 +      }
 +
 +      return 0;
 +
 +disable_device:
 +      pci_set_drvdata(pdev, NULL);
 +      destroy_hdev(hdev);
 +
 +      return rc;
 +}
 +
 +/*
 + * hl_pci_remove - remove PCI habanalabs devices
 + *
 + * @pdev: pointer to pci device
 + *
 + * Standard PCI remove function for habanalabs device
 + */
 +static void hl_pci_remove(struct pci_dev *pdev)
 +{
 +      struct hl_device *hdev;
 +
 +      hdev = pci_get_drvdata(pdev);
 +      if (!hdev)
 +              return;
 +
 +      hl_device_fini(hdev);
 +      pci_set_drvdata(pdev, NULL);
 +
 +      destroy_hdev(hdev);
 +}
 +
 +static const struct dev_pm_ops hl_pm_ops = {
 +      .suspend = hl_pmops_suspend,
 +      .resume = hl_pmops_resume,
 +};
 +
 +static struct pci_driver hl_pci_driver = {
 +      .name = HL_NAME,
 +      .id_table = ids,
 +      .probe = hl_pci_probe,
 +      .remove = hl_pci_remove,
 +      .driver.pm = &hl_pm_ops,
 +};
 +
 +/*
 + * hl_init - Initialize the habanalabs kernel driver
 + */
 +static int __init hl_init(void)
 +{
 +      int rc;
 +      dev_t dev;
 +
 +      pr_info("loading driver\n");
 +
 +      rc = alloc_chrdev_region(&dev, 0, HL_MAX_MINORS, HL_NAME);
 +      if (rc < 0) {
 +              pr_err("unable to get major\n");
 +              return rc;
 +      }
 +
 +      hl_major = MAJOR(dev);
 +
 +      hl_class = class_create(THIS_MODULE, HL_NAME);
 +      if (IS_ERR(hl_class)) {
 +              pr_err("failed to allocate class\n");
 +              rc = PTR_ERR(hl_class);
 +              goto remove_major;
 +      }
 +
 +      hl_debugfs_init();
 +
 +      rc = pci_register_driver(&hl_pci_driver);
 +      if (rc) {
 +              pr_err("failed to register pci device\n");
 +              goto remove_debugfs;
 +      }
 +
 +      pr_debug("driver loaded\n");
 +
 +      return 0;
 +
 +remove_debugfs:
 +      hl_debugfs_fini();
 +      class_destroy(hl_class);
 +remove_major:
 +      unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
 +      return rc;
 +}
 +
 +/*
 + * hl_exit - Release all resources of the habanalabs kernel driver
 + */
 +static void __exit hl_exit(void)
 +{
 +      pci_unregister_driver(&hl_pci_driver);
 +
 +      /*
 +       * Removing debugfs must be after all devices or simulator devices
 +       * have been removed because otherwise we get a bug in the
 +       * debugfs module for referencing NULL objects
 +       */
 +      hl_debugfs_fini();
 +
 +      class_destroy(hl_class);
 +      unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
 +
 +      idr_destroy(&hl_devs_idr);
 +
 +      pr_debug("driver removed\n");
 +}
 +
 +module_init(hl_init);
 +module_exit(hl_exit);
index 8c6cd77e6af6bd3b6ada7cd114867d9d528191a7,0000000000000000000000000000000000000000..b997336fa75fc88ad7a565163697b789e977f4ca
mode 100644,000000..100644
--- /dev/null
@@@ -1,579 -1,0 +1,578 @@@
- #define SENSORS_PKT_TIMEOUT           1000000 /* 1s */
 +// SPDX-License-Identifier: GPL-2.0
 +
 +/*
 + * Copyright 2016-2019 HabanaLabs, Ltd.
 + * All Rights Reserved.
 + */
 +
 +#include "habanalabs.h"
 +
 +#include <linux/pci.h>
 +#include <linux/hwmon.h>
 +
-                       SENSORS_PKT_TIMEOUT, value);
 +#define HWMON_NR_SENSOR_TYPES         (hwmon_pwm + 1)
 +
 +int hl_build_hwmon_channel_info(struct hl_device *hdev,
 +                              struct armcp_sensor *sensors_arr)
 +{
 +      u32 counts[HWMON_NR_SENSOR_TYPES] = {0};
 +      u32 *sensors_by_type[HWMON_NR_SENSOR_TYPES] = {NULL};
 +      u32 sensors_by_type_next_index[HWMON_NR_SENSOR_TYPES] = {0};
 +      struct hwmon_channel_info **channels_info;
 +      u32 num_sensors_for_type, num_active_sensor_types = 0,
 +                      arr_size = 0, *curr_arr;
 +      enum hwmon_sensor_types type;
 +      int rc, i, j;
 +
 +      for (i = 0 ; i < ARMCP_MAX_SENSORS ; i++) {
 +              type = le32_to_cpu(sensors_arr[i].type);
 +
 +              if ((type == 0) && (sensors_arr[i].flags == 0))
 +                      break;
 +
 +              if (type >= HWMON_NR_SENSOR_TYPES) {
 +                      dev_err(hdev->dev,
 +                              "Got wrong sensor type %d from device\n", type);
 +                      return -EINVAL;
 +              }
 +
 +              counts[type]++;
 +              arr_size++;
 +      }
 +
 +      for (i = 0 ; i < HWMON_NR_SENSOR_TYPES ; i++) {
 +              if (counts[i] == 0)
 +                      continue;
 +
 +              num_sensors_for_type = counts[i] + 1;
 +              curr_arr = kcalloc(num_sensors_for_type, sizeof(*curr_arr),
 +                              GFP_KERNEL);
 +              if (!curr_arr) {
 +                      rc = -ENOMEM;
 +                      goto sensors_type_err;
 +              }
 +
 +              num_active_sensor_types++;
 +              sensors_by_type[i] = curr_arr;
 +      }
 +
 +      for (i = 0 ; i < arr_size ; i++) {
 +              type = le32_to_cpu(sensors_arr[i].type);
 +              curr_arr = sensors_by_type[type];
 +              curr_arr[sensors_by_type_next_index[type]++] =
 +                              le32_to_cpu(sensors_arr[i].flags);
 +      }
 +
 +      channels_info = kcalloc(num_active_sensor_types + 1,
 +                      sizeof(*channels_info), GFP_KERNEL);
 +      if (!channels_info) {
 +              rc = -ENOMEM;
 +              goto channels_info_array_err;
 +      }
 +
 +      for (i = 0 ; i < num_active_sensor_types ; i++) {
 +              channels_info[i] = kzalloc(sizeof(*channels_info[i]),
 +                              GFP_KERNEL);
 +              if (!channels_info[i]) {
 +                      rc = -ENOMEM;
 +                      goto channel_info_err;
 +              }
 +      }
 +
 +      for (i = 0, j = 0 ; i < HWMON_NR_SENSOR_TYPES ; i++) {
 +              if (!sensors_by_type[i])
 +                      continue;
 +
 +              channels_info[j]->type = i;
 +              channels_info[j]->config = sensors_by_type[i];
 +              j++;
 +      }
 +
 +      hdev->hl_chip_info->info =
 +                      (const struct hwmon_channel_info **)channels_info;
 +
 +      return 0;
 +
 +channel_info_err:
 +      for (i = 0 ; i < num_active_sensor_types ; i++)
 +              if (channels_info[i]) {
 +                      kfree(channels_info[i]->config);
 +                      kfree(channels_info[i]);
 +              }
 +      kfree(channels_info);
 +channels_info_array_err:
 +sensors_type_err:
 +      for (i = 0 ; i < HWMON_NR_SENSOR_TYPES ; i++)
 +              kfree(sensors_by_type[i]);
 +
 +      return rc;
 +}
 +
 +static int hl_read(struct device *dev, enum hwmon_sensor_types type,
 +                      u32 attr, int channel, long *val)
 +{
 +      struct hl_device *hdev = dev_get_drvdata(dev);
 +      int rc;
 +
 +      if (hl_device_disabled_or_in_reset(hdev))
 +              return -ENODEV;
 +
 +      switch (type) {
 +      case hwmon_temp:
 +              switch (attr) {
 +              case hwmon_temp_input:
 +              case hwmon_temp_max:
 +              case hwmon_temp_crit:
 +              case hwmon_temp_max_hyst:
 +              case hwmon_temp_crit_hyst:
 +              case hwmon_temp_offset:
 +              case hwmon_temp_highest:
 +                      break;
 +              default:
 +                      return -EINVAL;
 +              }
 +
 +              rc = hl_get_temperature(hdev, channel, attr, val);
 +              break;
 +      case hwmon_in:
 +              switch (attr) {
 +              case hwmon_in_input:
 +              case hwmon_in_min:
 +              case hwmon_in_max:
 +              case hwmon_in_highest:
 +                      break;
 +              default:
 +                      return -EINVAL;
 +              }
 +
 +              rc = hl_get_voltage(hdev, channel, attr, val);
 +              break;
 +      case hwmon_curr:
 +              switch (attr) {
 +              case hwmon_curr_input:
 +              case hwmon_curr_min:
 +              case hwmon_curr_max:
 +              case hwmon_curr_highest:
 +                      break;
 +              default:
 +                      return -EINVAL;
 +              }
 +
 +              rc = hl_get_current(hdev, channel, attr, val);
 +              break;
 +      case hwmon_fan:
 +              switch (attr) {
 +              case hwmon_fan_input:
 +              case hwmon_fan_min:
 +              case hwmon_fan_max:
 +                      break;
 +              default:
 +                      return -EINVAL;
 +              }
 +              rc = hl_get_fan_speed(hdev, channel, attr, val);
 +              break;
 +      case hwmon_pwm:
 +              switch (attr) {
 +              case hwmon_pwm_input:
 +              case hwmon_pwm_enable:
 +                      break;
 +              default:
 +                      return -EINVAL;
 +              }
 +              rc = hl_get_pwm_info(hdev, channel, attr, val);
 +              break;
 +      default:
 +              return -EINVAL;
 +      }
 +      return rc;
 +}
 +
 +static int hl_write(struct device *dev, enum hwmon_sensor_types type,
 +                      u32 attr, int channel, long val)
 +{
 +      struct hl_device *hdev = dev_get_drvdata(dev);
 +
 +      if (hl_device_disabled_or_in_reset(hdev))
 +              return -ENODEV;
 +
 +      switch (type) {
 +      case hwmon_temp:
 +              switch (attr) {
 +              case hwmon_temp_offset:
 +              case hwmon_temp_reset_history:
 +                      break;
 +              default:
 +                      return -EINVAL;
 +              }
 +              hl_set_temperature(hdev, channel, attr, val);
 +              break;
 +      case hwmon_pwm:
 +              switch (attr) {
 +              case hwmon_pwm_input:
 +              case hwmon_pwm_enable:
 +                      break;
 +              default:
 +                      return -EINVAL;
 +              }
 +              hl_set_pwm_info(hdev, channel, attr, val);
 +              break;
 +      case hwmon_in:
 +              switch (attr) {
 +              case hwmon_in_reset_history:
 +                      break;
 +              default:
 +                      return -EINVAL;
 +              }
 +              hl_set_voltage(hdev, channel, attr, val);
 +              break;
 +      case hwmon_curr:
 +              switch (attr) {
 +              case hwmon_curr_reset_history:
 +                      break;
 +              default:
 +                      return -EINVAL;
 +              }
 +              hl_set_current(hdev, channel, attr, val);
 +              break;
 +      default:
 +              return -EINVAL;
 +      }
 +      return 0;
 +}
 +
 +static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type,
 +                              u32 attr, int channel)
 +{
 +      switch (type) {
 +      case hwmon_temp:
 +              switch (attr) {
 +              case hwmon_temp_input:
 +              case hwmon_temp_max:
 +              case hwmon_temp_max_hyst:
 +              case hwmon_temp_crit:
 +              case hwmon_temp_crit_hyst:
 +              case hwmon_temp_highest:
 +                      return 0444;
 +              case hwmon_temp_offset:
 +                      return 0644;
 +              case hwmon_temp_reset_history:
 +                      return 0200;
 +              }
 +              break;
 +      case hwmon_in:
 +              switch (attr) {
 +              case hwmon_in_input:
 +              case hwmon_in_min:
 +              case hwmon_in_max:
 +              case hwmon_in_highest:
 +                      return 0444;
 +              case hwmon_in_reset_history:
 +                      return 0200;
 +              }
 +              break;
 +      case hwmon_curr:
 +              switch (attr) {
 +              case hwmon_curr_input:
 +              case hwmon_curr_min:
 +              case hwmon_curr_max:
 +              case hwmon_curr_highest:
 +                      return 0444;
 +              case hwmon_curr_reset_history:
 +                      return 0200;
 +              }
 +              break;
 +      case hwmon_fan:
 +              switch (attr) {
 +              case hwmon_fan_input:
 +              case hwmon_fan_min:
 +              case hwmon_fan_max:
 +                      return 0444;
 +              }
 +              break;
 +      case hwmon_pwm:
 +              switch (attr) {
 +              case hwmon_pwm_input:
 +              case hwmon_pwm_enable:
 +                      return 0644;
 +              }
 +              break;
 +      default:
 +              break;
 +      }
 +      return 0;
 +}
 +
 +static const struct hwmon_ops hl_hwmon_ops = {
 +      .is_visible = hl_is_visible,
 +      .read = hl_read,
 +      .write = hl_write
 +};
 +
 +int hl_get_temperature(struct hl_device *hdev,
 +                      int sensor_index, u32 attr, long *value)
 +{
 +      struct armcp_packet pkt;
 +      int rc;
 +
 +      memset(&pkt, 0, sizeof(pkt));
 +
 +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_GET <<
 +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
 +      pkt.sensor_index = __cpu_to_le16(sensor_index);
 +      pkt.type = __cpu_to_le16(attr);
 +
 +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                               SENSORS_PKT_TIMEOUT, NULL);
++                                              0, value);
 +
 +      if (rc) {
 +              dev_err(hdev->dev,
 +                      "Failed to get temperature from sensor %d, error %d\n",
 +                      sensor_index, rc);
 +              *value = 0;
 +      }
 +
 +      return rc;
 +}
 +
 +int hl_set_temperature(struct hl_device *hdev,
 +                      int sensor_index, u32 attr, long value)
 +{
 +      struct armcp_packet pkt;
 +      int rc;
 +
 +      memset(&pkt, 0, sizeof(pkt));
 +
 +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_SET <<
 +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
 +      pkt.sensor_index = __cpu_to_le16(sensor_index);
 +      pkt.type = __cpu_to_le16(attr);
 +      pkt.value = __cpu_to_le64(value);
 +
 +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                       SENSORS_PKT_TIMEOUT, value);
++                                              0, NULL);
 +
 +      if (rc)
 +              dev_err(hdev->dev,
 +                      "Failed to set temperature of sensor %d, error %d\n",
 +                      sensor_index, rc);
 +
 +      return rc;
 +}
 +
 +int hl_get_voltage(struct hl_device *hdev,
 +                      int sensor_index, u32 attr, long *value)
 +{
 +      struct armcp_packet pkt;
 +      int rc;
 +
 +      memset(&pkt, 0, sizeof(pkt));
 +
 +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_GET <<
 +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
 +      pkt.sensor_index = __cpu_to_le16(sensor_index);
 +      pkt.type = __cpu_to_le16(attr);
 +
 +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                       SENSORS_PKT_TIMEOUT, value);
++                                              0, value);
 +
 +      if (rc) {
 +              dev_err(hdev->dev,
 +                      "Failed to get voltage from sensor %d, error %d\n",
 +                      sensor_index, rc);
 +              *value = 0;
 +      }
 +
 +      return rc;
 +}
 +
 +int hl_get_current(struct hl_device *hdev,
 +                      int sensor_index, u32 attr, long *value)
 +{
 +      struct armcp_packet pkt;
 +      int rc;
 +
 +      memset(&pkt, 0, sizeof(pkt));
 +
 +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_GET <<
 +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
 +      pkt.sensor_index = __cpu_to_le16(sensor_index);
 +      pkt.type = __cpu_to_le16(attr);
 +
 +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                       SENSORS_PKT_TIMEOUT, value);
++                                              0, value);
 +
 +      if (rc) {
 +              dev_err(hdev->dev,
 +                      "Failed to get current from sensor %d, error %d\n",
 +                      sensor_index, rc);
 +              *value = 0;
 +      }
 +
 +      return rc;
 +}
 +
 +int hl_get_fan_speed(struct hl_device *hdev,
 +                      int sensor_index, u32 attr, long *value)
 +{
 +      struct armcp_packet pkt;
 +      int rc;
 +
 +      memset(&pkt, 0, sizeof(pkt));
 +
 +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_FAN_SPEED_GET <<
 +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
 +      pkt.sensor_index = __cpu_to_le16(sensor_index);
 +      pkt.type = __cpu_to_le16(attr);
 +
 +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                       SENSORS_PKT_TIMEOUT, value);
++                                              0, value);
 +
 +      if (rc) {
 +              dev_err(hdev->dev,
 +                      "Failed to get fan speed from sensor %d, error %d\n",
 +                      sensor_index, rc);
 +              *value = 0;
 +      }
 +
 +      return rc;
 +}
 +
 +int hl_get_pwm_info(struct hl_device *hdev,
 +                      int sensor_index, u32 attr, long *value)
 +{
 +      struct armcp_packet pkt;
 +      int rc;
 +
 +      memset(&pkt, 0, sizeof(pkt));
 +
 +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_GET <<
 +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
 +      pkt.sensor_index = __cpu_to_le16(sensor_index);
 +      pkt.type = __cpu_to_le16(attr);
 +
 +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                       SENSORS_PKT_TIMEOUT, NULL);
++                                              0, value);
 +
 +      if (rc) {
 +              dev_err(hdev->dev,
 +                      "Failed to get pwm info from sensor %d, error %d\n",
 +                      sensor_index, rc);
 +              *value = 0;
 +      }
 +
 +      return rc;
 +}
 +
 +void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
 +                      long value)
 +{
 +      struct armcp_packet pkt;
 +      int rc;
 +
 +      memset(&pkt, 0, sizeof(pkt));
 +
 +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_SET <<
 +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
 +      pkt.sensor_index = __cpu_to_le16(sensor_index);
 +      pkt.type = __cpu_to_le16(attr);
 +      pkt.value = cpu_to_le64(value);
 +
 +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                               SENSORS_PKT_TIMEOUT, NULL);
++                                              0, NULL);
 +
 +      if (rc)
 +              dev_err(hdev->dev,
 +                      "Failed to set pwm info to sensor %d, error %d\n",
 +                      sensor_index, rc);
 +}
 +
 +int hl_set_voltage(struct hl_device *hdev,
 +                      int sensor_index, u32 attr, long value)
 +{
 +      struct armcp_packet pkt;
 +      int rc;
 +
 +      memset(&pkt, 0, sizeof(pkt));
 +
 +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_SET <<
 +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
 +      pkt.sensor_index = __cpu_to_le16(sensor_index);
 +      pkt.type = __cpu_to_le16(attr);
 +      pkt.value = __cpu_to_le64(value);
 +
 +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                               SENSORS_PKT_TIMEOUT, NULL);
++                                              0, NULL);
 +
 +      if (rc)
 +              dev_err(hdev->dev,
 +                      "Failed to set voltage of sensor %d, error %d\n",
 +                      sensor_index, rc);
 +
 +      return rc;
 +}
 +
 +int hl_set_current(struct hl_device *hdev,
 +                      int sensor_index, u32 attr, long value)
 +{
 +      struct armcp_packet pkt;
 +      int rc;
 +
 +      memset(&pkt, 0, sizeof(pkt));
 +
 +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_SET <<
 +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
 +      pkt.sensor_index = __cpu_to_le16(sensor_index);
 +      pkt.type = __cpu_to_le16(attr);
 +      pkt.value = __cpu_to_le64(value);
 +
 +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
++                                              0, NULL);
 +
 +      if (rc)
 +              dev_err(hdev->dev,
 +                      "Failed to set current of sensor %d, error %d\n",
 +                      sensor_index, rc);
 +
 +      return rc;
 +}
 +
 +int hl_hwmon_init(struct hl_device *hdev)
 +{
 +      struct device *dev = hdev->pdev ? &hdev->pdev->dev : hdev->dev;
 +      struct asic_fixed_properties *prop = &hdev->asic_prop;
 +      int rc;
 +
 +      if ((hdev->hwmon_initialized) || !(hdev->fw_loading))
 +              return 0;
 +
 +      if (hdev->hl_chip_info->info) {
 +              hdev->hl_chip_info->ops = &hl_hwmon_ops;
 +
 +              hdev->hwmon_dev = hwmon_device_register_with_info(dev,
 +                                      prop->armcp_info.card_name, hdev,
 +                                      hdev->hl_chip_info, NULL);
 +              if (IS_ERR(hdev->hwmon_dev)) {
 +                      rc = PTR_ERR(hdev->hwmon_dev);
 +                      dev_err(hdev->dev,
 +                              "Unable to register hwmon device: %d\n", rc);
 +                      return rc;
 +              }
 +
 +              dev_info(hdev->dev, "%s: add sensors information\n",
 +                      dev_name(hdev->hwmon_dev));
 +
 +              hdev->hwmon_initialized = true;
 +      } else {
 +              dev_info(hdev->dev, "no available sensors\n");
 +      }
 +
 +      return 0;
 +}
 +
 +void hl_hwmon_fini(struct hl_device *hdev)
 +{
 +      if (!hdev->hwmon_initialized)
 +              return;
 +
 +      hwmon_device_unregister(hdev->hwmon_dev);
 +}
index c4e7c682d58482ca85e64416d5d478d9d294f8ca,0000000000000000000000000000000000000000..b3cb0ac4721c5bb3d74b7d24d9371e97de8df03c
mode 100644,000000..100644
--- /dev/null
@@@ -1,445 -1,0 +1,442 @@@
- #define SET_CLK_PKT_TIMEOUT   1000000 /* 1s */
- #define SET_PWR_PKT_TIMEOUT   1000000 /* 1s */
 +// SPDX-License-Identifier: GPL-2.0
 +
 +/*
 + * Copyright 2016-2019 HabanaLabs, Ltd.
 + * All Rights Reserved.
 + */
 +
 +#include "habanalabs.h"
 +
 +#include <linux/pci.h>
 +
-                                               SET_CLK_PKT_TIMEOUT, &result);
 +long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
 +{
 +      struct armcp_packet pkt;
 +      long result;
 +      int rc;
 +
 +      memset(&pkt, 0, sizeof(pkt));
 +
 +      if (curr)
 +              pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_CURR_GET <<
 +                                              ARMCP_PKT_CTL_OPCODE_SHIFT);
 +      else
 +              pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_GET <<
 +                                              ARMCP_PKT_CTL_OPCODE_SHIFT);
 +      pkt.pll_index = cpu_to_le32(pll_index);
 +
 +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                       SET_CLK_PKT_TIMEOUT, NULL);
++                                              0, &result);
 +
 +      if (rc) {
 +              dev_err(hdev->dev,
 +                      "Failed to get frequency of PLL %d, error %d\n",
 +                      pll_index, rc);
 +              result = rc;
 +      }
 +
 +      return result;
 +}
 +
 +void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
 +{
 +      struct armcp_packet pkt;
 +      int rc;
 +
 +      memset(&pkt, 0, sizeof(pkt));
 +
 +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_SET <<
 +                                      ARMCP_PKT_CTL_OPCODE_SHIFT);
 +      pkt.pll_index = cpu_to_le32(pll_index);
 +      pkt.value = cpu_to_le64(freq);
 +
 +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                               SET_PWR_PKT_TIMEOUT, &result);
++                                              0, NULL);
 +
 +      if (rc)
 +              dev_err(hdev->dev,
 +                      "Failed to set frequency to PLL %d, error %d\n",
 +                      pll_index, rc);
 +}
 +
 +u64 hl_get_max_power(struct hl_device *hdev)
 +{
 +      struct armcp_packet pkt;
 +      long result;
 +      int rc;
 +
 +      memset(&pkt, 0, sizeof(pkt));
 +
 +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_GET <<
 +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
 +
 +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
-                                       SET_PWR_PKT_TIMEOUT, NULL);
++                                              0, &result);
 +
 +      if (rc) {
 +              dev_err(hdev->dev, "Failed to get max power, error %d\n", rc);
 +              result = rc;
 +      }
 +
 +      return result;
 +}
 +
 +void hl_set_max_power(struct hl_device *hdev, u64 value)
 +{
 +      struct armcp_packet pkt;
 +      int rc;
 +
 +      memset(&pkt, 0, sizeof(pkt));
 +
 +      pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_SET <<
 +                              ARMCP_PKT_CTL_OPCODE_SHIFT);
 +      pkt.value = cpu_to_le64(value);
 +
 +      rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
++                                              0, NULL);
 +
 +      if (rc)
 +              dev_err(hdev->dev, "Failed to set max power, error %d\n", rc);
 +}
 +
 +static ssize_t uboot_ver_show(struct device *dev, struct device_attribute *attr,
 +                              char *buf)
 +{
 +      struct hl_device *hdev = dev_get_drvdata(dev);
 +
 +      return sprintf(buf, "%s\n", hdev->asic_prop.uboot_ver);
 +}
 +
 +static ssize_t armcp_kernel_ver_show(struct device *dev,
 +                              struct device_attribute *attr, char *buf)
 +{
 +      struct hl_device *hdev = dev_get_drvdata(dev);
 +
 +      return sprintf(buf, "%s", hdev->asic_prop.armcp_info.kernel_version);
 +}
 +
 +static ssize_t armcp_ver_show(struct device *dev, struct device_attribute *attr,
 +                              char *buf)
 +{
 +      struct hl_device *hdev = dev_get_drvdata(dev);
 +
 +      return sprintf(buf, "%s\n", hdev->asic_prop.armcp_info.armcp_version);
 +}
 +
 +static ssize_t cpld_ver_show(struct device *dev, struct device_attribute *attr,
 +                              char *buf)
 +{
 +      struct hl_device *hdev = dev_get_drvdata(dev);
 +
 +      return sprintf(buf, "0x%08x\n",
 +                      hdev->asic_prop.armcp_info.cpld_version);
 +}
 +
 +static ssize_t infineon_ver_show(struct device *dev,
 +                              struct device_attribute *attr, char *buf)
 +{
 +      struct hl_device *hdev = dev_get_drvdata(dev);
 +
 +      return sprintf(buf, "0x%04x\n",
 +                      hdev->asic_prop.armcp_info.infineon_version);
 +}
 +
 +static ssize_t fuse_ver_show(struct device *dev, struct device_attribute *attr,
 +                              char *buf)
 +{
 +      struct hl_device *hdev = dev_get_drvdata(dev);
 +
 +      return sprintf(buf, "%s\n", hdev->asic_prop.armcp_info.fuse_version);
 +}
 +
 +static ssize_t thermal_ver_show(struct device *dev,
 +                              struct device_attribute *attr, char *buf)
 +{
 +      struct hl_device *hdev = dev_get_drvdata(dev);
 +
 +      return sprintf(buf, "%s", hdev->asic_prop.armcp_info.thermal_version);
 +}
 +
 +static ssize_t preboot_btl_ver_show(struct device *dev,
 +                              struct device_attribute *attr, char *buf)
 +{
 +      struct hl_device *hdev = dev_get_drvdata(dev);
 +
 +      return sprintf(buf, "%s\n", hdev->asic_prop.preboot_ver);
 +}
 +
 +static ssize_t soft_reset_store(struct device *dev,
 +                              struct device_attribute *attr, const char *buf,
 +                              size_t count)
 +{
 +      struct hl_device *hdev = dev_get_drvdata(dev);
 +      long value;
 +      int rc;
 +
 +      rc = kstrtoul(buf, 0, &value);
 +
 +      if (rc) {
 +              count = -EINVAL;
 +              goto out;
 +      }
 +
 +      if (!hdev->supports_soft_reset) {
 +              dev_err(hdev->dev, "Device does not support soft-reset\n");
 +              goto out;
 +      }
 +
 +      dev_warn(hdev->dev, "Soft-Reset requested through sysfs\n");
 +
 +      hl_device_reset(hdev, false, false);
 +
 +out:
 +      return count;
 +}
 +
 +static ssize_t hard_reset_store(struct device *dev,
 +                              struct device_attribute *attr,
 +                              const char *buf, size_t count)
 +{
 +      struct hl_device *hdev = dev_get_drvdata(dev);
 +      long value;
 +      int rc;
 +
 +      rc = kstrtoul(buf, 0, &value);
 +
 +      if (rc) {
 +              count = -EINVAL;
 +              goto out;
 +      }
 +
 +      dev_warn(hdev->dev, "Hard-Reset requested through sysfs\n");
 +
 +      hl_device_reset(hdev, true, false);
 +
 +out:
 +      return count;
 +}
 +
 +static ssize_t device_type_show(struct device *dev,
 +              struct device_attribute *attr, char *buf)
 +{
 +      struct hl_device *hdev = dev_get_drvdata(dev);
 +      char *str;
 +
 +      switch (hdev->asic_type) {
 +      case ASIC_GOYA:
 +              str = "GOYA";
 +              break;
 +      case ASIC_GAUDI:
 +              str = "GAUDI";
 +              break;
 +      default:
 +              dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
 +                              hdev->asic_type);
 +              return -EINVAL;
 +      }
 +
 +      return sprintf(buf, "%s\n", str);
 +}
 +
 +static ssize_t pci_addr_show(struct device *dev, struct device_attribute *attr,
 +                              char *buf)
 +{
 +      struct hl_device *hdev = dev_get_drvdata(dev);
 +
 +      return sprintf(buf, "%04x:%02x:%02x.%x\n",
 +                      pci_domain_nr(hdev->pdev->bus),
 +                      hdev->pdev->bus->number,
 +                      PCI_SLOT(hdev->pdev->devfn),
 +                      PCI_FUNC(hdev->pdev->devfn));
 +}
 +
 +static ssize_t status_show(struct device *dev, struct device_attribute *attr,
 +                              char *buf)
 +{
 +      struct hl_device *hdev = dev_get_drvdata(dev);
 +      char *str;
 +
 +      if (atomic_read(&hdev->in_reset))
 +              str = "In reset";
 +      else if (hdev->disabled)
 +              str = "Malfunction";
 +      else
 +              str = "Operational";
 +
 +      return sprintf(buf, "%s\n", str);
 +}
 +
 +static ssize_t soft_reset_cnt_show(struct device *dev,
 +              struct device_attribute *attr, char *buf)
 +{
 +      struct hl_device *hdev = dev_get_drvdata(dev);
 +
 +      return sprintf(buf, "%d\n", hdev->soft_reset_cnt);
 +}
 +
 +static ssize_t hard_reset_cnt_show(struct device *dev,
 +              struct device_attribute *attr, char *buf)
 +{
 +      struct hl_device *hdev = dev_get_drvdata(dev);
 +
 +      return sprintf(buf, "%d\n", hdev->hard_reset_cnt);
 +}
 +
 +static ssize_t max_power_show(struct device *dev, struct device_attribute *attr,
 +                              char *buf)
 +{
 +      struct hl_device *hdev = dev_get_drvdata(dev);
 +      long val;
 +
 +      if (hl_device_disabled_or_in_reset(hdev))
 +              return -ENODEV;
 +
 +      val = hl_get_max_power(hdev);
 +
 +      return sprintf(buf, "%lu\n", val);
 +}
 +
 +static ssize_t max_power_store(struct device *dev,
 +              struct device_attribute *attr, const char *buf, size_t count)
 +{
 +      struct hl_device *hdev = dev_get_drvdata(dev);
 +      unsigned long value;
 +      int rc;
 +
 +      if (hl_device_disabled_or_in_reset(hdev)) {
 +              count = -ENODEV;
 +              goto out;
 +      }
 +
 +      rc = kstrtoul(buf, 0, &value);
 +
 +      if (rc) {
 +              count = -EINVAL;
 +              goto out;
 +      }
 +
 +      hdev->max_power = value;
 +      hl_set_max_power(hdev, value);
 +
 +out:
 +      return count;
 +}
 +
 +static ssize_t eeprom_read_handler(struct file *filp, struct kobject *kobj,
 +                      struct bin_attribute *attr, char *buf, loff_t offset,
 +                      size_t max_size)
 +{
 +      struct device *dev = container_of(kobj, struct device, kobj);
 +      struct hl_device *hdev = dev_get_drvdata(dev);
 +      char *data;
 +      int rc;
 +
 +      if (hl_device_disabled_or_in_reset(hdev))
 +              return -ENODEV;
 +
 +      if (!max_size)
 +              return -EINVAL;
 +
 +      data = kzalloc(max_size, GFP_KERNEL);
 +      if (!data)
 +              return -ENOMEM;
 +
 +      rc = hdev->asic_funcs->get_eeprom_data(hdev, data, max_size);
 +      if (rc)
 +              goto out;
 +
 +      memcpy(buf, data, max_size);
 +
 +out:
 +      kfree(data);
 +
 +      return max_size;
 +}
 +
 +static DEVICE_ATTR_RO(armcp_kernel_ver);
 +static DEVICE_ATTR_RO(armcp_ver);
 +static DEVICE_ATTR_RO(cpld_ver);
 +static DEVICE_ATTR_RO(device_type);
 +static DEVICE_ATTR_RO(fuse_ver);
 +static DEVICE_ATTR_WO(hard_reset);
 +static DEVICE_ATTR_RO(hard_reset_cnt);
 +static DEVICE_ATTR_RO(infineon_ver);
 +static DEVICE_ATTR_RW(max_power);
 +static DEVICE_ATTR_RO(pci_addr);
 +static DEVICE_ATTR_RO(preboot_btl_ver);
 +static DEVICE_ATTR_WO(soft_reset);
 +static DEVICE_ATTR_RO(soft_reset_cnt);
 +static DEVICE_ATTR_RO(status);
 +static DEVICE_ATTR_RO(thermal_ver);
 +static DEVICE_ATTR_RO(uboot_ver);
 +
 +static struct bin_attribute bin_attr_eeprom = {
 +      .attr = {.name = "eeprom", .mode = (0444)},
 +      .size = PAGE_SIZE,
 +      .read = eeprom_read_handler
 +};
 +
 +static struct attribute *hl_dev_attrs[] = {
 +      &dev_attr_armcp_kernel_ver.attr,
 +      &dev_attr_armcp_ver.attr,
 +      &dev_attr_cpld_ver.attr,
 +      &dev_attr_device_type.attr,
 +      &dev_attr_fuse_ver.attr,
 +      &dev_attr_hard_reset.attr,
 +      &dev_attr_hard_reset_cnt.attr,
 +      &dev_attr_infineon_ver.attr,
 +      &dev_attr_max_power.attr,
 +      &dev_attr_pci_addr.attr,
 +      &dev_attr_preboot_btl_ver.attr,
 +      &dev_attr_soft_reset.attr,
 +      &dev_attr_soft_reset_cnt.attr,
 +      &dev_attr_status.attr,
 +      &dev_attr_thermal_ver.attr,
 +      &dev_attr_uboot_ver.attr,
 +      NULL,
 +};
 +
 +static struct bin_attribute *hl_dev_bin_attrs[] = {
 +      &bin_attr_eeprom,
 +      NULL
 +};
 +
 +static struct attribute_group hl_dev_attr_group = {
 +      .attrs = hl_dev_attrs,
 +      .bin_attrs = hl_dev_bin_attrs,
 +};
 +
 +static struct attribute_group hl_dev_clks_attr_group;
 +
 +static const struct attribute_group *hl_dev_attr_groups[] = {
 +      &hl_dev_attr_group,
 +      &hl_dev_clks_attr_group,
 +      NULL,
 +};
 +
 +int hl_sysfs_init(struct hl_device *hdev)
 +{
 +      int rc;
 +
 +      if (hdev->asic_type == ASIC_GOYA)
 +              hdev->pm_mng_profile = PM_AUTO;
 +      else
 +              hdev->pm_mng_profile = PM_MANUAL;
 +      hdev->max_power = hdev->asic_prop.max_power_default;
 +
 +      hdev->asic_funcs->add_device_attr(hdev, &hl_dev_clks_attr_group);
 +
 +      rc = device_add_groups(hdev->dev, hl_dev_attr_groups);
 +      if (rc) {
 +              dev_err(hdev->dev,
 +                      "Failed to add groups to device, error %d\n", rc);
 +              return rc;
 +      }
 +
 +      return 0;
 +}
 +
 +void hl_sysfs_fini(struct hl_device *hdev)
 +{
 +      device_remove_groups(hdev->dev, hl_dev_attr_groups);
 +}
index 4a1a52608fc0963f29acb6140cd396e652fd130f,637a9d608707f51e30226e121fad16019659a2bb..78fbff646f99a6b9387c5762c5c040fa56beea63
@@@ -1766,7 -1823,9 +1772,7 @@@ static void gaudi_init_golden_registers
  
        gaudi_init_hbm_cred(hdev);
  
-       gaudi_disable_clock_gating(hdev);
 -      gaudi_init_rate_limiter(hdev);
 -
+       hdev->asic_funcs->disable_clock_gating(hdev);
  
        for (tpc_id = 0, tpc_offset = 0;
                                tpc_id < TPC_NUMBER_OF_ENGINES;
@@@ -5166,50 -5275,41 +5211,51 @@@ static int gaudi_extract_ecc_info(struc
                hdev->asic_funcs->disable_clock_gating(hdev);
        }
  
 -      switch (num_mem_regs) {
 -      case 1:
 -              dev_err(hdev->dev,
 -                      "%s ECC indication: 0x%08x\n",
 -                      block_name, RREG32(block_address));
 -              break;
 -      case 2:
 -              dev_err(hdev->dev,
 -                      "%s ECC indication: 0x%08x 0x%08x\n",
 -                      block_name,
 -                      RREG32(block_address), RREG32(block_address + 4));
 -              break;
 -      case 3:
 -              dev_err(hdev->dev,
 -                      "%s ECC indication: 0x%08x 0x%08x 0x%08x\n",
 -                      block_name,
 -                      RREG32(block_address), RREG32(block_address + 4),
 -                      RREG32(block_address + 8));
 -              break;
 -      case 4:
 -              dev_err(hdev->dev,
 -                      "%s ECC indication: 0x%08x 0x%08x 0x%08x 0x%08x\n",
 -                      block_name,
 -                      RREG32(block_address), RREG32(block_address + 4),
 -                      RREG32(block_address + 8), RREG32(block_address + 0xc));
 -              break;
 -      default:
 -              break;
 +      /* Set invalid wrapper index */
 +      *memory_wrapper_idx = 0xFF;
 +
 +      /* Iterate through memory wrappers, a single bit must be set */
 +      for (i = 0 ; i > num_mem_regs ; i++) {
 +              err_addr += i * 4;
 +              err_word = RREG32(err_addr);
 +              if (err_word) {
 +                      err_bit = __ffs(err_word);
 +                      *memory_wrapper_idx = err_bit + (32 * i);
 +                      break;
 +              }
 +      }
  
 +      if (*memory_wrapper_idx == 0xFF) {
 +              dev_err(hdev->dev, "ECC error information cannot be found\n");
 +              rc = -EINVAL;
 +              goto enable_clk_gate;
        }
  
 -      if (disable_clock_gating) {
 +      WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
 +                      *memory_wrapper_idx);
 +
 +      *ecc_address =
 +              RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
 +      *ecc_syndrom =
 +              RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
 +
 +      /* Clear error indication */
 +      reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
 +      if (params->derr)
 +              reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
 +      else
 +              reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
 +
 +      WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
 +
 +enable_clk_gate:
 +      if (params->disable_clock_gating) {
-               hdev->asic_funcs->enable_clock_gating(hdev);
+               hdev->asic_funcs->set_clock_gating(hdev);
++
                mutex_unlock(&gaudi->clk_gate_mutex);
        }
 +
 +      return rc;
  }
  
  static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
Simple merge
Simple merge