drm/amdgpu: make pipeline sync be in same place v2

[mirror_ubuntu-jammy-kernel.git] / drivers / gpu / drm / amd / amdgpu / amdgpu.h
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index c1b9135417396c2296d8cddee3bd799b70f457ff..e2cafbd690c0cc8a830845688b16f89cdf500b2b 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -32,7 +32,7 @@
  #include <linux/wait.h>
  #include <linux/list.h>
  #include <linux/kref.h>
-#include <linux/interval_tree.h>
+#include <linux/rbtree.h>
  #include <linux/hashtable.h>
  #include <linux/dma-fence.h>
  
@@ -52,6 +52,7 @@
  #include "amdgpu_irq.h"
  #include "amdgpu_ucode.h"
  #include "amdgpu_ttm.h"
+#include "amdgpu_psp.h"
  #include "amdgpu_gds.h"
  #include "amdgpu_sync.h"
  #include "amdgpu_ring.h"
@@ -59,6 +60,8 @@
  #include "amd_powerplay.h"
  #include "amdgpu_dpm.h"
  #include "amdgpu_acp.h"
+#include "amdgpu_uvd.h"
+#include "amdgpu_vce.h"
  
  #include "gpu_scheduler.h"
  #include "amdgpu_virt.h"
@@ -79,7 +82,7 @@ extern int amdgpu_pcie_gen2;
  extern int amdgpu_msi;
  extern int amdgpu_lockup_timeout;
  extern int amdgpu_dpm;
-extern int amdgpu_smc_load_fw;
+extern int amdgpu_fw_load_type;
  extern int amdgpu_aspm;
  extern int amdgpu_runtime_pm;
  extern unsigned amdgpu_ip_block_mask;
@@ -101,7 +104,13 @@ extern char *amdgpu_disable_cu;
  extern char *amdgpu_virtual_display;
  extern unsigned amdgpu_pp_feature_mask;
  extern int amdgpu_vram_page_split;
+extern int amdgpu_ngg;
+extern int amdgpu_prim_buf_per_se;
+extern int amdgpu_pos_buf_per_se;
+extern int amdgpu_cntl_sb_buf_per_se;
+extern int amdgpu_param_buf_per_se;
  
+#define AMDGPU_DEFAULT_GTT_SIZE_MB             3072ULL /* 3GB by default */
  #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS         3000
  #define AMDGPU_MAX_USEC_TIMEOUT                        100000  /* 100 ms */
  #define AMDGPU_FENCE_JIFFIES_TIMEOUT           (HZ / 2)
@@ -109,14 +118,11 @@ extern int amdgpu_vram_page_split;
  #define AMDGPU_IB_POOL_SIZE                    16
  #define AMDGPU_DEBUGFS_MAX_COMPONENTS          32
  #define AMDGPUFB_CONN_LIMIT                    4
-#define AMDGPU_BIOS_NUM_SCRATCH                        8
+#define AMDGPU_BIOS_NUM_SCRATCH                        16
  
  /* max number of IP instances */
  #define AMDGPU_MAX_SDMA_INSTANCES              2
  
-/* hardcode that limit for now */
-#define AMDGPU_VA_RESERVED_SIZE                        (8 << 20)
-
  /* hard reset data */
  #define AMDGPU_ASIC_RESET_DATA                  0x39d5e86b
  
@@ -280,7 +286,7 @@ struct amdgpu_vm_pte_funcs {
         void (*set_pte_pde)(struct amdgpu_ib *ib,
                             uint64_t pe,
                             uint64_t addr, unsigned count,
-                           uint32_t incr, uint32_t flags);
+                           uint32_t incr, uint64_t flags);
  };
  
  /* provided by the gmc block */
@@ -293,7 +299,15 @@ struct amdgpu_gart_funcs {
                            void *cpu_pt_addr, /* cpu addr of page table */
                            uint32_t gpu_page_idx, /* pte/pde to update */
                            uint64_t addr, /* addr to write into pte/pde */
-                          uint32_t flags); /* access flags */
+                          uint64_t flags); /* access flags */
+       /* enable/disable PRT support */
+       void (*set_prt)(struct amdgpu_device *adev, bool enable);
+       /* set pte flags based per asic */
+       uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev,
+                                    uint32_t flags);
+       /* adjust mc addr in fb for APU case */
+       u64 (*adjust_mc_addr)(struct amdgpu_device *adev, u64 addr);
+       uint32_t (*get_invalidate_req)(unsigned int vm_id);
  };
  
  /* provided by the ih block */
@@ -355,7 +369,10 @@ struct amdgpu_bo_list_entry {
  
  struct amdgpu_bo_va_mapping {
         struct list_head                list;
-       struct interval_tree_node       it;
+       struct rb_node                  rb;
+       uint64_t                        start;
+       uint64_t                        last;
+       uint64_t                        __subtree_last;
         uint64_t                        offset;
         uint64_t                        flags;
  };
@@ -522,6 +539,10 @@ struct amdgpu_gart {
         struct page                     **pages;
  #endif
         bool                            ready;
+
+       /* Asic default pte flags */
+       uint64_t                        gart_pte_flags;
+
         const struct amdgpu_gart_funcs *gart_funcs;
  };
  
@@ -533,13 +554,26 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
  void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
  int amdgpu_gart_init(struct amdgpu_device *adev);
  void amdgpu_gart_fini(struct amdgpu_device *adev);
-void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
+int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
                         int pages);
  int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
                      int pages, struct page **pagelist,
-                    dma_addr_t *dma_addr, uint32_t flags);
+                    dma_addr_t *dma_addr, uint64_t flags);
  int amdgpu_ttm_recover_gart(struct amdgpu_device *adev);
  
+/*
+ * VMHUB structures, functions & helpers
+ */
+struct amdgpu_vmhub {
+       uint32_t        ctx0_ptb_addr_lo32;
+       uint32_t        ctx0_ptb_addr_hi32;
+       uint32_t        vm_inv_eng0_req;
+       uint32_t        vm_inv_eng0_ack;
+       uint32_t        vm_context0_cntl;
+       uint32_t        vm_l2_pro_fault_status;
+       uint32_t        vm_l2_pro_fault_cntl;
+};
+
  /*
   * GPU MC structures, functions & helpers
   */
@@ -567,6 +601,14 @@ struct amdgpu_mc {
         uint32_t                vram_type;
         uint32_t                srbm_soft_reset;
         struct amdgpu_mode_mc_save save;
+       bool                    prt_warning;
+       /* apertures */
+       u64                                     shared_aperture_start;
+       u64                                     shared_aperture_end;
+       u64                                     private_aperture_start;
+       u64                                     private_aperture_end;
+       /* protects concurrent invalidation */
+       spinlock_t              invalidate_lock;
  };
  
  /*
@@ -601,6 +643,83 @@ struct amdgpu_doorbell {
         u32                     num_doorbells;  /* Number of doorbells actually reserved for amdgpu. */
  };
  
+/*
+ * 64bit doorbell, offset are in QWORD, occupy 2KB doorbell space
+ */
+typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
+{
+       /*
+        * All compute related doorbells: kiq, hiq, diq, traditional compute queue, user queue, should locate in
+        * a continues range so that programming CP_MEC_DOORBELL_RANGE_LOWER/UPPER can cover this range.
+        *  Compute related doorbells are allocated from 0x00 to 0x8a
+        */
+
+
+       /* kernel scheduling */
+       AMDGPU_DOORBELL64_KIQ                     = 0x00,
+
+       /* HSA interface queue and debug queue */
+       AMDGPU_DOORBELL64_HIQ                     = 0x01,
+       AMDGPU_DOORBELL64_DIQ                     = 0x02,
+
+       /* Compute engines */
+       AMDGPU_DOORBELL64_MEC_RING0               = 0x03,
+       AMDGPU_DOORBELL64_MEC_RING1               = 0x04,
+       AMDGPU_DOORBELL64_MEC_RING2               = 0x05,
+       AMDGPU_DOORBELL64_MEC_RING3               = 0x06,
+       AMDGPU_DOORBELL64_MEC_RING4               = 0x07,
+       AMDGPU_DOORBELL64_MEC_RING5               = 0x08,
+       AMDGPU_DOORBELL64_MEC_RING6               = 0x09,
+       AMDGPU_DOORBELL64_MEC_RING7               = 0x0a,
+
+       /* User queue doorbell range (128 doorbells) */
+       AMDGPU_DOORBELL64_USERQUEUE_START         = 0x0b,
+       AMDGPU_DOORBELL64_USERQUEUE_END           = 0x8a,
+
+       /* Graphics engine */
+       AMDGPU_DOORBELL64_GFX_RING0               = 0x8b,
+
+       /*
+        * Other graphics doorbells can be allocated here: from 0x8c to 0xef
+        * Graphics voltage island aperture 1
+        * default non-graphics QWORD index is 0xF0 - 0xFF inclusive
+        */
+
+       /* sDMA engines */
+       AMDGPU_DOORBELL64_sDMA_ENGINE0            = 0xF0,
+       AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0     = 0xF1,
+       AMDGPU_DOORBELL64_sDMA_ENGINE1            = 0xF2,
+       AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1     = 0xF3,
+
+       /* Interrupt handler */
+       AMDGPU_DOORBELL64_IH                      = 0xF4,  /* For legacy interrupt ring buffer */
+       AMDGPU_DOORBELL64_IH_RING1                = 0xF5,  /* For page migration request log */
+       AMDGPU_DOORBELL64_IH_RING2                = 0xF6,  /* For page migration translation/invalidation log */
+
+       /* VCN engine use 32 bits doorbell  */
+       AMDGPU_DOORBELL64_VCN0_1                  = 0xF8, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */
+       AMDGPU_DOORBELL64_VCN2_3                  = 0xF9,
+       AMDGPU_DOORBELL64_VCN4_5                  = 0xFA,
+       AMDGPU_DOORBELL64_VCN6_7                  = 0xFB,
+
+       /* overlap the doorbell assignment with VCN as they are  mutually exclusive
+        * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD
+        */
+       AMDGPU_DOORBELL64_RING0_1                 = 0xF8,
+       AMDGPU_DOORBELL64_RING2_3                 = 0xF9,
+       AMDGPU_DOORBELL64_RING4_5                 = 0xFA,
+       AMDGPU_DOORBELL64_RING6_7                 = 0xFB,
+
+       AMDGPU_DOORBELL64_UVD_RING0_1             = 0xFC,
+       AMDGPU_DOORBELL64_UVD_RING2_3             = 0xFD,
+       AMDGPU_DOORBELL64_UVD_RING4_5             = 0xFE,
+       AMDGPU_DOORBELL64_UVD_RING6_7             = 0xFF,
+
+       AMDGPU_DOORBELL64_MAX_ASSIGNMENT          = 0xFF,
+       AMDGPU_DOORBELL64_INVALID                 = 0xFFFF
+} AMDGPU_DOORBELL64_ASSIGNMENT;
+
+
  void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
                                 phys_addr_t *aperture_base,
                                 size_t *aperture_size,
@@ -699,6 +818,7 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
  
  struct amdgpu_fpriv {
         struct amdgpu_vm        vm;
+       struct amdgpu_bo_va     *prt_va;
         struct mutex            bo_list_lock;
         struct idr              bo_list_handles;
         struct amdgpu_ctx_mgr   ctx_mgr;
@@ -776,14 +896,18 @@ struct amdgpu_rlc {
  struct amdgpu_mec {
         struct amdgpu_bo        *hpd_eop_obj;
         u64                     hpd_eop_gpu_addr;
+       struct amdgpu_bo        *mec_fw_obj;
+       u64                     mec_fw_gpu_addr;
         u32 num_pipe;
         u32 num_mec;
         u32 num_queue;
+       void                    *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1];
  };
  
  struct amdgpu_kiq {
         u64                     eop_gpu_addr;
         struct amdgpu_bo        *eop_obj;
+       struct mutex            ring_mutex;
         struct amdgpu_ring      ring;
         struct amdgpu_irq_src   irq;
  };
@@ -810,7 +934,16 @@ struct amdgpu_rb_config {
         uint32_t raster_config_1;
  };
  
-struct amdgpu_gca_config {
+struct gb_addr_config {
+       uint16_t pipe_interleave_size;
+       uint8_t num_pipes;
+       uint8_t max_compress_frags;
+       uint8_t num_banks;
+       uint8_t num_se;
+       uint8_t num_rb_per_se;
+};
+
+struct amdgpu_gfx_config {
         unsigned max_shader_engines;
         unsigned max_tile_pipes;
         unsigned max_cu_per_sh;
@@ -835,16 +968,23 @@ struct amdgpu_gca_config {
         unsigned mc_arb_ramcfg;
         unsigned gb_addr_config;
         unsigned num_rbs;
+       unsigned gs_vgt_table_depth;
+       unsigned gs_prim_buffer_depth;
  
         uint32_t tile_mode_array[32];
         uint32_t macrotile_mode_array[16];
  
+       struct gb_addr_config gb_addr_config_fields;
         struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE];
+
+       /* gfx configure feature */
+       uint32_t double_offchip_lds_buf;
  };
  
  struct amdgpu_cu_info {
         uint32_t number; /* total active CU number */
         uint32_t ao_cu_mask;
+       uint32_t wave_front_size;
         uint32_t bitmap[4][4];
  };
  
@@ -857,9 +997,31 @@ struct amdgpu_gfx_funcs {
         void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst);
  };
  
+struct amdgpu_ngg_buf {
+       struct amdgpu_bo        *bo;
+       uint64_t                gpu_addr;
+       uint32_t                size;
+       uint32_t                bo_size;
+};
+
+enum {
+       NGG_PRIM = 0,
+       NGG_POS,
+       NGG_CNTL,
+       NGG_PARAM,
+       NGG_BUF_MAX
+};
+
+struct amdgpu_ngg {
+       struct amdgpu_ngg_buf   buf[NGG_BUF_MAX];
+       uint32_t                gds_reserve_addr;
+       uint32_t                gds_reserve_size;
+       bool                    init;
+};
+
  struct amdgpu_gfx {
         struct mutex                    gpu_clock_mutex;
-       struct amdgpu_gca_config        config;
+       struct amdgpu_gfx_config        config;
         struct amdgpu_rlc               rlc;
         struct amdgpu_mec               mec;
         struct amdgpu_kiq               kiq;
@@ -899,6 +1061,11 @@ struct amdgpu_gfx {
         /* reset mask */
         uint32_t                        grbm_soft_reset;
         uint32_t                        srbm_soft_reset;
+       bool                            in_reset;
+       /* s3/s4 mask */
+       bool                            in_suspend;
+       /* NGG */
+       struct amdgpu_ngg               ngg;
  };
  
  int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
@@ -950,7 +1117,6 @@ struct amdgpu_cs_parser {
  #define AMDGPU_PREAMBLE_IB_PRESENT          (1 << 0) /* bit set means command submit involves a preamble IB */
  #define AMDGPU_PREAMBLE_IB_PRESENT_FIRST    (1 << 1) /* bit set means preamble IB is first presented in belonging context */
  #define AMDGPU_HAVE_CTX_SWITCH              (1 << 2) /* bit set means context switch occured */
-#define AMDGPU_VM_DOMAIN                    (1 << 3) /* bit set means in virtual memory context */
  
  struct amdgpu_job {
         struct amd_sched_job    base;
@@ -958,6 +1124,7 @@ struct amdgpu_job {
         struct amdgpu_vm        *vm;
         struct amdgpu_ring      *ring;
         struct amdgpu_sync      sync;
+       struct amdgpu_sync      sched_sync;
         struct amdgpu_ib        *ibs;
         struct dma_fence        *fence; /* the hw fence */
         uint32_t                preamble_status;
@@ -1007,66 +1174,11 @@ struct amdgpu_wb {
  
  int amdgpu_wb_get(struct amdgpu_device *adev, u32 *wb);
  void amdgpu_wb_free(struct amdgpu_device *adev, u32 wb);
+int amdgpu_wb_get_64bit(struct amdgpu_device *adev, u32 *wb);
+void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb);
  
  void amdgpu_get_pcie_info(struct amdgpu_device *adev);
  
-/*
- * UVD
- */
-#define AMDGPU_DEFAULT_UVD_HANDLES     10
-#define AMDGPU_MAX_UVD_HANDLES         40
-#define AMDGPU_UVD_STACK_SIZE          (200*1024)
-#define AMDGPU_UVD_HEAP_SIZE           (256*1024)
-#define AMDGPU_UVD_SESSION_SIZE                (50*1024)
-#define AMDGPU_UVD_FIRMWARE_OFFSET     256
-
-struct amdgpu_uvd {
-       struct amdgpu_bo        *vcpu_bo;
-       void                    *cpu_addr;
-       uint64_t                gpu_addr;
-       unsigned                fw_version;
-       void                    *saved_bo;
-       unsigned                max_handles;
-       atomic_t                handles[AMDGPU_MAX_UVD_HANDLES];
-       struct drm_file         *filp[AMDGPU_MAX_UVD_HANDLES];
-       struct delayed_work     idle_work;
-       const struct firmware   *fw;    /* UVD firmware */
-       struct amdgpu_ring      ring;
-       struct amdgpu_irq_src   irq;
-       bool                    address_64_bit;
-       bool                    use_ctx_buf;
-       struct amd_sched_entity entity;
-       uint32_t                srbm_soft_reset;
-};
-
-/*
- * VCE
- */
-#define AMDGPU_MAX_VCE_HANDLES 16
-#define AMDGPU_VCE_FIRMWARE_OFFSET 256
-
-#define AMDGPU_VCE_HARVEST_VCE0 (1 << 0)
-#define AMDGPU_VCE_HARVEST_VCE1 (1 << 1)
-
-struct amdgpu_vce {
-       struct amdgpu_bo        *vcpu_bo;
-       uint64_t                gpu_addr;
-       unsigned                fw_version;
-       unsigned                fb_version;
-       atomic_t                handles[AMDGPU_MAX_VCE_HANDLES];
-       struct drm_file         *filp[AMDGPU_MAX_VCE_HANDLES];
-       uint32_t                img_size[AMDGPU_MAX_VCE_HANDLES];
-       struct delayed_work     idle_work;
-       struct mutex            idle_mutex;
-       const struct firmware   *fw;    /* VCE firmware */
-       struct amdgpu_ring      ring[AMDGPU_MAX_VCE_RINGS];
-       struct amdgpu_irq_src   irq;
-       unsigned                harvest_config;
-       struct amd_sched_entity entity;
-       uint32_t                srbm_soft_reset;
-       unsigned                num_rings;
-};
-
  /*
   * SDMA
   */
@@ -1095,11 +1207,22 @@ struct amdgpu_sdma {
  /*
   * Firmware
   */
+enum amdgpu_firmware_load_type {
+       AMDGPU_FW_LOAD_DIRECT = 0,
+       AMDGPU_FW_LOAD_SMU,
+       AMDGPU_FW_LOAD_PSP,
+};
+
  struct amdgpu_firmware {
         struct amdgpu_firmware_info ucode[AMDGPU_UCODE_ID_MAXIMUM];
-       bool smu_load;
+       enum amdgpu_firmware_load_type load_type;
         struct amdgpu_bo *fw_buf;
         unsigned int fw_size;
+       unsigned int max_ucodes;
+       /* firmwares are loaded by psp instead of smu from vega10 */
+       const struct amdgpu_psp_funcs *funcs;
+       struct amdgpu_bo *rbuf;
+       struct mutex mutex;
  };
  
  /*
@@ -1112,10 +1235,6 @@ void amdgpu_benchmark(struct amdgpu_device *adev, int test_number);
   * Testing
   */
  void amdgpu_test_moves(struct amdgpu_device *adev);
-void amdgpu_test_ring_sync(struct amdgpu_device *adev,
-                          struct amdgpu_ring *cpA,
-                          struct amdgpu_ring *cpB);
-void amdgpu_test_syncing(struct amdgpu_device *adev);
  
  /*
   * MMU Notifier
@@ -1179,7 +1298,6 @@ struct amdgpu_smumgr {
   */
  struct amdgpu_allowed_register_entry {
         uint32_t reg_offset;
-       bool untouched;
         bool grbm_indexed;
  };
  
@@ -1202,6 +1320,8 @@ struct amdgpu_asic_funcs {
         /* static power management */
         int (*get_pcie_lanes)(struct amdgpu_device *adev);
         void (*set_pcie_lanes)(struct amdgpu_device *adev, int lanes);
+       /* get config memsize register */
+       u32 (*get_config_memsize)(struct amdgpu_device *adev);
  };
  
  /*
@@ -1342,9 +1462,11 @@ struct amdgpu_device {
         bool                            have_disp_power_ref;
  
         /* BIOS */
+       bool                            is_atom_fw;
         uint8_t                         *bios;
         uint32_t                        bios_size;
         struct amdgpu_bo                *stollen_vga_memory;
+       uint32_t                        bios_scratch_reg_offset;
         uint32_t                        bios_scratch[AMDGPU_BIOS_NUM_SCRATCH];
  
         /* Register/doorbell mmio */
@@ -1391,6 +1513,7 @@ struct amdgpu_device {
         struct amdgpu_gart              gart;
         struct amdgpu_dummy_page        dummy_page;
         struct amdgpu_vm_manager        vm_manager;
+       struct amdgpu_vmhub             vmhub[AMDGPU_MAX_VMHUBS];
  
         /* memory management */
         struct amdgpu_mman              mman;
@@ -1457,6 +1580,9 @@ struct amdgpu_device {
         /* firmwares */
         struct amdgpu_firmware          firmware;
  
+       /* PSP */
+       struct psp_context              psp;
+
         /* GDS */
         struct amdgpu_gds               gds;
  
@@ -1501,23 +1627,32 @@ void amdgpu_device_fini(struct amdgpu_device *adev);
  int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
  
  uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
-                       bool always_indirect);
+                       uint32_t acc_flags);
  void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
-                   bool always_indirect);
+                   uint32_t acc_flags);
  u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg);
  void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v);
  
  u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index);
  void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
+u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index);
+void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v);
  
  /*
   * Registers read & write functions.
   */
-#define RREG32(reg) amdgpu_mm_rreg(adev, (reg), false)
-#define RREG32_IDX(reg) amdgpu_mm_rreg(adev, (reg), true)
-#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_mm_rreg(adev, (reg), false))
-#define WREG32(reg, v) amdgpu_mm_wreg(adev, (reg), (v), false)
-#define WREG32_IDX(reg, v) amdgpu_mm_wreg(adev, (reg), (v), true)
+
+#define AMDGPU_REGS_IDX       (1<<0)
+#define AMDGPU_REGS_NO_KIQ    (1<<1)
+
+#define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
+#define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ)
+
+#define RREG32(reg) amdgpu_mm_rreg(adev, (reg), 0)
+#define RREG32_IDX(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_IDX)
+#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_mm_rreg(adev, (reg), 0))
+#define WREG32(reg, v) amdgpu_mm_wreg(adev, (reg), (v), 0)
+#define WREG32_IDX(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_IDX)
  #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
  #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK)
  #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg))
@@ -1556,6 +1691,8 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
  
  #define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index))
  #define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v))
+#define RDOORBELL64(index) amdgpu_mm_rdoorbell64(adev, (index))
+#define WDOORBELL64(index, v) amdgpu_mm_wdoorbell64(adev, (index), (v))
  
  #define REG_FIELD_SHIFT(reg, field) reg##__##field##__SHIFT
  #define REG_FIELD_MASK(reg, field) reg##__##field##_MASK
@@ -1570,6 +1707,9 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
  #define WREG32_FIELD(reg, field, val)  \
         WREG32(mm##reg, (RREG32(mm##reg) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
  
+#define WREG32_FIELD_OFFSET(reg, offset, field, val)   \
+       WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
+
  /*
   * BIOS helpers.
   */
@@ -1584,7 +1724,7 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
  {
         if (ring->count_dw <= 0)
                 DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
-       ring->ring[ring->wptr++] = v;
+       ring->ring[ring->wptr++ & ring->buf_mask] = v;
         ring->wptr &= ring->ptr_mask;
         ring->count_dw--;
  }
@@ -1594,30 +1734,31 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void *sr
         unsigned occupied, chunk1, chunk2;
         void *dst;
  
-       if (ring->count_dw < count_dw) {
+       if (unlikely(ring->count_dw < count_dw)) {
                 DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
-       } else {
-               occupied = ring->wptr & ring->ptr_mask;
-               dst = (void *)&ring->ring[occupied];
-               chunk1 = ring->ptr_mask + 1 - occupied;
-               chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1;
-               chunk2 = count_dw - chunk1;
-               chunk1 <<= 2;
-               chunk2 <<= 2;
-
-               if (chunk1)
-                       memcpy(dst, src, chunk1);
-
-               if (chunk2) {
-                       src += chunk1;
-                       dst = (void *)ring->ring;
-                       memcpy(dst, src, chunk2);
-               }
-
-               ring->wptr += count_dw;
-               ring->wptr &= ring->ptr_mask;
-               ring->count_dw -= count_dw;
+               return;
+       }
+
+       occupied = ring->wptr & ring->buf_mask;
+       dst = (void *)&ring->ring[occupied];
+       chunk1 = ring->buf_mask + 1 - occupied;
+       chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1;
+       chunk2 = count_dw - chunk1;
+       chunk1 <<= 2;
+       chunk2 <<= 2;
+
+       if (chunk1)
+               memcpy(dst, src, chunk1);
+
+       if (chunk2) {
+               src += chunk1;
+               dst = (void *)ring->ring;
+               memcpy(dst, src, chunk2);
         }
+
+       ring->wptr += count_dw;
+       ring->wptr &= ring->ptr_mask;
+       ring->count_dw -= count_dw;
  }
  
  static inline struct amdgpu_sdma_instance *
@@ -1650,11 +1791,13 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
  #define amdgpu_asic_read_disabled_bios(adev) (adev)->asic_funcs->read_disabled_bios((adev))
  #define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l))
  #define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v)))
+#define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
  #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid))
  #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
  #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
  #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
  #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
+#define amdgpu_vm_get_pte_flags(adev, flags) (adev)->gart.gart_funcs->get_vm_pte_flags((adev),(flags))
  #define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
  #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
  #define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
@@ -1672,6 +1815,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
  #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
  #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
  #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
+#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
  #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
  #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
  #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
@@ -1698,6 +1842,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
  #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
  #define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
  #define amdgpu_gds_switch(adev, r, v, d, w, a) (adev)->gds.funcs->patch_gds_switch((r), (v), (d), (w), (a))
+#define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
  
  /* Common functions */
  int amdgpu_gpu_reset(struct amdgpu_device *adev);
@@ -1723,7 +1868,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
  bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
                                        int *last_invalidated);
  bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
-uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
+uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
                                  struct ttm_mem_reg *mem);
  void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 base);
  void amdgpu_gtt_location(struct amdgpu_device *adev, struct amdgpu_mc *mc);
@@ -1742,12 +1887,14 @@ void amdgpu_unregister_atpx_handler(void);
  bool amdgpu_has_atpx_dgpu_power_cntl(void);
  bool amdgpu_is_atpx_hybrid(void);
  bool amdgpu_atpx_dgpu_req_power_for_displays(void);
+bool amdgpu_has_atpx(void);
  #else
  static inline void amdgpu_register_atpx_handler(void) {}
  static inline void amdgpu_unregister_atpx_handler(void) {}
  static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; }
  static inline bool amdgpu_is_atpx_hybrid(void) { return false; }
  static inline bool amdgpu_atpx_dgpu_req_power_for_displays(void) { return false; }
+static inline bool amdgpu_has_atpx(void) { return false; }
  #endif
  
  /*
@@ -1762,18 +1909,12 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev);
  int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv);
  void amdgpu_driver_postclose_kms(struct drm_device *dev,
                                  struct drm_file *file_priv);
-void amdgpu_driver_preclose_kms(struct drm_device *dev,
-                               struct drm_file *file_priv);
  int amdgpu_suspend(struct amdgpu_device *adev);
  int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon);
  int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon);
  u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe);
  int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe);
  void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe);
-int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe,
-                                   int *max_error,
-                                   struct timeval *vblank_time,
-                                   unsigned flags);
  long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd,
                              unsigned long arg);