]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/commitdiff
Merge tag 'dmaengine-4.6-rc1' of git://git.infradead.org/users/vkoul/slave-dma
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 17 Mar 2016 19:34:54 +0000 (12:34 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 17 Mar 2016 19:34:54 +0000 (12:34 -0700)
Pull dmaengine updates from Vinod Koul:
 "This is smallish update with minor changes to core and new driver and
  usual updates.  Nothing super exciting here..

   - We have made slave address as physical to enable driver to do the
     mapping.

   - We now expose the maxburst for slave dma as new capability so
     clients can know this and program accordingly

   - addition of device synchronize callbacks on omap and edma.

   - pl330 updates to support DMAFLUSHP for Rockchip platforms.

   - Updates and improved sg handling in Xilinx VDMA driver.

   - New hidma qualcomm dma driver, though some bits are still in
     progress"

* tag 'dmaengine-4.6-rc1' of git://git.infradead.org/users/vkoul/slave-dma: (40 commits)
  dmaengine: IOATDMA: revise channel reset workaround on CB3.3 platforms
  dmaengine: add Qualcomm Technologies HIDMA channel driver
  dmaengine: add Qualcomm Technologies HIDMA management driver
  dmaengine: hidma: Add Device Tree binding
  dmaengine: qcom_bam_dma: move to qcom directory
  dmaengine: tegra: Move of_device_id table near to its user
  dmaengine: xilinx_vdma: Remove unnecessary variable initializations
  dmaengine: sirf: use __maybe_unused to hide pm functions
  dmaengine: rcar-dmac: clear pertinence number of channels
  dmaengine: sh: shdmac: don't open code of_device_get_match_data()
  dmaengine: tegra: don't open code of_device_get_match_data()
  dmaengine: qcom_bam_dma: Make driver work for BE
  dmaengine: sun4i: support module autoloading
  dma/mic_x100_dma: IS_ERR() vs PTR_ERR() typo
  dmaengine: xilinx_vdma: Use readl_poll_timeout instead of do while loop's
  dmaengine: xilinx_vdma: Simplify spin lock handling
  dmaengine: xilinx_vdma: Fix issues with non-parking mode
  dmaengine: xilinx_vdma: Improve SG engine handling
  dmaengine: pl330: fix to support the burst mode
  dmaengine: make slave address physical
  ...

1  2 
drivers/dma/edma.c
drivers/dma/ioat/dma.c
drivers/dma/qcom/bam_dma.c
drivers/spi/spi-rockchip.c

diff --combined drivers/dma/edma.c
index e3d7fcb69b4c2e4ffc4221c8ea8ec4e360648fd7,29a7723918d96093ead93e36641c96f46a401e1a..ee3463e774f8e4dc5c46e2614719f9d0723cfd06
  #define GET_NUM_REGN(x)               ((x & 0x300000) >> 20) /* bits 20-21 */
  #define CHMAP_EXIST           BIT(24)
  
 +/* CCSTAT register */
 +#define EDMA_CCSTAT_ACTV      BIT(4)
 +
  /*
   * Max of 20 segments per channel to conserve PaRAM slots
   * Also note that MAX_NR_SG should be atleast the no.of periods
@@@ -869,6 -866,13 +869,13 @@@ static int edma_terminate_all(struct dm
        return 0;
  }
  
+ static void edma_synchronize(struct dma_chan *chan)
+ {
+       struct edma_chan *echan = to_edma_chan(chan);
+       vchan_synchronize(&echan->vchan);
+ }
  static int edma_slave_config(struct dma_chan *chan,
        struct dma_slave_config *cfg)
  {
@@@ -1365,36 -1369,36 +1372,36 @@@ static struct dma_async_tx_descriptor *
  static void edma_completion_handler(struct edma_chan *echan)
  {
        struct device *dev = echan->vchan.chan.device->dev;
-       struct edma_desc *edesc = echan->edesc;
-       if (!edesc)
-               return;
+       struct edma_desc *edesc;
  
        spin_lock(&echan->vchan.lock);
-       if (edesc->cyclic) {
-               vchan_cyclic_callback(&edesc->vdesc);
-               spin_unlock(&echan->vchan.lock);
-               return;
-       } else if (edesc->processed == edesc->pset_nr) {
-               edesc->residue = 0;
-               edma_stop(echan);
-               vchan_cookie_complete(&edesc->vdesc);
-               echan->edesc = NULL;
-               dev_dbg(dev, "Transfer completed on channel %d\n",
-                       echan->ch_num);
-       } else {
-               dev_dbg(dev, "Sub transfer completed on channel %d\n",
-                       echan->ch_num);
-               edma_pause(echan);
-               /* Update statistics for tx_status */
-               edesc->residue -= edesc->sg_len;
-               edesc->residue_stat = edesc->residue;
-               edesc->processed_stat = edesc->processed;
+       edesc = echan->edesc;
+       if (edesc) {
+               if (edesc->cyclic) {
+                       vchan_cyclic_callback(&edesc->vdesc);
+                       spin_unlock(&echan->vchan.lock);
+                       return;
+               } else if (edesc->processed == edesc->pset_nr) {
+                       edesc->residue = 0;
+                       edma_stop(echan);
+                       vchan_cookie_complete(&edesc->vdesc);
+                       echan->edesc = NULL;
+                       dev_dbg(dev, "Transfer completed on channel %d\n",
+                               echan->ch_num);
+               } else {
+                       dev_dbg(dev, "Sub transfer completed on channel %d\n",
+                               echan->ch_num);
+                       edma_pause(echan);
+                       /* Update statistics for tx_status */
+                       edesc->residue -= edesc->sg_len;
+                       edesc->residue_stat = edesc->residue;
+                       edesc->processed_stat = edesc->processed;
+               }
+               edma_execute(echan);
        }
-       edma_execute(echan);
  
        spin_unlock(&echan->vchan.lock);
  }
@@@ -1683,20 -1687,9 +1690,20 @@@ static void edma_issue_pending(struct d
        spin_unlock_irqrestore(&echan->vchan.lock, flags);
  }
  
 +/*
 + * This limit exists to avoid a possible infinite loop when waiting for proof
 + * that a particular transfer is completed. This limit can be hit if there
 + * are large bursts to/from slow devices or the CPU is never able to catch
 + * the DMA hardware idle. On an AM335x transfering 48 bytes from the UART
 + * RX-FIFO, as many as 55 loops have been seen.
 + */
 +#define EDMA_MAX_TR_WAIT_LOOPS 1000
 +
  static u32 edma_residue(struct edma_desc *edesc)
  {
        bool dst = edesc->direction == DMA_DEV_TO_MEM;
 +      int loop_count = EDMA_MAX_TR_WAIT_LOOPS;
 +      struct edma_chan *echan = edesc->echan;
        struct edma_pset *pset = edesc->pset;
        dma_addr_t done, pos;
        int i;
         * We always read the dst/src position from the first RamPar
         * pset. That's the one which is active now.
         */
 -      pos = edma_get_position(edesc->echan->ecc, edesc->echan->slot[0], dst);
 +      pos = edma_get_position(echan->ecc, echan->slot[0], dst);
 +
 +      /*
 +       * "pos" may represent a transfer request that is still being
 +       * processed by the EDMACC or EDMATC. We will busy wait until
 +       * any one of the situations occurs:
 +       *   1. the DMA hardware is idle
 +       *   2. a new transfer request is setup
 +       *   3. we hit the loop limit
 +       */
 +      while (edma_read(echan->ecc, EDMA_CCSTAT) & EDMA_CCSTAT_ACTV) {
 +              /* check if a new transfer request is setup */
 +              if (edma_get_position(echan->ecc,
 +                                    echan->slot[0], dst) != pos) {
 +                      break;
 +              }
 +
 +              if (!--loop_count) {
 +                      dev_dbg_ratelimited(echan->vchan.chan.device->dev,
 +                              "%s: timeout waiting for PaRAM update\n",
 +                              __func__);
 +                      break;
 +              }
 +
 +              cpu_relax();
 +      }
  
        /*
         * Cyclic is simple. Just subtract pset[0].addr from pos.
@@@ -1837,6 -1805,7 +1844,7 @@@ static void edma_dma_init(struct edma_c
        s_ddev->device_pause = edma_dma_pause;
        s_ddev->device_resume = edma_dma_resume;
        s_ddev->device_terminate_all = edma_terminate_all;
+       s_ddev->device_synchronize = edma_synchronize;
  
        s_ddev->src_addr_widths = EDMA_DMA_BUSWIDTHS;
        s_ddev->dst_addr_widths = EDMA_DMA_BUSWIDTHS;
                m_ddev->device_pause = edma_dma_pause;
                m_ddev->device_resume = edma_dma_resume;
                m_ddev->device_terminate_all = edma_terminate_all;
+               m_ddev->device_synchronize = edma_synchronize;
  
                m_ddev->src_addr_widths = EDMA_DMA_BUSWIDTHS;
                m_ddev->dst_addr_widths = EDMA_DMA_BUSWIDTHS;
diff --combined drivers/dma/ioat/dma.c
index 21539d5c54c3d5c2d2dc3244650691bf414cf879,5428746f03fbb90504369ac81cdd8374a90651eb..bd09961443b131a2e81d5a27d9dba160d4b52567
@@@ -31,6 -31,7 +31,7 @@@
  #include <linux/dma-mapping.h>
  #include <linux/workqueue.h>
  #include <linux/prefetch.h>
+ #include <linux/sizes.h>
  #include "dma.h"
  #include "registers.h"
  #include "hw.h"
@@@ -290,24 -291,30 +291,30 @@@ static dma_cookie_t ioat_tx_submit_unlo
  }
  
  static struct ioat_ring_ent *
- ioat_alloc_ring_ent(struct dma_chan *chan, gfp_t flags)
+ ioat_alloc_ring_ent(struct dma_chan *chan, int idx, gfp_t flags)
  {
        struct ioat_dma_descriptor *hw;
        struct ioat_ring_ent *desc;
        struct ioatdma_device *ioat_dma;
+       struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+       int chunk;
        dma_addr_t phys;
+       u8 *pos;
+       off_t offs;
  
        ioat_dma = to_ioatdma_device(chan->device);
-       hw = pci_pool_alloc(ioat_dma->dma_pool, flags, &phys);
-       if (!hw)
-               return NULL;
+       chunk = idx / IOAT_DESCS_PER_2M;
+       idx &= (IOAT_DESCS_PER_2M - 1);
+       offs = idx * IOAT_DESC_SZ;
+       pos = (u8 *)ioat_chan->descs[chunk].virt + offs;
+       phys = ioat_chan->descs[chunk].hw + offs;
+       hw = (struct ioat_dma_descriptor *)pos;
        memset(hw, 0, sizeof(*hw));
  
        desc = kmem_cache_zalloc(ioat_cache, flags);
-       if (!desc) {
-               pci_pool_free(ioat_dma->dma_pool, hw, phys);
+       if (!desc)
                return NULL;
-       }
  
        dma_async_tx_descriptor_init(&desc->txd, chan);
        desc->txd.tx_submit = ioat_tx_submit_unlock;
  
  void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan)
  {
-       struct ioatdma_device *ioat_dma;
-       ioat_dma = to_ioatdma_device(chan->device);
-       pci_pool_free(ioat_dma->dma_pool, desc->hw, desc->txd.phys);
        kmem_cache_free(ioat_cache, desc);
  }
  
  struct ioat_ring_ent **
  ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
  {
+       struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
        struct ioat_ring_ent **ring;
-       int descs = 1 << order;
-       int i;
-       if (order > ioat_get_max_alloc_order())
-               return NULL;
+       int total_descs = 1 << order;
+       int i, chunks;
  
        /* allocate the array to hold the software ring */
-       ring = kcalloc(descs, sizeof(*ring), flags);
+       ring = kcalloc(total_descs, sizeof(*ring), flags);
        if (!ring)
                return NULL;
-       for (i = 0; i < descs; i++) {
-               ring[i] = ioat_alloc_ring_ent(c, flags);
+       ioat_chan->desc_chunks = chunks = (total_descs * IOAT_DESC_SZ) / SZ_2M;
+       for (i = 0; i < chunks; i++) {
+               struct ioat_descs *descs = &ioat_chan->descs[i];
+               descs->virt = dma_alloc_coherent(to_dev(ioat_chan),
+                                                SZ_2M, &descs->hw, flags);
+               if (!descs->virt && (i > 0)) {
+                       int idx;
+                       for (idx = 0; idx < i; idx++) {
+                               dma_free_coherent(to_dev(ioat_chan), SZ_2M,
+                                                 descs->virt, descs->hw);
+                               descs->virt = NULL;
+                               descs->hw = 0;
+                       }
+                       ioat_chan->desc_chunks = 0;
+                       kfree(ring);
+                       return NULL;
+               }
+       }
+       for (i = 0; i < total_descs; i++) {
+               ring[i] = ioat_alloc_ring_ent(c, i, flags);
                if (!ring[i]) {
+                       int idx;
                        while (i--)
                                ioat_free_ring_ent(ring[i], c);
+                       for (idx = 0; idx < ioat_chan->desc_chunks; idx++) {
+                               dma_free_coherent(to_dev(ioat_chan),
+                                                 SZ_2M,
+                                                 ioat_chan->descs[idx].virt,
+                                                 ioat_chan->descs[idx].hw);
+                               ioat_chan->descs[idx].virt = NULL;
+                               ioat_chan->descs[idx].hw = 0;
+                       }
+                       ioat_chan->desc_chunks = 0;
                        kfree(ring);
                        return NULL;
                }
        }
  
        /* link descs */
-       for (i = 0; i < descs-1; i++) {
+       for (i = 0; i < total_descs-1; i++) {
                struct ioat_ring_ent *next = ring[i+1];
                struct ioat_dma_descriptor *hw = ring[i]->hw;
  
        return ring;
  }
  
- static bool reshape_ring(struct ioatdma_chan *ioat_chan, int order)
- {
-       /* reshape differs from normal ring allocation in that we want
-        * to allocate a new software ring while only
-        * extending/truncating the hardware ring
-        */
-       struct dma_chan *c = &ioat_chan->dma_chan;
-       const u32 curr_size = ioat_ring_size(ioat_chan);
-       const u16 active = ioat_ring_active(ioat_chan);
-       const u32 new_size = 1 << order;
-       struct ioat_ring_ent **ring;
-       u32 i;
-       if (order > ioat_get_max_alloc_order())
-               return false;
-       /* double check that we have at least 1 free descriptor */
-       if (active == curr_size)
-               return false;
-       /* when shrinking, verify that we can hold the current active
-        * set in the new ring
-        */
-       if (active >= new_size)
-               return false;
-       /* allocate the array to hold the software ring */
-       ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT);
-       if (!ring)
-               return false;
-       /* allocate/trim descriptors as needed */
-       if (new_size > curr_size) {
-               /* copy current descriptors to the new ring */
-               for (i = 0; i < curr_size; i++) {
-                       u16 curr_idx = (ioat_chan->tail+i) & (curr_size-1);
-                       u16 new_idx = (ioat_chan->tail+i) & (new_size-1);
-                       ring[new_idx] = ioat_chan->ring[curr_idx];
-                       set_desc_id(ring[new_idx], new_idx);
-               }
-               /* add new descriptors to the ring */
-               for (i = curr_size; i < new_size; i++) {
-                       u16 new_idx = (ioat_chan->tail+i) & (new_size-1);
-                       ring[new_idx] = ioat_alloc_ring_ent(c, GFP_NOWAIT);
-                       if (!ring[new_idx]) {
-                               while (i--) {
-                                       u16 new_idx = (ioat_chan->tail+i) &
-                                                      (new_size-1);
-                                       ioat_free_ring_ent(ring[new_idx], c);
-                               }
-                               kfree(ring);
-                               return false;
-                       }
-                       set_desc_id(ring[new_idx], new_idx);
-               }
-               /* hw link new descriptors */
-               for (i = curr_size-1; i < new_size; i++) {
-                       u16 new_idx = (ioat_chan->tail+i) & (new_size-1);
-                       struct ioat_ring_ent *next =
-                               ring[(new_idx+1) & (new_size-1)];
-                       struct ioat_dma_descriptor *hw = ring[new_idx]->hw;
-                       hw->next = next->txd.phys;
-               }
-       } else {
-               struct ioat_dma_descriptor *hw;
-               struct ioat_ring_ent *next;
-               /* copy current descriptors to the new ring, dropping the
-                * removed descriptors
-                */
-               for (i = 0; i < new_size; i++) {
-                       u16 curr_idx = (ioat_chan->tail+i) & (curr_size-1);
-                       u16 new_idx = (ioat_chan->tail+i) & (new_size-1);
-                       ring[new_idx] = ioat_chan->ring[curr_idx];
-                       set_desc_id(ring[new_idx], new_idx);
-               }
-               /* free deleted descriptors */
-               for (i = new_size; i < curr_size; i++) {
-                       struct ioat_ring_ent *ent;
-                       ent = ioat_get_ring_ent(ioat_chan, ioat_chan->tail+i);
-                       ioat_free_ring_ent(ent, c);
-               }
-               /* fix up hardware ring */
-               hw = ring[(ioat_chan->tail+new_size-1) & (new_size-1)]->hw;
-               next = ring[(ioat_chan->tail+new_size) & (new_size-1)];
-               hw->next = next->txd.phys;
-       }
-       dev_dbg(to_dev(ioat_chan), "%s: allocated %d descriptors\n",
-               __func__, new_size);
-       kfree(ioat_chan->ring);
-       ioat_chan->ring = ring;
-       ioat_chan->alloc_order = order;
-       return true;
- }
  /**
   * ioat_check_space_lock - verify space and grab ring producer lock
   * @ioat: ioat,3 channel (ring) to operate on
  int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs)
        __acquires(&ioat_chan->prep_lock)
  {
-       bool retry;
-  retry:
        spin_lock_bh(&ioat_chan->prep_lock);
        /* never allow the last descriptor to be consumed, we need at
         * least one free at all times to allow for on-the-fly ring
                ioat_chan->produce = num_descs;
                return 0;  /* with ioat->prep_lock held */
        }
-       retry = test_and_set_bit(IOAT_RESHAPE_PENDING, &ioat_chan->state);
        spin_unlock_bh(&ioat_chan->prep_lock);
  
-       /* is another cpu already trying to expand the ring? */
-       if (retry)
-               goto retry;
-       spin_lock_bh(&ioat_chan->cleanup_lock);
-       spin_lock_bh(&ioat_chan->prep_lock);
-       retry = reshape_ring(ioat_chan, ioat_chan->alloc_order + 1);
-       clear_bit(IOAT_RESHAPE_PENDING, &ioat_chan->state);
-       spin_unlock_bh(&ioat_chan->prep_lock);
-       spin_unlock_bh(&ioat_chan->cleanup_lock);
-       /* if we were able to expand the ring retry the allocation */
-       if (retry)
-               goto retry;
        dev_dbg_ratelimited(to_dev(ioat_chan),
                            "%s: ring full! num_descs: %d (%x:%x:%x)\n",
                            __func__, num_descs, ioat_chan->head,
@@@ -823,19 -734,6 +734,6 @@@ static void check_active(struct ioatdma
  
        if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state))
                mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
-       else if (ioat_chan->alloc_order > ioat_get_alloc_order()) {
-               /* if the ring is idle, empty, and oversized try to step
-                * down the size
-                */
-               reshape_ring(ioat_chan, ioat_chan->alloc_order - 1);
-               /* keep shrinking until we get back to our minimum
-                * default size
-                */
-               if (ioat_chan->alloc_order > ioat_get_alloc_order())
-                       mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
-       }
  }
  
  void ioat_timer_event(unsigned long data)
                        return;
        }
  
 +      spin_lock_bh(&ioat_chan->cleanup_lock);
 +
 +      /* handle the no-actives case */
 +      if (!ioat_ring_active(ioat_chan)) {
 +              spin_lock_bh(&ioat_chan->prep_lock);
 +              check_active(ioat_chan);
 +              spin_unlock_bh(&ioat_chan->prep_lock);
 +              spin_unlock_bh(&ioat_chan->cleanup_lock);
 +              return;
 +      }
 +
        /* if we haven't made progress and we have already
         * acknowledged a pending completion once, then be more
         * forceful with a restart
         */
 -      spin_lock_bh(&ioat_chan->cleanup_lock);
        if (ioat_cleanup_preamble(ioat_chan, &phys_complete))
                __cleanup(ioat_chan, phys_complete);
        else if (test_bit(IOAT_COMPLETION_ACK, &ioat_chan->state)) {
 +              u32 chanerr;
 +
 +              chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
 +              dev_warn(to_dev(ioat_chan), "Restarting channel...\n");
 +              dev_warn(to_dev(ioat_chan), "CHANSTS: %#Lx CHANERR: %#x\n",
 +                       status, chanerr);
 +              dev_warn(to_dev(ioat_chan), "Active descriptors: %d\n",
 +                       ioat_ring_active(ioat_chan));
 +
                spin_lock_bh(&ioat_chan->prep_lock);
                ioat_restart_channel(ioat_chan);
                spin_unlock_bh(&ioat_chan->prep_lock);
                spin_unlock_bh(&ioat_chan->cleanup_lock);
                return;
 -      } else {
 +      } else
                set_bit(IOAT_COMPLETION_ACK, &ioat_chan->state);
 -              mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
 -      }
 -
  
 -      if (ioat_ring_active(ioat_chan))
 -              mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
 -      else {
 -              spin_lock_bh(&ioat_chan->prep_lock);
 -              check_active(ioat_chan);
 -              spin_unlock_bh(&ioat_chan->prep_lock);
 -      }
 +      mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
        spin_unlock_bh(&ioat_chan->cleanup_lock);
  }
  
@@@ -916,40 -804,6 +814,6 @@@ ioat_tx_status(struct dma_chan *c, dma_
        return dma_cookie_status(c, cookie, txstate);
  }
  
- static int ioat_irq_reinit(struct ioatdma_device *ioat_dma)
- {
-       struct pci_dev *pdev = ioat_dma->pdev;
-       int irq = pdev->irq, i;
-       if (!is_bwd_ioat(pdev))
-               return 0;
-       switch (ioat_dma->irq_mode) {
-       case IOAT_MSIX:
-               for (i = 0; i < ioat_dma->dma_dev.chancnt; i++) {
-                       struct msix_entry *msix = &ioat_dma->msix_entries[i];
-                       struct ioatdma_chan *ioat_chan;
-                       ioat_chan = ioat_chan_by_index(ioat_dma, i);
-                       devm_free_irq(&pdev->dev, msix->vector, ioat_chan);
-               }
-               pci_disable_msix(pdev);
-               break;
-       case IOAT_MSI:
-               pci_disable_msi(pdev);
-               /* fall through */
-       case IOAT_INTX:
-               devm_free_irq(&pdev->dev, irq, ioat_dma);
-               break;
-       default:
-               return 0;
-       }
-       ioat_dma->irq_mode = IOAT_NOIRQ;
-       return ioat_dma_setup_interrupts(ioat_dma);
- }
  int ioat_reset_hw(struct ioatdma_chan *ioat_chan)
  {
        /* throw away whatever the channel was doing and get it
                }
        }
  
+       if (is_bwd_ioat(pdev) && (ioat_dma->irq_mode == IOAT_MSIX)) {
+               ioat_dma->msixtba0 = readq(ioat_dma->reg_base + 0x1000);
+               ioat_dma->msixdata0 = readq(ioat_dma->reg_base + 0x1008);
+               ioat_dma->msixpba = readq(ioat_dma->reg_base + 0x1800);
+       }
        err = ioat_reset_sync(ioat_chan, msecs_to_jiffies(200));
-       if (!err)
-               err = ioat_irq_reinit(ioat_dma);
+       if (!err) {
+               if (is_bwd_ioat(pdev) && (ioat_dma->irq_mode == IOAT_MSIX)) {
+                       writeq(ioat_dma->msixtba0, ioat_dma->reg_base + 0x1000);
+                       writeq(ioat_dma->msixdata0, ioat_dma->reg_base + 0x1008);
+                       writeq(ioat_dma->msixpba, ioat_dma->reg_base + 0x1800);
+               }
+       }
  
        if (err)
                dev_err(&pdev->dev, "Failed to reset: %d\n", err);
index 0000000000000000000000000000000000000000,2d691a34a0ab5a266c2e2768912b9e19bb1885de..d5e0a9c3ad5d035d5489fd1fbe5552afe04a3877
mode 000000,100644..100644
--- /dev/null
@@@ -1,0 -1,1262 +1,1262 @@@
 -      bchan->fifo_virt = dma_alloc_writecombine(bdev->dev, BAM_DESC_FIFO_SIZE,
 -                              &bchan->fifo_phys, GFP_KERNEL);
+ /*
+  * Copyright (c) 2013-2014, The Linux Foundation. All rights reserved.
+  *
+  * This program is free software; you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License version 2 and
+  * only version 2 as published by the Free Software Foundation.
+  *
+  * This program is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  * GNU General Public License for more details.
+  *
+  */
+ /*
+  * QCOM BAM DMA engine driver
+  *
+  * QCOM BAM DMA blocks are distributed amongst a number of the on-chip
+  * peripherals on the MSM 8x74.  The configuration of the channels are dependent
+  * on the way they are hard wired to that specific peripheral.  The peripheral
+  * device tree entries specify the configuration of each channel.
+  *
+  * The DMA controller requires the use of external memory for storage of the
+  * hardware descriptors for each channel.  The descriptor FIFO is accessed as a
+  * circular buffer and operations are managed according to the offset within the
+  * FIFO.  After pipe/channel reset, all of the pipe registers and internal state
+  * are back to defaults.
+  *
+  * During DMA operations, we write descriptors to the FIFO, being careful to
+  * handle wrapping and then write the last FIFO offset to that channel's
+  * P_EVNT_REG register to kick off the transaction.  The P_SW_OFSTS register
+  * indicates the current FIFO offset that is being processed, so there is some
+  * indication of where the hardware is currently working.
+  */
+ #include <linux/kernel.h>
+ #include <linux/io.h>
+ #include <linux/init.h>
+ #include <linux/slab.h>
+ #include <linux/module.h>
+ #include <linux/interrupt.h>
+ #include <linux/dma-mapping.h>
+ #include <linux/scatterlist.h>
+ #include <linux/device.h>
+ #include <linux/platform_device.h>
+ #include <linux/of.h>
+ #include <linux/of_address.h>
+ #include <linux/of_irq.h>
+ #include <linux/of_dma.h>
+ #include <linux/clk.h>
+ #include <linux/dmaengine.h>
+ #include "../dmaengine.h"
+ #include "../virt-dma.h"
+ struct bam_desc_hw {
+       __le32 addr;            /* Buffer physical address */
+       __le16 size;            /* Buffer size in bytes */
+       __le16 flags;
+ };
+ #define DESC_FLAG_INT BIT(15)
+ #define DESC_FLAG_EOT BIT(14)
+ #define DESC_FLAG_EOB BIT(13)
+ #define DESC_FLAG_NWD BIT(12)
+ struct bam_async_desc {
+       struct virt_dma_desc vd;
+       u32 num_desc;
+       u32 xfer_len;
+       /* transaction flags, EOT|EOB|NWD */
+       u16 flags;
+       struct bam_desc_hw *curr_desc;
+       enum dma_transfer_direction dir;
+       size_t length;
+       struct bam_desc_hw desc[0];
+ };
+ enum bam_reg {
+       BAM_CTRL,
+       BAM_REVISION,
+       BAM_NUM_PIPES,
+       BAM_DESC_CNT_TRSHLD,
+       BAM_IRQ_SRCS,
+       BAM_IRQ_SRCS_MSK,
+       BAM_IRQ_SRCS_UNMASKED,
+       BAM_IRQ_STTS,
+       BAM_IRQ_CLR,
+       BAM_IRQ_EN,
+       BAM_CNFG_BITS,
+       BAM_IRQ_SRCS_EE,
+       BAM_IRQ_SRCS_MSK_EE,
+       BAM_P_CTRL,
+       BAM_P_RST,
+       BAM_P_HALT,
+       BAM_P_IRQ_STTS,
+       BAM_P_IRQ_CLR,
+       BAM_P_IRQ_EN,
+       BAM_P_EVNT_DEST_ADDR,
+       BAM_P_EVNT_REG,
+       BAM_P_SW_OFSTS,
+       BAM_P_DATA_FIFO_ADDR,
+       BAM_P_DESC_FIFO_ADDR,
+       BAM_P_EVNT_GEN_TRSHLD,
+       BAM_P_FIFO_SIZES,
+ };
+ struct reg_offset_data {
+       u32 base_offset;
+       unsigned int pipe_mult, evnt_mult, ee_mult;
+ };
+ static const struct reg_offset_data bam_v1_3_reg_info[] = {
+       [BAM_CTRL]              = { 0x0F80, 0x00, 0x00, 0x00 },
+       [BAM_REVISION]          = { 0x0F84, 0x00, 0x00, 0x00 },
+       [BAM_NUM_PIPES]         = { 0x0FBC, 0x00, 0x00, 0x00 },
+       [BAM_DESC_CNT_TRSHLD]   = { 0x0F88, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_SRCS]          = { 0x0F8C, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_SRCS_MSK]      = { 0x0F90, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_SRCS_UNMASKED] = { 0x0FB0, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_STTS]          = { 0x0F94, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_CLR]           = { 0x0F98, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_EN]            = { 0x0F9C, 0x00, 0x00, 0x00 },
+       [BAM_CNFG_BITS]         = { 0x0FFC, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_SRCS_EE]       = { 0x1800, 0x00, 0x00, 0x80 },
+       [BAM_IRQ_SRCS_MSK_EE]   = { 0x1804, 0x00, 0x00, 0x80 },
+       [BAM_P_CTRL]            = { 0x0000, 0x80, 0x00, 0x00 },
+       [BAM_P_RST]             = { 0x0004, 0x80, 0x00, 0x00 },
+       [BAM_P_HALT]            = { 0x0008, 0x80, 0x00, 0x00 },
+       [BAM_P_IRQ_STTS]        = { 0x0010, 0x80, 0x00, 0x00 },
+       [BAM_P_IRQ_CLR]         = { 0x0014, 0x80, 0x00, 0x00 },
+       [BAM_P_IRQ_EN]          = { 0x0018, 0x80, 0x00, 0x00 },
+       [BAM_P_EVNT_DEST_ADDR]  = { 0x102C, 0x00, 0x40, 0x00 },
+       [BAM_P_EVNT_REG]        = { 0x1018, 0x00, 0x40, 0x00 },
+       [BAM_P_SW_OFSTS]        = { 0x1000, 0x00, 0x40, 0x00 },
+       [BAM_P_DATA_FIFO_ADDR]  = { 0x1024, 0x00, 0x40, 0x00 },
+       [BAM_P_DESC_FIFO_ADDR]  = { 0x101C, 0x00, 0x40, 0x00 },
+       [BAM_P_EVNT_GEN_TRSHLD] = { 0x1028, 0x00, 0x40, 0x00 },
+       [BAM_P_FIFO_SIZES]      = { 0x1020, 0x00, 0x40, 0x00 },
+ };
+ static const struct reg_offset_data bam_v1_4_reg_info[] = {
+       [BAM_CTRL]              = { 0x0000, 0x00, 0x00, 0x00 },
+       [BAM_REVISION]          = { 0x0004, 0x00, 0x00, 0x00 },
+       [BAM_NUM_PIPES]         = { 0x003C, 0x00, 0x00, 0x00 },
+       [BAM_DESC_CNT_TRSHLD]   = { 0x0008, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_SRCS]          = { 0x000C, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_SRCS_MSK]      = { 0x0010, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_SRCS_UNMASKED] = { 0x0030, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_STTS]          = { 0x0014, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_CLR]           = { 0x0018, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_EN]            = { 0x001C, 0x00, 0x00, 0x00 },
+       [BAM_CNFG_BITS]         = { 0x007C, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_SRCS_EE]       = { 0x0800, 0x00, 0x00, 0x80 },
+       [BAM_IRQ_SRCS_MSK_EE]   = { 0x0804, 0x00, 0x00, 0x80 },
+       [BAM_P_CTRL]            = { 0x1000, 0x1000, 0x00, 0x00 },
+       [BAM_P_RST]             = { 0x1004, 0x1000, 0x00, 0x00 },
+       [BAM_P_HALT]            = { 0x1008, 0x1000, 0x00, 0x00 },
+       [BAM_P_IRQ_STTS]        = { 0x1010, 0x1000, 0x00, 0x00 },
+       [BAM_P_IRQ_CLR]         = { 0x1014, 0x1000, 0x00, 0x00 },
+       [BAM_P_IRQ_EN]          = { 0x1018, 0x1000, 0x00, 0x00 },
+       [BAM_P_EVNT_DEST_ADDR]  = { 0x182C, 0x00, 0x1000, 0x00 },
+       [BAM_P_EVNT_REG]        = { 0x1818, 0x00, 0x1000, 0x00 },
+       [BAM_P_SW_OFSTS]        = { 0x1800, 0x00, 0x1000, 0x00 },
+       [BAM_P_DATA_FIFO_ADDR]  = { 0x1824, 0x00, 0x1000, 0x00 },
+       [BAM_P_DESC_FIFO_ADDR]  = { 0x181C, 0x00, 0x1000, 0x00 },
+       [BAM_P_EVNT_GEN_TRSHLD] = { 0x1828, 0x00, 0x1000, 0x00 },
+       [BAM_P_FIFO_SIZES]      = { 0x1820, 0x00, 0x1000, 0x00 },
+ };
+ static const struct reg_offset_data bam_v1_7_reg_info[] = {
+       [BAM_CTRL]              = { 0x00000, 0x00, 0x00, 0x00 },
+       [BAM_REVISION]          = { 0x01000, 0x00, 0x00, 0x00 },
+       [BAM_NUM_PIPES]         = { 0x01008, 0x00, 0x00, 0x00 },
+       [BAM_DESC_CNT_TRSHLD]   = { 0x00008, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_SRCS]          = { 0x03010, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_SRCS_MSK]      = { 0x03014, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_SRCS_UNMASKED] = { 0x03018, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_STTS]          = { 0x00014, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_CLR]           = { 0x00018, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_EN]            = { 0x0001C, 0x00, 0x00, 0x00 },
+       [BAM_CNFG_BITS]         = { 0x0007C, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_SRCS_EE]       = { 0x03000, 0x00, 0x00, 0x1000 },
+       [BAM_IRQ_SRCS_MSK_EE]   = { 0x03004, 0x00, 0x00, 0x1000 },
+       [BAM_P_CTRL]            = { 0x13000, 0x1000, 0x00, 0x00 },
+       [BAM_P_RST]             = { 0x13004, 0x1000, 0x00, 0x00 },
+       [BAM_P_HALT]            = { 0x13008, 0x1000, 0x00, 0x00 },
+       [BAM_P_IRQ_STTS]        = { 0x13010, 0x1000, 0x00, 0x00 },
+       [BAM_P_IRQ_CLR]         = { 0x13014, 0x1000, 0x00, 0x00 },
+       [BAM_P_IRQ_EN]          = { 0x13018, 0x1000, 0x00, 0x00 },
+       [BAM_P_EVNT_DEST_ADDR]  = { 0x1382C, 0x00, 0x1000, 0x00 },
+       [BAM_P_EVNT_REG]        = { 0x13818, 0x00, 0x1000, 0x00 },
+       [BAM_P_SW_OFSTS]        = { 0x13800, 0x00, 0x1000, 0x00 },
+       [BAM_P_DATA_FIFO_ADDR]  = { 0x13824, 0x00, 0x1000, 0x00 },
+       [BAM_P_DESC_FIFO_ADDR]  = { 0x1381C, 0x00, 0x1000, 0x00 },
+       [BAM_P_EVNT_GEN_TRSHLD] = { 0x13828, 0x00, 0x1000, 0x00 },
+       [BAM_P_FIFO_SIZES]      = { 0x13820, 0x00, 0x1000, 0x00 },
+ };
+ /* BAM CTRL */
+ #define BAM_SW_RST                    BIT(0)
+ #define BAM_EN                                BIT(1)
+ #define BAM_EN_ACCUM                  BIT(4)
+ #define BAM_TESTBUS_SEL_SHIFT         5
+ #define BAM_TESTBUS_SEL_MASK          0x3F
+ #define BAM_DESC_CACHE_SEL_SHIFT      13
+ #define BAM_DESC_CACHE_SEL_MASK               0x3
+ #define BAM_CACHED_DESC_STORE         BIT(15)
+ #define IBC_DISABLE                   BIT(16)
+ /* BAM REVISION */
+ #define REVISION_SHIFT                0
+ #define REVISION_MASK         0xFF
+ #define NUM_EES_SHIFT         8
+ #define NUM_EES_MASK          0xF
+ #define CE_BUFFER_SIZE                BIT(13)
+ #define AXI_ACTIVE            BIT(14)
+ #define USE_VMIDMT            BIT(15)
+ #define SECURED                       BIT(16)
+ #define BAM_HAS_NO_BYPASS     BIT(17)
+ #define HIGH_FREQUENCY_BAM    BIT(18)
+ #define INACTIV_TMRS_EXST     BIT(19)
+ #define NUM_INACTIV_TMRS      BIT(20)
+ #define DESC_CACHE_DEPTH_SHIFT        21
+ #define DESC_CACHE_DEPTH_1    (0 << DESC_CACHE_DEPTH_SHIFT)
+ #define DESC_CACHE_DEPTH_2    (1 << DESC_CACHE_DEPTH_SHIFT)
+ #define DESC_CACHE_DEPTH_3    (2 << DESC_CACHE_DEPTH_SHIFT)
+ #define DESC_CACHE_DEPTH_4    (3 << DESC_CACHE_DEPTH_SHIFT)
+ #define CMD_DESC_EN           BIT(23)
+ #define INACTIV_TMR_BASE_SHIFT        24
+ #define INACTIV_TMR_BASE_MASK 0xFF
+ /* BAM NUM PIPES */
+ #define BAM_NUM_PIPES_SHIFT           0
+ #define BAM_NUM_PIPES_MASK            0xFF
+ #define PERIPH_NON_PIPE_GRP_SHIFT     16
+ #define PERIPH_NON_PIP_GRP_MASK               0xFF
+ #define BAM_NON_PIPE_GRP_SHIFT                24
+ #define BAM_NON_PIPE_GRP_MASK         0xFF
+ /* BAM CNFG BITS */
+ #define BAM_PIPE_CNFG         BIT(2)
+ #define BAM_FULL_PIPE         BIT(11)
+ #define BAM_NO_EXT_P_RST      BIT(12)
+ #define BAM_IBC_DISABLE               BIT(13)
+ #define BAM_SB_CLK_REQ                BIT(14)
+ #define BAM_PSM_CSW_REQ               BIT(15)
+ #define BAM_PSM_P_RES         BIT(16)
+ #define BAM_AU_P_RES          BIT(17)
+ #define BAM_SI_P_RES          BIT(18)
+ #define BAM_WB_P_RES          BIT(19)
+ #define BAM_WB_BLK_CSW                BIT(20)
+ #define BAM_WB_CSW_ACK_IDL    BIT(21)
+ #define BAM_WB_RETR_SVPNT     BIT(22)
+ #define BAM_WB_DSC_AVL_P_RST  BIT(23)
+ #define BAM_REG_P_EN          BIT(24)
+ #define BAM_PSM_P_HD_DATA     BIT(25)
+ #define BAM_AU_ACCUMED                BIT(26)
+ #define BAM_CMD_ENABLE                BIT(27)
+ #define BAM_CNFG_BITS_DEFAULT (BAM_PIPE_CNFG |        \
+                                BAM_NO_EXT_P_RST |     \
+                                BAM_IBC_DISABLE |      \
+                                BAM_SB_CLK_REQ |       \
+                                BAM_PSM_CSW_REQ |      \
+                                BAM_PSM_P_RES |        \
+                                BAM_AU_P_RES |         \
+                                BAM_SI_P_RES |         \
+                                BAM_WB_P_RES |         \
+                                BAM_WB_BLK_CSW |       \
+                                BAM_WB_CSW_ACK_IDL |   \
+                                BAM_WB_RETR_SVPNT |    \
+                                BAM_WB_DSC_AVL_P_RST | \
+                                BAM_REG_P_EN |         \
+                                BAM_PSM_P_HD_DATA |    \
+                                BAM_AU_ACCUMED |       \
+                                BAM_CMD_ENABLE)
+ /* PIPE CTRL */
+ #define P_EN                  BIT(1)
+ #define P_DIRECTION           BIT(3)
+ #define P_SYS_STRM            BIT(4)
+ #define P_SYS_MODE            BIT(5)
+ #define P_AUTO_EOB            BIT(6)
+ #define P_AUTO_EOB_SEL_SHIFT  7
+ #define P_AUTO_EOB_SEL_512    (0 << P_AUTO_EOB_SEL_SHIFT)
+ #define P_AUTO_EOB_SEL_256    (1 << P_AUTO_EOB_SEL_SHIFT)
+ #define P_AUTO_EOB_SEL_128    (2 << P_AUTO_EOB_SEL_SHIFT)
+ #define P_AUTO_EOB_SEL_64     (3 << P_AUTO_EOB_SEL_SHIFT)
+ #define P_PREFETCH_LIMIT_SHIFT        9
+ #define P_PREFETCH_LIMIT_32   (0 << P_PREFETCH_LIMIT_SHIFT)
+ #define P_PREFETCH_LIMIT_16   (1 << P_PREFETCH_LIMIT_SHIFT)
+ #define P_PREFETCH_LIMIT_4    (2 << P_PREFETCH_LIMIT_SHIFT)
+ #define P_WRITE_NWD           BIT(11)
+ #define P_LOCK_GROUP_SHIFT    16
+ #define P_LOCK_GROUP_MASK     0x1F
+ /* BAM_DESC_CNT_TRSHLD */
+ #define CNT_TRSHLD            0xffff
+ #define DEFAULT_CNT_THRSHLD   0x4
+ /* BAM_IRQ_SRCS */
+ #define BAM_IRQ                       BIT(31)
+ #define P_IRQ                 0x7fffffff
+ /* BAM_IRQ_SRCS_MSK */
+ #define BAM_IRQ_MSK           BAM_IRQ
+ #define P_IRQ_MSK             P_IRQ
+ /* BAM_IRQ_STTS */
+ #define BAM_TIMER_IRQ         BIT(4)
+ #define BAM_EMPTY_IRQ         BIT(3)
+ #define BAM_ERROR_IRQ         BIT(2)
+ #define BAM_HRESP_ERR_IRQ     BIT(1)
+ /* BAM_IRQ_CLR */
+ #define BAM_TIMER_CLR         BIT(4)
+ #define BAM_EMPTY_CLR         BIT(3)
+ #define BAM_ERROR_CLR         BIT(2)
+ #define BAM_HRESP_ERR_CLR     BIT(1)
+ /* BAM_IRQ_EN */
+ #define BAM_TIMER_EN          BIT(4)
+ #define BAM_EMPTY_EN          BIT(3)
+ #define BAM_ERROR_EN          BIT(2)
+ #define BAM_HRESP_ERR_EN      BIT(1)
+ /* BAM_P_IRQ_EN */
+ #define P_PRCSD_DESC_EN               BIT(0)
+ #define P_TIMER_EN            BIT(1)
+ #define P_WAKE_EN             BIT(2)
+ #define P_OUT_OF_DESC_EN      BIT(3)
+ #define P_ERR_EN              BIT(4)
+ #define P_TRNSFR_END_EN               BIT(5)
+ #define P_DEFAULT_IRQS_EN     (P_PRCSD_DESC_EN | P_ERR_EN | P_TRNSFR_END_EN)
+ /* BAM_P_SW_OFSTS */
+ #define P_SW_OFSTS_MASK               0xffff
+ #define BAM_DESC_FIFO_SIZE    SZ_32K
+ #define MAX_DESCRIPTORS (BAM_DESC_FIFO_SIZE / sizeof(struct bam_desc_hw) - 1)
+ #define BAM_MAX_DATA_SIZE     (SZ_32K - 8)
+ struct bam_chan {
+       struct virt_dma_chan vc;
+       struct bam_device *bdev;
+       /* configuration from device tree */
+       u32 id;
+       struct bam_async_desc *curr_txd;        /* current running dma */
+       /* runtime configuration */
+       struct dma_slave_config slave;
+       /* fifo storage */
+       struct bam_desc_hw *fifo_virt;
+       dma_addr_t fifo_phys;
+       /* fifo markers */
+       unsigned short head;            /* start of active descriptor entries */
+       unsigned short tail;            /* end of active descriptor entries */
+       unsigned int initialized;       /* is the channel hw initialized? */
+       unsigned int paused;            /* is the channel paused? */
+       unsigned int reconfigure;       /* new slave config? */
+       struct list_head node;
+ };
+ static inline struct bam_chan *to_bam_chan(struct dma_chan *common)
+ {
+       return container_of(common, struct bam_chan, vc.chan);
+ }
+ struct bam_device {
+       void __iomem *regs;
+       struct device *dev;
+       struct dma_device common;
+       struct device_dma_parameters dma_parms;
+       struct bam_chan *channels;
+       u32 num_channels;
+       /* execution environment ID, from DT */
+       u32 ee;
+       const struct reg_offset_data *layout;
+       struct clk *bamclk;
+       int irq;
+       /* dma start transaction tasklet */
+       struct tasklet_struct task;
+ };
+ /**
+  * bam_addr - returns BAM register address
+  * @bdev: bam device
+  * @pipe: pipe instance (ignored when register doesn't have multiple instances)
+  * @reg:  register enum
+  */
+ static inline void __iomem *bam_addr(struct bam_device *bdev, u32 pipe,
+               enum bam_reg reg)
+ {
+       const struct reg_offset_data r = bdev->layout[reg];
+       return bdev->regs + r.base_offset +
+               r.pipe_mult * pipe +
+               r.evnt_mult * pipe +
+               r.ee_mult * bdev->ee;
+ }
+ /**
+  * bam_reset_channel - Reset individual BAM DMA channel
+  * @bchan: bam channel
+  *
+  * This function resets a specific BAM channel
+  */
+ static void bam_reset_channel(struct bam_chan *bchan)
+ {
+       struct bam_device *bdev = bchan->bdev;
+       lockdep_assert_held(&bchan->vc.lock);
+       /* reset channel */
+       writel_relaxed(1, bam_addr(bdev, bchan->id, BAM_P_RST));
+       writel_relaxed(0, bam_addr(bdev, bchan->id, BAM_P_RST));
+       /* don't allow cpu to reorder BAM register accesses done after this */
+       wmb();
+       /* make sure hw is initialized when channel is used the first time  */
+       bchan->initialized = 0;
+ }
+ /**
+  * bam_chan_init_hw - Initialize channel hardware
+  * @bchan: bam channel
+  *
+  * This function resets and initializes the BAM channel
+  */
+ static void bam_chan_init_hw(struct bam_chan *bchan,
+       enum dma_transfer_direction dir)
+ {
+       struct bam_device *bdev = bchan->bdev;
+       u32 val;
+       /* Reset the channel to clear internal state of the FIFO */
+       bam_reset_channel(bchan);
+       /*
+        * write out 8 byte aligned address.  We have enough space for this
+        * because we allocated 1 more descriptor (8 bytes) than we can use
+        */
+       writel_relaxed(ALIGN(bchan->fifo_phys, sizeof(struct bam_desc_hw)),
+                       bam_addr(bdev, bchan->id, BAM_P_DESC_FIFO_ADDR));
+       writel_relaxed(BAM_DESC_FIFO_SIZE,
+                       bam_addr(bdev, bchan->id, BAM_P_FIFO_SIZES));
+       /* enable the per pipe interrupts, enable EOT, ERR, and INT irqs */
+       writel_relaxed(P_DEFAULT_IRQS_EN,
+                       bam_addr(bdev, bchan->id, BAM_P_IRQ_EN));
+       /* unmask the specific pipe and EE combo */
+       val = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
+       val |= BIT(bchan->id);
+       writel_relaxed(val, bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
+       /* don't allow cpu to reorder the channel enable done below */
+       wmb();
+       /* set fixed direction and mode, then enable channel */
+       val = P_EN | P_SYS_MODE;
+       if (dir == DMA_DEV_TO_MEM)
+               val |= P_DIRECTION;
+       writel_relaxed(val, bam_addr(bdev, bchan->id, BAM_P_CTRL));
+       bchan->initialized = 1;
+       /* init FIFO pointers */
+       bchan->head = 0;
+       bchan->tail = 0;
+ }
+ /**
+  * bam_alloc_chan - Allocate channel resources for DMA channel.
+  * @chan: specified channel
+  *
+  * This function allocates the FIFO descriptor memory
+  */
+ static int bam_alloc_chan(struct dma_chan *chan)
+ {
+       struct bam_chan *bchan = to_bam_chan(chan);
+       struct bam_device *bdev = bchan->bdev;
+       if (bchan->fifo_virt)
+               return 0;
+       /* allocate FIFO descriptor space, but only if necessary */
 -      dma_free_writecombine(bdev->dev, BAM_DESC_FIFO_SIZE, bchan->fifo_virt,
 -                              bchan->fifo_phys);
++      bchan->fifo_virt = dma_alloc_wc(bdev->dev, BAM_DESC_FIFO_SIZE,
++                                      &bchan->fifo_phys, GFP_KERNEL);
+       if (!bchan->fifo_virt) {
+               dev_err(bdev->dev, "Failed to allocate desc fifo\n");
+               return -ENOMEM;
+       }
+       return 0;
+ }
+ /**
+  * bam_free_chan - Frees dma resources associated with specific channel
+  * @chan: specified channel
+  *
+  * Free the allocated fifo descriptor memory and channel resources
+  *
+  */
+ static void bam_free_chan(struct dma_chan *chan)
+ {
+       struct bam_chan *bchan = to_bam_chan(chan);
+       struct bam_device *bdev = bchan->bdev;
+       u32 val;
+       unsigned long flags;
+       vchan_free_chan_resources(to_virt_chan(chan));
+       if (bchan->curr_txd) {
+               dev_err(bchan->bdev->dev, "Cannot free busy channel\n");
+               return;
+       }
+       spin_lock_irqsave(&bchan->vc.lock, flags);
+       bam_reset_channel(bchan);
+       spin_unlock_irqrestore(&bchan->vc.lock, flags);
 -              dma_free_writecombine(bdev->dev, BAM_DESC_FIFO_SIZE,
 -                      bdev->channels[i].fifo_virt,
 -                      bdev->channels[i].fifo_phys);
++      dma_free_wc(bdev->dev, BAM_DESC_FIFO_SIZE, bchan->fifo_virt,
++                  bchan->fifo_phys);
+       bchan->fifo_virt = NULL;
+       /* mask irq for pipe/channel */
+       val = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
+       val &= ~BIT(bchan->id);
+       writel_relaxed(val, bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
+       /* disable irq */
+       writel_relaxed(0, bam_addr(bdev, bchan->id, BAM_P_IRQ_EN));
+ }
+ /**
+  * bam_slave_config - set slave configuration for channel
+  * @chan: dma channel
+  * @cfg: slave configuration
+  *
+  * Sets slave configuration for channel
+  *
+  */
+ static int bam_slave_config(struct dma_chan *chan,
+                           struct dma_slave_config *cfg)
+ {
+       struct bam_chan *bchan = to_bam_chan(chan);
+       unsigned long flag;
+       spin_lock_irqsave(&bchan->vc.lock, flag);
+       memcpy(&bchan->slave, cfg, sizeof(*cfg));
+       bchan->reconfigure = 1;
+       spin_unlock_irqrestore(&bchan->vc.lock, flag);
+       return 0;
+ }
+ /**
+  * bam_prep_slave_sg - Prep slave sg transaction
+  *
+  * @chan: dma channel
+  * @sgl: scatter gather list
+  * @sg_len: length of sg
+  * @direction: DMA transfer direction
+  * @flags: DMA flags
+  * @context: transfer context (unused)
+  */
+ static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan,
+       struct scatterlist *sgl, unsigned int sg_len,
+       enum dma_transfer_direction direction, unsigned long flags,
+       void *context)
+ {
+       struct bam_chan *bchan = to_bam_chan(chan);
+       struct bam_device *bdev = bchan->bdev;
+       struct bam_async_desc *async_desc;
+       struct scatterlist *sg;
+       u32 i;
+       struct bam_desc_hw *desc;
+       unsigned int num_alloc = 0;
+       if (!is_slave_direction(direction)) {
+               dev_err(bdev->dev, "invalid dma direction\n");
+               return NULL;
+       }
+       /* calculate number of required entries */
+       for_each_sg(sgl, sg, sg_len, i)
+               num_alloc += DIV_ROUND_UP(sg_dma_len(sg), BAM_MAX_DATA_SIZE);
+       /* allocate enough room to accomodate the number of entries */
+       async_desc = kzalloc(sizeof(*async_desc) +
+                       (num_alloc * sizeof(struct bam_desc_hw)), GFP_NOWAIT);
+       if (!async_desc)
+               goto err_out;
+       if (flags & DMA_PREP_FENCE)
+               async_desc->flags |= DESC_FLAG_NWD;
+       if (flags & DMA_PREP_INTERRUPT)
+               async_desc->flags |= DESC_FLAG_EOT;
+       else
+               async_desc->flags |= DESC_FLAG_INT;
+       async_desc->num_desc = num_alloc;
+       async_desc->curr_desc = async_desc->desc;
+       async_desc->dir = direction;
+       /* fill in temporary descriptors */
+       desc = async_desc->desc;
+       for_each_sg(sgl, sg, sg_len, i) {
+               unsigned int remainder = sg_dma_len(sg);
+               unsigned int curr_offset = 0;
+               do {
+                       desc->addr = cpu_to_le32(sg_dma_address(sg) +
+                                                curr_offset);
+                       if (remainder > BAM_MAX_DATA_SIZE) {
+                               desc->size = cpu_to_le16(BAM_MAX_DATA_SIZE);
+                               remainder -= BAM_MAX_DATA_SIZE;
+                               curr_offset += BAM_MAX_DATA_SIZE;
+                       } else {
+                               desc->size = cpu_to_le16(remainder);
+                               remainder = 0;
+                       }
+                       async_desc->length += desc->size;
+                       desc++;
+               } while (remainder > 0);
+       }
+       return vchan_tx_prep(&bchan->vc, &async_desc->vd, flags);
+ err_out:
+       kfree(async_desc);
+       return NULL;
+ }
+ /**
+  * bam_dma_terminate_all - terminate all transactions on a channel
+  * @bchan: bam dma channel
+  *
+  * Dequeues and frees all transactions
+  * No callbacks are done
+  *
+  */
+ static int bam_dma_terminate_all(struct dma_chan *chan)
+ {
+       struct bam_chan *bchan = to_bam_chan(chan);
+       unsigned long flag;
+       LIST_HEAD(head);
+       /* remove all transactions, including active transaction */
+       spin_lock_irqsave(&bchan->vc.lock, flag);
+       if (bchan->curr_txd) {
+               list_add(&bchan->curr_txd->vd.node, &bchan->vc.desc_issued);
+               bchan->curr_txd = NULL;
+       }
+       vchan_get_all_descriptors(&bchan->vc, &head);
+       spin_unlock_irqrestore(&bchan->vc.lock, flag);
+       vchan_dma_desc_free_list(&bchan->vc, &head);
+       return 0;
+ }
+ /**
+  * bam_pause - Pause DMA channel
+  * @chan: dma channel
+  *
+  */
+ static int bam_pause(struct dma_chan *chan)
+ {
+       struct bam_chan *bchan = to_bam_chan(chan);
+       struct bam_device *bdev = bchan->bdev;
+       unsigned long flag;
+       spin_lock_irqsave(&bchan->vc.lock, flag);
+       writel_relaxed(1, bam_addr(bdev, bchan->id, BAM_P_HALT));
+       bchan->paused = 1;
+       spin_unlock_irqrestore(&bchan->vc.lock, flag);
+       return 0;
+ }
+ /**
+  * bam_resume - Resume DMA channel operations
+  * @chan: dma channel
+  *
+  */
+ static int bam_resume(struct dma_chan *chan)
+ {
+       struct bam_chan *bchan = to_bam_chan(chan);
+       struct bam_device *bdev = bchan->bdev;
+       unsigned long flag;
+       spin_lock_irqsave(&bchan->vc.lock, flag);
+       writel_relaxed(0, bam_addr(bdev, bchan->id, BAM_P_HALT));
+       bchan->paused = 0;
+       spin_unlock_irqrestore(&bchan->vc.lock, flag);
+       return 0;
+ }
+ /**
+  * process_channel_irqs - processes the channel interrupts
+  * @bdev: bam controller
+  *
+  * This function processes the channel interrupts
+  *
+  */
+ static u32 process_channel_irqs(struct bam_device *bdev)
+ {
+       u32 i, srcs, pipe_stts;
+       unsigned long flags;
+       struct bam_async_desc *async_desc;
+       srcs = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_EE));
+       /* return early if no pipe/channel interrupts are present */
+       if (!(srcs & P_IRQ))
+               return srcs;
+       for (i = 0; i < bdev->num_channels; i++) {
+               struct bam_chan *bchan = &bdev->channels[i];
+               if (!(srcs & BIT(i)))
+                       continue;
+               /* clear pipe irq */
+               pipe_stts = readl_relaxed(bam_addr(bdev, i, BAM_P_IRQ_STTS));
+               writel_relaxed(pipe_stts, bam_addr(bdev, i, BAM_P_IRQ_CLR));
+               spin_lock_irqsave(&bchan->vc.lock, flags);
+               async_desc = bchan->curr_txd;
+               if (async_desc) {
+                       async_desc->num_desc -= async_desc->xfer_len;
+                       async_desc->curr_desc += async_desc->xfer_len;
+                       bchan->curr_txd = NULL;
+                       /* manage FIFO */
+                       bchan->head += async_desc->xfer_len;
+                       bchan->head %= MAX_DESCRIPTORS;
+                       /*
+                        * if complete, process cookie.  Otherwise
+                        * push back to front of desc_issued so that
+                        * it gets restarted by the tasklet
+                        */
+                       if (!async_desc->num_desc)
+                               vchan_cookie_complete(&async_desc->vd);
+                       else
+                               list_add(&async_desc->vd.node,
+                                       &bchan->vc.desc_issued);
+               }
+               spin_unlock_irqrestore(&bchan->vc.lock, flags);
+       }
+       return srcs;
+ }
+ /**
+  * bam_dma_irq - irq handler for bam controller
+  * @irq: IRQ of interrupt
+  * @data: callback data
+  *
+  * IRQ handler for the bam controller
+  */
+ static irqreturn_t bam_dma_irq(int irq, void *data)
+ {
+       struct bam_device *bdev = data;
+       u32 clr_mask = 0, srcs = 0;
+       srcs |= process_channel_irqs(bdev);
+       /* kick off tasklet to start next dma transfer */
+       if (srcs & P_IRQ)
+               tasklet_schedule(&bdev->task);
+       if (srcs & BAM_IRQ)
+               clr_mask = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_STTS));
+       /* don't allow reorder of the various accesses to the BAM registers */
+       mb();
+       writel_relaxed(clr_mask, bam_addr(bdev, 0, BAM_IRQ_CLR));
+       return IRQ_HANDLED;
+ }
+ /**
+  * bam_tx_status - returns status of transaction
+  * @chan: dma channel
+  * @cookie: transaction cookie
+  * @txstate: DMA transaction state
+  *
+  * Return status of dma transaction
+  */
+ static enum dma_status bam_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+               struct dma_tx_state *txstate)
+ {
+       struct bam_chan *bchan = to_bam_chan(chan);
+       struct virt_dma_desc *vd;
+       int ret;
+       size_t residue = 0;
+       unsigned int i;
+       unsigned long flags;
+       ret = dma_cookie_status(chan, cookie, txstate);
+       if (ret == DMA_COMPLETE)
+               return ret;
+       if (!txstate)
+               return bchan->paused ? DMA_PAUSED : ret;
+       spin_lock_irqsave(&bchan->vc.lock, flags);
+       vd = vchan_find_desc(&bchan->vc, cookie);
+       if (vd)
+               residue = container_of(vd, struct bam_async_desc, vd)->length;
+       else if (bchan->curr_txd && bchan->curr_txd->vd.tx.cookie == cookie)
+               for (i = 0; i < bchan->curr_txd->num_desc; i++)
+                       residue += bchan->curr_txd->curr_desc[i].size;
+       spin_unlock_irqrestore(&bchan->vc.lock, flags);
+       dma_set_residue(txstate, residue);
+       if (ret == DMA_IN_PROGRESS && bchan->paused)
+               ret = DMA_PAUSED;
+       return ret;
+ }
+ /**
+  * bam_apply_new_config
+  * @bchan: bam dma channel
+  * @dir: DMA direction
+  */
+ static void bam_apply_new_config(struct bam_chan *bchan,
+       enum dma_transfer_direction dir)
+ {
+       struct bam_device *bdev = bchan->bdev;
+       u32 maxburst;
+       if (dir == DMA_DEV_TO_MEM)
+               maxburst = bchan->slave.src_maxburst;
+       else
+               maxburst = bchan->slave.dst_maxburst;
+       writel_relaxed(maxburst, bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD));
+       bchan->reconfigure = 0;
+ }
+ /**
+  * bam_start_dma - start next transaction
+  * @bchan - bam dma channel
+  */
+ static void bam_start_dma(struct bam_chan *bchan)
+ {
+       struct virt_dma_desc *vd = vchan_next_desc(&bchan->vc);
+       struct bam_device *bdev = bchan->bdev;
+       struct bam_async_desc *async_desc;
+       struct bam_desc_hw *desc;
+       struct bam_desc_hw *fifo = PTR_ALIGN(bchan->fifo_virt,
+                                       sizeof(struct bam_desc_hw));
+       lockdep_assert_held(&bchan->vc.lock);
+       if (!vd)
+               return;
+       list_del(&vd->node);
+       async_desc = container_of(vd, struct bam_async_desc, vd);
+       bchan->curr_txd = async_desc;
+       /* on first use, initialize the channel hardware */
+       if (!bchan->initialized)
+               bam_chan_init_hw(bchan, async_desc->dir);
+       /* apply new slave config changes, if necessary */
+       if (bchan->reconfigure)
+               bam_apply_new_config(bchan, async_desc->dir);
+       desc = bchan->curr_txd->curr_desc;
+       if (async_desc->num_desc > MAX_DESCRIPTORS)
+               async_desc->xfer_len = MAX_DESCRIPTORS;
+       else
+               async_desc->xfer_len = async_desc->num_desc;
+       /* set any special flags on the last descriptor */
+       if (async_desc->num_desc == async_desc->xfer_len)
+               desc[async_desc->xfer_len - 1].flags =
+                                       cpu_to_le16(async_desc->flags);
+       else
+               desc[async_desc->xfer_len - 1].flags |=
+                                       cpu_to_le16(DESC_FLAG_INT);
+       if (bchan->tail + async_desc->xfer_len > MAX_DESCRIPTORS) {
+               u32 partial = MAX_DESCRIPTORS - bchan->tail;
+               memcpy(&fifo[bchan->tail], desc,
+                               partial * sizeof(struct bam_desc_hw));
+               memcpy(fifo, &desc[partial], (async_desc->xfer_len - partial) *
+                               sizeof(struct bam_desc_hw));
+       } else {
+               memcpy(&fifo[bchan->tail], desc,
+                       async_desc->xfer_len * sizeof(struct bam_desc_hw));
+       }
+       bchan->tail += async_desc->xfer_len;
+       bchan->tail %= MAX_DESCRIPTORS;
+       /* ensure descriptor writes and dma start not reordered */
+       wmb();
+       writel_relaxed(bchan->tail * sizeof(struct bam_desc_hw),
+                       bam_addr(bdev, bchan->id, BAM_P_EVNT_REG));
+ }
+ /**
+  * dma_tasklet - DMA IRQ tasklet
+  * @data: tasklet argument (bam controller structure)
+  *
+  * Sets up next DMA operation and then processes all completed transactions
+  */
+ static void dma_tasklet(unsigned long data)
+ {
+       struct bam_device *bdev = (struct bam_device *)data;
+       struct bam_chan *bchan;
+       unsigned long flags;
+       unsigned int i;
+       /* go through the channels and kick off transactions */
+       for (i = 0; i < bdev->num_channels; i++) {
+               bchan = &bdev->channels[i];
+               spin_lock_irqsave(&bchan->vc.lock, flags);
+               if (!list_empty(&bchan->vc.desc_issued) && !bchan->curr_txd)
+                       bam_start_dma(bchan);
+               spin_unlock_irqrestore(&bchan->vc.lock, flags);
+       }
+ }
+ /**
+  * bam_issue_pending - starts pending transactions
+  * @chan: dma channel
+  *
+  * Calls tasklet directly which in turn starts any pending transactions
+  */
+ static void bam_issue_pending(struct dma_chan *chan)
+ {
+       struct bam_chan *bchan = to_bam_chan(chan);
+       unsigned long flags;
+       spin_lock_irqsave(&bchan->vc.lock, flags);
+       /* if work pending and idle, start a transaction */
+       if (vchan_issue_pending(&bchan->vc) && !bchan->curr_txd)
+               bam_start_dma(bchan);
+       spin_unlock_irqrestore(&bchan->vc.lock, flags);
+ }
+ /**
+  * bam_dma_free_desc - free descriptor memory
+  * @vd: virtual descriptor
+  *
+  */
+ static void bam_dma_free_desc(struct virt_dma_desc *vd)
+ {
+       struct bam_async_desc *async_desc = container_of(vd,
+                       struct bam_async_desc, vd);
+       kfree(async_desc);
+ }
+ static struct dma_chan *bam_dma_xlate(struct of_phandle_args *dma_spec,
+               struct of_dma *of)
+ {
+       struct bam_device *bdev = container_of(of->of_dma_data,
+                                       struct bam_device, common);
+       unsigned int request;
+       if (dma_spec->args_count != 1)
+               return NULL;
+       request = dma_spec->args[0];
+       if (request >= bdev->num_channels)
+               return NULL;
+       return dma_get_slave_channel(&(bdev->channels[request].vc.chan));
+ }
+ /**
+  * bam_init
+  * @bdev: bam device
+  *
+  * Initialization helper for global bam registers
+  */
+ static int bam_init(struct bam_device *bdev)
+ {
+       u32 val;
+       /* read revision and configuration information */
+       val = readl_relaxed(bam_addr(bdev, 0, BAM_REVISION)) >> NUM_EES_SHIFT;
+       val &= NUM_EES_MASK;
+       /* check that configured EE is within range */
+       if (bdev->ee >= val)
+               return -EINVAL;
+       val = readl_relaxed(bam_addr(bdev, 0, BAM_NUM_PIPES));
+       bdev->num_channels = val & BAM_NUM_PIPES_MASK;
+       /* s/w reset bam */
+       /* after reset all pipes are disabled and idle */
+       val = readl_relaxed(bam_addr(bdev, 0, BAM_CTRL));
+       val |= BAM_SW_RST;
+       writel_relaxed(val, bam_addr(bdev, 0, BAM_CTRL));
+       val &= ~BAM_SW_RST;
+       writel_relaxed(val, bam_addr(bdev, 0, BAM_CTRL));
+       /* make sure previous stores are visible before enabling BAM */
+       wmb();
+       /* enable bam */
+       val |= BAM_EN;
+       writel_relaxed(val, bam_addr(bdev, 0, BAM_CTRL));
+       /* set descriptor threshhold, start with 4 bytes */
+       writel_relaxed(DEFAULT_CNT_THRSHLD,
+                       bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD));
+       /* Enable default set of h/w workarounds, ie all except BAM_FULL_PIPE */
+       writel_relaxed(BAM_CNFG_BITS_DEFAULT, bam_addr(bdev, 0, BAM_CNFG_BITS));
+       /* enable irqs for errors */
+       writel_relaxed(BAM_ERROR_EN | BAM_HRESP_ERR_EN,
+                       bam_addr(bdev, 0, BAM_IRQ_EN));
+       /* unmask global bam interrupt */
+       writel_relaxed(BAM_IRQ_MSK, bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
+       return 0;
+ }
+ static void bam_channel_init(struct bam_device *bdev, struct bam_chan *bchan,
+       u32 index)
+ {
+       bchan->id = index;
+       bchan->bdev = bdev;
+       vchan_init(&bchan->vc, &bdev->common);
+       bchan->vc.desc_free = bam_dma_free_desc;
+ }
+ static const struct of_device_id bam_of_match[] = {
+       { .compatible = "qcom,bam-v1.3.0", .data = &bam_v1_3_reg_info },
+       { .compatible = "qcom,bam-v1.4.0", .data = &bam_v1_4_reg_info },
+       { .compatible = "qcom,bam-v1.7.0", .data = &bam_v1_7_reg_info },
+       {}
+ };
+ MODULE_DEVICE_TABLE(of, bam_of_match);
+ static int bam_dma_probe(struct platform_device *pdev)
+ {
+       struct bam_device *bdev;
+       const struct of_device_id *match;
+       struct resource *iores;
+       int ret, i;
+       bdev = devm_kzalloc(&pdev->dev, sizeof(*bdev), GFP_KERNEL);
+       if (!bdev)
+               return -ENOMEM;
+       bdev->dev = &pdev->dev;
+       match = of_match_node(bam_of_match, pdev->dev.of_node);
+       if (!match) {
+               dev_err(&pdev->dev, "Unsupported BAM module\n");
+               return -ENODEV;
+       }
+       bdev->layout = match->data;
+       iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       bdev->regs = devm_ioremap_resource(&pdev->dev, iores);
+       if (IS_ERR(bdev->regs))
+               return PTR_ERR(bdev->regs);
+       bdev->irq = platform_get_irq(pdev, 0);
+       if (bdev->irq < 0)
+               return bdev->irq;
+       ret = of_property_read_u32(pdev->dev.of_node, "qcom,ee", &bdev->ee);
+       if (ret) {
+               dev_err(bdev->dev, "Execution environment unspecified\n");
+               return ret;
+       }
+       bdev->bamclk = devm_clk_get(bdev->dev, "bam_clk");
+       if (IS_ERR(bdev->bamclk))
+               return PTR_ERR(bdev->bamclk);
+       ret = clk_prepare_enable(bdev->bamclk);
+       if (ret) {
+               dev_err(bdev->dev, "failed to prepare/enable clock\n");
+               return ret;
+       }
+       ret = bam_init(bdev);
+       if (ret)
+               goto err_disable_clk;
+       tasklet_init(&bdev->task, dma_tasklet, (unsigned long)bdev);
+       bdev->channels = devm_kcalloc(bdev->dev, bdev->num_channels,
+                               sizeof(*bdev->channels), GFP_KERNEL);
+       if (!bdev->channels) {
+               ret = -ENOMEM;
+               goto err_tasklet_kill;
+       }
+       /* allocate and initialize channels */
+       INIT_LIST_HEAD(&bdev->common.channels);
+       for (i = 0; i < bdev->num_channels; i++)
+               bam_channel_init(bdev, &bdev->channels[i], i);
+       ret = devm_request_irq(bdev->dev, bdev->irq, bam_dma_irq,
+                       IRQF_TRIGGER_HIGH, "bam_dma", bdev);
+       if (ret)
+               goto err_bam_channel_exit;
+       /* set max dma segment size */
+       bdev->common.dev = bdev->dev;
+       bdev->common.dev->dma_parms = &bdev->dma_parms;
+       ret = dma_set_max_seg_size(bdev->common.dev, BAM_MAX_DATA_SIZE);
+       if (ret) {
+               dev_err(bdev->dev, "cannot set maximum segment size\n");
+               goto err_bam_channel_exit;
+       }
+       platform_set_drvdata(pdev, bdev);
+       /* set capabilities */
+       dma_cap_zero(bdev->common.cap_mask);
+       dma_cap_set(DMA_SLAVE, bdev->common.cap_mask);
+       /* initialize dmaengine apis */
+       bdev->common.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+       bdev->common.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
+       bdev->common.src_addr_widths = DMA_SLAVE_BUSWIDTH_4_BYTES;
+       bdev->common.dst_addr_widths = DMA_SLAVE_BUSWIDTH_4_BYTES;
+       bdev->common.device_alloc_chan_resources = bam_alloc_chan;
+       bdev->common.device_free_chan_resources = bam_free_chan;
+       bdev->common.device_prep_slave_sg = bam_prep_slave_sg;
+       bdev->common.device_config = bam_slave_config;
+       bdev->common.device_pause = bam_pause;
+       bdev->common.device_resume = bam_resume;
+       bdev->common.device_terminate_all = bam_dma_terminate_all;
+       bdev->common.device_issue_pending = bam_issue_pending;
+       bdev->common.device_tx_status = bam_tx_status;
+       bdev->common.dev = bdev->dev;
+       ret = dma_async_device_register(&bdev->common);
+       if (ret) {
+               dev_err(bdev->dev, "failed to register dma async device\n");
+               goto err_bam_channel_exit;
+       }
+       ret = of_dma_controller_register(pdev->dev.of_node, bam_dma_xlate,
+                                       &bdev->common);
+       if (ret)
+               goto err_unregister_dma;
+       return 0;
+ err_unregister_dma:
+       dma_async_device_unregister(&bdev->common);
+ err_bam_channel_exit:
+       for (i = 0; i < bdev->num_channels; i++)
+               tasklet_kill(&bdev->channels[i].vc.task);
+ err_tasklet_kill:
+       tasklet_kill(&bdev->task);
+ err_disable_clk:
+       clk_disable_unprepare(bdev->bamclk);
+       return ret;
+ }
+ static int bam_dma_remove(struct platform_device *pdev)
+ {
+       struct bam_device *bdev = platform_get_drvdata(pdev);
+       u32 i;
+       of_dma_controller_free(pdev->dev.of_node);
+       dma_async_device_unregister(&bdev->common);
+       /* mask all interrupts for this execution environment */
+       writel_relaxed(0, bam_addr(bdev, 0,  BAM_IRQ_SRCS_MSK_EE));
+       devm_free_irq(bdev->dev, bdev->irq, bdev);
+       for (i = 0; i < bdev->num_channels; i++) {
+               bam_dma_terminate_all(&bdev->channels[i].vc.chan);
+               tasklet_kill(&bdev->channels[i].vc.task);
++              dma_free_wc(bdev->dev, BAM_DESC_FIFO_SIZE,
++                          bdev->channels[i].fifo_virt,
++                          bdev->channels[i].fifo_phys);
+       }
+       tasklet_kill(&bdev->task);
+       clk_disable_unprepare(bdev->bamclk);
+       return 0;
+ }
+ static struct platform_driver bam_dma_driver = {
+       .probe = bam_dma_probe,
+       .remove = bam_dma_remove,
+       .driver = {
+               .name = "bam-dma-engine",
+               .of_match_table = bam_of_match,
+       },
+ };
+ module_platform_driver(bam_dma_driver);
+ MODULE_AUTHOR("Andy Gross <agross@codeaurora.org>");
+ MODULE_DESCRIPTION("QCOM BAM DMA engine driver");
+ MODULE_LICENSE("GPL v2");
index 26e2688c104ec9e6ba314264b63318a65048596f,aa9561f586ab9a047410652f811ad4ebdffbc7ca..8f50a4020f6fff6f7c1d7e1c1f43964c734e5c3f
   *
   */
  
 -#include <linux/init.h>
 -#include <linux/module.h>
  #include <linux/clk.h>
 -#include <linux/err.h>
 -#include <linux/delay.h>
 -#include <linux/interrupt.h>
 +#include <linux/dmaengine.h>
 +#include <linux/module.h>
 +#include <linux/of.h>
  #include <linux/platform_device.h>
 -#include <linux/slab.h>
  #include <linux/spi/spi.h>
 -#include <linux/scatterlist.h>
 -#include <linux/of.h>
  #include <linux/pm_runtime.h>
 -#include <linux/io.h>
 -#include <linux/dmaengine.h>
 +#include <linux/scatterlist.h>
  
  #define DRIVER_NAME "rockchip-spi"
  
@@@ -173,7 -179,7 +173,7 @@@ struct rockchip_spi 
        u8 tmode;
        u8 bpw;
        u8 n_bytes;
 -      u8 rsd_nsecs;
 +      u32 rsd_nsecs;
        unsigned len;
        u32 speed;
  
        /* protect state */
        spinlock_t lock;
  
 -      struct completion xfer_completion;
 -
        u32 use_dma;
        struct sg_table tx_sg;
        struct sg_table rx_sg;
        struct rockchip_spi_dma_data dma_rx;
        struct rockchip_spi_dma_data dma_tx;
+       struct dma_slave_caps dma_caps;
  };
  
  static inline void spi_enable_chip(struct rockchip_spi *rs, int enable)
@@@ -257,10 -266,7 +258,10 @@@ static inline u32 rx_max(struct rockchi
  static void rockchip_spi_set_cs(struct spi_device *spi, bool enable)
  {
        u32 ser;
 -      struct rockchip_spi *rs = spi_master_get_devdata(spi->master);
 +      struct spi_master *master = spi->master;
 +      struct rockchip_spi *rs = spi_master_get_devdata(master);
 +
 +      pm_runtime_get_sync(rs->dev);
  
        ser = readl_relaxed(rs->regs + ROCKCHIP_SPI_SER) & SER_MASK;
  
                ser &= ~(1 << spi->chip_select);
  
        writel_relaxed(ser, rs->regs + ROCKCHIP_SPI_SER);
 +
 +      pm_runtime_put_sync(rs->dev);
  }
  
  static int rockchip_spi_prepare_message(struct spi_master *master,
@@@ -316,12 -320,12 +317,12 @@@ static void rockchip_spi_handle_err(str
         */
        if (rs->use_dma) {
                if (rs->state & RXBUSY) {
 -                      dmaengine_terminate_all(rs->dma_rx.ch);
 +                      dmaengine_terminate_async(rs->dma_rx.ch);
                        flush_fifo(rs);
                }
  
                if (rs->state & TXBUSY)
 -                      dmaengine_terminate_all(rs->dma_tx.ch);
 +                      dmaengine_terminate_async(rs->dma_tx.ch);
        }
  
        spin_unlock_irqrestore(&rs->lock, flags);
@@@ -430,7 -434,7 +431,7 @@@ static void rockchip_spi_dma_txcb(void 
        spin_unlock_irqrestore(&rs->lock, flags);
  }
  
 -static void rockchip_spi_prepare_dma(struct rockchip_spi *rs)
 +static int rockchip_spi_prepare_dma(struct rockchip_spi *rs)
  {
        unsigned long flags;
        struct dma_slave_config rxconf, txconf;
                rxconf.direction = rs->dma_rx.direction;
                rxconf.src_addr = rs->dma_rx.addr;
                rxconf.src_addr_width = rs->n_bytes;
-               rxconf.src_maxburst = rs->n_bytes;
+               if (rs->dma_caps.max_burst > 4)
+                       rxconf.src_maxburst = 4;
+               else
+                       rxconf.src_maxburst = 1;
                dmaengine_slave_config(rs->dma_rx.ch, &rxconf);
  
                rxdesc = dmaengine_prep_slave_sg(
                                rs->dma_rx.ch,
                                rs->rx_sg.sgl, rs->rx_sg.nents,
                                rs->dma_rx.direction, DMA_PREP_INTERRUPT);
 +              if (!rxdesc)
 +                      return -EINVAL;
  
                rxdesc->callback = rockchip_spi_dma_rxcb;
                rxdesc->callback_param = rs;
                txconf.direction = rs->dma_tx.direction;
                txconf.dst_addr = rs->dma_tx.addr;
                txconf.dst_addr_width = rs->n_bytes;
-               txconf.dst_maxburst = rs->n_bytes;
+               if (rs->dma_caps.max_burst > 4)
+                       txconf.dst_maxburst = 4;
+               else
+                       txconf.dst_maxburst = 1;
                dmaengine_slave_config(rs->dma_tx.ch, &txconf);
  
                txdesc = dmaengine_prep_slave_sg(
                                rs->dma_tx.ch,
                                rs->tx_sg.sgl, rs->tx_sg.nents,
                                rs->dma_tx.direction, DMA_PREP_INTERRUPT);
 +              if (!txdesc) {
 +                      if (rxdesc)
 +                              dmaengine_terminate_sync(rs->dma_rx.ch);
 +                      return -EINVAL;
 +              }
  
                txdesc->callback = rockchip_spi_dma_txcb;
                txdesc->callback_param = rs;
                dmaengine_submit(txdesc);
                dma_async_issue_pending(rs->dma_tx.ch);
        }
 +
 +      return 0;
  }
  
  static void rockchip_spi_config(struct rockchip_spi *rs)
        int rsd = 0;
  
        u32 cr0 = (CR0_BHT_8BIT << CR0_BHT_OFFSET)
 -              | (CR0_SSD_ONE << CR0_SSD_OFFSET);
 +              | (CR0_SSD_ONE << CR0_SSD_OFFSET)
 +              | (CR0_EM_BIG << CR0_EM_OFFSET);
  
        cr0 |= (rs->n_bytes << CR0_DFS_OFFSET);
        cr0 |= ((rs->mode & 0x3) << CR0_SCPH_OFFSET);
@@@ -613,12 -613,12 +620,12 @@@ static int rockchip_spi_transfer_one
        if (rs->use_dma) {
                if (rs->tmode == CR0_XFM_RO) {
                        /* rx: dma must be prepared first */
 -                      rockchip_spi_prepare_dma(rs);
 +                      ret = rockchip_spi_prepare_dma(rs);
                        spi_enable_chip(rs, 1);
                } else {
                        /* tx or tr: spi must be enabled first */
                        spi_enable_chip(rs, 1);
 -                      rockchip_spi_prepare_dma(rs);
 +                      ret = rockchip_spi_prepare_dma(rs);
                }
        } else {
                spi_enable_chip(rs, 1);
@@@ -724,14 -724,8 +731,14 @@@ static int rockchip_spi_probe(struct pl
        master->handle_err = rockchip_spi_handle_err;
  
        rs->dma_tx.ch = dma_request_slave_channel(rs->dev, "tx");
 -      if (!rs->dma_tx.ch)
 +      if (IS_ERR_OR_NULL(rs->dma_tx.ch)) {
 +              /* Check tx to see if we need defer probing driver */
 +              if (PTR_ERR(rs->dma_tx.ch) == -EPROBE_DEFER) {
 +                      ret = -EPROBE_DEFER;
 +                      goto err_get_fifo_len;
 +              }
                dev_warn(rs->dev, "Failed to request TX DMA channel\n");
 +      }
  
        rs->dma_rx.ch = dma_request_slave_channel(rs->dev, "rx");
        if (!rs->dma_rx.ch) {
        }
  
        if (rs->dma_tx.ch && rs->dma_rx.ch) {
+               dma_get_slave_caps(rs->dma_rx.ch, &(rs->dma_caps));
                rs->dma_tx.addr = (dma_addr_t)(mem->start + ROCKCHIP_SPI_TXDR);
                rs->dma_rx.addr = (dma_addr_t)(mem->start + ROCKCHIP_SPI_RXDR);
                rs->dma_tx.direction = DMA_MEM_TO_DEV;
        return 0;
  
  err_register_master:
 +      pm_runtime_disable(&pdev->dev);
        if (rs->dma_tx.ch)
                dma_release_channel(rs->dma_tx.ch);
        if (rs->dma_rx.ch)
@@@ -792,8 -786,6 +800,8 @@@ static int rockchip_spi_remove(struct p
        if (rs->dma_rx.ch)
                dma_release_channel(rs->dma_rx.ch);
  
 +      spi_master_put(master);
 +
        return 0;
  }
  
@@@ -884,7 -876,6 +892,7 @@@ static const struct of_device_id rockch
        { .compatible = "rockchip,rk3066-spi", },
        { .compatible = "rockchip,rk3188-spi", },
        { .compatible = "rockchip,rk3288-spi", },
 +      { .compatible = "rockchip,rk3399-spi", },
        { },
  };
  MODULE_DEVICE_TABLE(of, rockchip_spi_dt_match);