+/* Skylake Xeon NTB */
+
+static u64 skx_db_ioread(void __iomem *mmio)
+{
+ return ioread64(mmio);
+}
+
+static void skx_db_iowrite(u64 bits, void __iomem *mmio)
+{
+ iowrite64(bits, mmio);
+}
+
+static int skx_init_isr(struct intel_ntb_dev *ndev)
+{
+ int i;
+
+ /*
+ * The MSIX vectors and the interrupt status bits are not lined up
+ * on Skylake. By default the link status bit is bit 32, however it
+ * is by default MSIX vector0. We need to fixup to line them up.
+ * The vectors at reset is 1-32,0. We need to reprogram to 0-32.
+ */
+
+ for (i = 0; i < SKX_DB_MSIX_VECTOR_COUNT; i++)
+ iowrite8(i, ndev->self_mmio + SKX_INTVEC_OFFSET + i);
+
+ /* move link status down one as workaround */
+ if (ndev->hwerr_flags & NTB_HWERR_MSIX_VECTOR32_BAD) {
+ iowrite8(SKX_DB_MSIX_VECTOR_COUNT - 2,
+ ndev->self_mmio + SKX_INTVEC_OFFSET +
+ (SKX_DB_MSIX_VECTOR_COUNT - 1));
+ }
+
+ return ndev_init_isr(ndev, SKX_DB_MSIX_VECTOR_COUNT,
+ SKX_DB_MSIX_VECTOR_COUNT,
+ SKX_DB_MSIX_VECTOR_SHIFT,
+ SKX_DB_TOTAL_SHIFT);
+}
+
+static int skx_setup_b2b_mw(struct intel_ntb_dev *ndev,
+ const struct intel_b2b_addr *addr,
+ const struct intel_b2b_addr *peer_addr)
+{
+ struct pci_dev *pdev;
+ void __iomem *mmio;
+ resource_size_t bar_size;
+ phys_addr_t bar_addr;
+ int b2b_bar;
+ u8 bar_sz;
+
+ pdev = ndev_pdev(ndev);
+ mmio = ndev->self_mmio;
+
+ if (ndev->b2b_idx == UINT_MAX) {
+ dev_dbg(ndev_dev(ndev), "not using b2b mw\n");
+ b2b_bar = 0;
+ ndev->b2b_off = 0;
+ } else {
+ b2b_bar = ndev_mw_to_bar(ndev, ndev->b2b_idx);
+ if (b2b_bar < 0)
+ return -EIO;
+
+ dev_dbg(ndev_dev(ndev), "using b2b mw bar %d\n", b2b_bar);
+
+ bar_size = pci_resource_len(ndev->ntb.pdev, b2b_bar);
+
+ dev_dbg(ndev_dev(ndev), "b2b bar size %#llx\n", bar_size);
+
+ if (b2b_mw_share && ((bar_size >> 1) >= XEON_B2B_MIN_SIZE)) {
+ dev_dbg(ndev_dev(ndev),
+ "b2b using first half of bar\n");
+ ndev->b2b_off = bar_size >> 1;
+ } else if (bar_size >= XEON_B2B_MIN_SIZE) {
+ dev_dbg(ndev_dev(ndev),
+ "b2b using whole bar\n");
+ ndev->b2b_off = 0;
+ --ndev->mw_count;
+ } else {
+ dev_dbg(ndev_dev(ndev),
+ "b2b bar size is too small\n");
+ return -EIO;
+ }
+ }
+
+ /*
+ * Reset the secondary bar sizes to match the primary bar sizes,
+ * except disable or halve the size of the b2b secondary bar.
+ */
+ pci_read_config_byte(pdev, SKX_IMBAR1SZ_OFFSET, &bar_sz);
+ dev_dbg(ndev_dev(ndev), "IMBAR1SZ %#x\n", bar_sz);
+ if (b2b_bar == 1) {
+ if (ndev->b2b_off)
+ bar_sz -= 1;
+ else
+ bar_sz = 0;
+ }
+
+ pci_write_config_byte(pdev, SKX_EMBAR1SZ_OFFSET, bar_sz);
+ pci_read_config_byte(pdev, SKX_EMBAR1SZ_OFFSET, &bar_sz);
+ dev_dbg(ndev_dev(ndev), "EMBAR1SZ %#x\n", bar_sz);
+
+ pci_read_config_byte(pdev, SKX_IMBAR2SZ_OFFSET, &bar_sz);
+ dev_dbg(ndev_dev(ndev), "IMBAR2SZ %#x\n", bar_sz);
+ if (b2b_bar == 2) {
+ if (ndev->b2b_off)
+ bar_sz -= 1;
+ else
+ bar_sz = 0;
+ }
+
+ pci_write_config_byte(pdev, SKX_EMBAR2SZ_OFFSET, bar_sz);
+ pci_read_config_byte(pdev, SKX_EMBAR2SZ_OFFSET, &bar_sz);
+ dev_dbg(ndev_dev(ndev), "EMBAR2SZ %#x\n", bar_sz);
+
+ /* SBAR01 hit by first part of the b2b bar */
+ if (b2b_bar == 0)
+ bar_addr = addr->bar0_addr;
+ else if (b2b_bar == 1)
+ bar_addr = addr->bar2_addr64;
+ else if (b2b_bar == 2)
+ bar_addr = addr->bar4_addr64;
+ else
+ return -EIO;
+
+ /* setup incoming bar limits == base addrs (zero length windows) */
+ bar_addr = addr->bar2_addr64 + (b2b_bar == 1 ? ndev->b2b_off : 0);
+ iowrite64(bar_addr, mmio + SKX_IMBAR1XLMT_OFFSET);
+ bar_addr = ioread64(mmio + SKX_IMBAR1XLMT_OFFSET);
+ dev_dbg(ndev_dev(ndev), "IMBAR1XLMT %#018llx\n", bar_addr);
+
+ bar_addr = addr->bar4_addr64 + (b2b_bar == 2 ? ndev->b2b_off : 0);
+ iowrite64(bar_addr, mmio + SKX_IMBAR2XLMT_OFFSET);
+ bar_addr = ioread64(mmio + SKX_IMBAR2XLMT_OFFSET);
+ dev_dbg(ndev_dev(ndev), "IMBAR2XLMT %#018llx\n", bar_addr);
+
+ /* zero incoming translation addrs */
+ iowrite64(0, mmio + SKX_IMBAR1XBASE_OFFSET);
+ iowrite64(0, mmio + SKX_IMBAR2XBASE_OFFSET);
+
+ ndev->peer_mmio = ndev->self_mmio;
+
+ return 0;
+}
+
+static int skx_init_ntb(struct intel_ntb_dev *ndev)
+{
+ int rc;
+
+
+ ndev->mw_count = XEON_MW_COUNT;
+ ndev->spad_count = SKX_SPAD_COUNT;
+ ndev->db_count = SKX_DB_COUNT;
+ ndev->db_link_mask = SKX_DB_LINK_BIT;
+
+ /* DB fixup for using 31 right now */
+ if (ndev->hwerr_flags & NTB_HWERR_MSIX_VECTOR32_BAD)
+ ndev->db_link_mask |= BIT_ULL(31);
+
+ switch (ndev->ntb.topo) {
+ case NTB_TOPO_B2B_USD:
+ case NTB_TOPO_B2B_DSD:
+ ndev->self_reg = &skx_pri_reg;
+ ndev->peer_reg = &skx_b2b_reg;
+ ndev->xlat_reg = &skx_sec_xlat;
+
+ if (ndev->ntb.topo == NTB_TOPO_B2B_USD) {
+ rc = skx_setup_b2b_mw(ndev,
+ &xeon_b2b_dsd_addr,
+ &xeon_b2b_usd_addr);
+ } else {
+ rc = skx_setup_b2b_mw(ndev,
+ &xeon_b2b_usd_addr,
+ &xeon_b2b_dsd_addr);
+ }
+
+ if (rc)
+ return rc;
+
+ /* Enable Bus Master and Memory Space on the secondary side */
+ iowrite16(PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER,
+ ndev->self_mmio + SKX_SPCICMD_OFFSET);
+
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
+
+ ndev->reg->db_iowrite(ndev->db_valid_mask,
+ ndev->self_mmio +
+ ndev->self_reg->db_mask);
+
+ return 0;
+}
+
+static int skx_init_dev(struct intel_ntb_dev *ndev)
+{
+ struct pci_dev *pdev;
+ u8 ppd;
+ int rc;
+
+ pdev = ndev_pdev(ndev);
+
+ ndev->reg = &skx_reg;
+
+ rc = pci_read_config_byte(pdev, XEON_PPD_OFFSET, &ppd);
+ if (rc)
+ return -EIO;
+
+ ndev->ntb.topo = xeon_ppd_topo(ndev, ppd);
+ dev_dbg(ndev_dev(ndev), "ppd %#x topo %s\n", ppd,
+ ntb_topo_string(ndev->ntb.topo));
+ if (ndev->ntb.topo == NTB_TOPO_NONE)
+ return -EINVAL;
+
+ if (pdev_is_skx_xeon(pdev))
+ ndev->hwerr_flags |= NTB_HWERR_MSIX_VECTOR32_BAD;
+
+ rc = skx_init_ntb(ndev);
+ if (rc)
+ return rc;
+
+ return skx_init_isr(ndev);
+}
+
+static int intel_ntb3_link_enable(struct ntb_dev *ntb,
+ enum ntb_speed max_speed,
+ enum ntb_width max_width)
+{
+ struct intel_ntb_dev *ndev;
+ u32 ntb_ctl;
+
+ ndev = container_of(ntb, struct intel_ntb_dev, ntb);
+
+ dev_dbg(ndev_dev(ndev),
+ "Enabling link with max_speed %d max_width %d\n",
+ max_speed, max_width);
+
+ if (max_speed != NTB_SPEED_AUTO)
+ dev_dbg(ndev_dev(ndev), "ignoring max_speed %d\n", max_speed);
+ if (max_width != NTB_WIDTH_AUTO)
+ dev_dbg(ndev_dev(ndev), "ignoring max_width %d\n", max_width);
+
+ ntb_ctl = ioread32(ndev->self_mmio + ndev->reg->ntb_ctl);
+ ntb_ctl &= ~(NTB_CTL_DISABLE | NTB_CTL_CFG_LOCK);
+ ntb_ctl |= NTB_CTL_P2S_BAR2_SNOOP | NTB_CTL_S2P_BAR2_SNOOP;
+ ntb_ctl |= NTB_CTL_P2S_BAR4_SNOOP | NTB_CTL_S2P_BAR4_SNOOP;
+ iowrite32(ntb_ctl, ndev->self_mmio + ndev->reg->ntb_ctl);
+
+ return 0;
+}
+static int intel_ntb3_mw_set_trans(struct ntb_dev *ntb, int idx,
+ dma_addr_t addr, resource_size_t size)
+{
+ struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+ unsigned long xlat_reg, limit_reg;
+ resource_size_t bar_size, mw_size;
+ void __iomem *mmio;
+ u64 base, limit, reg_val;
+ int bar;
+
+ if (idx >= ndev->b2b_idx && !ndev->b2b_off)
+ idx += 1;
+
+ bar = ndev_mw_to_bar(ndev, idx);
+ if (bar < 0)
+ return bar;
+
+ bar_size = pci_resource_len(ndev->ntb.pdev, bar);
+
+ if (idx == ndev->b2b_idx)
+ mw_size = bar_size - ndev->b2b_off;
+ else
+ mw_size = bar_size;
+
+ /* hardware requires that addr is aligned to bar size */
+ if (addr & (bar_size - 1))
+ return -EINVAL;
+
+ /* make sure the range fits in the usable mw size */
+ if (size > mw_size)
+ return -EINVAL;
+
+ mmio = ndev->self_mmio;
+ xlat_reg = ndev->xlat_reg->bar2_xlat + (idx * 0x10);
+ limit_reg = ndev->xlat_reg->bar2_limit + (idx * 0x10);
+ base = pci_resource_start(ndev->ntb.pdev, bar);
+
+ /* Set the limit if supported, if size is not mw_size */
+ if (limit_reg && size != mw_size)
+ limit = base + size;
+ else
+ limit = base + mw_size;
+
+ /* set and verify setting the translation address */
+ iowrite64(addr, mmio + xlat_reg);
+ reg_val = ioread64(mmio + xlat_reg);
+ if (reg_val != addr) {
+ iowrite64(0, mmio + xlat_reg);
+ return -EIO;
+ }
+
+ dev_dbg(ndev_dev(ndev), "BAR %d IMBARXBASE: %#Lx\n", bar, reg_val);
+
+ /* set and verify setting the limit */
+ iowrite64(limit, mmio + limit_reg);
+ reg_val = ioread64(mmio + limit_reg);
+ if (reg_val != limit) {
+ iowrite64(base, mmio + limit_reg);
+ iowrite64(0, mmio + xlat_reg);
+ return -EIO;
+ }
+
+ dev_dbg(ndev_dev(ndev), "BAR %d IMBARXLMT: %#Lx\n", bar, reg_val);
+
+ /* setup the EP */
+ limit_reg = ndev->xlat_reg->bar2_limit + (idx * 0x10) + 0x4000;
+ base = ioread64(mmio + SKX_EMBAR1_OFFSET + (8 * idx));
+ base &= ~0xf;
+
+ if (limit_reg && size != mw_size)
+ limit = base + size;
+ else
+ limit = base + mw_size;
+
+ /* set and verify setting the limit */
+ iowrite64(limit, mmio + limit_reg);
+ reg_val = ioread64(mmio + limit_reg);
+ if (reg_val != limit) {
+ iowrite64(base, mmio + limit_reg);
+ iowrite64(0, mmio + xlat_reg);
+ return -EIO;
+ }
+
+ dev_dbg(ndev_dev(ndev), "BAR %d EMBARXLMT: %#Lx\n", bar, reg_val);
+
+ return 0;
+}
+
+static int intel_ntb3_peer_db_set(struct ntb_dev *ntb, u64 db_bits)
+{
+ struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+ int bit;
+
+ if (db_bits & ~ndev->db_valid_mask)
+ return -EINVAL;
+
+ while (db_bits) {
+ bit = __ffs(db_bits);
+ iowrite32(1, ndev->peer_mmio +
+ ndev->peer_reg->db_bell + (bit * 4));
+ db_bits &= db_bits - 1;
+ }
+
+ return 0;
+}
+
+static u64 intel_ntb3_db_read(struct ntb_dev *ntb)
+{
+ struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+
+ return ndev_db_read(ndev,
+ ndev->self_mmio +
+ ndev->self_reg->db_clear);
+}
+
+static int intel_ntb3_db_clear(struct ntb_dev *ntb, u64 db_bits)
+{
+ struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+
+ return ndev_db_write(ndev, db_bits,
+ ndev->self_mmio +
+ ndev->self_reg->db_clear);
+}
+