APPLIED MICRO (APM) X-GENE SOC ETHERNET DRIVER
M: Iyappan Subramanian <isubramanian@apm.com>
M: Keyur Chudgar <kchudgar@apm.com>
+M: Quan Nguyen <qnguyen@apm.com>
S: Supported
F: drivers/net/ethernet/apm/xgene/
F: drivers/net/phy/mdio-xgene.c
F: Documentation/devicetree/bindings/net/apm-xgene-enet.txt
F: Documentation/devicetree/bindings/net/apm-xgene-mdio.txt
+APPLIED MICRO (APM) X-GENE SOC ETHERNET (V2) DRIVER
+M: Iyappan Subramanian <isubramanian@apm.com>
+M: Keyur Chudgar <kchudgar@apm.com>
+S: Supported
+F: drivers/net/ethernet/apm/xgene-v2/
+
APPLIED MICRO (APM) X-GENE SOC PMU
M: Tai Nguyen <ttnguyen@apm.com>
S: Supported
S: Maintained
F: drivers/edac/mpc85xx_edac.[ch]
+ EDAC-PND2
+ M: Tony Luck <tony.luck@intel.com>
+ L: linux-edac@vger.kernel.org
+ S: Maintained
+ F: drivers/edac/pnd2_edac.[ch]
+
EDAC-PASEMI
M: Egor Martovetsky <egor@pasemi.com>
L: linux-edac@vger.kernel.org
F: net/bridge/
ETHERNET PHY LIBRARY
+ M: Andrew Lunn <andrew@lunn.ch>
M: Florian Fainelli <f.fainelli@gmail.com>
L: netdev@vger.kernel.org
S: Maintained
F: fs/autofs4/
KERNEL BUILD + files below scripts/ (unless maintained elsewhere)
+ M: Masahiro Yamada <yamada.masahiro@socionext.com>
M: Michal Marek <mmarek@suse.com>
- T: git git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild.git for-next
- T: git git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild.git rc-fixes
+ T: git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git
L: linux-kbuild@vger.kernel.org
S: Maintained
F: Documentation/kbuild/
F: block/partitions/ibm.c
S390 NETWORK DRIVERS
+ M: Julian Wiedmann <jwi@linux.vnet.ibm.com>
M: Ursula Braun <ubraun@linux.vnet.ibm.com>
L: linux-s390@vger.kernel.org
W: http://www.ibm.com/developerworks/linux/linux390/
F: drivers/s390/scsi/zfcp_*
S390 IUCV NETWORK LAYER
+ M: Julian Wiedmann <jwi@linux.vnet.ibm.com>
M: Ursula Braun <ubraun@linux.vnet.ibm.com>
L: linux-s390@vger.kernel.org
W: http://www.ibm.com/developerworks/linux/linux390/
F: include/linux/platform_data/dma-dw.h
F: drivers/dma/dw/
+SYNOPSYS DESIGNWARE ENTERPRISE ETHERNET DRIVER
+M: Jie Deng <jiedeng@synopsys.com>
+L: netdev@vger.kernel.org
+S: Supported
+F: drivers/net/ethernet/synopsys/
+
SYNOPSYS DESIGNWARE I2C DRIVER
M: Jarkko Nikula <jarkko.nikula@linux.intel.com>
R: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
for (j = 0; j < max_idx; j++) {
struct bnxt_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[j];
+ dma_addr_t mapping = rx_buf->mapping;
void *data = rx_buf->data;
if (!data)
continue;
- dma_unmap_single(&pdev->dev, rx_buf->mapping,
- bp->rx_buf_use_size, bp->rx_dir);
-
rx_buf->data = NULL;
- if (BNXT_RX_PAGE_MODE(bp))
+ if (BNXT_RX_PAGE_MODE(bp)) {
+ mapping -= bp->rx_dma_offset;
+ dma_unmap_page(&pdev->dev, mapping,
+ PAGE_SIZE, bp->rx_dir);
__free_page(data);
- else
+ } else {
+ dma_unmap_single(&pdev->dev, mapping,
+ bp->rx_buf_use_size,
+ bp->rx_dir);
kfree(data);
+ }
}
for (j = 0; j < max_agg_idx; j++) {
return 0;
}
+ static void bnxt_init_cp_rings(struct bnxt *bp)
+ {
+ int i;
+
+ for (i = 0; i < bp->cp_nr_rings; i++) {
+ struct bnxt_cp_ring_info *cpr = &bp->bnapi[i]->cp_ring;
+ struct bnxt_ring_struct *ring = &cpr->cp_ring_struct;
+
+ ring->fw_ring_id = INVALID_HW_RING_ID;
+ }
+ }
+
static int bnxt_init_rx_rings(struct bnxt *bp)
{
int i, rc = 0;
pf->max_tx_wm_flows = le32_to_cpu(resp->max_tx_wm_flows);
pf->max_rx_em_flows = le32_to_cpu(resp->max_rx_em_flows);
pf->max_rx_wm_flows = le32_to_cpu(resp->max_rx_wm_flows);
+ if (resp->flags &
+ cpu_to_le32(FUNC_QCAPS_RESP_FLAGS_WOL_MAGICPKT_SUPPORTED))
+ bp->flags |= BNXT_FLAG_WOL_CAP;
} else {
#ifdef CONFIG_BNXT_SRIOV
struct bnxt_vf_info *vf = &bp->vf;
rc = bnxt_hwrm_vnic_set_tpa(bp, i, tpa_flags);
if (rc) {
netdev_err(bp->dev, "hwrm vnic set tpa failure rc for vnic %d: %x\n",
- rc, i);
+ i, rc);
return rc;
}
}
static int bnxt_init_nic(struct bnxt *bp, bool irq_re_init)
{
+ bnxt_init_cp_rings(bp);
bnxt_init_rx_rings(bp);
bnxt_init_tx_rings(bp);
bnxt_init_ring_grps(bp, irq_re_init);
{
#if defined(CONFIG_BNXT_SRIOV)
if (BNXT_VF(bp))
- return bp->vf.max_irqs;
+ return min_t(unsigned int, bp->vf.max_irqs,
+ bp->vf.max_cp_rings);
#endif
- return bp->pf.max_irqs;
+ return min_t(unsigned int, bp->pf.max_irqs, bp->pf.max_cp_rings);
}
void bnxt_set_max_func_irqs(struct bnxt *bp, unsigned int max_irqs)
return 0;
}
+int bnxt_hwrm_alloc_wol_fltr(struct bnxt *bp)
+{
+ struct hwrm_wol_filter_alloc_input req = {0};
+ struct hwrm_wol_filter_alloc_output *resp = bp->hwrm_cmd_resp_addr;
+ int rc;
+
+ bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_WOL_FILTER_ALLOC, -1, -1);
+ req.port_id = cpu_to_le16(bp->pf.port_id);
+ req.wol_type = WOL_FILTER_ALLOC_REQ_WOL_TYPE_MAGICPKT;
+ req.enables = cpu_to_le32(WOL_FILTER_ALLOC_REQ_ENABLES_MAC_ADDRESS);
+ memcpy(req.mac_address, bp->dev->dev_addr, ETH_ALEN);
+ mutex_lock(&bp->hwrm_cmd_lock);
+ rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ if (!rc)
+ bp->wol_filter_id = resp->wol_filter_id;
+ mutex_unlock(&bp->hwrm_cmd_lock);
+ return rc;
+}
+
+int bnxt_hwrm_free_wol_fltr(struct bnxt *bp)
+{
+ struct hwrm_wol_filter_free_input req = {0};
+ int rc;
+
+ bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_WOL_FILTER_FREE, -1, -1);
+ req.port_id = cpu_to_le16(bp->pf.port_id);
+ req.enables = cpu_to_le32(WOL_FILTER_FREE_REQ_ENABLES_WOL_FILTER_ID);
+ req.wol_filter_id = bp->wol_filter_id;
+ rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ return rc;
+}
+
+static u16 bnxt_hwrm_get_wol_fltrs(struct bnxt *bp, u16 handle)
+{
+ struct hwrm_wol_filter_qcfg_input req = {0};
+ struct hwrm_wol_filter_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
+ u16 next_handle = 0;
+ int rc;
+
+ bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_WOL_FILTER_QCFG, -1, -1);
+ req.port_id = cpu_to_le16(bp->pf.port_id);
+ req.handle = cpu_to_le16(handle);
+ mutex_lock(&bp->hwrm_cmd_lock);
+ rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ if (!rc) {
+ next_handle = le16_to_cpu(resp->next_handle);
+ if (next_handle != 0) {
+ if (resp->wol_type ==
+ WOL_FILTER_ALLOC_REQ_WOL_TYPE_MAGICPKT) {
+ bp->wol = 1;
+ bp->wol_filter_id = resp->wol_filter_id;
+ }
+ }
+ }
+ mutex_unlock(&bp->hwrm_cmd_lock);
+ return next_handle;
+}
+
+static void bnxt_get_wol_settings(struct bnxt *bp)
+{
+ u16 handle = 0;
+
+ if (!BNXT_PF(bp) || !(bp->flags & BNXT_FLAG_WOL_CAP))
+ return;
+
+ do {
+ handle = bnxt_hwrm_get_wol_fltrs(bp, handle);
+ } while (handle && handle != 0xffff);
+}
+
static bool bnxt_eee_config_ok(struct bnxt *bp)
{
struct ethtool_eee *eee = &bp->eee;
return rc;
}
+/* rtnl_lock held, open the NIC half way by allocating all resources, but
+ * NAPI, IRQ, and TX are not enabled. This is mainly used for offline
+ * self tests.
+ */
+int bnxt_half_open_nic(struct bnxt *bp)
+{
+ int rc = 0;
+
+ rc = bnxt_alloc_mem(bp, false);
+ if (rc) {
+ netdev_err(bp->dev, "bnxt_alloc_mem err: %x\n", rc);
+ goto half_open_err;
+ }
+ rc = bnxt_init_nic(bp, false);
+ if (rc) {
+ netdev_err(bp->dev, "bnxt_init_nic err: %x\n", rc);
+ goto half_open_err;
+ }
+ return 0;
+
+half_open_err:
+ bnxt_free_skbs(bp);
+ bnxt_free_mem(bp, false);
+ dev_close(bp->dev);
+ return rc;
+}
+
+/* rtnl_lock held, this call can only be made after a previous successful
+ * call to bnxt_half_open_nic().
+ */
+void bnxt_half_close_nic(struct bnxt *bp)
+{
+ bnxt_hwrm_resource_free(bp, false, false);
+ bnxt_free_skbs(bp);
+ bnxt_free_mem(bp, false);
+}
+
static int bnxt_open(struct net_device *dev)
{
struct bnxt *bp = netdev_priv(dev);
if (ntc->type != TC_SETUP_MQPRIO)
return -EINVAL;
- return bnxt_setup_mq_tc(dev, ntc->tc);
+ ntc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+
+ return bnxt_setup_mq_tc(dev, ntc->mqprio->num_tc);
}
#ifdef CONFIG_RFS_ACCEL
bnxt_clear_int_mode(bp);
bnxt_hwrm_func_drv_unrgtr(bp);
bnxt_free_hwrm_resources(bp);
+ bnxt_ethtool_free(bp);
bnxt_dcb_free(bp);
kfree(bp->edev);
bp->edev = NULL;
bnxt_hwrm_func_qcfg(bp);
bnxt_hwrm_port_led_qcaps(bp);
+ bnxt_ethtool_init(bp);
bnxt_set_rx_skb_mode(bp, false);
bnxt_set_tpa_flags(bp);
if (rc)
goto init_err_pci_clean;
+ bnxt_get_wol_settings(bp);
+ if (bp->flags & BNXT_FLAG_WOL_CAP)
+ device_set_wakeup_enable(&pdev->dev, bp->wol);
+ else
+ device_set_wakeup_capable(&pdev->dev, false);
+
rc = register_netdev(dev);
if (rc)
goto init_err_clr_int;
return rc;
}
+static void bnxt_shutdown(struct pci_dev *pdev)
+{
+ struct net_device *dev = pci_get_drvdata(pdev);
+ struct bnxt *bp;
+
+ if (!dev)
+ return;
+
+ rtnl_lock();
+ bp = netdev_priv(dev);
+ if (!bp)
+ goto shutdown_exit;
+
+ if (netif_running(dev))
+ dev_close(dev);
+
+ if (system_state == SYSTEM_POWER_OFF) {
+ bnxt_clear_int_mode(bp);
+ pci_wake_from_d3(pdev, bp->wol);
+ pci_set_power_state(pdev, PCI_D3hot);
+ }
+
+shutdown_exit:
+ rtnl_unlock();
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int bnxt_suspend(struct device *device)
+{
+ struct pci_dev *pdev = to_pci_dev(device);
+ struct net_device *dev = pci_get_drvdata(pdev);
+ struct bnxt *bp = netdev_priv(dev);
+ int rc = 0;
+
+ rtnl_lock();
+ if (netif_running(dev)) {
+ netif_device_detach(dev);
+ rc = bnxt_close(dev);
+ }
+ bnxt_hwrm_func_drv_unrgtr(bp);
+ rtnl_unlock();
+ return rc;
+}
+
+static int bnxt_resume(struct device *device)
+{
+ struct pci_dev *pdev = to_pci_dev(device);
+ struct net_device *dev = pci_get_drvdata(pdev);
+ struct bnxt *bp = netdev_priv(dev);
+ int rc = 0;
+
+ rtnl_lock();
+ if (bnxt_hwrm_ver_get(bp) || bnxt_hwrm_func_drv_rgtr(bp)) {
+ rc = -ENODEV;
+ goto resume_exit;
+ }
+ rc = bnxt_hwrm_func_reset(bp);
+ if (rc) {
+ rc = -EBUSY;
+ goto resume_exit;
+ }
+ bnxt_get_wol_settings(bp);
+ if (netif_running(dev)) {
+ rc = bnxt_open(dev);
+ if (!rc)
+ netif_device_attach(dev);
+ }
+
+resume_exit:
+ rtnl_unlock();
+ return rc;
+}
+
+static SIMPLE_DEV_PM_OPS(bnxt_pm_ops, bnxt_suspend, bnxt_resume);
+#define BNXT_PM_OPS (&bnxt_pm_ops)
+
+#else
+
+#define BNXT_PM_OPS NULL
+
+#endif /* CONFIG_PM_SLEEP */
+
/**
* bnxt_io_error_detected - called when PCI error is detected
* @pdev: Pointer to PCI device
.id_table = bnxt_pci_tbl,
.probe = bnxt_init_one,
.remove = bnxt_remove_one,
+ .shutdown = bnxt_shutdown,
+ .driver.pm = BNXT_PM_OPS,
.err_handler = &bnxt_err_handler,
#if defined(CONFIG_BNXT_SRIOV)
.sriov_configure = bnxt_sriov_configure,
#include <linux/module.h>
#include <linux/etherdevice.h>
+#include <linux/interrupt.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
#include <linux/of_net.h>
nps_enet_tx_handler(ndev);
work_done = nps_enet_rx_handler(ndev);
- if (work_done < budget) {
+ if ((work_done < budget) && napi_complete_done(napi, work_done)) {
u32 buf_int_enable_value = 0;
- napi_complete_done(napi, work_done);
-
/* set tx_done and rx_rdy bits */
buf_int_enable_value |= NPS_ENET_ENABLE << RX_RDY_SHIFT;
buf_int_enable_value |= NPS_ENET_ENABLE << TX_DONE_SHIFT;
else
*link_status = 0;
- ret = mac_cb->dsaf_dev->misc_op->get_sfp_prsnt(mac_cb, &sfp_prsnt);
- if (!ret)
- *link_status = *link_status && sfp_prsnt;
+ if (mac_cb->media_type == HNAE_MEDIA_TYPE_FIBER) {
+ ret = mac_cb->dsaf_dev->misc_op->get_sfp_prsnt(mac_cb,
+ &sfp_prsnt);
+ if (!ret)
+ *link_status = *link_status && sfp_prsnt;
+ }
mac_cb->link = *link_status;
}
return 0;
}
-/**
- *hns_mac_del_mac - delete mac address into dsaf table,can't delete the same
- * address twice
- *@net_dev: net device
- *@vfn : vf lan
- *@mac : mac address
- *return status
- */
-int hns_mac_del_mac(struct hns_mac_cb *mac_cb, u32 vfn, char *mac)
-{
- struct mac_entry_idx *old_mac;
- struct dsaf_device *dsaf_dev;
- u32 ret;
-
- dsaf_dev = mac_cb->dsaf_dev;
-
- if (vfn < DSAF_MAX_VM_NUM) {
- old_mac = &mac_cb->addr_entry_idx[vfn];
- } else {
- dev_err(mac_cb->dev,
- "vf queue is too large, %s mac%d queue = %#x!\n",
- mac_cb->dsaf_dev->ae_dev.name, mac_cb->mac_id, vfn);
- return -EINVAL;
- }
-
- if (dsaf_dev) {
- ret = hns_dsaf_del_mac_entry(dsaf_dev, old_mac->vlan_id,
- mac_cb->mac_id, old_mac->addr);
- if (ret)
- return ret;
-
- if (memcmp(old_mac->addr, mac, sizeof(old_mac->addr)) == 0)
- old_mac->valid = 0;
- }
-
- return 0;
-}
-
int hns_mac_clr_multicast(struct hns_mac_cb *mac_cb, int vfn)
{
struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev;
}
}
-int hns_mac_set_mtu(struct hns_mac_cb *mac_cb, u32 new_mtu)
+int hns_mac_set_mtu(struct hns_mac_cb *mac_cb, u32 new_mtu, u32 buf_size)
{
struct mac_driver *drv = hns_mac_get_drv(mac_cb);
- u32 buf_size = mac_cb->dsaf_dev->buf_size;
u32 new_frm = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
u32 max_frm = AE_IS_VER1(mac_cb->dsaf_dev->dsaf_ver) ?
MAC_MAX_MTU : MAC_MAX_MTU_V2;
of_node_put(np);
np = of_parse_phandle(to_of_node(mac_cb->fw_port),
- "serdes-syscon", 0);
+ "serdes-syscon", 0);
syscon = syscon_node_to_regmap(np);
of_node_put(np);
if (IS_ERR_OR_NULL(syscon)) {
o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg);
dsaf_set_field(o_sbm_bp_cfg,
DSAFV2_SBM_CFG3_SET_BUF_NUM_NO_PFC_M,
- DSAFV2_SBM_CFG3_SET_BUF_NUM_NO_PFC_S, 48);
+ DSAFV2_SBM_CFG3_SET_BUF_NUM_NO_PFC_S, 55);
dsaf_set_field(o_sbm_bp_cfg,
DSAFV2_SBM_CFG3_RESET_BUF_NUM_NO_PFC_M,
- DSAFV2_SBM_CFG3_RESET_BUF_NUM_NO_PFC_S, 80);
+ DSAFV2_SBM_CFG3_RESET_BUF_NUM_NO_PFC_S, 110);
dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg);
/* for no enable pfc mode */
o_sbm_bp_cfg = dsaf_read_dev(dsaf_dev, reg);
dsaf_set_field(o_sbm_bp_cfg,
DSAFV2_SBM_CFG4_SET_BUF_NUM_NO_PFC_M,
- DSAFV2_SBM_CFG4_SET_BUF_NUM_NO_PFC_S, 192);
+ DSAFV2_SBM_CFG4_SET_BUF_NUM_NO_PFC_S, 128);
dsaf_set_field(o_sbm_bp_cfg,
DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_M,
- DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_S, 240);
+ DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_S, 192);
dsaf_write_dev(dsaf_dev, reg, o_sbm_bp_cfg);
}
mac_key->high.bits.mac_3 = addr[3];
mac_key->low.bits.mac_4 = addr[4];
mac_key->low.bits.mac_5 = addr[5];
+ mac_key->low.bits.port_vlan = 0;
dsaf_set_field(mac_key->low.bits.port_vlan, DSAF_TBL_TCAM_KEY_VLAN_M,
DSAF_TBL_TCAM_KEY_VLAN_S, vlan_id);
dsaf_set_field(mac_key->low.bits.port_vlan, DSAF_TBL_TCAM_KEY_PORT_M,
mac_entry->addr);
}
-/**
- * hns_dsaf_set_mac_mc_entry - set mac mc-entry
- * @dsaf_dev: dsa fabric device struct pointer
- * @mac_entry: mc-mac entry
- */
-int hns_dsaf_set_mac_mc_entry(
- struct dsaf_device *dsaf_dev,
- struct dsaf_drv_mac_multi_dest_entry *mac_entry)
-{
- u16 entry_index = DSAF_INVALID_ENTRY_IDX;
- struct dsaf_drv_tbl_tcam_key mac_key;
- struct dsaf_tbl_tcam_mcast_cfg mac_data;
- struct dsaf_drv_priv *priv =
- (struct dsaf_drv_priv *)hns_dsaf_dev_priv(dsaf_dev);
- struct dsaf_drv_soft_mac_tbl *soft_mac_entry = priv->soft_mac_tbl;
- struct dsaf_drv_tbl_tcam_key tmp_mac_key;
- struct dsaf_tbl_tcam_data tcam_data;
-
- /* mac addr check */
- if (MAC_IS_ALL_ZEROS(mac_entry->addr)) {
- dev_err(dsaf_dev->dev, "set uc %s Mac %pM err!\n",
- dsaf_dev->ae_dev.name, mac_entry->addr);
- return -EINVAL;
- }
-
- /*config key */
- hns_dsaf_set_mac_key(dsaf_dev, &mac_key,
- mac_entry->in_vlan_id,
- mac_entry->in_port_num, mac_entry->addr);
-
- /* entry ie exist? */
- entry_index = hns_dsaf_find_soft_mac_entry(dsaf_dev, &mac_key);
- if (entry_index == DSAF_INVALID_ENTRY_IDX) {
- /*if hasnot, find enpty entry*/
- entry_index = hns_dsaf_find_empty_mac_entry(dsaf_dev);
- if (entry_index == DSAF_INVALID_ENTRY_IDX) {
- /*if hasnot empty, error*/
- dev_err(dsaf_dev->dev,
- "set_uc_entry failed, %s Mac key(%#x:%#x)\n",
- dsaf_dev->ae_dev.name,
- mac_key.high.val, mac_key.low.val);
- return -EINVAL;
- }
-
- /* config hardware entry */
- memset(mac_data.tbl_mcast_port_msk,
- 0, sizeof(mac_data.tbl_mcast_port_msk));
- } else {
- /* config hardware entry */
- hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, &tcam_data,
- &mac_data);
-
- tmp_mac_key.high.val =
- le32_to_cpu(tcam_data.tbl_tcam_data_high);
- tmp_mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low);
- }
- mac_data.tbl_mcast_old_en = 0;
- mac_data.tbl_mcast_item_vld = 1;
- dsaf_set_field(mac_data.tbl_mcast_port_msk[0],
- 0x3F, 0, mac_entry->port_mask[0]);
-
- dev_dbg(dsaf_dev->dev,
- "set_uc_entry, %s key(%#x:%#x) entry_index%d\n",
- dsaf_dev->ae_dev.name, mac_key.high.val,
- mac_key.low.val, entry_index);
-
- tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val);
- tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val);
-
- hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index, &tcam_data, NULL,
- &mac_data);
-
- /* config software entry */
- soft_mac_entry += entry_index;
- soft_mac_entry->index = entry_index;
- soft_mac_entry->tcam_key.high.val = mac_key.high.val;
- soft_mac_entry->tcam_key.low.val = mac_key.low.val;
-
- return 0;
-}
-
static void hns_dsaf_mc_mask_bit_clear(char *dst, const char *src)
{
u16 *a = (u16 *)dst;
return ret;
}
-/**
- * hns_dsaf_get_mac_uc_entry - get mac uc entry
- * @dsaf_dev: dsa fabric device struct pointer
- * @mac_entry: mac entry
- */
-int hns_dsaf_get_mac_uc_entry(struct dsaf_device *dsaf_dev,
- struct dsaf_drv_mac_single_dest_entry *mac_entry)
-{
- u16 entry_index = DSAF_INVALID_ENTRY_IDX;
- struct dsaf_drv_tbl_tcam_key mac_key;
-
- struct dsaf_tbl_tcam_ucast_cfg mac_data;
- struct dsaf_tbl_tcam_data tcam_data;
-
- /* check macaddr */
- if (MAC_IS_ALL_ZEROS(mac_entry->addr) ||
- MAC_IS_BROADCAST(mac_entry->addr)) {
- dev_err(dsaf_dev->dev, "get_entry failed,addr %pM\n",
- mac_entry->addr);
- return -EINVAL;
- }
-
- /*config key */
- hns_dsaf_set_mac_key(dsaf_dev, &mac_key, mac_entry->in_vlan_id,
- mac_entry->in_port_num, mac_entry->addr);
-
- /*check exist? */
- entry_index = hns_dsaf_find_soft_mac_entry(dsaf_dev, &mac_key);
- if (entry_index == DSAF_INVALID_ENTRY_IDX) {
- /*find none, error */
- dev_err(dsaf_dev->dev,
- "get_uc_entry failed, %s Mac key(%#x:%#x)\n",
- dsaf_dev->ae_dev.name,
- mac_key.high.val, mac_key.low.val);
- return -EINVAL;
- }
- dev_dbg(dsaf_dev->dev,
- "get_uc_entry, %s Mac key(%#x:%#x) entry_index%d\n",
- dsaf_dev->ae_dev.name, mac_key.high.val,
- mac_key.low.val, entry_index);
-
- /* read entry */
- hns_dsaf_tcam_uc_get(dsaf_dev, entry_index, &tcam_data, &mac_data);
-
- mac_key.high.val = le32_to_cpu(tcam_data.tbl_tcam_data_high);
- mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low);
-
- mac_entry->port_num = mac_data.tbl_ucast_out_port;
-
- return 0;
-}
-
-/**
- * hns_dsaf_get_mac_mc_entry - get mac mc entry
- * @dsaf_dev: dsa fabric device struct pointer
- * @mac_entry: mac entry
- */
-int hns_dsaf_get_mac_mc_entry(struct dsaf_device *dsaf_dev,
- struct dsaf_drv_mac_multi_dest_entry *mac_entry)
-{
- u16 entry_index = DSAF_INVALID_ENTRY_IDX;
- struct dsaf_drv_tbl_tcam_key mac_key;
-
- struct dsaf_tbl_tcam_mcast_cfg mac_data;
- struct dsaf_tbl_tcam_data tcam_data;
-
- /*check mac addr */
- if (MAC_IS_ALL_ZEROS(mac_entry->addr) ||
- MAC_IS_BROADCAST(mac_entry->addr)) {
- dev_err(dsaf_dev->dev, "get_entry failed,addr %pM\n",
- mac_entry->addr);
- return -EINVAL;
- }
-
- /*config key */
- hns_dsaf_set_mac_key(dsaf_dev, &mac_key, mac_entry->in_vlan_id,
- mac_entry->in_port_num, mac_entry->addr);
-
- /*check exist? */
- entry_index = hns_dsaf_find_soft_mac_entry(dsaf_dev, &mac_key);
- if (entry_index == DSAF_INVALID_ENTRY_IDX) {
- /* find none, error */
- dev_err(dsaf_dev->dev,
- "get_mac_uc_entry failed, %s Mac key(%#x:%#x)\n",
- dsaf_dev->ae_dev.name, mac_key.high.val,
- mac_key.low.val);
- return -EINVAL;
- }
- dev_dbg(dsaf_dev->dev,
- "get_mac_uc_entry, %s Mac key(%#x:%#x) entry_index%d\n",
- dsaf_dev->ae_dev.name, mac_key.high.val,
- mac_key.low.val, entry_index);
-
- /*read entry */
- hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, &tcam_data, &mac_data);
-
- mac_key.high.val = le32_to_cpu(tcam_data.tbl_tcam_data_high);
- mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low);
-
- mac_entry->port_mask[0] = mac_data.tbl_mcast_port_msk[0] & 0x3F;
- return 0;
-}
-
-/**
- * hns_dsaf_get_mac_entry_by_index - get mac entry by tab index
- * @dsaf_dev: dsa fabric device struct pointer
- * @entry_index: tab entry index
- * @mac_entry: mac entry
- */
-int hns_dsaf_get_mac_entry_by_index(
- struct dsaf_device *dsaf_dev,
- u16 entry_index, struct dsaf_drv_mac_multi_dest_entry *mac_entry)
-{
- struct dsaf_drv_tbl_tcam_key mac_key;
-
- struct dsaf_tbl_tcam_mcast_cfg mac_data;
- struct dsaf_tbl_tcam_ucast_cfg mac_uc_data;
- struct dsaf_tbl_tcam_data tcam_data;
- char mac_addr[ETH_ALEN] = {0};
-
- if (entry_index >= dsaf_dev->tcam_max_num) {
- /* find none, del error */
- dev_err(dsaf_dev->dev, "get_uc_entry failed, %s\n",
- dsaf_dev->ae_dev.name);
- return -EINVAL;
- }
-
- /* mc entry, do read opt */
- hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, &tcam_data, &mac_data);
-
- mac_key.high.val = le32_to_cpu(tcam_data.tbl_tcam_data_high);
- mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low);
-
- mac_entry->port_mask[0] = mac_data.tbl_mcast_port_msk[0] & 0x3F;
-
- /***get mac addr*/
- mac_addr[0] = mac_key.high.bits.mac_0;
- mac_addr[1] = mac_key.high.bits.mac_1;
- mac_addr[2] = mac_key.high.bits.mac_2;
- mac_addr[3] = mac_key.high.bits.mac_3;
- mac_addr[4] = mac_key.low.bits.mac_4;
- mac_addr[5] = mac_key.low.bits.mac_5;
- /**is mc or uc*/
- if (MAC_IS_MULTICAST((u8 *)mac_addr) ||
- MAC_IS_L3_MULTICAST((u8 *)mac_addr)) {
- /**mc donot do*/
- } else {
- /*is not mc, just uc... */
- hns_dsaf_tcam_uc_get(dsaf_dev, entry_index, &tcam_data,
- &mac_uc_data);
-
- mac_key.high.val = le32_to_cpu(tcam_data.tbl_tcam_data_high);
- mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low);
-
- mac_entry->port_mask[0] = (1 << mac_uc_data.tbl_ucast_out_port);
- }
-
- return 0;
-}
-
static struct dsaf_device *hns_dsaf_alloc_dev(struct device *dev,
size_t sizeof_priv)
{
/* find the tcam entry index for promisc */
entry_index = dsaf_promisc_tcam_entry(port);
+ memset(&tbl_tcam_data, 0, sizeof(tbl_tcam_data));
+ memset(&tbl_tcam_mask, 0, sizeof(tbl_tcam_mask));
+
/* config key mask */
if (enable) {
- memset(&tbl_tcam_data, 0, sizeof(tbl_tcam_data));
- memset(&tbl_tcam_mask, 0, sizeof(tbl_tcam_mask));
dsaf_set_field(tbl_tcam_data.low.bits.port_vlan,
DSAF_TBL_TCAM_KEY_PORT_M,
DSAF_TBL_TCAM_KEY_PORT_S, port);
#define DRV_KERN "-k"
-#define DRV_VERSION_MAJOR 1
-#define DRV_VERSION_MINOR 6
-#define DRV_VERSION_BUILD 27
+#define DRV_VERSION_MAJOR 2
+#define DRV_VERSION_MINOR 1
+#define DRV_VERSION_BUILD 7
#define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
__stringify(DRV_VERSION_MINOR) "." \
__stringify(DRV_VERSION_BUILD) DRV_KERN
* device is munged, not just the one netdev port, so go for the full
* reset.
**/
-#ifdef I40E_FCOE
-void i40e_tx_timeout(struct net_device *netdev)
-#else
static void i40e_tx_timeout(struct net_device *netdev)
-#endif
{
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_vsi *vsi = np->vsi;
* Returns the address of the device statistics structure.
* The statistics are actually updated from the service task.
**/
-#ifndef I40E_FCOE
-static
-#endif
-void i40e_get_netdev_stats_struct(struct net_device *netdev,
+static void i40e_get_netdev_stats_struct(struct net_device *netdev,
struct rtnl_link_stats64 *stats)
{
struct i40e_netdev_priv *np = netdev_priv(netdev);
veb->stat_offsets_loaded = true;
}
-#ifdef I40E_FCOE
-/**
- * i40e_update_fcoe_stats - Update FCoE-specific ethernet statistics counters.
- * @vsi: the VSI that is capable of doing FCoE
- **/
-static void i40e_update_fcoe_stats(struct i40e_vsi *vsi)
-{
- struct i40e_pf *pf = vsi->back;
- struct i40e_hw *hw = &pf->hw;
- struct i40e_fcoe_stats *ofs;
- struct i40e_fcoe_stats *fs; /* device's eth stats */
- int idx;
-
- if (vsi->type != I40E_VSI_FCOE)
- return;
-
- idx = hw->pf_id + I40E_FCOE_PF_STAT_OFFSET;
- fs = &vsi->fcoe_stats;
- ofs = &vsi->fcoe_stats_offsets;
-
- i40e_stat_update32(hw, I40E_GL_FCOEPRC(idx),
- vsi->fcoe_stat_offsets_loaded,
- &ofs->rx_fcoe_packets, &fs->rx_fcoe_packets);
- i40e_stat_update48(hw, I40E_GL_FCOEDWRCH(idx), I40E_GL_FCOEDWRCL(idx),
- vsi->fcoe_stat_offsets_loaded,
- &ofs->rx_fcoe_dwords, &fs->rx_fcoe_dwords);
- i40e_stat_update32(hw, I40E_GL_FCOERPDC(idx),
- vsi->fcoe_stat_offsets_loaded,
- &ofs->rx_fcoe_dropped, &fs->rx_fcoe_dropped);
- i40e_stat_update32(hw, I40E_GL_FCOEPTC(idx),
- vsi->fcoe_stat_offsets_loaded,
- &ofs->tx_fcoe_packets, &fs->tx_fcoe_packets);
- i40e_stat_update48(hw, I40E_GL_FCOEDWTCH(idx), I40E_GL_FCOEDWTCL(idx),
- vsi->fcoe_stat_offsets_loaded,
- &ofs->tx_fcoe_dwords, &fs->tx_fcoe_dwords);
- i40e_stat_update32(hw, I40E_GL_FCOECRC(idx),
- vsi->fcoe_stat_offsets_loaded,
- &ofs->fcoe_bad_fccrc, &fs->fcoe_bad_fccrc);
- i40e_stat_update32(hw, I40E_GL_FCOELAST(idx),
- vsi->fcoe_stat_offsets_loaded,
- &ofs->fcoe_last_error, &fs->fcoe_last_error);
- i40e_stat_update32(hw, I40E_GL_FCOEDDPC(idx),
- vsi->fcoe_stat_offsets_loaded,
- &ofs->fcoe_ddp_count, &fs->fcoe_ddp_count);
-
- vsi->fcoe_stat_offsets_loaded = true;
-}
-
-#endif
/**
* i40e_update_vsi_stats - Update the vsi statistics counters.
* @vsi: the VSI to be updated
&osd->rx_lpi_count, &nsd->rx_lpi_count);
if (pf->flags & I40E_FLAG_FD_SB_ENABLED &&
- !(pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED))
+ !(pf->hw_disabled_flags & I40E_FLAG_FD_SB_ENABLED))
nsd->fd_sb_status = true;
else
nsd->fd_sb_status = false;
if (pf->flags & I40E_FLAG_FD_ATR_ENABLED &&
- !(pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
+ !(pf->hw_disabled_flags & I40E_FLAG_FD_ATR_ENABLED))
nsd->fd_atr_status = true;
else
nsd->fd_atr_status = false;
i40e_update_pf_stats(pf);
i40e_update_vsi_stats(vsi);
-#ifdef I40E_FCOE
- i40e_update_fcoe_stats(vsi);
-#endif
}
/**
*
* Returns 0 on success, negative on failure
**/
-#ifdef I40E_FCOE
-int i40e_set_mac(struct net_device *netdev, void *p)
-#else
static int i40e_set_mac(struct net_device *netdev, void *p)
-#endif
{
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_vsi *vsi = np->vsi;
*
* Setup VSI queue mapping for enabled traffic classes.
**/
-#ifdef I40E_FCOE
-void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
- struct i40e_vsi_context *ctxt,
- u8 enabled_tc,
- bool is_add)
-#else
static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
struct i40e_vsi_context *ctxt,
u8 enabled_tc,
bool is_add)
-#endif
{
struct i40e_pf *pf = vsi->back;
u16 sections = 0;
qcount = min_t(int, pf->alloc_rss_size,
num_tc_qps);
break;
-#ifdef I40E_FCOE
- case I40E_VSI_FCOE:
- qcount = num_tc_qps;
- break;
-#endif
case I40E_VSI_FDIR:
case I40E_VSI_SRIOV:
case I40E_VSI_VMDQ2:
* i40e_set_rx_mode - NDO callback to set the netdev filters
* @netdev: network interface device structure
**/
-#ifdef I40E_FCOE
-void i40e_set_rx_mode(struct net_device *netdev)
-#else
static void i40e_set_rx_mode(struct net_device *netdev)
-#endif
{
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_vsi *vsi = np->vsi;
static
struct i40e_new_mac_filter *i40e_next_filter(struct i40e_new_mac_filter *next)
{
- while (next) {
- next = hlist_entry(next->hlist.next,
- typeof(struct i40e_new_mac_filter),
- hlist);
-
- /* keep going if we found a broadcast filter */
- if (next && is_broadcast_ether_addr(next->f->macaddr))
- continue;
-
- break;
+ hlist_for_each_entry_continue(next, hlist) {
+ if (!is_broadcast_ether_addr(next->f->macaddr))
+ return next;
}
- return next;
+ return NULL;
}
/**
{
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_vsi *vsi = np->vsi;
+ struct i40e_pf *pf = vsi->back;
netdev_info(netdev, "changing MTU from %d to %d\n",
netdev->mtu, new_mtu);
netdev->mtu = new_mtu;
if (netif_running(netdev))
i40e_vsi_reinit_locked(vsi);
- i40e_notify_client_of_l2_param_changes(vsi);
+ pf->flags |= (I40E_FLAG_SERVICE_CLIENT_REQUESTED |
+ I40E_FLAG_CLIENT_L2_CHANGE);
return 0;
}
*
* net_device_ops implementation for adding vlan ids
**/
-#ifdef I40E_FCOE
-int i40e_vlan_rx_add_vid(struct net_device *netdev,
- __always_unused __be16 proto, u16 vid)
-#else
static int i40e_vlan_rx_add_vid(struct net_device *netdev,
__always_unused __be16 proto, u16 vid)
-#endif
{
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_vsi *vsi = np->vsi;
*
* net_device_ops implementation for removing vlan ids
**/
-#ifdef I40E_FCOE
-int i40e_vlan_rx_kill_vid(struct net_device *netdev,
- __always_unused __be16 proto, u16 vid)
-#else
static int i40e_vlan_rx_kill_vid(struct net_device *netdev,
__always_unused __be16 proto, u16 vid)
-#endif
{
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_vsi *vsi = np->vsi;
for (i = 0; i < vsi->num_queue_pairs && !err; i++)
err = i40e_setup_rx_descriptors(vsi->rx_rings[i]);
-#ifdef I40E_FCOE
- i40e_fcoe_setup_ddp_resources(vsi);
-#endif
return err;
}
for (i = 0; i < vsi->num_queue_pairs; i++)
if (vsi->rx_rings[i] && vsi->rx_rings[i]->desc)
i40e_free_rx_resources(vsi->rx_rings[i]);
-#ifdef I40E_FCOE
- i40e_fcoe_free_ddp_resources(vsi);
-#endif
}
/**
tx_ctx.qlen = ring->count;
tx_ctx.fd_ena = !!(vsi->back->flags & (I40E_FLAG_FD_SB_ENABLED |
I40E_FLAG_FD_ATR_ENABLED));
-#ifdef I40E_FCOE
- tx_ctx.fc_ena = (vsi->type == I40E_VSI_FCOE);
-#endif
tx_ctx.timesync_ena = !!(vsi->back->flags & I40E_FLAG_PTP);
/* FDIR VSI tx ring can still use RS bit and writebacks */
if (vsi->type != I40E_VSI_FDIR)
ring->rx_buf_len = vsi->rx_buf_len;
- rx_ctx.dbuff = ring->rx_buf_len >> I40E_RXQ_CTX_DBUFF_SHIFT;
+ rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len,
+ BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT));
rx_ctx.base = (ring->dma / 128);
rx_ctx.qlen = ring->count;
rx_ctx.l2tsel = 1;
/* this controls whether VLAN is stripped from inner headers */
rx_ctx.showiv = 0;
-#ifdef I40E_FCOE
- rx_ctx.fc_ena = (vsi->type == I40E_VSI_FCOE);
-#endif
/* set the prefena field to 1 because the manual says to */
rx_ctx.prefena = 1;
int err = 0;
u16 i;
- if (vsi->netdev && (vsi->netdev->mtu > ETH_DATA_LEN))
- vsi->max_frame = vsi->netdev->mtu + ETH_HLEN
- + ETH_FCS_LEN + VLAN_HLEN;
- else
- vsi->max_frame = I40E_RXBUFFER_2048;
-
- vsi->rx_buf_len = I40E_RXBUFFER_2048;
-
-#ifdef I40E_FCOE
- /* setup rx buffer for FCoE */
- if ((vsi->type == I40E_VSI_FCOE) &&
- (vsi->back->flags & I40E_FLAG_FCOE_ENABLED)) {
- vsi->rx_buf_len = I40E_RXBUFFER_3072;
- vsi->max_frame = I40E_RXBUFFER_3072;
+ if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX)) {
+ vsi->max_frame = I40E_MAX_RXBUFFER;
+ vsi->rx_buf_len = I40E_RXBUFFER_2048;
+#if (PAGE_SIZE < 8192)
+ } else if (vsi->netdev->mtu <= ETH_DATA_LEN) {
+ vsi->max_frame = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
+ vsi->rx_buf_len = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
+#endif
+ } else {
+ vsi->max_frame = I40E_MAX_RXBUFFER;
+ vsi->rx_buf_len = I40E_RXBUFFER_2048;
}
-#endif /* I40E_FCOE */
- /* round up for the chip's needs */
- vsi->rx_buf_len = ALIGN(vsi->rx_buf_len,
- BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT));
-
/* set up individual rings */
for (i = 0; i < vsi->num_queue_pairs && !err; i++)
err = i40e_configure_rx_ring(vsi->rx_rings[i]);
if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
return;
+ /* Reset FDir counters as we're replaying all existing filters */
+ pf->fd_tcp4_filter_cnt = 0;
+ pf->fd_udp4_filter_cnt = 0;
+ pf->fd_sctp4_filter_cnt = 0;
+ pf->fd_ip4_filter_cnt = 0;
+
hlist_for_each_entry_safe(filter, node,
&pf->fdir_filter_list, fdir_node) {
i40e_add_del_fdir(vsi, filter, true);
* This is used by netconsole to send skbs without having to re-enable
* interrupts. It's not called while the normal interrupt routine is executing.
**/
-#ifdef I40E_FCOE
-void i40e_netpoll(struct net_device *netdev)
-#else
static void i40e_netpoll(struct net_device *netdev)
-#endif
{
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_vsi *vsi = np->vsi;
}
}
- if (hw->revision_id == 0)
- mdelay(50);
return ret;
}
}
}
+ /* Due to HW errata, on Rx disable only, the register can indicate done
+ * before it really is. Needs 50ms to be sure
+ */
+ if (!enable)
+ mdelay(50);
+
return ret;
}
if (!vsi->netdev)
return;
- for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++)
- napi_enable(&vsi->q_vectors[q_idx]->napi);
+ for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) {
+ struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx];
+
+ if (q_vector->rx.ring || q_vector->tx.ring)
+ napi_enable(&q_vector->napi);
+ }
}
/**
if (!vsi->netdev)
return;
- for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++)
- napi_disable(&vsi->q_vectors[q_idx]->napi);
+ for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) {
+ struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx];
+
+ if (q_vector->rx.ring || q_vector->tx.ring)
+ napi_disable(&q_vector->napi);
+ }
}
/**
**/
static void i40e_vsi_close(struct i40e_vsi *vsi)
{
- bool reset = false;
-
+ struct i40e_pf *pf = vsi->back;
if (!test_and_set_bit(__I40E_DOWN, &vsi->state))
i40e_down(vsi);
i40e_vsi_free_irq(vsi);
i40e_vsi_free_tx_resources(vsi);
i40e_vsi_free_rx_resources(vsi);
vsi->current_netdev_flags = 0;
- if (test_bit(__I40E_RESET_RECOVERY_PENDING, &vsi->back->state))
- reset = true;
- i40e_notify_client_of_netdev_close(vsi, reset);
+ pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
+ if (test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state))
+ pf->flags |= I40E_FLAG_CLIENT_RESET;
}
/**
if (test_bit(__I40E_DOWN, &vsi->state))
return;
- /* No need to disable FCoE VSI when Tx suspended */
- if ((test_bit(__I40E_PORT_TX_SUSPENDED, &vsi->back->state)) &&
- vsi->type == I40E_VSI_FCOE) {
- dev_dbg(&vsi->back->pdev->dev,
- "VSI seid %d skipping FCoE VSI disable\n", vsi->seid);
- return;
- }
-
set_bit(__I40E_NEEDS_RESTART, &vsi->state);
if (vsi->netdev && netif_running(vsi->netdev))
vsi->netdev->netdev_ops->ndo_stop(vsi->netdev);
int v, ret = 0;
for (v = 0; v < pf->hw.func_caps.num_vsis; v++) {
- /* No need to wait for FCoE VSI queues */
- if (pf->vsi[v] && pf->vsi[v]->type != I40E_VSI_FCOE) {
+ if (pf->vsi[v]) {
ret = i40e_vsi_wait_queues_disabled(pf->vsi[v]);
if (ret)
break;
continue;
/* - Enable all TCs for the LAN VSI
-#ifdef I40E_FCOE
- * - For FCoE VSI only enable the TC configured
- * as per the APP TLV
-#endif
* - For all others keep them at TC0 for now
*/
if (v == pf->lan_vsi)
tc_map = i40e_pf_get_tc_map(pf);
else
tc_map = I40E_DEFAULT_TRAFFIC_CLASS;
-#ifdef I40E_FCOE
- if (pf->vsi[v]->type == I40E_VSI_FCOE)
- tc_map = i40e_get_fcoe_tc_map(pf);
-#endif /* #ifdef I40E_FCOE */
ret = i40e_vsi_config_tc(pf->vsi[v], tc_map);
if (ret) {
(hw->dcbx_status == I40E_DCBX_STATUS_DISABLED)) {
dev_info(&pf->pdev->dev,
"DCBX offload is not supported or is disabled for this PF.\n");
-
- if (pf->flags & I40E_FLAG_MFP_ENABLED)
- goto out;
-
} else {
/* When status is not DISABLED then DCBX in FW */
pf->dcbx_cap = DCB_CAP_DCBX_LLD_MANAGED |
/* replay FDIR SB filters */
if (vsi->type == I40E_VSI_FDIR) {
/* reset fd counters */
- pf->fd_add_err = pf->fd_atr_cnt = 0;
- if (pf->fd_tcp_rule > 0) {
- pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
- if (I40E_DEBUG_FD & pf->hw.debug_mask)
- dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 exist\n");
- pf->fd_tcp_rule = 0;
- }
+ pf->fd_add_err = 0;
+ pf->fd_atr_cnt = 0;
i40e_fdir_filter_restore(vsi);
}
i40e_clean_rx_ring(vsi->rx_rings[i]);
}
- i40e_notify_client_of_netdev_close(vsi, false);
-
}
/**
return ret;
}
-#ifdef I40E_FCOE
-int __i40e_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
- struct tc_to_netdev *tc)
-#else
static int __i40e_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
struct tc_to_netdev *tc)
-#endif
{
- if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO)
+ if (tc->type != TC_SETUP_MQPRIO)
return -EINVAL;
- return i40e_setup_tc(netdev, tc->tc);
+
+ tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+
+ return i40e_setup_tc(netdev, tc->mqprio->num_tc);
}
/**
static void i40e_fdir_filter_exit(struct i40e_pf *pf)
{
struct i40e_fdir_filter *filter;
+ struct i40e_flex_pit *pit_entry, *tmp;
struct hlist_node *node2;
hlist_for_each_entry_safe(filter, node2,
hlist_del(&filter->fdir_node);
kfree(filter);
}
+
+ list_for_each_entry_safe(pit_entry, tmp, &pf->l3_flex_pit_list, list) {
+ list_del(&pit_entry->list);
+ kfree(pit_entry);
+ }
+ INIT_LIST_HEAD(&pf->l3_flex_pit_list);
+
+ list_for_each_entry_safe(pit_entry, tmp, &pf->l4_flex_pit_list, list) {
+ list_del(&pit_entry->list);
+ kfree(pit_entry);
+ }
+ INIT_LIST_HEAD(&pf->l4_flex_pit_list);
+
pf->fdir_pf_active_filters = 0;
+ pf->fd_tcp4_filter_cnt = 0;
+ pf->fd_udp4_filter_cnt = 0;
+ pf->fd_sctp4_filter_cnt = 0;
+ pf->fd_ip4_filter_cnt = 0;
+
+ /* Reprogram the default input set for TCP/IPv4 */
+ i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_TCP,
+ I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
+ I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
+
+ /* Reprogram the default input set for UDP/IPv4 */
+ i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_UDP,
+ I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
+ I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
+
+ /* Reprogram the default input set for SCTP/IPv4 */
+ i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_SCTP,
+ I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
+ I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
+
+ /* Reprogram the default input set for Other/IPv4 */
+ i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_OTHER,
+ I40E_L3_SRC_MASK | I40E_L3_DST_MASK);
}
/**
i40e_service_event_schedule(pf);
} else {
i40e_pf_unquiesce_all_vsi(pf);
- /* Notify the client for the DCB changes */
- i40e_notify_client_of_l2_param_changes(pf->vsi[pf->lan_vsi]);
+ pf->flags |= (I40E_FLAG_SERVICE_CLIENT_REQUESTED |
+ I40E_FLAG_CLIENT_L2_CHANGE);
}
exit:
(pf->fd_add_err == 0) ||
(i40e_get_current_atr_cnt(pf) < pf->fd_atr_cnt)) {
if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
- (pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED)) {
- pf->auto_disable_flags &= ~I40E_FLAG_FD_SB_ENABLED;
+ (pf->hw_disabled_flags & I40E_FLAG_FD_SB_ENABLED)) {
+ pf->hw_disabled_flags &= ~I40E_FLAG_FD_SB_ENABLED;
if (I40E_DEBUG_FD & pf->hw.debug_mask)
dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n");
}
*/
if (fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM * 2)) {
if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
- (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED) &&
- (pf->fd_tcp_rule == 0)) {
- pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED;
+ (pf->hw_disabled_flags & I40E_FLAG_FD_ATR_ENABLED) &&
+ (pf->fd_tcp4_filter_cnt == 0)) {
+ pf->hw_disabled_flags &= ~I40E_FLAG_FD_ATR_ENABLED;
if (I40E_DEBUG_FD & pf->hw.debug_mask)
dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n");
}
}
pf->fd_flush_timestamp = jiffies;
- pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
+ pf->hw_disabled_flags |= I40E_FLAG_FD_ATR_ENABLED;
/* flush all filters */
wr32(&pf->hw, I40E_PFQF_CTL_1,
I40E_PFQF_CTL_1_CLEARFDTABLE_MASK);
} else {
/* replay sideband filters */
i40e_fdir_filter_restore(pf->vsi[pf->lan_vsi]);
- if (!disable_atr)
- pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED;
+ if (!disable_atr && !pf->fd_tcp4_filter_cnt)
+ pf->hw_disabled_flags &= ~I40E_FLAG_FD_ATR_ENABLED;
clear_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state);
if (I40E_DEBUG_FD & pf->hw.debug_mask)
dev_info(&pf->pdev->dev, "FD Filter table flushed and FD-SB replayed.\n");
switch (vsi->type) {
case I40E_VSI_MAIN:
-#ifdef I40E_FCOE
- case I40E_VSI_FCOE:
-#endif
if (!vsi->netdev || !vsi->netdev_registered)
break;
opcode);
break;
}
- } while (pending && (i++ < pf->adminq_work_limit));
+ } while (i++ < pf->adminq_work_limit);
+
+ if (i < pf->adminq_work_limit)
+ clear_bit(__I40E_ADMINQ_EVENT_PENDING, &pf->state);
- clear_bit(__I40E_ADMINQ_EVENT_PENDING, &pf->state);
/* re-enable Admin queue interrupt cause */
val = rd32(hw, I40E_PFINT_ICR0_ENA);
val |= I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
goto end_core_reset;
ret = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp,
- hw->func_caps.num_rx_qp,
- pf->fcoe_hmc_cntx_num, pf->fcoe_hmc_filt_num);
+ hw->func_caps.num_rx_qp, 0, 0);
if (ret) {
dev_info(&pf->pdev->dev, "init_lan_hmc failed: %d\n", ret);
goto end_core_reset;
/* Continue without DCB enabled */
}
#endif /* CONFIG_I40E_DCB */
-#ifdef I40E_FCOE
- i40e_init_pf_fcoe(pf);
-
-#endif
/* do basic switch setup */
ret = i40e_setup_pf_switch(pf, reinit);
if (ret)
{
struct i40e_hw *hw = &pf->hw;
i40e_status ret;
- __be16 port;
+ u16 port;
int i;
if (!(pf->flags & I40E_FLAG_UDP_FILTER_SYNC))
"%s %s port %d, index %d failed, err %s aq_err %s\n",
pf->udp_ports[i].type ? "vxlan" : "geneve",
port ? "add" : "delete",
- ntohs(port), i,
+ port, i,
i40e_stat_str(&pf->hw, ret),
i40e_aq_str(&pf->hw,
pf->hw.aq.asq_last_status));
i40e_vc_process_vflr_event(pf);
i40e_watchdog_subtask(pf);
i40e_fdir_reinit_subtask(pf);
- i40e_client_subtask(pf);
+ if (pf->flags & I40E_FLAG_CLIENT_RESET) {
+ /* Client subtask will reopen next time through. */
+ i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], true);
+ pf->flags &= ~I40E_FLAG_CLIENT_RESET;
+ } else {
+ i40e_client_subtask(pf);
+ if (pf->flags & I40E_FLAG_CLIENT_L2_CHANGE) {
+ i40e_notify_client_of_l2_param_changes(
+ pf->vsi[pf->lan_vsi]);
+ pf->flags &= ~I40E_FLAG_CLIENT_L2_CHANGE;
+ }
+ }
i40e_sync_filters_subtask(pf);
i40e_sync_udp_filters_subtask(pf);
i40e_clean_adminq_subtask(pf);
I40E_REQ_DESCRIPTOR_MULTIPLE);
break;
-#ifdef I40E_FCOE
- case I40E_VSI_FCOE:
- vsi->alloc_queue_pairs = pf->num_fcoe_qps;
- vsi->num_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
- I40E_REQ_DESCRIPTOR_MULTIPLE);
- vsi->num_q_vectors = pf->num_fcoe_msix;
- break;
-
-#endif /* I40E_FCOE */
default:
WARN_ON(1);
return -ENODATA;
static int i40e_init_msix(struct i40e_pf *pf)
{
struct i40e_hw *hw = &pf->hw;
+ int cpus, extra_vectors;
int vectors_left;
int v_budget, i;
int v_actual;
* - assumes symmetric Tx/Rx pairing
* - The number of VMDq pairs
* - The CPU count within the NUMA node if iWARP is enabled
-#ifdef I40E_FCOE
- * - The number of FCOE qps.
-#endif
* Once we count this up, try the request.
*
* If we can't get what we want, we'll simplify to nearly nothing
vectors_left--;
}
- /* reserve vectors for the main PF traffic queues */
- pf->num_lan_msix = min_t(int, num_online_cpus(), vectors_left);
+ /* reserve some vectors for the main PF traffic queues. Initially we
+ * only reserve at most 50% of the available vectors, in the case that
+ * the number of online CPUs is large. This ensures that we can enable
+ * extra features as well. Once we've enabled the other features, we
+ * will use any remaining vectors to reach as close as we can to the
+ * number of online CPUs.
+ */
+ cpus = num_online_cpus();
+ pf->num_lan_msix = min_t(int, cpus, vectors_left / 2);
vectors_left -= pf->num_lan_msix;
- v_budget += pf->num_lan_msix;
/* reserve one vector for sideband flow director */
if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
}
}
-#ifdef I40E_FCOE
- /* can we reserve enough for FCoE? */
- if (pf->flags & I40E_FLAG_FCOE_ENABLED) {
- if (!vectors_left)
- pf->num_fcoe_msix = 0;
- else if (vectors_left >= pf->num_fcoe_qps)
- pf->num_fcoe_msix = pf->num_fcoe_qps;
- else
- pf->num_fcoe_msix = 1;
- v_budget += pf->num_fcoe_msix;
- vectors_left -= pf->num_fcoe_msix;
- }
-
-#endif
/* can we reserve enough for iWARP? */
if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
iwarp_requested = pf->num_iwarp_msix;
}
}
+ /* On systems with a large number of SMP cores, we previously limited
+ * the number of vectors for num_lan_msix to be at most 50% of the
+ * available vectors, to allow for other features. Now, we add back
+ * the remaining vectors. However, we ensure that the total
+ * num_lan_msix will not exceed num_online_cpus(). To do this, we
+ * calculate the number of vectors we can add without going over the
+ * cap of CPUs. For systems with a small number of CPUs this will be
+ * zero.
+ */
+ extra_vectors = min_t(int, cpus - pf->num_lan_msix, vectors_left);
+ pf->num_lan_msix += extra_vectors;
+ vectors_left -= extra_vectors;
+
+ WARN(vectors_left < 0,
+ "Calculation of remaining vectors underflowed. This is an accounting bug when determining total MSI-X vectors.\n");
+
+ v_budget += pf->num_lan_msix;
pf->msix_entries = kcalloc(v_budget, sizeof(struct msix_entry),
GFP_KERNEL);
if (!pf->msix_entries)
pf->num_vmdq_msix = 1; /* force VMDqs to only one vector */
pf->num_vmdq_vsis = 1;
pf->num_vmdq_qps = 1;
-#ifdef I40E_FCOE
- pf->num_fcoe_qps = 0;
- pf->num_fcoe_msix = 0;
-#endif
/* partition out the remaining vectors */
switch (vec) {
} else {
pf->num_lan_msix = 2;
}
-#ifdef I40E_FCOE
- /* give one vector to FCoE */
- if (pf->flags & I40E_FLAG_FCOE_ENABLED) {
- pf->num_lan_msix = 1;
- pf->num_fcoe_msix = 1;
- }
-#endif
break;
default:
if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
(vec - (pf->num_iwarp_msix + pf->num_vmdq_vsis)),
pf->num_lan_msix);
pf->num_lan_qps = pf->num_lan_msix;
-#ifdef I40E_FCOE
- /* give one vector to FCoE */
- if (pf->flags & I40E_FLAG_FCOE_ENABLED) {
- pf->num_fcoe_msix = 1;
- vec--;
- }
-#endif
break;
}
}
dev_info(&pf->pdev->dev, "IWARP disabled, not enough MSI-X vectors\n");
pf->flags &= ~I40E_FLAG_IWARP_ENABLED;
}
-#ifdef I40E_FCOE
-
- if ((pf->flags & I40E_FLAG_FCOE_ENABLED) && (pf->num_fcoe_msix == 0)) {
- dev_info(&pf->pdev->dev, "FCOE disabled, not enough MSI-X vectors\n");
- pf->flags &= ~I40E_FLAG_FCOE_ENABLED;
- }
-#endif
i40e_debug(&pf->hw, I40E_DEBUG_INIT,
"MSI-X vector distribution: PF %d, VMDq %d, FDSB %d, iWARP %d\n",
pf->num_lan_msix,
if (vectors < 0) {
pf->flags &= ~(I40E_FLAG_MSIX_ENABLED |
I40E_FLAG_IWARP_ENABLED |
-#ifdef I40E_FCOE
- I40E_FLAG_FCOE_ENABLED |
-#endif
I40E_FLAG_RSS_ENABLED |
I40E_FLAG_DCB_CAPABLE |
I40E_FLAG_DCB_ENABLED |
if (vsi->type == I40E_VSI_MAIN) {
for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++)
- i40e_write_rx_ctl(hw, I40E_PFQF_HKEY(i),
- seed_dw[i]);
+ wr32(hw, I40E_PFQF_HKEY(i), seed_dw[i]);
} else if (vsi->type == I40E_VSI_SRIOV) {
for (i = 0; i <= I40E_VFQF_HKEY1_MAX_INDEX; i++)
- i40e_write_rx_ctl(hw,
- I40E_VFQF_HKEY1(i, vf_id),
- seed_dw[i]);
+ wr32(hw, I40E_VFQF_HKEY1(i, vf_id), seed_dw[i]);
} else {
dev_err(&pf->pdev->dev, "Cannot set RSS seed - invalid VSI type\n");
}
if (lut_size != I40E_VF_HLUT_ARRAY_SIZE)
return -EINVAL;
for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++)
- i40e_write_rx_ctl(hw,
- I40E_VFQF_HLUT1(i, vf_id),
- lut_dw[i]);
+ wr32(hw, I40E_VFQF_HLUT1(i, vf_id), lut_dw[i]);
} else {
dev_err(&pf->pdev->dev, "Cannot set RSS LUT - invalid VSI type\n");
}
i40e_write_rx_ctl(hw, I40E_PFQF_CTL_0, reg_val);
/* Determine the RSS size of the VSI */
- if (!vsi->rss_size)
- vsi->rss_size = min_t(int, pf->alloc_rss_size,
- vsi->num_queue_pairs);
+ if (!vsi->rss_size) {
+ u16 qcount;
+
+ qcount = vsi->num_queue_pairs / vsi->tc_config.numtc;
+ vsi->rss_size = min_t(int, pf->alloc_rss_size, qcount);
+ }
if (!vsi->rss_size)
return -EINVAL;
new_rss_size = min_t(int, queue_count, pf->rss_size_max);
if (queue_count != vsi->num_queue_pairs) {
+ u16 qcount;
+
vsi->req_queue_pairs = queue_count;
i40e_prep_for_reset(pf);
}
/* Reset vsi->rss_size, as number of enabled queues changed */
- vsi->rss_size = min_t(int, pf->alloc_rss_size,
- vsi->num_queue_pairs);
+ qcount = vsi->num_queue_pairs / vsi->tc_config.numtc;
+ vsi->rss_size = min_t(int, pf->alloc_rss_size, qcount);
i40e_pf_config_rss(pf);
}
pf->num_iwarp_msix = (int)num_online_cpus() + 1;
}
-#ifdef I40E_FCOE
- i40e_init_pf_fcoe(pf);
-
-#endif /* I40E_FCOE */
#ifdef CONFIG_PCI_IOV
if (pf->hw.func_caps.num_vfs && pf->hw.partition_id == 1) {
pf->num_vf_qps = I40E_DEFAULT_QUEUES_PER_VF;
(pf->hw.aq.api_min_ver > 4))) {
/* Supported in FW API version higher than 1.4 */
pf->flags |= I40E_FLAG_GENEVE_OFFLOAD_CAPABLE;
- pf->auto_disable_flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE;
+ pf->hw_disabled_flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE;
} else {
- pf->auto_disable_flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE;
+ pf->hw_disabled_flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE;
}
pf->eeprom_version = 0xDEAD;
i40e_fdir_filter_exit(pf);
}
pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
- pf->auto_disable_flags &= ~I40E_FLAG_FD_SB_ENABLED;
+ pf->hw_disabled_flags &= ~I40E_FLAG_FD_SB_ENABLED;
/* reset fd counters */
- pf->fd_add_err = pf->fd_atr_cnt = pf->fd_tcp_rule = 0;
- pf->fdir_pf_active_filters = 0;
+ pf->fd_add_err = 0;
+ pf->fd_atr_cnt = 0;
/* if ATR was auto disabled it can be re-enabled. */
if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
- (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) {
- pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED;
+ (pf->hw_disabled_flags & I40E_FLAG_FD_ATR_ENABLED)) {
+ pf->hw_disabled_flags &= ~I40E_FLAG_FD_ATR_ENABLED;
if (I40E_DEBUG_FD & pf->hw.debug_mask)
dev_info(&pf->pdev->dev, "ATR re-enabled.\n");
}
*
* Returns the index number or I40E_MAX_PF_UDP_OFFLOAD_PORTS if port not found
**/
-static u8 i40e_get_udp_port_idx(struct i40e_pf *pf, __be16 port)
+static u8 i40e_get_udp_port_idx(struct i40e_pf *pf, u16 port)
{
u8 i;
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_vsi *vsi = np->vsi;
struct i40e_pf *pf = vsi->back;
- __be16 port = ti->port;
+ u16 port = ntohs(ti->port);
u8 next_idx;
u8 idx;
/* Check if port already exists */
if (idx < I40E_MAX_PF_UDP_OFFLOAD_PORTS) {
- netdev_info(netdev, "port %d already offloaded\n",
- ntohs(port));
+ netdev_info(netdev, "port %d already offloaded\n", port);
return;
}
if (next_idx == I40E_MAX_PF_UDP_OFFLOAD_PORTS) {
netdev_info(netdev, "maximum number of offloaded UDP ports reached, not adding port %d\n",
- ntohs(port));
+ port);
return;
}
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_vsi *vsi = np->vsi;
struct i40e_pf *pf = vsi->back;
- __be16 port = ti->port;
+ u16 port = ntohs(ti->port);
u8 idx;
idx = i40e_get_udp_port_idx(pf, port);
return;
not_found:
netdev_warn(netdev, "UDP port %d was not found, not deleting\n",
- ntohs(port));
+ port);
}
static int i40e_get_phys_port_id(struct net_device *netdev,
.ndo_poll_controller = i40e_netpoll,
#endif
.ndo_setup_tc = __i40e_setup_tc,
-#ifdef I40E_FCOE
- .ndo_fcoe_enable = i40e_fcoe_enable,
- .ndo_fcoe_disable = i40e_fcoe_disable,
-#endif
.ndo_set_features = i40e_set_features,
.ndo_set_vf_mac = i40e_ndo_set_vf_mac,
.ndo_set_vf_vlan = i40e_ndo_set_vf_port_vlan,
if (vsi->type == I40E_VSI_MAIN) {
SET_NETDEV_DEV(netdev, &pf->pdev->dev);
ether_addr_copy(mac_addr, hw->mac.perm_addr);
- /* The following steps are necessary to prevent reception
- * of tagged packets - some older NVM configurations load a
- * default a MAC-VLAN filter that accepts any tagged packet
- * which must be replaced by a normal filter.
+ /* The following steps are necessary to properly keep track of
+ * MAC-VLAN filters loaded into firmware - first we remove
+ * filter that is automatically generated by firmware and then
+ * add new filter both to the driver hash table and firmware.
*/
i40e_rm_default_mac_filter(vsi, mac_addr);
spin_lock_bh(&vsi->mac_filter_hash_lock);
netdev->netdev_ops = &i40e_netdev_ops;
netdev->watchdog_timeo = 5 * HZ;
i40e_set_ethtool_ops(netdev);
-#ifdef I40E_FCOE
- i40e_fcoe_config_netdev(netdev, vsi);
-#endif
/* MTU range: 68 - 9706 */
netdev->min_mtu = ETH_MIN_MTU;
i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, true);
break;
-#ifdef I40E_FCOE
- case I40E_VSI_FCOE:
- ret = i40e_fcoe_vsi_init(vsi, &ctxt);
- if (ret) {
- dev_info(&pf->pdev->dev, "failed to initialize FCoE VSI\n");
- return ret;
- }
- break;
-
-#endif /* I40E_FCOE */
case I40E_VSI_IWARP:
/* send down message to iWARP */
break;
}
}
case I40E_VSI_VMDQ2:
- case I40E_VSI_FCOE:
ret = i40e_config_netdev(vsi);
if (ret)
goto err_netdev;
int queues_left;
pf->num_lan_qps = 0;
-#ifdef I40E_FCOE
- pf->num_fcoe_qps = 0;
-#endif
/* Find the max queues to be put into basic use. We'll always be
* using TC0, whether or not DCB is running, and TC0 will get the
/* make sure all the fancies are disabled */
pf->flags &= ~(I40E_FLAG_RSS_ENABLED |
I40E_FLAG_IWARP_ENABLED |
-#ifdef I40E_FCOE
- I40E_FLAG_FCOE_ENABLED |
-#endif
I40E_FLAG_FD_SB_ENABLED |
I40E_FLAG_FD_ATR_ENABLED |
I40E_FLAG_DCB_CAPABLE |
pf->flags &= ~(I40E_FLAG_RSS_ENABLED |
I40E_FLAG_IWARP_ENABLED |
-#ifdef I40E_FCOE
- I40E_FLAG_FCOE_ENABLED |
-#endif
I40E_FLAG_FD_SB_ENABLED |
I40E_FLAG_FD_ATR_ENABLED |
I40E_FLAG_DCB_ENABLED |
queues_left -= pf->num_lan_qps;
}
-#ifdef I40E_FCOE
- if (pf->flags & I40E_FLAG_FCOE_ENABLED) {
- if (I40E_DEFAULT_FCOE <= queues_left) {
- pf->num_fcoe_qps = I40E_DEFAULT_FCOE;
- } else if (I40E_MINIMUM_FCOE <= queues_left) {
- pf->num_fcoe_qps = I40E_MINIMUM_FCOE;
- } else {
- pf->num_fcoe_qps = 0;
- pf->flags &= ~I40E_FLAG_FCOE_ENABLED;
- dev_info(&pf->pdev->dev, "not enough queues for FCoE. FCoE feature will be disabled\n");
- }
-
- queues_left -= pf->num_fcoe_qps;
- }
-
-#endif
if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
if (queues_left > 1) {
queues_left -= 1; /* save 1 queue for FD */
pf->num_lan_qps, pf->alloc_rss_size, pf->num_req_vfs,
pf->num_vf_qps, pf->num_vmdq_vsis, pf->num_vmdq_qps,
queues_left);
-#ifdef I40E_FCOE
- dev_dbg(&pf->pdev->dev, "fcoe queues = %d\n", pf->num_fcoe_qps);
-#endif
}
/**
i += snprintf(&buf[i], REMAIN(i), " Geneve");
if (pf->flags & I40E_FLAG_PTP)
i += snprintf(&buf[i], REMAIN(i), " PTP");
-#ifdef I40E_FCOE
- if (pf->flags & I40E_FLAG_FCOE_ENABLED)
- i += snprintf(&buf[i], REMAIN(i), " FCOE");
-#endif
if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED)
i += snprintf(&buf[i], REMAIN(i), " VEB");
else
hw->bus.bus_id = pdev->bus->number;
pf->instance = pfs_found;
+ INIT_LIST_HEAD(&pf->l3_flex_pit_list);
+ INIT_LIST_HEAD(&pf->l4_flex_pit_list);
+
/* set up the locks for the AQ, do this only once in probe
* and destroy them only once in remove
*/
}
err = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp,
- hw->func_caps.num_rx_qp,
- pf->fcoe_hmc_cntx_num, pf->fcoe_hmc_filt_num);
+ hw->func_caps.num_rx_qp, 0, 0);
if (err) {
dev_info(&pdev->dev, "init_lan_hmc failed: %d\n", err);
goto err_init_lan_hmc;
i40e_get_port_mac_addr(hw, hw->mac.port_addr);
if (is_valid_ether_addr(hw->mac.port_addr))
pf->flags |= I40E_FLAG_PORT_ID_VALID;
-#ifdef I40E_FCOE
- err = i40e_get_san_mac_addr(hw, hw->mac.san_addr);
- if (err)
- dev_info(&pdev->dev,
- "(non-fatal) SAN MAC retrieval failed: %d\n", err);
- if (!is_valid_ether_addr(hw->mac.san_addr)) {
- dev_warn(&pdev->dev, "invalid SAN MAC address %pM, falling back to LAN MAC\n",
- hw->mac.san_addr);
- ether_addr_copy(hw->mac.san_addr, hw->mac.addr);
- }
- dev_info(&pf->pdev->dev, "SAN MAC: %pM\n", hw->mac.san_addr);
-#endif /* I40E_FCOE */
pci_set_drvdata(pdev, pf);
pci_save_state(pdev);
dev_info(&pdev->dev, "Failed to add PF to client API service list: %d\n",
err);
-#ifdef I40E_FCOE
- /* create FCoE interface */
- i40e_fcoe_vsi_setup(pf);
-
-#endif
#define PCI_SPEED_SIZE 8
#define PCI_WIDTH_SIZE 8
/* Devices on the IOSF bus do not have this information
* Chris Telfer <chris.telfer@netronome.com>
*/
+#include <linux/bitfield.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <linux/module.h>
#include <net/pkt_cls.h>
#include <net/vxlan.h>
+#include "nfpcore/nfp_nsp.h"
#include "nfp_net_ctrl.h"
#include "nfp_net.h"
put_unaligned_le32(reg, fw_ver);
}
-static dma_addr_t
-nfp_net_dma_map_rx(struct nfp_net *nn, void *frag, unsigned int bufsz,
- int direction)
+static dma_addr_t nfp_net_dma_map_rx(struct nfp_net_dp *dp, void *frag)
{
- return dma_map_single(&nn->pdev->dev, frag + NFP_NET_RX_BUF_HEADROOM,
- bufsz - NFP_NET_RX_BUF_NON_DATA, direction);
+ return dma_map_single(dp->dev, frag + NFP_NET_RX_BUF_HEADROOM,
+ dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA,
+ dp->rx_dma_dir);
}
-static void
-nfp_net_dma_unmap_rx(struct nfp_net *nn, dma_addr_t dma_addr,
- unsigned int bufsz, int direction)
+static void nfp_net_dma_unmap_rx(struct nfp_net_dp *dp, dma_addr_t dma_addr)
{
- dma_unmap_single(&nn->pdev->dev, dma_addr,
- bufsz - NFP_NET_RX_BUF_NON_DATA, direction);
+ dma_unmap_single(dp->dev, dma_addr,
+ dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA,
+ dp->rx_dma_dir);
}
/* Firmware reconfig
nfp_net_irqs_assign(struct nfp_net *nn, struct msix_entry *irq_entries,
unsigned int n)
{
+ struct nfp_net_dp *dp = &nn->dp;
+
nn->max_r_vecs = n - NFP_NET_NON_Q_VECTORS;
- nn->num_r_vecs = nn->max_r_vecs;
+ dp->num_r_vecs = nn->max_r_vecs;
memcpy(nn->irq_entries, irq_entries, sizeof(*irq_entries) * n);
- if (nn->num_rx_rings > nn->num_r_vecs ||
- nn->num_tx_rings > nn->num_r_vecs)
- nn_warn(nn, "More rings (%d,%d) than vectors (%d).\n",
- nn->num_rx_rings, nn->num_tx_rings, nn->num_r_vecs);
+ if (dp->num_rx_rings > dp->num_r_vecs ||
+ dp->num_tx_rings > dp->num_r_vecs)
+ dev_warn(nn->dp.dev, "More rings (%d,%d) than vectors (%d).\n",
+ dp->num_rx_rings, dp->num_tx_rings,
+ dp->num_r_vecs);
- nn->num_rx_rings = min(nn->num_r_vecs, nn->num_rx_rings);
- nn->num_tx_rings = min(nn->num_r_vecs, nn->num_tx_rings);
- nn->num_stack_tx_rings = nn->num_tx_rings;
+ dp->num_rx_rings = min(dp->num_r_vecs, dp->num_rx_rings);
+ dp->num_tx_rings = min(dp->num_r_vecs, dp->num_tx_rings);
+ dp->num_stack_tx_rings = dp->num_tx_rings;
}
/**
return IRQ_HANDLED;
}
+bool nfp_net_link_changed_read_clear(struct nfp_net *nn)
+{
+ unsigned long flags;
+ bool ret;
+
+ spin_lock_irqsave(&nn->link_status_lock, flags);
+ ret = nn->link_changed;
+ nn->link_changed = false;
+ spin_unlock_irqrestore(&nn->link_status_lock, flags);
+
+ return ret;
+}
+
/**
* nfp_net_read_link_status() - Reread link status from control BAR
* @nn: NFP Network structure
goto out;
nn->link_up = link_up;
+ nn->link_changed = true;
if (nn->link_up) {
- netif_carrier_on(nn->netdev);
- netdev_info(nn->netdev, "NIC Link is Up\n");
+ netif_carrier_on(nn->dp.netdev);
+ netdev_info(nn->dp.netdev, "NIC Link is Up\n");
} else {
- netif_carrier_off(nn->netdev);
- netdev_info(nn->netdev, "NIC Link is Down\n");
+ netif_carrier_off(nn->dp.netdev);
+ netdev_info(nn->dp.netdev, "NIC Link is Down\n");
}
out:
spin_unlock_irqrestore(&nn->link_status_lock, flags);
rx_ring->r_vec = r_vec;
rx_ring->fl_qcidx = rx_ring->idx * nn->stride_rx;
- rx_ring->rx_qcidx = rx_ring->fl_qcidx + (nn->stride_rx - 1);
-
rx_ring->qcp_fl = nn->rx_bar + NFP_QCP_QUEUE_OFF(rx_ring->fl_qcidx);
- rx_ring->qcp_rx = nn->rx_bar + NFP_QCP_QUEUE_OFF(rx_ring->rx_qcidx);
}
/**
entry = &nn->irq_entries[vector_idx];
- snprintf(name, name_sz, format, netdev_name(nn->netdev));
+ snprintf(name, name_sz, format, netdev_name(nn->dp.netdev));
err = request_irq(entry->vector, handler, 0, name, nn);
if (err) {
nn_err(nn, "Failed to request IRQ %d (err=%d).\n",
/**
* nfp_net_tx_tso() - Set up Tx descriptor for LSO
- * @nn: NFP Net device
* @r_vec: per-ring structure
* @txbuf: Pointer to driver soft TX descriptor
* @txd: Pointer to HW TX descriptor
* Set up Tx descriptor for LSO, do nothing for non-LSO skbs.
* Return error on packet header greater than maximum supported LSO header size.
*/
-static void nfp_net_tx_tso(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
+static void nfp_net_tx_tso(struct nfp_net_r_vector *r_vec,
struct nfp_net_tx_buf *txbuf,
struct nfp_net_tx_desc *txd, struct sk_buff *skb)
{
/**
* nfp_net_tx_csum() - Set TX CSUM offload flags in TX descriptor
- * @nn: NFP Net device
+ * @dp: NFP Net data path struct
* @r_vec: per-ring structure
* @txbuf: Pointer to driver soft TX descriptor
* @txd: Pointer to TX descriptor
* This function sets the TX checksum flags in the TX descriptor based
* on the configuration and the protocol of the packet to be transmitted.
*/
-static void nfp_net_tx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
+static void nfp_net_tx_csum(struct nfp_net_dp *dp,
+ struct nfp_net_r_vector *r_vec,
struct nfp_net_tx_buf *txbuf,
struct nfp_net_tx_desc *txd, struct sk_buff *skb)
{
struct iphdr *iph;
u8 l4_hdr;
- if (!(nn->ctrl & NFP_NET_CFG_CTRL_TXCSUM))
+ if (!(dp->ctrl & NFP_NET_CFG_CTRL_TXCSUM))
return;
if (skb->ip_summed != CHECKSUM_PARTIAL)
} else if (ipv6h->version == 6) {
l4_hdr = ipv6h->nexthdr;
} else {
- nn_warn_ratelimit(nn, "partial checksum but ipv=%x!\n",
- iph->version);
+ nn_dp_warn(dp, "partial checksum but ipv=%x!\n", iph->version);
return;
}
txd->flags |= PCIE_DESC_TX_UDP_CSUM;
break;
default:
- nn_warn_ratelimit(nn, "partial checksum but l4 proto=%x!\n",
- l4_hdr);
+ nn_dp_warn(dp, "partial checksum but l4 proto=%x!\n", l4_hdr);
return;
}
{
struct nfp_net *nn = netdev_priv(netdev);
const struct skb_frag_struct *frag;
- struct nfp_net_r_vector *r_vec;
struct nfp_net_tx_desc *txd, txdg;
- struct nfp_net_tx_buf *txbuf;
struct nfp_net_tx_ring *tx_ring;
+ struct nfp_net_r_vector *r_vec;
+ struct nfp_net_tx_buf *txbuf;
struct netdev_queue *nd_q;
+ struct nfp_net_dp *dp;
dma_addr_t dma_addr;
unsigned int fsize;
int f, nr_frags;
int wr_idx;
u16 qidx;
+ dp = &nn->dp;
qidx = skb_get_queue_mapping(skb);
- tx_ring = &nn->tx_rings[qidx];
+ tx_ring = &dp->tx_rings[qidx];
r_vec = tx_ring->r_vec;
- nd_q = netdev_get_tx_queue(nn->netdev, qidx);
+ nd_q = netdev_get_tx_queue(dp->netdev, qidx);
nr_frags = skb_shinfo(skb)->nr_frags;
if (unlikely(nfp_net_tx_full(tx_ring, nr_frags + 1))) {
- nn_warn_ratelimit(nn, "TX ring %d busy. wrp=%u rdp=%u\n",
- qidx, tx_ring->wr_p, tx_ring->rd_p);
+ nn_dp_warn(dp, "TX ring %d busy. wrp=%u rdp=%u\n",
+ qidx, tx_ring->wr_p, tx_ring->rd_p);
netif_tx_stop_queue(nd_q);
+ nfp_net_tx_xmit_more_flush(tx_ring);
u64_stats_update_begin(&r_vec->tx_sync);
r_vec->tx_busy++;
u64_stats_update_end(&r_vec->tx_sync);
}
/* Start with the head skbuf */
- dma_addr = dma_map_single(&nn->pdev->dev, skb->data, skb_headlen(skb),
+ dma_addr = dma_map_single(dp->dev, skb->data, skb_headlen(skb),
DMA_TO_DEVICE);
- if (dma_mapping_error(&nn->pdev->dev, dma_addr))
+ if (dma_mapping_error(dp->dev, dma_addr))
goto err_free;
wr_idx = tx_ring->wr_p & (tx_ring->cnt - 1);
txd->mss = 0;
txd->l4_offset = 0;
- nfp_net_tx_tso(nn, r_vec, txbuf, txd, skb);
+ nfp_net_tx_tso(r_vec, txbuf, txd, skb);
- nfp_net_tx_csum(nn, r_vec, txbuf, txd, skb);
+ nfp_net_tx_csum(dp, r_vec, txbuf, txd, skb);
- if (skb_vlan_tag_present(skb) && nn->ctrl & NFP_NET_CFG_CTRL_TXVLAN) {
+ if (skb_vlan_tag_present(skb) && dp->ctrl & NFP_NET_CFG_CTRL_TXVLAN) {
txd->flags |= PCIE_DESC_TX_VLAN;
txd->vlan = cpu_to_le16(skb_vlan_tag_get(skb));
}
frag = &skb_shinfo(skb)->frags[f];
fsize = skb_frag_size(frag);
- dma_addr = skb_frag_dma_map(&nn->pdev->dev, frag, 0,
+ dma_addr = skb_frag_dma_map(dp->dev, frag, 0,
fsize, DMA_TO_DEVICE);
- if (dma_mapping_error(&nn->pdev->dev, dma_addr))
+ if (dma_mapping_error(dp->dev, dma_addr))
goto err_unmap;
wr_idx = (wr_idx + 1) & (tx_ring->cnt - 1);
--f;
while (f >= 0) {
frag = &skb_shinfo(skb)->frags[f];
- dma_unmap_page(&nn->pdev->dev,
- tx_ring->txbufs[wr_idx].dma_addr,
+ dma_unmap_page(dp->dev, tx_ring->txbufs[wr_idx].dma_addr,
skb_frag_size(frag), DMA_TO_DEVICE);
tx_ring->txbufs[wr_idx].skb = NULL;
tx_ring->txbufs[wr_idx].dma_addr = 0;
if (wr_idx < 0)
wr_idx += tx_ring->cnt;
}
- dma_unmap_single(&nn->pdev->dev, tx_ring->txbufs[wr_idx].dma_addr,
+ dma_unmap_single(dp->dev, tx_ring->txbufs[wr_idx].dma_addr,
skb_headlen(skb), DMA_TO_DEVICE);
tx_ring->txbufs[wr_idx].skb = NULL;
tx_ring->txbufs[wr_idx].dma_addr = 0;
tx_ring->txbufs[wr_idx].fidx = -2;
err_free:
- nn_warn_ratelimit(nn, "Failed to map DMA TX buffer\n");
+ nn_dp_warn(dp, "Failed to map DMA TX buffer\n");
+ nfp_net_tx_xmit_more_flush(tx_ring);
u64_stats_update_begin(&r_vec->tx_sync);
r_vec->tx_errors++;
u64_stats_update_end(&r_vec->tx_sync);
static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring)
{
struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
- struct nfp_net *nn = r_vec->nfp_net;
+ struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
const struct skb_frag_struct *frag;
struct netdev_queue *nd_q;
u32 done_pkts = 0, done_bytes = 0;
if (fidx == -1) {
/* unmap head */
- dma_unmap_single(&nn->pdev->dev,
- tx_ring->txbufs[idx].dma_addr,
+ dma_unmap_single(dp->dev, tx_ring->txbufs[idx].dma_addr,
skb_headlen(skb), DMA_TO_DEVICE);
done_pkts += tx_ring->txbufs[idx].pkt_cnt;
} else {
/* unmap fragment */
frag = &skb_shinfo(skb)->frags[fidx];
- dma_unmap_page(&nn->pdev->dev,
- tx_ring->txbufs[idx].dma_addr,
+ dma_unmap_page(dp->dev, tx_ring->txbufs[idx].dma_addr,
skb_frag_size(frag), DMA_TO_DEVICE);
}
r_vec->tx_pkts += done_pkts;
u64_stats_update_end(&r_vec->tx_sync);
- nd_q = netdev_get_tx_queue(nn->netdev, tx_ring->idx);
+ nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx);
netdev_tx_completed_queue(nd_q, done_pkts, done_bytes);
if (nfp_net_tx_ring_should_wake(tx_ring)) {
/* Make sure TX thread will see updated tx_ring->rd_p */
static void nfp_net_xdp_complete(struct nfp_net_tx_ring *tx_ring)
{
struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
- struct nfp_net *nn = r_vec->nfp_net;
+ struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
u32 done_pkts = 0, done_bytes = 0;
int idx, todo;
u32 qcp_rd_p;
if (!tx_ring->txbufs[idx].frag)
continue;
- nfp_net_dma_unmap_rx(nn, tx_ring->txbufs[idx].dma_addr,
- nn->fl_bufsz, DMA_BIDIRECTIONAL);
+ nfp_net_dma_unmap_rx(dp, tx_ring->txbufs[idx].dma_addr);
__free_page(virt_to_page(tx_ring->txbufs[idx].frag));
done_pkts++;
/**
* nfp_net_tx_ring_reset() - Free any untransmitted buffers and reset pointers
- * @nn: NFP Net device
+ * @dp: NFP Net data path struct
* @tx_ring: TX ring structure
*
* Assumes that the device is stopped
*/
static void
-nfp_net_tx_ring_reset(struct nfp_net *nn, struct nfp_net_tx_ring *tx_ring)
+nfp_net_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring)
{
struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
const struct skb_frag_struct *frag;
- struct pci_dev *pdev = nn->pdev;
struct netdev_queue *nd_q;
while (tx_ring->rd_p != tx_ring->wr_p) {
tx_buf = &tx_ring->txbufs[idx];
if (tx_ring == r_vec->xdp_ring) {
- nfp_net_dma_unmap_rx(nn, tx_buf->dma_addr,
- nn->fl_bufsz, DMA_BIDIRECTIONAL);
+ nfp_net_dma_unmap_rx(dp, tx_buf->dma_addr);
__free_page(virt_to_page(tx_ring->txbufs[idx].frag));
} else {
struct sk_buff *skb = tx_ring->txbufs[idx].skb;
if (tx_buf->fidx == -1) {
/* unmap head */
- dma_unmap_single(&pdev->dev, tx_buf->dma_addr,
+ dma_unmap_single(dp->dev, tx_buf->dma_addr,
skb_headlen(skb),
DMA_TO_DEVICE);
} else {
/* unmap fragment */
frag = &skb_shinfo(skb)->frags[tx_buf->fidx];
- dma_unmap_page(&pdev->dev, tx_buf->dma_addr,
+ dma_unmap_page(dp->dev, tx_buf->dma_addr,
skb_frag_size(frag),
DMA_TO_DEVICE);
}
if (tx_ring == r_vec->xdp_ring)
return;
- nd_q = netdev_get_tx_queue(nn->netdev, tx_ring->idx);
+ nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx);
netdev_tx_reset_queue(nd_q);
}
struct nfp_net *nn = netdev_priv(netdev);
int i;
- for (i = 0; i < nn->netdev->real_num_tx_queues; i++) {
+ for (i = 0; i < nn->dp.netdev->real_num_tx_queues; i++) {
if (!netif_tx_queue_stopped(netdev_get_tx_queue(netdev, i)))
continue;
nn_warn(nn, "TX timeout on ring: %d\n", i);
/* Receive processing
*/
static unsigned int
-nfp_net_calc_fl_bufsz(struct nfp_net *nn, unsigned int mtu)
+nfp_net_calc_fl_bufsz(struct nfp_net_dp *dp)
{
unsigned int fl_bufsz;
fl_bufsz = NFP_NET_RX_BUF_HEADROOM;
- if (nn->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
+ fl_bufsz += dp->rx_dma_off;
+ if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
fl_bufsz += NFP_NET_MAX_PREPEND;
else
- fl_bufsz += nn->rx_offset;
- fl_bufsz += ETH_HLEN + VLAN_HLEN * 2 + mtu;
+ fl_bufsz += dp->rx_offset;
+ fl_bufsz += ETH_HLEN + VLAN_HLEN * 2 + dp->mtu;
fl_bufsz = SKB_DATA_ALIGN(fl_bufsz);
fl_bufsz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
/**
* nfp_net_rx_alloc_one() - Allocate and map page frag for RX
+ * @dp: NFP Net data path struct
* @rx_ring: RX ring structure of the skb
* @dma_addr: Pointer to storage for DMA address (output param)
- * @fl_bufsz: size of freelist buffers
- * @xdp: Whether XDP is enabled
*
* This function will allcate a new page frag, map it for DMA.
*
* Return: allocated page frag or NULL on failure.
*/
static void *
-nfp_net_rx_alloc_one(struct nfp_net_rx_ring *rx_ring, dma_addr_t *dma_addr,
- unsigned int fl_bufsz, bool xdp)
+nfp_net_rx_alloc_one(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring,
+ dma_addr_t *dma_addr)
{
- struct nfp_net *nn = rx_ring->r_vec->nfp_net;
- int direction;
void *frag;
- if (!xdp)
- frag = netdev_alloc_frag(fl_bufsz);
+ if (!dp->xdp_prog)
+ frag = netdev_alloc_frag(dp->fl_bufsz);
else
frag = page_address(alloc_page(GFP_KERNEL | __GFP_COLD));
if (!frag) {
- nn_warn_ratelimit(nn, "Failed to alloc receive page frag\n");
+ nn_dp_warn(dp, "Failed to alloc receive page frag\n");
return NULL;
}
- direction = xdp ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
-
- *dma_addr = nfp_net_dma_map_rx(nn, frag, fl_bufsz, direction);
- if (dma_mapping_error(&nn->pdev->dev, *dma_addr)) {
- nfp_net_free_frag(frag, xdp);
- nn_warn_ratelimit(nn, "Failed to map DMA RX buffer\n");
+ *dma_addr = nfp_net_dma_map_rx(dp, frag);
+ if (dma_mapping_error(dp->dev, *dma_addr)) {
+ nfp_net_free_frag(frag, dp->xdp_prog);
+ nn_dp_warn(dp, "Failed to map DMA RX buffer\n");
return NULL;
}
return frag;
}
-static void *
-nfp_net_napi_alloc_one(struct nfp_net *nn, int direction, dma_addr_t *dma_addr)
+static void *nfp_net_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr)
{
void *frag;
- if (!nn->xdp_prog)
- frag = napi_alloc_frag(nn->fl_bufsz);
+ if (!dp->xdp_prog)
+ frag = napi_alloc_frag(dp->fl_bufsz);
else
frag = page_address(alloc_page(GFP_ATOMIC | __GFP_COLD));
if (!frag) {
- nn_warn_ratelimit(nn, "Failed to alloc receive page frag\n");
+ nn_dp_warn(dp, "Failed to alloc receive page frag\n");
return NULL;
}
- *dma_addr = nfp_net_dma_map_rx(nn, frag, nn->fl_bufsz, direction);
- if (dma_mapping_error(&nn->pdev->dev, *dma_addr)) {
- nfp_net_free_frag(frag, nn->xdp_prog);
- nn_warn_ratelimit(nn, "Failed to map DMA RX buffer\n");
+ *dma_addr = nfp_net_dma_map_rx(dp, frag);
+ if (dma_mapping_error(dp->dev, *dma_addr)) {
+ nfp_net_free_frag(frag, dp->xdp_prog);
+ nn_dp_warn(dp, "Failed to map DMA RX buffer\n");
return NULL;
}
/**
* nfp_net_rx_give_one() - Put mapped skb on the software and hardware rings
+ * @dp: NFP Net data path struct
* @rx_ring: RX ring structure
* @frag: page fragment buffer
* @dma_addr: DMA address of skb mapping
*/
-static void nfp_net_rx_give_one(struct nfp_net_rx_ring *rx_ring,
+static void nfp_net_rx_give_one(const struct nfp_net_dp *dp,
+ struct nfp_net_rx_ring *rx_ring,
void *frag, dma_addr_t dma_addr)
{
unsigned int wr_idx;
/* Fill freelist descriptor */
rx_ring->rxds[wr_idx].fld.reserved = 0;
rx_ring->rxds[wr_idx].fld.meta_len_dd = 0;
- nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld, dma_addr);
+ nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld,
+ dma_addr + dp->rx_dma_off);
rx_ring->wr_p++;
rx_ring->wr_ptr_add++;
/**
* nfp_net_rx_ring_bufs_free() - Free any buffers currently on the RX ring
- * @nn: NFP Net device
+ * @dp: NFP Net data path struct
* @rx_ring: RX ring to remove buffers from
- * @xdp: Whether XDP is enabled
*
* Assumes that the device is stopped and buffers are in [0, ring->cnt - 1)
* entries. After device is disabled nfp_net_rx_ring_reset() must be called
* to restore required ring geometry.
*/
static void
-nfp_net_rx_ring_bufs_free(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring,
- bool xdp)
+nfp_net_rx_ring_bufs_free(struct nfp_net_dp *dp,
+ struct nfp_net_rx_ring *rx_ring)
{
- int direction = xdp ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
unsigned int i;
for (i = 0; i < rx_ring->cnt - 1; i++) {
if (!rx_ring->rxbufs[i].frag)
continue;
- nfp_net_dma_unmap_rx(nn, rx_ring->rxbufs[i].dma_addr,
- rx_ring->bufsz, direction);
- nfp_net_free_frag(rx_ring->rxbufs[i].frag, xdp);
+ nfp_net_dma_unmap_rx(dp, rx_ring->rxbufs[i].dma_addr);
+ nfp_net_free_frag(rx_ring->rxbufs[i].frag, dp->xdp_prog);
rx_ring->rxbufs[i].dma_addr = 0;
rx_ring->rxbufs[i].frag = NULL;
}
/**
* nfp_net_rx_ring_bufs_alloc() - Fill RX ring with buffers (don't give to FW)
- * @nn: NFP Net device
+ * @dp: NFP Net data path struct
* @rx_ring: RX ring to remove buffers from
- * @xdp: Whether XDP is enabled
*/
static int
-nfp_net_rx_ring_bufs_alloc(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring,
- bool xdp)
+nfp_net_rx_ring_bufs_alloc(struct nfp_net_dp *dp,
+ struct nfp_net_rx_ring *rx_ring)
{
struct nfp_net_rx_buf *rxbufs;
unsigned int i;
for (i = 0; i < rx_ring->cnt - 1; i++) {
rxbufs[i].frag =
- nfp_net_rx_alloc_one(rx_ring, &rxbufs[i].dma_addr,
- rx_ring->bufsz, xdp);
+ nfp_net_rx_alloc_one(dp, rx_ring, &rxbufs[i].dma_addr);
if (!rxbufs[i].frag) {
- nfp_net_rx_ring_bufs_free(nn, rx_ring, xdp);
+ nfp_net_rx_ring_bufs_free(dp, rx_ring);
return -ENOMEM;
}
}
/**
* nfp_net_rx_ring_fill_freelist() - Give buffers from the ring to FW
+ * @dp: NFP Net data path struct
* @rx_ring: RX ring to fill
*/
-static void nfp_net_rx_ring_fill_freelist(struct nfp_net_rx_ring *rx_ring)
+static void
+nfp_net_rx_ring_fill_freelist(struct nfp_net_dp *dp,
+ struct nfp_net_rx_ring *rx_ring)
{
unsigned int i;
for (i = 0; i < rx_ring->cnt - 1; i++)
- nfp_net_rx_give_one(rx_ring, rx_ring->rxbufs[i].frag,
+ nfp_net_rx_give_one(dp, rx_ring, rx_ring->rxbufs[i].frag,
rx_ring->rxbufs[i].dma_addr);
}
/**
* nfp_net_rx_csum() - set SKB checksum field based on RX descriptor flags
- * @nn: NFP Net device
+ * @dp: NFP Net data path struct
* @r_vec: per-ring structure
* @rxd: Pointer to RX descriptor
* @skb: Pointer to SKB
*/
-static void nfp_net_rx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
+static void nfp_net_rx_csum(struct nfp_net_dp *dp,
+ struct nfp_net_r_vector *r_vec,
struct nfp_net_rx_desc *rxd, struct sk_buff *skb)
{
skb_checksum_none_assert(skb);
- if (!(nn->netdev->features & NETIF_F_RXCSUM))
+ if (!(dp->netdev->features & NETIF_F_RXCSUM))
return;
if (nfp_net_rx_csum_has_errors(le16_to_cpu(rxd->rxd.flags))) {
static void
nfp_net_set_hash_desc(struct net_device *netdev, struct sk_buff *skb,
- struct nfp_net_rx_desc *rxd)
+ void *data, struct nfp_net_rx_desc *rxd)
{
- struct nfp_net_rx_hash *rx_hash;
+ struct nfp_net_rx_hash *rx_hash = data;
if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS))
return;
- rx_hash = (struct nfp_net_rx_hash *)(skb->data - sizeof(*rx_hash));
-
nfp_net_set_hash(netdev, skb, get_unaligned_be32(&rx_hash->hash_type),
&rx_hash->hash);
}
static void *
nfp_net_parse_meta(struct net_device *netdev, struct sk_buff *skb,
- int meta_len)
+ void *data, int meta_len)
{
- u8 *data = skb->data - meta_len;
u32 meta_info;
meta_info = get_unaligned_be32(data);
}
static void
-nfp_net_rx_drop(struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring,
- struct nfp_net_rx_buf *rxbuf, struct sk_buff *skb)
+nfp_net_rx_drop(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
+ struct nfp_net_rx_ring *rx_ring, struct nfp_net_rx_buf *rxbuf,
+ struct sk_buff *skb)
{
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->rx_drops++;
if (skb && rxbuf && skb->head == rxbuf->frag)
page_ref_inc(virt_to_head_page(rxbuf->frag));
if (rxbuf)
- nfp_net_rx_give_one(rx_ring, rxbuf->frag, rxbuf->dma_addr);
+ nfp_net_rx_give_one(dp, rx_ring, rxbuf->frag, rxbuf->dma_addr);
if (skb)
dev_kfree_skb_any(skb);
}
static bool
-nfp_net_tx_xdp_buf(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring,
+nfp_net_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring,
struct nfp_net_tx_ring *tx_ring,
- struct nfp_net_rx_buf *rxbuf, unsigned int pkt_off,
+ struct nfp_net_rx_buf *rxbuf, unsigned int dma_off,
unsigned int pkt_len)
{
struct nfp_net_tx_buf *txbuf;
int wr_idx;
if (unlikely(nfp_net_tx_full(tx_ring, 1))) {
- nfp_net_rx_drop(rx_ring->r_vec, rx_ring, rxbuf, NULL);
+ nfp_net_rx_drop(dp, rx_ring->r_vec, rx_ring, rxbuf, NULL);
return false;
}
- new_frag = nfp_net_napi_alloc_one(nn, DMA_BIDIRECTIONAL, &new_dma_addr);
+ new_frag = nfp_net_napi_alloc_one(dp, &new_dma_addr);
if (unlikely(!new_frag)) {
- nfp_net_rx_drop(rx_ring->r_vec, rx_ring, rxbuf, NULL);
+ nfp_net_rx_drop(dp, rx_ring->r_vec, rx_ring, rxbuf, NULL);
return false;
}
- nfp_net_rx_give_one(rx_ring, new_frag, new_dma_addr);
+ nfp_net_rx_give_one(dp, rx_ring, new_frag, new_dma_addr);
wr_idx = tx_ring->wr_p & (tx_ring->cnt - 1);
txbuf->pkt_cnt = 1;
txbuf->real_len = pkt_len;
- dma_sync_single_for_device(&nn->pdev->dev, rxbuf->dma_addr + pkt_off,
+ dma_sync_single_for_device(dp->dev, rxbuf->dma_addr + dma_off,
pkt_len, DMA_BIDIRECTIONAL);
/* Build TX descriptor */
txd = &tx_ring->txds[wr_idx];
txd->offset_eop = PCIE_DESC_TX_EOP;
txd->dma_len = cpu_to_le16(pkt_len);
- nfp_desc_set_dma_addr(txd, rxbuf->dma_addr + pkt_off);
+ nfp_desc_set_dma_addr(txd, rxbuf->dma_addr + dma_off);
txd->data_len = cpu_to_le16(pkt_len);
txd->flags = 0;
return true;
}
-static int nfp_net_run_xdp(struct bpf_prog *prog, void *data, unsigned int len)
+static int nfp_net_run_xdp(struct bpf_prog *prog, void *data, void *hard_start,
+ unsigned int *off, unsigned int *len)
{
struct xdp_buff xdp;
+ void *orig_data;
+ int ret;
+
+ xdp.data_hard_start = hard_start;
+ xdp.data = data + *off;
+ xdp.data_end = data + *off + *len;
+
+ orig_data = xdp.data;
+ ret = bpf_prog_run_xdp(prog, &xdp);
- xdp.data = data;
- xdp.data_end = data + len;
+ *len -= xdp.data - orig_data;
+ *off += xdp.data - orig_data;
- return bpf_prog_run_xdp(prog, &xdp);
+ return ret;
}
/**
static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
{
struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
- struct nfp_net *nn = r_vec->nfp_net;
+ struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
struct nfp_net_tx_ring *tx_ring;
struct bpf_prog *xdp_prog;
unsigned int true_bufsz;
struct sk_buff *skb;
int pkts_polled = 0;
- int rx_dma_map_dir;
int idx;
rcu_read_lock();
- xdp_prog = READ_ONCE(nn->xdp_prog);
- rx_dma_map_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
- true_bufsz = xdp_prog ? PAGE_SIZE : nn->fl_bufsz;
+ xdp_prog = READ_ONCE(dp->xdp_prog);
+ true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz;
tx_ring = r_vec->xdp_ring;
while (pkts_polled < budget) {
- unsigned int meta_len, data_len, data_off, pkt_len, pkt_off;
+ unsigned int meta_len, data_len, data_off, pkt_len;
+ u8 meta_prepend[NFP_NET_MAX_PREPEND];
struct nfp_net_rx_buf *rxbuf;
struct nfp_net_rx_desc *rxd;
dma_addr_t new_dma_addr;
void *new_frag;
+ u8 *meta;
idx = rx_ring->rd_p & (rx_ring->cnt - 1);
data_len = le16_to_cpu(rxd->rxd.data_len);
pkt_len = data_len - meta_len;
- if (nn->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
- pkt_off = meta_len;
+ if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
+ data_off = NFP_NET_RX_BUF_HEADROOM + meta_len;
else
- pkt_off = nn->rx_offset;
- data_off = NFP_NET_RX_BUF_HEADROOM + pkt_off;
+ data_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_offset;
+ data_off += dp->rx_dma_off;
/* Stats update */
u64_stats_update_begin(&r_vec->rx_sync);
r_vec->rx_bytes += pkt_len;
u64_stats_update_end(&r_vec->rx_sync);
+ /* Pointer to start of metadata */
+ meta = rxbuf->frag + data_off - meta_len;
+
+ if (unlikely(meta_len > NFP_NET_MAX_PREPEND ||
+ (dp->rx_offset && meta_len > dp->rx_offset))) {
+ nn_dp_warn(dp, "oversized RX packet metadata %u\n",
+ meta_len);
+ nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
+ continue;
+ }
+
if (xdp_prog && !(rxd->rxd.flags & PCIE_DESC_RX_BPF &&
- nn->bpf_offload_xdp)) {
+ dp->bpf_offload_xdp)) {
+ unsigned int dma_off;
+ void *hard_start;
int act;
- dma_sync_single_for_cpu(&nn->pdev->dev,
- rxbuf->dma_addr + pkt_off,
- pkt_len, DMA_BIDIRECTIONAL);
- act = nfp_net_run_xdp(xdp_prog, rxbuf->frag + data_off,
- pkt_len);
+ hard_start = rxbuf->frag + NFP_NET_RX_BUF_HEADROOM;
+ dma_off = data_off - NFP_NET_RX_BUF_HEADROOM;
+ dma_sync_single_for_cpu(dp->dev, rxbuf->dma_addr,
+ dma_off + pkt_len,
+ DMA_BIDIRECTIONAL);
+
+ /* Move prepend out of the way */
+ if (xdp_prog->xdp_adjust_head) {
+ memcpy(meta_prepend, meta, meta_len);
+ meta = meta_prepend;
+ }
+
+ act = nfp_net_run_xdp(xdp_prog, rxbuf->frag, hard_start,
+ &data_off, &pkt_len);
switch (act) {
case XDP_PASS:
break;
case XDP_TX:
- if (unlikely(!nfp_net_tx_xdp_buf(nn, rx_ring,
+ dma_off = data_off - NFP_NET_RX_BUF_HEADROOM;
+ if (unlikely(!nfp_net_tx_xdp_buf(dp, rx_ring,
tx_ring, rxbuf,
- pkt_off, pkt_len)))
- trace_xdp_exception(nn->netdev, xdp_prog, act);
+ dma_off,
+ pkt_len)))
+ trace_xdp_exception(dp->netdev,
+ xdp_prog, act);
continue;
default:
bpf_warn_invalid_xdp_action(act);
case XDP_ABORTED:
- trace_xdp_exception(nn->netdev, xdp_prog, act);
+ trace_xdp_exception(dp->netdev, xdp_prog, act);
case XDP_DROP:
- nfp_net_rx_give_one(rx_ring, rxbuf->frag,
+ nfp_net_rx_give_one(dp, rx_ring, rxbuf->frag,
rxbuf->dma_addr);
continue;
}
skb = build_skb(rxbuf->frag, true_bufsz);
if (unlikely(!skb)) {
- nfp_net_rx_drop(r_vec, rx_ring, rxbuf, NULL);
+ nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
continue;
}
- new_frag = nfp_net_napi_alloc_one(nn, rx_dma_map_dir,
- &new_dma_addr);
+ new_frag = nfp_net_napi_alloc_one(dp, &new_dma_addr);
if (unlikely(!new_frag)) {
- nfp_net_rx_drop(r_vec, rx_ring, rxbuf, skb);
+ nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, skb);
continue;
}
- nfp_net_dma_unmap_rx(nn, rxbuf->dma_addr, nn->fl_bufsz,
- rx_dma_map_dir);
+ nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr);
- nfp_net_rx_give_one(rx_ring, new_frag, new_dma_addr);
+ nfp_net_rx_give_one(dp, rx_ring, new_frag, new_dma_addr);
skb_reserve(skb, data_off);
skb_put(skb, pkt_len);
- if (nn->fw_ver.major <= 3) {
- nfp_net_set_hash_desc(nn->netdev, skb, rxd);
+ if (!dp->chained_metadata_format) {
+ nfp_net_set_hash_desc(dp->netdev, skb, meta, rxd);
} else if (meta_len) {
void *end;
- end = nfp_net_parse_meta(nn->netdev, skb, meta_len);
- if (unlikely(end != skb->data)) {
- nn_warn_ratelimit(nn, "invalid RX packet metadata\n");
- nfp_net_rx_drop(r_vec, rx_ring, NULL, skb);
+ end = nfp_net_parse_meta(dp->netdev, skb, meta,
+ meta_len);
+ if (unlikely(end != meta + meta_len)) {
+ nn_dp_warn(dp, "invalid RX packet metadata\n");
+ nfp_net_rx_drop(dp, r_vec, rx_ring, NULL, skb);
continue;
}
}
skb_record_rx_queue(skb, rx_ring->idx);
- skb->protocol = eth_type_trans(skb, nn->netdev);
+ skb->protocol = eth_type_trans(skb, dp->netdev);
- nfp_net_rx_csum(nn, r_vec, rxd, skb);
+ nfp_net_rx_csum(dp, r_vec, rxd, skb);
if (rxd->rxd.flags & PCIE_DESC_RX_VLAN)
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
nfp_net_xdp_complete(r_vec->xdp_ring);
}
- if (pkts_polled < budget) {
- napi_complete_done(napi, pkts_polled);
- nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry);
- }
+ if (pkts_polled < budget)
+ if (napi_complete_done(napi, pkts_polled))
+ nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry);
return pkts_polled;
}
static void nfp_net_tx_ring_free(struct nfp_net_tx_ring *tx_ring)
{
struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
- struct nfp_net *nn = r_vec->nfp_net;
- struct pci_dev *pdev = nn->pdev;
+ struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
kfree(tx_ring->txbufs);
if (tx_ring->txds)
- dma_free_coherent(&pdev->dev, tx_ring->size,
+ dma_free_coherent(dp->dev, tx_ring->size,
tx_ring->txds, tx_ring->dma);
tx_ring->cnt = 0;
/**
* nfp_net_tx_ring_alloc() - Allocate resource for a TX ring
+ * @dp: NFP Net data path struct
* @tx_ring: TX Ring structure to allocate
- * @cnt: Ring buffer count
* @is_xdp: True if ring will be used for XDP
*
* Return: 0 on success, negative errno otherwise.
*/
static int
-nfp_net_tx_ring_alloc(struct nfp_net_tx_ring *tx_ring, u32 cnt, bool is_xdp)
+nfp_net_tx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring,
+ bool is_xdp)
{
struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
- struct nfp_net *nn = r_vec->nfp_net;
- struct pci_dev *pdev = nn->pdev;
int sz;
- tx_ring->cnt = cnt;
+ tx_ring->cnt = dp->txd_cnt;
tx_ring->size = sizeof(*tx_ring->txds) * tx_ring->cnt;
- tx_ring->txds = dma_zalloc_coherent(&pdev->dev, tx_ring->size,
+ tx_ring->txds = dma_zalloc_coherent(dp->dev, tx_ring->size,
&tx_ring->dma, GFP_KERNEL);
if (!tx_ring->txds)
goto err_alloc;
goto err_alloc;
if (!is_xdp)
- netif_set_xps_queue(nn->netdev, &r_vec->affinity_mask,
+ netif_set_xps_queue(dp->netdev, &r_vec->affinity_mask,
tx_ring->idx);
- nn_dbg(nn, "TxQ%02d: QCidx=%02d cnt=%d dma=%#llx host=%p %s\n",
- tx_ring->idx, tx_ring->qcidx,
- tx_ring->cnt, (unsigned long long)tx_ring->dma, tx_ring->txds,
- is_xdp ? "XDP" : "");
-
return 0;
err_alloc:
return -ENOMEM;
}
-static struct nfp_net_tx_ring *
-nfp_net_tx_ring_set_prepare(struct nfp_net *nn, struct nfp_net_ring_set *s,
- unsigned int num_stack_tx_rings)
+static int nfp_net_tx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp)
{
- struct nfp_net_tx_ring *rings;
unsigned int r;
- rings = kcalloc(s->n_rings, sizeof(*rings), GFP_KERNEL);
- if (!rings)
- return NULL;
+ dp->tx_rings = kcalloc(dp->num_tx_rings, sizeof(*dp->tx_rings),
+ GFP_KERNEL);
+ if (!dp->tx_rings)
+ return -ENOMEM;
- for (r = 0; r < s->n_rings; r++) {
+ for (r = 0; r < dp->num_tx_rings; r++) {
int bias = 0;
- if (r >= num_stack_tx_rings)
- bias = num_stack_tx_rings;
+ if (r >= dp->num_stack_tx_rings)
+ bias = dp->num_stack_tx_rings;
- nfp_net_tx_ring_init(&rings[r], &nn->r_vecs[r - bias], r);
+ nfp_net_tx_ring_init(&dp->tx_rings[r], &nn->r_vecs[r - bias],
+ r);
- if (nfp_net_tx_ring_alloc(&rings[r], s->dcnt, bias))
+ if (nfp_net_tx_ring_alloc(dp, &dp->tx_rings[r], bias))
goto err_free_prev;
}
- return s->rings = rings;
+ return 0;
err_free_prev:
while (r--)
- nfp_net_tx_ring_free(&rings[r]);
- kfree(rings);
- return NULL;
-}
-
-static void
-nfp_net_tx_ring_set_swap(struct nfp_net *nn, struct nfp_net_ring_set *s)
-{
- struct nfp_net_ring_set new = *s;
-
- s->dcnt = nn->txd_cnt;
- s->rings = nn->tx_rings;
- s->n_rings = nn->num_tx_rings;
-
- nn->txd_cnt = new.dcnt;
- nn->tx_rings = new.rings;
- nn->num_tx_rings = new.n_rings;
+ nfp_net_tx_ring_free(&dp->tx_rings[r]);
+ kfree(dp->tx_rings);
+ return -ENOMEM;
}
-static void
-nfp_net_tx_ring_set_free(struct nfp_net *nn, struct nfp_net_ring_set *s)
+static void nfp_net_tx_rings_free(struct nfp_net_dp *dp)
{
- struct nfp_net_tx_ring *rings = s->rings;
unsigned int r;
- for (r = 0; r < s->n_rings; r++)
- nfp_net_tx_ring_free(&rings[r]);
+ for (r = 0; r < dp->num_tx_rings; r++)
+ nfp_net_tx_ring_free(&dp->tx_rings[r]);
- kfree(rings);
+ kfree(dp->tx_rings);
}
/**
static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring)
{
struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
- struct nfp_net *nn = r_vec->nfp_net;
- struct pci_dev *pdev = nn->pdev;
+ struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
kfree(rx_ring->rxbufs);
if (rx_ring->rxds)
- dma_free_coherent(&pdev->dev, rx_ring->size,
+ dma_free_coherent(dp->dev, rx_ring->size,
rx_ring->rxds, rx_ring->dma);
rx_ring->cnt = 0;
/**
* nfp_net_rx_ring_alloc() - Allocate resource for a RX ring
+ * @dp: NFP Net data path struct
* @rx_ring: RX ring to allocate
- * @fl_bufsz: Size of buffers to allocate
- * @cnt: Ring buffer count
*
* Return: 0 on success, negative errno otherwise.
*/
static int
-nfp_net_rx_ring_alloc(struct nfp_net_rx_ring *rx_ring, unsigned int fl_bufsz,
- u32 cnt)
+nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring)
{
- struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
- struct nfp_net *nn = r_vec->nfp_net;
- struct pci_dev *pdev = nn->pdev;
int sz;
- rx_ring->cnt = cnt;
- rx_ring->bufsz = fl_bufsz;
-
+ rx_ring->cnt = dp->rxd_cnt;
rx_ring->size = sizeof(*rx_ring->rxds) * rx_ring->cnt;
- rx_ring->rxds = dma_zalloc_coherent(&pdev->dev, rx_ring->size,
+ rx_ring->rxds = dma_zalloc_coherent(dp->dev, rx_ring->size,
&rx_ring->dma, GFP_KERNEL);
if (!rx_ring->rxds)
goto err_alloc;
if (!rx_ring->rxbufs)
goto err_alloc;
- nn_dbg(nn, "RxQ%02d: FlQCidx=%02d RxQCidx=%02d cnt=%d dma=%#llx host=%p\n",
- rx_ring->idx, rx_ring->fl_qcidx, rx_ring->rx_qcidx,
- rx_ring->cnt, (unsigned long long)rx_ring->dma, rx_ring->rxds);
-
return 0;
err_alloc:
return -ENOMEM;
}
-static struct nfp_net_rx_ring *
-nfp_net_rx_ring_set_prepare(struct nfp_net *nn, struct nfp_net_ring_set *s,
- bool xdp)
+static int nfp_net_rx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp)
{
- unsigned int fl_bufsz = nfp_net_calc_fl_bufsz(nn, s->mtu);
- struct nfp_net_rx_ring *rings;
unsigned int r;
- rings = kcalloc(s->n_rings, sizeof(*rings), GFP_KERNEL);
- if (!rings)
- return NULL;
+ dp->rx_rings = kcalloc(dp->num_rx_rings, sizeof(*dp->rx_rings),
+ GFP_KERNEL);
+ if (!dp->rx_rings)
+ return -ENOMEM;
- for (r = 0; r < s->n_rings; r++) {
- nfp_net_rx_ring_init(&rings[r], &nn->r_vecs[r], r);
+ for (r = 0; r < dp->num_rx_rings; r++) {
+ nfp_net_rx_ring_init(&dp->rx_rings[r], &nn->r_vecs[r], r);
- if (nfp_net_rx_ring_alloc(&rings[r], fl_bufsz, s->dcnt))
+ if (nfp_net_rx_ring_alloc(dp, &dp->rx_rings[r]))
goto err_free_prev;
- if (nfp_net_rx_ring_bufs_alloc(nn, &rings[r], xdp))
+ if (nfp_net_rx_ring_bufs_alloc(dp, &dp->rx_rings[r]))
goto err_free_ring;
}
- return s->rings = rings;
+ return 0;
err_free_prev:
while (r--) {
- nfp_net_rx_ring_bufs_free(nn, &rings[r], xdp);
+ nfp_net_rx_ring_bufs_free(dp, &dp->rx_rings[r]);
err_free_ring:
- nfp_net_rx_ring_free(&rings[r]);
+ nfp_net_rx_ring_free(&dp->rx_rings[r]);
}
- kfree(rings);
- return NULL;
-}
-
-static void
-nfp_net_rx_ring_set_swap(struct nfp_net *nn, struct nfp_net_ring_set *s)
-{
- struct nfp_net_ring_set new = *s;
-
- s->mtu = nn->netdev->mtu;
- s->dcnt = nn->rxd_cnt;
- s->rings = nn->rx_rings;
- s->n_rings = nn->num_rx_rings;
-
- nn->netdev->mtu = new.mtu;
- nn->fl_bufsz = nfp_net_calc_fl_bufsz(nn, new.mtu);
- nn->rxd_cnt = new.dcnt;
- nn->rx_rings = new.rings;
- nn->num_rx_rings = new.n_rings;
+ kfree(dp->rx_rings);
+ return -ENOMEM;
}
-static void
-nfp_net_rx_ring_set_free(struct nfp_net *nn, struct nfp_net_ring_set *s,
- bool xdp)
+static void nfp_net_rx_rings_free(struct nfp_net_dp *dp)
{
- struct nfp_net_rx_ring *rings = s->rings;
unsigned int r;
- for (r = 0; r < s->n_rings; r++) {
- nfp_net_rx_ring_bufs_free(nn, &rings[r], xdp);
- nfp_net_rx_ring_free(&rings[r]);
+ for (r = 0; r < dp->num_rx_rings; r++) {
+ nfp_net_rx_ring_bufs_free(dp, &dp->rx_rings[r]);
+ nfp_net_rx_ring_free(&dp->rx_rings[r]);
}
- kfree(rings);
+ kfree(dp->rx_rings);
}
static void
-nfp_net_vector_assign_rings(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
- int idx)
+nfp_net_vector_assign_rings(struct nfp_net_dp *dp,
+ struct nfp_net_r_vector *r_vec, int idx)
{
- r_vec->rx_ring = idx < nn->num_rx_rings ? &nn->rx_rings[idx] : NULL;
+ r_vec->rx_ring = idx < dp->num_rx_rings ? &dp->rx_rings[idx] : NULL;
r_vec->tx_ring =
- idx < nn->num_stack_tx_rings ? &nn->tx_rings[idx] : NULL;
+ idx < dp->num_stack_tx_rings ? &dp->tx_rings[idx] : NULL;
- r_vec->xdp_ring = idx < nn->num_tx_rings - nn->num_stack_tx_rings ?
- &nn->tx_rings[nn->num_stack_tx_rings + idx] : NULL;
+ r_vec->xdp_ring = idx < dp->num_tx_rings - dp->num_stack_tx_rings ?
+ &dp->tx_rings[dp->num_stack_tx_rings + idx] : NULL;
}
static int
int err;
/* Setup NAPI */
- netif_napi_add(nn->netdev, &r_vec->napi,
+ netif_napi_add(nn->dp.netdev, &r_vec->napi,
nfp_net_poll, NAPI_POLL_WEIGHT);
snprintf(r_vec->name, sizeof(r_vec->name),
- "%s-rxtx-%d", nn->netdev->name, idx);
+ "%s-rxtx-%d", nn->dp.netdev->name, idx);
err = request_irq(r_vec->irq_vector, r_vec->handler, 0, r_vec->name,
r_vec);
if (err) {
{
int i;
- for (i = 0; i < NFP_NET_CFG_RSS_KEY_SZ; i += 4)
+ for (i = 0; i < nfp_net_rss_key_sz(nn); i += 4)
nn_writel(nn, NFP_NET_CFG_RSS_KEY + i,
get_unaligned_le32(nn->rss_key + i));
}
/* copy RX interrupt coalesce parameters */
value = (nn->rx_coalesce_max_frames << 16) |
(factor * nn->rx_coalesce_usecs);
- for (i = 0; i < nn->num_rx_rings; i++)
+ for (i = 0; i < nn->dp.num_rx_rings; i++)
nn_writel(nn, NFP_NET_CFG_RXR_IRQ_MOD(i), value);
/* copy TX interrupt coalesce parameters */
value = (nn->tx_coalesce_max_frames << 16) |
(factor * nn->tx_coalesce_usecs);
- for (i = 0; i < nn->num_tx_rings; i++)
+ for (i = 0; i < nn->dp.num_tx_rings; i++)
nn_writel(nn, NFP_NET_CFG_TXR_IRQ_MOD(i), value);
}
static void nfp_net_write_mac_addr(struct nfp_net *nn)
{
nn_writel(nn, NFP_NET_CFG_MACADDR + 0,
- get_unaligned_be32(nn->netdev->dev_addr));
+ get_unaligned_be32(nn->dp.netdev->dev_addr));
nn_writew(nn, NFP_NET_CFG_MACADDR + 6,
- get_unaligned_be16(nn->netdev->dev_addr + 4));
+ get_unaligned_be16(nn->dp.netdev->dev_addr + 4));
}
static void nfp_net_vec_clear_ring_data(struct nfp_net *nn, unsigned int idx)
unsigned int r;
int err;
- new_ctrl = nn->ctrl;
+ new_ctrl = nn->dp.ctrl;
new_ctrl &= ~NFP_NET_CFG_CTRL_ENABLE;
update = NFP_NET_CFG_UPDATE_GEN;
update |= NFP_NET_CFG_UPDATE_MSIX;
if (err)
nn_err(nn, "Could not disable device: %d\n", err);
- for (r = 0; r < nn->num_rx_rings; r++)
- nfp_net_rx_ring_reset(&nn->rx_rings[r]);
- for (r = 0; r < nn->num_tx_rings; r++)
- nfp_net_tx_ring_reset(nn, &nn->tx_rings[r]);
- for (r = 0; r < nn->num_r_vecs; r++)
+ for (r = 0; r < nn->dp.num_rx_rings; r++)
+ nfp_net_rx_ring_reset(&nn->dp.rx_rings[r]);
+ for (r = 0; r < nn->dp.num_tx_rings; r++)
+ nfp_net_tx_ring_reset(&nn->dp, &nn->dp.tx_rings[r]);
+ for (r = 0; r < nn->dp.num_r_vecs; r++)
nfp_net_vec_clear_ring_data(nn, r);
- nn->ctrl = new_ctrl;
+ nn->dp.ctrl = new_ctrl;
}
static void
nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), tx_ring->r_vec->irq_entry);
}
-static int __nfp_net_set_config_and_enable(struct nfp_net *nn)
+/**
+ * nfp_net_set_config_and_enable() - Write control BAR and enable NFP
+ * @nn: NFP Net device to reconfigure
+ */
+static int nfp_net_set_config_and_enable(struct nfp_net *nn)
{
u32 new_ctrl, update = 0;
unsigned int r;
int err;
- new_ctrl = nn->ctrl;
+ new_ctrl = nn->dp.ctrl;
if (nn->cap & NFP_NET_CFG_CTRL_RSS) {
nfp_net_rss_write_key(nn);
update |= NFP_NET_CFG_UPDATE_IRQMOD;
}
- for (r = 0; r < nn->num_tx_rings; r++)
- nfp_net_tx_ring_hw_cfg_write(nn, &nn->tx_rings[r], r);
- for (r = 0; r < nn->num_rx_rings; r++)
- nfp_net_rx_ring_hw_cfg_write(nn, &nn->rx_rings[r], r);
+ for (r = 0; r < nn->dp.num_tx_rings; r++)
+ nfp_net_tx_ring_hw_cfg_write(nn, &nn->dp.tx_rings[r], r);
+ for (r = 0; r < nn->dp.num_rx_rings; r++)
+ nfp_net_rx_ring_hw_cfg_write(nn, &nn->dp.rx_rings[r], r);
- nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, nn->num_tx_rings == 64 ?
- 0xffffffffffffffffULL : ((u64)1 << nn->num_tx_rings) - 1);
+ nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, nn->dp.num_tx_rings == 64 ?
+ 0xffffffffffffffffULL : ((u64)1 << nn->dp.num_tx_rings) - 1);
- nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, nn->num_rx_rings == 64 ?
- 0xffffffffffffffffULL : ((u64)1 << nn->num_rx_rings) - 1);
+ nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, nn->dp.num_rx_rings == 64 ?
+ 0xffffffffffffffffULL : ((u64)1 << nn->dp.num_rx_rings) - 1);
nfp_net_write_mac_addr(nn);
- nn_writel(nn, NFP_NET_CFG_MTU, nn->netdev->mtu);
+ nn_writel(nn, NFP_NET_CFG_MTU, nn->dp.netdev->mtu);
nn_writel(nn, NFP_NET_CFG_FLBUFSZ,
- nn->fl_bufsz - NFP_NET_RX_BUF_NON_DATA);
+ nn->dp.fl_bufsz - NFP_NET_RX_BUF_NON_DATA);
/* Enable device */
new_ctrl |= NFP_NET_CFG_CTRL_ENABLE;
nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
err = nfp_net_reconfig(nn, update);
+ if (err) {
+ nfp_net_clear_config_and_disable(nn);
+ return err;
+ }
- nn->ctrl = new_ctrl;
+ nn->dp.ctrl = new_ctrl;
- for (r = 0; r < nn->num_rx_rings; r++)
- nfp_net_rx_ring_fill_freelist(&nn->rx_rings[r]);
+ for (r = 0; r < nn->dp.num_rx_rings; r++)
+ nfp_net_rx_ring_fill_freelist(&nn->dp, &nn->dp.rx_rings[r]);
/* Since reconfiguration requests while NFP is down are ignored we
* have to wipe the entire VXLAN configuration and reinitialize it.
*/
- if (nn->ctrl & NFP_NET_CFG_CTRL_VXLAN) {
+ if (nn->dp.ctrl & NFP_NET_CFG_CTRL_VXLAN) {
memset(&nn->vxlan_ports, 0, sizeof(nn->vxlan_ports));
memset(&nn->vxlan_usecnt, 0, sizeof(nn->vxlan_usecnt));
- udp_tunnel_get_rx_info(nn->netdev);
+ udp_tunnel_get_rx_info(nn->dp.netdev);
}
- return err;
-}
-
-/**
- * nfp_net_set_config_and_enable() - Write control BAR and enable NFP
- * @nn: NFP Net device to reconfigure
- */
-static int nfp_net_set_config_and_enable(struct nfp_net *nn)
-{
- int err;
-
- err = __nfp_net_set_config_and_enable(nn);
- if (err)
- nfp_net_clear_config_and_disable(nn);
-
- return err;
+ return 0;
}
/**
{
unsigned int r;
- for (r = 0; r < nn->num_r_vecs; r++) {
+ for (r = 0; r < nn->dp.num_r_vecs; r++) {
napi_enable(&nn->r_vecs[r].napi);
enable_irq(nn->r_vecs[r].irq_vector);
}
- netif_tx_wake_all_queues(nn->netdev);
+ netif_tx_wake_all_queues(nn->dp.netdev);
enable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
nfp_net_read_link_status(nn);
static int nfp_net_netdev_open(struct net_device *netdev)
{
struct nfp_net *nn = netdev_priv(netdev);
- struct nfp_net_ring_set rx = {
- .n_rings = nn->num_rx_rings,
- .mtu = nn->netdev->mtu,
- .dcnt = nn->rxd_cnt,
- };
- struct nfp_net_ring_set tx = {
- .n_rings = nn->num_tx_rings,
- .dcnt = nn->txd_cnt,
- };
int err, r;
- if (nn->ctrl & NFP_NET_CFG_CTRL_ENABLE) {
- nn_err(nn, "Dev is already enabled: 0x%08x\n", nn->ctrl);
- return -EBUSY;
- }
-
/* Step 1: Allocate resources for rings and the like
* - Request interrupts
* - Allocate RX and TX ring resources
goto err_free_exn;
disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
- for (r = 0; r < nn->num_r_vecs; r++) {
+ for (r = 0; r < nn->dp.num_r_vecs; r++) {
err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r);
if (err)
goto err_cleanup_vec_p;
}
- nn->rx_rings = nfp_net_rx_ring_set_prepare(nn, &rx, nn->xdp_prog);
- if (!nn->rx_rings) {
- err = -ENOMEM;
+ err = nfp_net_rx_rings_prepare(nn, &nn->dp);
+ if (err)
goto err_cleanup_vec;
- }
- nn->tx_rings = nfp_net_tx_ring_set_prepare(nn, &tx,
- nn->num_stack_tx_rings);
- if (!nn->tx_rings) {
- err = -ENOMEM;
+ err = nfp_net_tx_rings_prepare(nn, &nn->dp);
+ if (err)
goto err_free_rx_rings;
- }
for (r = 0; r < nn->max_r_vecs; r++)
- nfp_net_vector_assign_rings(nn, &nn->r_vecs[r], r);
+ nfp_net_vector_assign_rings(&nn->dp, &nn->r_vecs[r], r);
- err = netif_set_real_num_tx_queues(netdev, nn->num_stack_tx_rings);
+ err = netif_set_real_num_tx_queues(netdev, nn->dp.num_stack_tx_rings);
if (err)
goto err_free_rings;
- err = netif_set_real_num_rx_queues(netdev, nn->num_rx_rings);
+ err = netif_set_real_num_rx_queues(netdev, nn->dp.num_rx_rings);
if (err)
goto err_free_rings;
return 0;
err_free_rings:
- nfp_net_tx_ring_set_free(nn, &tx);
+ nfp_net_tx_rings_free(&nn->dp);
err_free_rx_rings:
- nfp_net_rx_ring_set_free(nn, &rx, nn->xdp_prog);
+ nfp_net_rx_rings_free(&nn->dp);
err_cleanup_vec:
- r = nn->num_r_vecs;
+ r = nn->dp.num_r_vecs;
err_cleanup_vec_p:
while (r--)
nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
unsigned int r;
disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
- netif_carrier_off(nn->netdev);
+ netif_carrier_off(nn->dp.netdev);
nn->link_up = false;
- for (r = 0; r < nn->num_r_vecs; r++) {
+ for (r = 0; r < nn->dp.num_r_vecs; r++) {
disable_irq(nn->r_vecs[r].irq_vector);
napi_disable(&nn->r_vecs[r].napi);
}
- netif_tx_disable(nn->netdev);
+ netif_tx_disable(nn->dp.netdev);
}
/**
{
unsigned int r;
- for (r = 0; r < nn->num_rx_rings; r++) {
- nfp_net_rx_ring_bufs_free(nn, &nn->rx_rings[r], nn->xdp_prog);
- nfp_net_rx_ring_free(&nn->rx_rings[r]);
+ for (r = 0; r < nn->dp.num_rx_rings; r++) {
+ nfp_net_rx_ring_bufs_free(&nn->dp, &nn->dp.rx_rings[r]);
+ nfp_net_rx_ring_free(&nn->dp.rx_rings[r]);
}
- for (r = 0; r < nn->num_tx_rings; r++)
- nfp_net_tx_ring_free(&nn->tx_rings[r]);
- for (r = 0; r < nn->num_r_vecs; r++)
+ for (r = 0; r < nn->dp.num_tx_rings; r++)
+ nfp_net_tx_ring_free(&nn->dp.tx_rings[r]);
+ for (r = 0; r < nn->dp.num_r_vecs; r++)
nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
- kfree(nn->rx_rings);
- kfree(nn->tx_rings);
+ kfree(nn->dp.rx_rings);
+ kfree(nn->dp.tx_rings);
nfp_net_aux_irq_free(nn, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX);
{
struct nfp_net *nn = netdev_priv(netdev);
- if (!(nn->ctrl & NFP_NET_CFG_CTRL_ENABLE)) {
- nn_err(nn, "Dev is not up: 0x%08x\n", nn->ctrl);
- return 0;
- }
-
/* Step 1: Disable RX and TX rings from the Linux kernel perspective
*/
nfp_net_close_stack(nn);
struct nfp_net *nn = netdev_priv(netdev);
u32 new_ctrl;
- new_ctrl = nn->ctrl;
+ new_ctrl = nn->dp.ctrl;
if (netdev->flags & IFF_PROMISC) {
if (nn->cap & NFP_NET_CFG_CTRL_PROMISC)
new_ctrl &= ~NFP_NET_CFG_CTRL_PROMISC;
}
- if (new_ctrl == nn->ctrl)
+ if (new_ctrl == nn->dp.ctrl)
return;
nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
nfp_net_reconfig_post(nn, NFP_NET_CFG_UPDATE_GEN);
- nn->ctrl = new_ctrl;
+ nn->dp.ctrl = new_ctrl;
}
static void nfp_net_rss_init_itbl(struct nfp_net *nn)
for (i = 0; i < sizeof(nn->rss_itbl); i++)
nn->rss_itbl[i] =
- ethtool_rxfh_indir_default(i, nn->num_rx_rings);
+ ethtool_rxfh_indir_default(i, nn->dp.num_rx_rings);
}
-static int
-nfp_net_ring_swap_enable(struct nfp_net *nn, unsigned int *num_vecs,
- unsigned int *stack_tx_rings,
- struct bpf_prog **xdp_prog,
- struct nfp_net_ring_set *rx,
- struct nfp_net_ring_set *tx)
+static void nfp_net_dp_swap(struct nfp_net *nn, struct nfp_net_dp *dp)
+{
+ struct nfp_net_dp new_dp = *dp;
+
+ *dp = nn->dp;
+ nn->dp = new_dp;
+
+ nn->dp.netdev->mtu = new_dp.mtu;
+
+ if (!netif_is_rxfh_configured(nn->dp.netdev))
+ nfp_net_rss_init_itbl(nn);
+}
+
+static int nfp_net_dp_swap_enable(struct nfp_net *nn, struct nfp_net_dp *dp)
{
unsigned int r;
int err;
- if (rx)
- nfp_net_rx_ring_set_swap(nn, rx);
- if (tx)
- nfp_net_tx_ring_set_swap(nn, tx);
-
- swap(*num_vecs, nn->num_r_vecs);
- swap(*stack_tx_rings, nn->num_stack_tx_rings);
- *xdp_prog = xchg(&nn->xdp_prog, *xdp_prog);
+ nfp_net_dp_swap(nn, dp);
for (r = 0; r < nn->max_r_vecs; r++)
- nfp_net_vector_assign_rings(nn, &nn->r_vecs[r], r);
-
- if (!netif_is_rxfh_configured(nn->netdev))
- nfp_net_rss_init_itbl(nn);
+ nfp_net_vector_assign_rings(&nn->dp, &nn->r_vecs[r], r);
- err = netif_set_real_num_rx_queues(nn->netdev,
- nn->num_rx_rings);
+ err = netif_set_real_num_rx_queues(nn->dp.netdev, nn->dp.num_rx_rings);
if (err)
return err;
- if (nn->netdev->real_num_tx_queues != nn->num_stack_tx_rings) {
- err = netif_set_real_num_tx_queues(nn->netdev,
- nn->num_stack_tx_rings);
+ if (nn->dp.netdev->real_num_tx_queues != nn->dp.num_stack_tx_rings) {
+ err = netif_set_real_num_tx_queues(nn->dp.netdev,
+ nn->dp.num_stack_tx_rings);
if (err)
return err;
}
- return __nfp_net_set_config_and_enable(nn);
+ return nfp_net_set_config_and_enable(nn);
}
-static int
-nfp_net_check_config(struct nfp_net *nn, struct bpf_prog *xdp_prog,
- struct nfp_net_ring_set *rx, struct nfp_net_ring_set *tx)
+struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn)
+{
+ struct nfp_net_dp *new;
+
+ new = kmalloc(sizeof(*new), GFP_KERNEL);
+ if (!new)
+ return NULL;
+
+ *new = nn->dp;
+
+ /* Clear things which need to be recomputed */
+ new->fl_bufsz = 0;
+ new->tx_rings = NULL;
+ new->rx_rings = NULL;
+ new->num_r_vecs = 0;
+ new->num_stack_tx_rings = 0;
+
+ return new;
+}
+
+static int nfp_net_check_config(struct nfp_net *nn, struct nfp_net_dp *dp)
{
/* XDP-enabled tests */
- if (!xdp_prog)
+ if (!dp->xdp_prog)
return 0;
- if (rx && nfp_net_calc_fl_bufsz(nn, rx->mtu) > PAGE_SIZE) {
+ if (dp->fl_bufsz > PAGE_SIZE) {
nn_warn(nn, "MTU too large w/ XDP enabled\n");
return -EINVAL;
}
- if (tx && tx->n_rings > nn->max_tx_rings) {
+ if (dp->num_tx_rings > nn->max_tx_rings) {
nn_warn(nn, "Insufficient number of TX rings w/ XDP enabled\n");
return -EINVAL;
}
return 0;
}
-static void
-nfp_net_ring_reconfig_down(struct nfp_net *nn, struct bpf_prog **xdp_prog,
- struct nfp_net_ring_set *rx,
- struct nfp_net_ring_set *tx,
- unsigned int stack_tx_rings, unsigned int num_vecs)
-{
- nn->netdev->mtu = rx ? rx->mtu : nn->netdev->mtu;
- nn->fl_bufsz = nfp_net_calc_fl_bufsz(nn, nn->netdev->mtu);
- nn->rxd_cnt = rx ? rx->dcnt : nn->rxd_cnt;
- nn->txd_cnt = tx ? tx->dcnt : nn->txd_cnt;
- nn->num_rx_rings = rx ? rx->n_rings : nn->num_rx_rings;
- nn->num_tx_rings = tx ? tx->n_rings : nn->num_tx_rings;
- nn->num_stack_tx_rings = stack_tx_rings;
- nn->num_r_vecs = num_vecs;
- *xdp_prog = xchg(&nn->xdp_prog, *xdp_prog);
-
- if (!netif_is_rxfh_configured(nn->netdev))
- nfp_net_rss_init_itbl(nn);
-}
-
-int
-nfp_net_ring_reconfig(struct nfp_net *nn, struct bpf_prog **xdp_prog,
- struct nfp_net_ring_set *rx, struct nfp_net_ring_set *tx)
+int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *dp)
{
- unsigned int stack_tx_rings, num_vecs, r;
- int err;
+ int r, err;
- stack_tx_rings = tx ? tx->n_rings : nn->num_tx_rings;
- if (*xdp_prog)
- stack_tx_rings -= rx ? rx->n_rings : nn->num_rx_rings;
+ dp->fl_bufsz = nfp_net_calc_fl_bufsz(dp);
- num_vecs = max(rx ? rx->n_rings : nn->num_rx_rings, stack_tx_rings);
+ dp->num_stack_tx_rings = dp->num_tx_rings;
+ if (dp->xdp_prog)
+ dp->num_stack_tx_rings -= dp->num_rx_rings;
- err = nfp_net_check_config(nn, *xdp_prog, rx, tx);
+ dp->num_r_vecs = max(dp->num_rx_rings, dp->num_stack_tx_rings);
+
+ err = nfp_net_check_config(nn, dp);
if (err)
- return err;
+ goto exit_free_dp;
- if (!netif_running(nn->netdev)) {
- nfp_net_ring_reconfig_down(nn, xdp_prog, rx, tx,
- stack_tx_rings, num_vecs);
- return 0;
+ if (!netif_running(dp->netdev)) {
+ nfp_net_dp_swap(nn, dp);
+ err = 0;
+ goto exit_free_dp;
}
/* Prepare new rings */
- for (r = nn->num_r_vecs; r < num_vecs; r++) {
+ for (r = nn->dp.num_r_vecs; r < dp->num_r_vecs; r++) {
err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r);
if (err) {
- num_vecs = r;
+ dp->num_r_vecs = r;
goto err_cleanup_vecs;
}
}
- if (rx) {
- if (!nfp_net_rx_ring_set_prepare(nn, rx, *xdp_prog)) {
- err = -ENOMEM;
- goto err_cleanup_vecs;
- }
- }
- if (tx) {
- if (!nfp_net_tx_ring_set_prepare(nn, tx, stack_tx_rings)) {
- err = -ENOMEM;
- goto err_free_rx;
- }
- }
+
+ err = nfp_net_rx_rings_prepare(nn, dp);
+ if (err)
+ goto err_cleanup_vecs;
+
+ err = nfp_net_tx_rings_prepare(nn, dp);
+ if (err)
+ goto err_free_rx;
/* Stop device, swap in new rings, try to start the firmware */
nfp_net_close_stack(nn);
nfp_net_clear_config_and_disable(nn);
- err = nfp_net_ring_swap_enable(nn, &num_vecs, &stack_tx_rings,
- xdp_prog, rx, tx);
+ err = nfp_net_dp_swap_enable(nn, dp);
if (err) {
int err2;
nfp_net_clear_config_and_disable(nn);
/* Try with old configuration and old rings */
- err2 = nfp_net_ring_swap_enable(nn, &num_vecs, &stack_tx_rings,
- xdp_prog, rx, tx);
+ err2 = nfp_net_dp_swap_enable(nn, dp);
if (err2)
nn_err(nn, "Can't restore ring config - FW communication failed (%d,%d)\n",
err, err2);
}
- for (r = num_vecs - 1; r >= nn->num_r_vecs; r--)
+ for (r = dp->num_r_vecs - 1; r >= nn->dp.num_r_vecs; r--)
nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
- if (rx)
- nfp_net_rx_ring_set_free(nn, rx, *xdp_prog);
- if (tx)
- nfp_net_tx_ring_set_free(nn, tx);
+ nfp_net_rx_rings_free(dp);
+ nfp_net_tx_rings_free(dp);
nfp_net_open_stack(nn);
+exit_free_dp:
+ kfree(dp);
return err;
err_free_rx:
- if (rx)
- nfp_net_rx_ring_set_free(nn, rx, *xdp_prog);
+ nfp_net_rx_rings_free(dp);
err_cleanup_vecs:
- for (r = num_vecs - 1; r >= nn->num_r_vecs; r--)
+ for (r = dp->num_r_vecs - 1; r >= nn->dp.num_r_vecs; r--)
nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
+ kfree(dp);
return err;
}
static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu)
{
struct nfp_net *nn = netdev_priv(netdev);
- struct nfp_net_ring_set rx = {
- .n_rings = nn->num_rx_rings,
- .mtu = new_mtu,
- .dcnt = nn->rxd_cnt,
- };
+ struct nfp_net_dp *dp;
+
+ dp = nfp_net_clone_dp(nn);
+ if (!dp)
+ return -ENOMEM;
- return nfp_net_ring_reconfig(nn, &nn->xdp_prog, &rx, NULL);
+ dp->mtu = new_mtu;
+
+ return nfp_net_ring_reconfig(nn, dp);
}
static void nfp_net_stat64(struct net_device *netdev,
struct nfp_net *nn = netdev_priv(netdev);
int r;
- for (r = 0; r < nn->num_r_vecs; r++) {
+ for (r = 0; r < nn->dp.num_r_vecs; r++) {
struct nfp_net_r_vector *r_vec = &nn->r_vecs[r];
u64 data[3];
unsigned int start;
return -ENOTSUPP;
if (tc->type == TC_SETUP_CLSBPF && nfp_net_ebpf_capable(nn)) {
- if (!nn->bpf_offload_xdp)
+ if (!nn->dp.bpf_offload_xdp)
return nfp_net_bpf_offload(nn, tc->cls_bpf);
else
return -EBUSY;
/* Assume this is not called with features we have not advertised */
- new_ctrl = nn->ctrl;
+ new_ctrl = nn->dp.ctrl;
if (changed & NETIF_F_RXCSUM) {
if (features & NETIF_F_RXCSUM)
new_ctrl &= ~NFP_NET_CFG_CTRL_GATHER;
}
- if (changed & NETIF_F_HW_TC && nn->ctrl & NFP_NET_CFG_CTRL_BPF) {
+ if (changed & NETIF_F_HW_TC && nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) {
nn_err(nn, "Cannot disable HW TC offload while in use\n");
return -EBUSY;
}
nn_dbg(nn, "Feature change 0x%llx -> 0x%llx (changed=0x%llx)\n",
netdev->features, features, changed);
- if (new_ctrl == nn->ctrl)
+ if (new_ctrl == nn->dp.ctrl)
return 0;
- nn_dbg(nn, "NIC ctrl: 0x%x -> 0x%x\n", nn->ctrl, new_ctrl);
+ nn_dbg(nn, "NIC ctrl: 0x%x -> 0x%x\n", nn->dp.ctrl, new_ctrl);
nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
if (err)
return err;
- nn->ctrl = new_ctrl;
+ nn->dp.ctrl = new_ctrl;
return 0;
}
return features;
}
+static int
+nfp_net_get_phys_port_name(struct net_device *netdev, char *name, size_t len)
+{
+ struct nfp_net *nn = netdev_priv(netdev);
+ int err;
+
+ if (!nn->eth_port)
+ return -EOPNOTSUPP;
+
+ if (!nn->eth_port->is_split)
+ err = snprintf(name, len, "p%d", nn->eth_port->label_port);
+ else
+ err = snprintf(name, len, "p%ds%d", nn->eth_port->label_port,
+ nn->eth_port->label_subport);
+ if (err >= len)
+ return -EINVAL;
+
+ return 0;
+}
+
/**
* nfp_net_set_vxlan_port() - set vxlan port in SW and reconfigure HW
* @nn: NFP Net device to reconfigure
nn->vxlan_ports[idx] = port;
- if (!(nn->ctrl & NFP_NET_CFG_CTRL_VXLAN))
+ if (!(nn->dp.ctrl & NFP_NET_CFG_CTRL_VXLAN))
return;
BUILD_BUG_ON(NFP_NET_N_VXLAN_PORTS & 1);
if (!nfp_net_ebpf_capable(nn))
return -EINVAL;
- if (nn->ctrl & NFP_NET_CFG_CTRL_BPF) {
- if (!nn->bpf_offload_xdp)
+ if (nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) {
+ if (!nn->dp.bpf_offload_xdp)
return prog ? -EBUSY : 0;
cmd.command = prog ? TC_CLSBPF_REPLACE : TC_CLSBPF_DESTROY;
} else {
/* Stop offload if replace not possible */
if (ret && cmd.command == TC_CLSBPF_REPLACE)
nfp_net_xdp_offload(nn, NULL);
- nn->bpf_offload_xdp = prog && !ret;
+ nn->dp.bpf_offload_xdp = prog && !ret;
return ret;
}
static int nfp_net_xdp_setup(struct nfp_net *nn, struct bpf_prog *prog)
{
- struct nfp_net_ring_set rx = {
- .n_rings = nn->num_rx_rings,
- .mtu = nn->netdev->mtu,
- .dcnt = nn->rxd_cnt,
- };
- struct nfp_net_ring_set tx = {
- .n_rings = nn->num_tx_rings,
- .dcnt = nn->txd_cnt,
- };
+ struct bpf_prog *old_prog = nn->dp.xdp_prog;
+ struct nfp_net_dp *dp;
int err;
- if (prog && prog->xdp_adjust_head) {
- nn_err(nn, "Does not support bpf_xdp_adjust_head()\n");
- return -EOPNOTSUPP;
- }
- if (!prog && !nn->xdp_prog)
+ if (!prog && !nn->dp.xdp_prog)
return 0;
- if (prog && nn->xdp_prog) {
- prog = xchg(&nn->xdp_prog, prog);
+ if (prog && nn->dp.xdp_prog) {
+ prog = xchg(&nn->dp.xdp_prog, prog);
bpf_prog_put(prog);
- nfp_net_xdp_offload(nn, nn->xdp_prog);
+ nfp_net_xdp_offload(nn, nn->dp.xdp_prog);
return 0;
}
- tx.n_rings += prog ? nn->num_rx_rings : -nn->num_rx_rings;
+ dp = nfp_net_clone_dp(nn);
+ if (!dp)
+ return -ENOMEM;
+
+ dp->xdp_prog = prog;
+ dp->num_tx_rings += prog ? nn->dp.num_rx_rings : -nn->dp.num_rx_rings;
+ dp->rx_dma_dir = prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
+ if (prog)
+ dp->rx_dma_off = XDP_PACKET_HEADROOM -
+ (nn->dp.rx_offset ?: NFP_NET_MAX_PREPEND);
+ else
+ dp->rx_dma_off = 0;
/* We need RX reconfig to remap the buffers (BIDIR vs FROM_DEV) */
- err = nfp_net_ring_reconfig(nn, &prog, &rx, &tx);
+ err = nfp_net_ring_reconfig(nn, dp);
if (err)
return err;
- /* @prog got swapped and is now the old one */
- if (prog)
- bpf_prog_put(prog);
+ if (old_prog)
+ bpf_prog_put(old_prog);
- nfp_net_xdp_offload(nn, nn->xdp_prog);
+ nfp_net_xdp_offload(nn, nn->dp.xdp_prog);
return 0;
}
case XDP_SETUP_PROG:
return nfp_net_xdp_setup(nn, xdp->prog);
case XDP_QUERY_PROG:
- xdp->prog_attached = !!nn->xdp_prog;
+ xdp->prog_attached = !!nn->dp.xdp_prog;
return 0;
default:
return -EINVAL;
.ndo_set_mac_address = eth_mac_addr,
.ndo_set_features = nfp_net_set_features,
.ndo_features_check = nfp_net_features_check,
+ .ndo_get_phys_port_name = nfp_net_get_phys_port_name,
.ndo_udp_tunnel_add = nfp_net_add_vxlan_port,
.ndo_udp_tunnel_del = nfp_net_del_vxlan_port,
.ndo_xdp = nfp_net_xdp,
void nfp_net_info(struct nfp_net *nn)
{
nn_info(nn, "Netronome NFP-6xxx %sNetdev: TxQs=%d/%d RxQs=%d/%d\n",
- nn->is_vf ? "VF " : "",
- nn->num_tx_rings, nn->max_tx_rings,
- nn->num_rx_rings, nn->max_rx_rings);
+ nn->dp.is_vf ? "VF " : "",
+ nn->dp.num_tx_rings, nn->max_tx_rings,
+ nn->dp.num_rx_rings, nn->max_rx_rings);
nn_info(nn, "VER: %d.%d.%d.%d, Maximum supported MTU: %d\n",
nn->fw_ver.resv, nn->fw_ver.class,
nn->fw_ver.major, nn->fw_ver.minor,
SET_NETDEV_DEV(netdev, &pdev->dev);
nn = netdev_priv(netdev);
- nn->netdev = netdev;
+ nn->dp.netdev = netdev;
+ nn->dp.dev = &pdev->dev;
nn->pdev = pdev;
nn->max_tx_rings = max_tx_rings;
nn->max_rx_rings = max_rx_rings;
- nn->num_tx_rings = min_t(unsigned int, max_tx_rings, num_online_cpus());
- nn->num_rx_rings = min_t(unsigned int, max_rx_rings,
+ nn->dp.num_tx_rings = min_t(unsigned int,
+ max_tx_rings, num_online_cpus());
+ nn->dp.num_rx_rings = min_t(unsigned int, max_rx_rings,
netif_get_num_default_rss_queues());
- nn->num_r_vecs = max(nn->num_tx_rings, nn->num_rx_rings);
- nn->num_r_vecs = min_t(unsigned int, nn->num_r_vecs, num_online_cpus());
+ nn->dp.num_r_vecs = max(nn->dp.num_tx_rings, nn->dp.num_rx_rings);
+ nn->dp.num_r_vecs = min_t(unsigned int,
+ nn->dp.num_r_vecs, num_online_cpus());
- nn->txd_cnt = NFP_NET_TX_DESCS_DEFAULT;
- nn->rxd_cnt = NFP_NET_RX_DESCS_DEFAULT;
+ nn->dp.txd_cnt = NFP_NET_TX_DESCS_DEFAULT;
+ nn->dp.rxd_cnt = NFP_NET_RX_DESCS_DEFAULT;
spin_lock_init(&nn->reconfig_lock);
spin_lock_init(&nn->rx_filter_lock);
*/
void nfp_net_netdev_free(struct nfp_net *nn)
{
- free_netdev(nn->netdev);
+ free_netdev(nn->dp.netdev);
+}
+
+/**
+ * nfp_net_rss_key_sz() - Get current size of the RSS key
+ * @nn: NFP Net device instance
+ *
+ * Return: size of the RSS key for currently selected hash function.
+ */
+unsigned int nfp_net_rss_key_sz(struct nfp_net *nn)
+{
+ switch (nn->rss_hfunc) {
+ case ETH_RSS_HASH_TOP:
+ return NFP_NET_CFG_RSS_KEY_SZ;
+ case ETH_RSS_HASH_XOR:
+ return 0;
+ case ETH_RSS_HASH_CRC32:
+ return 4;
+ }
+
+ nn_warn(nn, "Unknown hash function: %u\n", nn->rss_hfunc);
+ return 0;
}
/**
*/
static void nfp_net_rss_init(struct nfp_net *nn)
{
- netdev_rss_key_fill(nn->rss_key, NFP_NET_CFG_RSS_KEY_SZ);
+ unsigned long func_bit, rss_cap_hfunc;
+ u32 reg;
+
+ /* Read the RSS function capability and select first supported func */
+ reg = nn_readl(nn, NFP_NET_CFG_RSS_CAP);
+ rss_cap_hfunc = FIELD_GET(NFP_NET_CFG_RSS_CAP_HFUNC, reg);
+ if (!rss_cap_hfunc)
+ rss_cap_hfunc = FIELD_GET(NFP_NET_CFG_RSS_CAP_HFUNC,
+ NFP_NET_CFG_RSS_TOEPLITZ);
+
+ func_bit = find_first_bit(&rss_cap_hfunc, NFP_NET_CFG_RSS_HFUNCS);
+ if (func_bit == NFP_NET_CFG_RSS_HFUNCS) {
+ dev_warn(nn->dp.dev,
+ "Bad RSS config, defaulting to Toeplitz hash\n");
+ func_bit = ETH_RSS_HASH_TOP_BIT;
+ }
+ nn->rss_hfunc = 1 << func_bit;
+
+ netdev_rss_key_fill(nn->rss_key, nfp_net_rss_key_sz(nn));
nfp_net_rss_init_itbl(nn);
/* Enable IPv4/IPv6 TCP by default */
nn->rss_cfg = NFP_NET_CFG_RSS_IPV4_TCP |
NFP_NET_CFG_RSS_IPV6_TCP |
- NFP_NET_CFG_RSS_TOEPLITZ |
+ FIELD_PREP(NFP_NET_CFG_RSS_HFUNC, nn->rss_hfunc) |
NFP_NET_CFG_RSS_MASK;
}
struct nfp_net *nn = netdev_priv(netdev);
int err;
+ /* XDP calls for 256 byte packet headroom which wouldn't fit in a u8.
+ * We, however, reuse the metadata prepend space for XDP buffers which
+ * is at least 1 byte long and as long as XDP headroom doesn't increase
+ * above 256 the *extra* XDP headroom will fit on 8 bits.
+ */
+ BUILD_BUG_ON(XDP_PACKET_HEADROOM > 256);
+
+ nn->dp.chained_metadata_format = nn->fw_ver.major > 3;
+
+ nn->dp.rx_dma_dir = DMA_FROM_DEVICE;
+
/* Get some of the read-only fields from the BAR */
nn->cap = nn_readl(nn, NFP_NET_CFG_CAP);
nn->max_mtu = nn_readl(nn, NFP_NET_CFG_MAX_MTU);
nfp_net_write_mac_addr(nn);
/* Determine RX packet/metadata boundary offset */
- if (nn->fw_ver.major >= 2)
- nn->rx_offset = nn_readl(nn, NFP_NET_CFG_RX_OFFSET);
- else
- nn->rx_offset = NFP_NET_RX_OFFSET;
+ if (nn->fw_ver.major >= 2) {
+ u32 reg;
+
+ reg = nn_readl(nn, NFP_NET_CFG_RX_OFFSET);
+ if (reg > NFP_NET_MAX_PREPEND) {
+ nn_err(nn, "Invalid rx offset: %d\n", reg);
+ return -EINVAL;
+ }
+ nn->dp.rx_offset = reg;
+ } else {
+ nn->dp.rx_offset = NFP_NET_RX_OFFSET;
+ }
/* Set default MTU and Freelist buffer size */
if (nn->max_mtu < NFP_NET_DEFAULT_MTU)
netdev->mtu = nn->max_mtu;
else
netdev->mtu = NFP_NET_DEFAULT_MTU;
- nn->fl_bufsz = nfp_net_calc_fl_bufsz(nn, netdev->mtu);
+ nn->dp.mtu = netdev->mtu;
+ nn->dp.fl_bufsz = nfp_net_calc_fl_bufsz(&nn->dp);
/* Advertise/enable offloads based on capabilities
*
netdev->hw_features = NETIF_F_HIGHDMA;
if (nn->cap & NFP_NET_CFG_CTRL_RXCSUM) {
netdev->hw_features |= NETIF_F_RXCSUM;
- nn->ctrl |= NFP_NET_CFG_CTRL_RXCSUM;
+ nn->dp.ctrl |= NFP_NET_CFG_CTRL_RXCSUM;
}
if (nn->cap & NFP_NET_CFG_CTRL_TXCSUM) {
netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
- nn->ctrl |= NFP_NET_CFG_CTRL_TXCSUM;
+ nn->dp.ctrl |= NFP_NET_CFG_CTRL_TXCSUM;
}
if (nn->cap & NFP_NET_CFG_CTRL_GATHER) {
netdev->hw_features |= NETIF_F_SG;
- nn->ctrl |= NFP_NET_CFG_CTRL_GATHER;
+ nn->dp.ctrl |= NFP_NET_CFG_CTRL_GATHER;
}
if ((nn->cap & NFP_NET_CFG_CTRL_LSO) && nn->fw_ver.major > 2) {
netdev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6;
- nn->ctrl |= NFP_NET_CFG_CTRL_LSO;
+ nn->dp.ctrl |= NFP_NET_CFG_CTRL_LSO;
}
if (nn->cap & NFP_NET_CFG_CTRL_RSS) {
netdev->hw_features |= NETIF_F_RXHASH;
nfp_net_rss_init(nn);
- nn->ctrl |= NFP_NET_CFG_CTRL_RSS;
+ nn->dp.ctrl |= NFP_NET_CFG_CTRL_RSS;
}
if (nn->cap & NFP_NET_CFG_CTRL_VXLAN &&
nn->cap & NFP_NET_CFG_CTRL_NVGRE) {
if (nn->cap & NFP_NET_CFG_CTRL_LSO)
netdev->hw_features |= NETIF_F_GSO_GRE |
NETIF_F_GSO_UDP_TUNNEL;
- nn->ctrl |= NFP_NET_CFG_CTRL_VXLAN | NFP_NET_CFG_CTRL_NVGRE;
+ nn->dp.ctrl |= NFP_NET_CFG_CTRL_VXLAN | NFP_NET_CFG_CTRL_NVGRE;
netdev->hw_enc_features = netdev->hw_features;
}
if (nn->cap & NFP_NET_CFG_CTRL_RXVLAN) {
netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
- nn->ctrl |= NFP_NET_CFG_CTRL_RXVLAN;
+ nn->dp.ctrl |= NFP_NET_CFG_CTRL_RXVLAN;
}
if (nn->cap & NFP_NET_CFG_CTRL_TXVLAN) {
netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
- nn->ctrl |= NFP_NET_CFG_CTRL_TXVLAN;
+ nn->dp.ctrl |= NFP_NET_CFG_CTRL_TXVLAN;
}
netdev->features = netdev->hw_features;
/* Allow L2 Broadcast and Multicast through by default, if supported */
if (nn->cap & NFP_NET_CFG_CTRL_L2BC)
- nn->ctrl |= NFP_NET_CFG_CTRL_L2BC;
+ nn->dp.ctrl |= NFP_NET_CFG_CTRL_L2BC;
if (nn->cap & NFP_NET_CFG_CTRL_L2MC)
- nn->ctrl |= NFP_NET_CFG_CTRL_L2MC;
+ nn->dp.ctrl |= NFP_NET_CFG_CTRL_L2MC;
/* Allow IRQ moderation, if supported */
if (nn->cap & NFP_NET_CFG_CTRL_IRQMOD) {
nfp_net_irqmod_init(nn);
- nn->ctrl |= NFP_NET_CFG_CTRL_IRQMOD;
+ nn->dp.ctrl |= NFP_NET_CFG_CTRL_IRQMOD;
}
/* Stash the re-configuration queue away. First odd queue in TX Bar */
{
struct nfp_net *nn = netdev_priv(netdev);
- unregister_netdev(nn->netdev);
++ unregister_netdev(nn->dp.netdev);
+
- if (nn->xdp_prog)
- bpf_prog_put(nn->xdp_prog);
- if (nn->bpf_offload_xdp)
+ if (nn->dp.xdp_prog)
+ bpf_prog_put(nn->dp.xdp_prog);
+ if (nn->dp.bpf_offload_xdp)
nfp_net_xdp_offload(nn, NULL);
- unregister_netdev(nn->dp.netdev);
}
return "1Gbps";
case SPEED_2500:
return "2.5Gbps";
+ case SPEED_5000:
+ return "5Gbps";
case SPEED_10000:
return "10Gbps";
+ case SPEED_20000:
+ return "20Gbps";
+ case SPEED_25000:
+ return "25Gbps";
+ case SPEED_40000:
+ return "40Gbps";
+ case SPEED_50000:
+ return "50Gbps";
+ case SPEED_56000:
+ return "56Gbps";
+ case SPEED_100000:
+ return "100Gbps";
case SPEED_UNKNOWN:
return "Unknown";
default:
cancel_delayed_work_sync(&phydev->state_queue);
mutex_lock(&phydev->lock);
- if (phydev->state > PHY_UP)
+ if (phydev->state > PHY_UP && phydev->state != PHY_HALTED)
phydev->state = PHY_UP;
mutex_unlock(&phydev->lock);
}
}
EXPORT_SYMBOL(phy_mac_interrupt);
-static inline void mmd_phy_indirect(struct mii_bus *bus, int prtad, int devad,
- int addr)
-{
- /* Write the desired MMD Devad */
- bus->write(bus, addr, MII_MMD_CTRL, devad);
-
- /* Write the desired MMD register address */
- bus->write(bus, addr, MII_MMD_DATA, prtad);
-
- /* Select the Function : DATA with no post increment */
- bus->write(bus, addr, MII_MMD_CTRL, (devad | MII_MMD_CTRL_NOINCR));
-}
-
-/**
- * phy_read_mmd_indirect - reads data from the MMD registers
- * @phydev: The PHY device bus
- * @prtad: MMD Address
- * @devad: MMD DEVAD
- *
- * Description: it reads data from the MMD registers (clause 22 to access to
- * clause 45) of the specified phy address.
- * To read these register we have:
- * 1) Write reg 13 // DEVAD
- * 2) Write reg 14 // MMD Address
- * 3) Write reg 13 // MMD Data Command for MMD DEVAD
- * 3) Read reg 14 // Read MMD data
- */
-int phy_read_mmd_indirect(struct phy_device *phydev, int prtad, int devad)
-{
- struct phy_driver *phydrv = phydev->drv;
- int addr = phydev->mdio.addr;
- int value = -1;
-
- if (!phydrv->read_mmd_indirect) {
- struct mii_bus *bus = phydev->mdio.bus;
-
- mutex_lock(&bus->mdio_lock);
- mmd_phy_indirect(bus, prtad, devad, addr);
-
- /* Read the content of the MMD's selected register */
- value = bus->read(bus, addr, MII_MMD_DATA);
- mutex_unlock(&bus->mdio_lock);
- } else {
- value = phydrv->read_mmd_indirect(phydev, prtad, devad, addr);
- }
- return value;
-}
-EXPORT_SYMBOL(phy_read_mmd_indirect);
-
-/**
- * phy_write_mmd_indirect - writes data to the MMD registers
- * @phydev: The PHY device
- * @prtad: MMD Address
- * @devad: MMD DEVAD
- * @data: data to write in the MMD register
- *
- * Description: Write data from the MMD registers of the specified
- * phy address.
- * To write these register we have:
- * 1) Write reg 13 // DEVAD
- * 2) Write reg 14 // MMD Address
- * 3) Write reg 13 // MMD Data Command for MMD DEVAD
- * 3) Write reg 14 // Write MMD data
- */
-void phy_write_mmd_indirect(struct phy_device *phydev, int prtad,
- int devad, u32 data)
-{
- struct phy_driver *phydrv = phydev->drv;
- int addr = phydev->mdio.addr;
-
- if (!phydrv->write_mmd_indirect) {
- struct mii_bus *bus = phydev->mdio.bus;
-
- mutex_lock(&bus->mdio_lock);
- mmd_phy_indirect(bus, prtad, devad, addr);
-
- /* Write the data into MMD's selected register */
- bus->write(bus, addr, MII_MMD_DATA, data);
- mutex_unlock(&bus->mdio_lock);
- } else {
- phydrv->write_mmd_indirect(phydev, prtad, devad, addr, data);
- }
-}
-EXPORT_SYMBOL(phy_write_mmd_indirect);
-
/**
* phy_init_eee - init and check the EEE feature
* @phydev: target phy_device struct
return -EIO;
/* According to 802.3az,the EEE is supported only in full duplex-mode.
- * Also EEE feature is active when core is operating with MII, GMII
- * or RGMII (all kinds). Internal PHYs are also allowed to proceed and
- * should return an error if they do not support EEE.
*/
- if ((phydev->duplex == DUPLEX_FULL) &&
- ((phydev->interface == PHY_INTERFACE_MODE_MII) ||
- (phydev->interface == PHY_INTERFACE_MODE_GMII) ||
- phy_interface_is_rgmii(phydev) ||
- phy_is_internal(phydev))) {
+ if (phydev->duplex == DUPLEX_FULL) {
int eee_lp, eee_cap, eee_adv;
u32 lp, cap, adv;
int status;
return status;
/* First check if the EEE ability is supported */
- eee_cap = phy_read_mmd_indirect(phydev, MDIO_PCS_EEE_ABLE,
- MDIO_MMD_PCS);
+ eee_cap = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_PCS_EEE_ABLE);
if (eee_cap <= 0)
goto eee_exit_err;
/* Check which link settings negotiated and verify it in
* the EEE advertising registers.
*/
- eee_lp = phy_read_mmd_indirect(phydev, MDIO_AN_EEE_LPABLE,
- MDIO_MMD_AN);
+ eee_lp = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_LPABLE);
if (eee_lp <= 0)
goto eee_exit_err;
- eee_adv = phy_read_mmd_indirect(phydev, MDIO_AN_EEE_ADV,
- MDIO_MMD_AN);
+ eee_adv = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV);
if (eee_adv <= 0)
goto eee_exit_err;
/* Configure the PHY to stop receiving xMII
* clock while it is signaling LPI.
*/
- int val = phy_read_mmd_indirect(phydev, MDIO_CTRL1,
- MDIO_MMD_PCS);
+ int val = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1);
if (val < 0)
return val;
val |= MDIO_PCS_CTRL1_CLKSTOP_EN;
- phy_write_mmd_indirect(phydev, MDIO_CTRL1,
- MDIO_MMD_PCS, val);
+ phy_write_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1, val);
}
return 0; /* EEE supported */
if (!phydev->drv)
return -EIO;
- return phy_read_mmd_indirect(phydev, MDIO_PCS_EEE_WK_ERR, MDIO_MMD_PCS);
+ return phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_PCS_EEE_WK_ERR);
}
EXPORT_SYMBOL(phy_get_eee_err);
return -EIO;
/* Get Supported EEE */
- val = phy_read_mmd_indirect(phydev, MDIO_PCS_EEE_ABLE, MDIO_MMD_PCS);
+ val = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_PCS_EEE_ABLE);
if (val < 0)
return val;
data->supported = mmd_eee_cap_to_ethtool_sup_t(val);
/* Get advertisement EEE */
- val = phy_read_mmd_indirect(phydev, MDIO_AN_EEE_ADV, MDIO_MMD_AN);
+ val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV);
if (val < 0)
return val;
data->advertised = mmd_eee_adv_to_ethtool_adv_t(val);
/* Get LP advertisement EEE */
- val = phy_read_mmd_indirect(phydev, MDIO_AN_EEE_LPABLE, MDIO_MMD_AN);
+ val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_LPABLE);
if (val < 0)
return val;
data->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(val);
*/
int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data)
{
- int val = ethtool_adv_to_mmd_eee_adv_t(data->advertised);
+ int cap, old_adv, adv, ret;
if (!phydev->drv)
return -EIO;
+ /* Get Supported EEE */
+ cap = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_PCS_EEE_ABLE);
+ if (cap < 0)
+ return cap;
+
+ old_adv = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV);
+ if (old_adv < 0)
+ return old_adv;
+
+ adv = ethtool_adv_to_mmd_eee_adv_t(data->advertised) & cap;
+
/* Mask prohibited EEE modes */
- val &= ~phydev->eee_broken_modes;
+ adv &= ~phydev->eee_broken_modes;
+
+ if (old_adv != adv) {
+ ret = phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV, adv);
+ if (ret < 0)
+ return ret;
- phy_write_mmd_indirect(phydev, MDIO_AN_EEE_ADV, MDIO_MMD_AN, val);
+ /* Restart autonegotiation so the new modes get sent to the
+ * link partner.
+ */
+ ret = genphy_restart_aneg(phydev);
+ if (ret < 0)
+ return ret;
+ }
return 0;
}
/* Define these values to match your device */
#define VENDOR_ID_REALTEK 0x0bda
+ #define VENDOR_ID_MICROSOFT 0x045e
#define VENDOR_ID_SAMSUNG 0x04e8
#define VENDOR_ID_LENOVO 0x17ef
#define VENDOR_ID_NVIDIA 0x0955
}
} else {
if (netif_carrier_ok(tp->netdev)) {
+ netif_stop_queue(tp->netdev);
set_bit(RTL8152_LINK_CHG, &tp->flags);
schedule_delayed_work(&tp->schedule, 0);
}
unsigned long flags;
struct list_head *cursor, *next, rx_queue;
int ret = 0, work_done = 0;
+ struct napi_struct *napi = &tp->napi;
if (!skb_queue_empty(&tp->rx_queue)) {
while (work_done < budget) {
break;
pkt_len = skb->len;
- napi_gro_receive(&tp->napi, skb);
+ napi_gro_receive(napi, skb);
work_done++;
stats->rx_packets++;
stats->rx_bytes += pkt_len;
pkt_len -= CRC_SIZE;
rx_data += sizeof(struct rx_desc);
- skb = napi_alloc_skb(&tp->napi, pkt_len);
+ skb = napi_alloc_skb(napi, pkt_len);
if (!skb) {
stats->rx_dropped++;
goto find_next_rx;
skb->protocol = eth_type_trans(skb, netdev);
rtl_rx_vlan_tag(rx_desc, skb);
if (work_done < budget) {
- napi_gro_receive(&tp->napi, skb);
+ napi_gro_receive(napi, skb);
work_done++;
stats->rx_packets++;
stats->rx_bytes += pkt_len;
static void set_carrier(struct r8152 *tp)
{
struct net_device *netdev = tp->netdev;
+ struct napi_struct *napi = &tp->napi;
u8 speed;
speed = rtl8152_get_speed(tp);
tp->rtl_ops.enable(tp);
set_bit(RTL8152_SET_RX_MODE, &tp->flags);
netif_stop_queue(netdev);
- napi_disable(&tp->napi);
+ napi_disable(napi);
netif_carrier_on(netdev);
rtl_start_rx(tp);
napi_enable(&tp->napi);
netif_wake_queue(netdev);
netif_info(tp, link, netdev, "carrier on\n");
+ } else if (netif_queue_stopped(netdev) &&
+ skb_queue_len(&tp->tx_queue) < tp->tx_qlen) {
+ netif_wake_queue(netdev);
}
} else {
if (netif_carrier_ok(netdev)) {
netif_carrier_off(netdev);
- napi_disable(&tp->napi);
+ napi_disable(napi);
tp->rtl_ops.disable(tp);
- napi_enable(&tp->napi);
+ napi_enable(napi);
netif_info(tp, link, netdev, "carrier off\n");
}
}
tp->rtl_ops.autosuspend_en(tp, true);
if (netif_carrier_ok(netdev)) {
- napi_disable(&tp->napi);
+ struct napi_struct *napi = &tp->napi;
+
+ napi_disable(napi);
rtl_stop_rx(tp);
rxdy_gated_en(tp, false);
ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, rcr);
- napi_enable(&tp->napi);
+ napi_enable(napi);
}
}
netif_device_detach(netdev);
if (netif_running(netdev) && test_bit(WORK_ENABLE, &tp->flags)) {
+ struct napi_struct *napi = &tp->napi;
+
clear_bit(WORK_ENABLE, &tp->flags);
usb_kill_urb(tp->intr_urb);
- napi_disable(&tp->napi);
+ napi_disable(napi);
cancel_delayed_work_sync(&tp->schedule);
tp->rtl_ops.down(tp);
- napi_enable(&tp->napi);
+ napi_enable(napi);
}
return ret;
static int rtl8152_resume(struct usb_interface *intf)
{
struct r8152 *tp = usb_get_intfdata(intf);
+ struct net_device *netdev = tp->netdev;
mutex_lock(&tp->control);
if (!test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
tp->rtl_ops.init(tp);
queue_delayed_work(system_long_wq, &tp->hw_phy_work, 0);
- netif_device_attach(tp->netdev);
+ netif_device_attach(netdev);
}
- if (netif_running(tp->netdev) && tp->netdev->flags & IFF_UP) {
+ if (netif_running(netdev) && netdev->flags & IFF_UP) {
if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
+ struct napi_struct *napi = &tp->napi;
+
tp->rtl_ops.autosuspend_en(tp, false);
- napi_disable(&tp->napi);
+ napi_disable(napi);
set_bit(WORK_ENABLE, &tp->flags);
- if (netif_carrier_ok(netdev))
- rtl_start_rx(tp);
-
- if (netif_carrier_ok(tp->netdev)) {
++ if (netif_carrier_ok(netdev)) {
+ if (rtl8152_get_speed(tp) & LINK_STATUS) {
+ rtl_start_rx(tp);
+ } else {
- netif_carrier_off(tp->netdev);
++ netif_carrier_off(netdev);
+ tp->rtl_ops.disable(tp);
- netif_info(tp, link, tp->netdev,
++ netif_info(tp, link, netdev,
+ "linking down\n");
+ }
+ }
-
- napi_enable(&tp->napi);
+ napi_enable(napi);
clear_bit(SELECTIVE_SUSPEND, &tp->flags);
smp_mb__after_atomic();
if (!list_empty(&tp->rx_done))
napi_schedule(&tp->napi);
} else {
tp->rtl_ops.up(tp);
- netif_carrier_off(tp->netdev);
+ netif_carrier_off(netdev);
set_bit(WORK_ENABLE, &tp->flags);
}
usb_submit_urb(tp->intr_urb, GFP_KERNEL);
} else if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
- if (tp->netdev->flags & IFF_UP)
+ if (netdev->flags & IFF_UP)
tp->rtl_ops.autosuspend_en(tp, false);
clear_bit(SELECTIVE_SUSPEND, &tp->flags);
}
}
static
-int rtl8152_get_settings(struct net_device *netdev, struct ethtool_cmd *cmd)
+int rtl8152_get_link_ksettings(struct net_device *netdev,
+ struct ethtool_link_ksettings *cmd)
{
struct r8152 *tp = netdev_priv(netdev);
int ret;
mutex_lock(&tp->control);
- ret = mii_ethtool_gset(&tp->mii, cmd);
+ ret = mii_ethtool_get_link_ksettings(&tp->mii, cmd);
mutex_unlock(&tp->control);
return ret;
}
-static int rtl8152_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int rtl8152_set_link_ksettings(struct net_device *dev,
+ const struct ethtool_link_ksettings *cmd)
{
struct r8152 *tp = netdev_priv(dev);
int ret;
mutex_lock(&tp->control);
- ret = rtl8152_set_speed(tp, cmd->autoneg, cmd->speed, cmd->duplex);
+ ret = rtl8152_set_speed(tp, cmd->base.autoneg, cmd->base.speed,
+ cmd->base.duplex);
if (!ret) {
- tp->autoneg = cmd->autoneg;
- tp->speed = cmd->speed;
- tp->duplex = cmd->duplex;
+ tp->autoneg = cmd->base.autoneg;
+ tp->speed = cmd->base.speed;
+ tp->duplex = cmd->base.duplex;
}
mutex_unlock(&tp->control);
static const struct ethtool_ops ops = {
.get_drvinfo = rtl8152_get_drvinfo,
- .get_settings = rtl8152_get_settings,
- .set_settings = rtl8152_set_settings,
.get_link = ethtool_op_get_link,
.nway_reset = rtl8152_nway_reset,
.get_msglevel = rtl8152_get_msglevel,
.set_coalesce = rtl8152_set_coalesce,
.get_eee = rtl_ethtool_get_eee,
.set_eee = rtl_ethtool_set_eee,
+ .get_link_ksettings = rtl8152_get_link_ksettings,
+ .set_link_ksettings = rtl8152_set_link_ksettings,
};
static int rtl8152_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd)
.ndo_features_check = rtl8152_features_check,
};
-static void r8152b_get_version(struct r8152 *tp)
-{
- u32 ocp_data;
- u16 version;
-
- ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_TCR1);
- version = (u16)(ocp_data & VERSION_MASK);
-
- switch (version) {
- case 0x4c00:
- tp->version = RTL_VER_01;
- break;
- case 0x4c10:
- tp->version = RTL_VER_02;
- break;
- case 0x5c00:
- tp->version = RTL_VER_03;
- tp->mii.supports_gmii = 1;
- break;
- case 0x5c10:
- tp->version = RTL_VER_04;
- tp->mii.supports_gmii = 1;
- break;
- case 0x5c20:
- tp->version = RTL_VER_05;
- tp->mii.supports_gmii = 1;
- break;
- case 0x5c30:
- tp->version = RTL_VER_06;
- tp->mii.supports_gmii = 1;
- break;
- default:
- netif_info(tp, probe, tp->netdev,
- "Unknown version 0x%04x\n", version);
- break;
- }
-}
-
static void rtl8152_unload(struct r8152 *tp)
{
if (test_bit(RTL8152_UNPLUG, &tp->flags))
return ret;
}
+static u8 rtl_get_version(struct usb_interface *intf)
+{
+ struct usb_device *udev = interface_to_usbdev(intf);
+ u32 ocp_data = 0;
+ __le32 *tmp;
+ u8 version;
+ int ret;
+
+ tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
+ if (!tmp)
+ return 0;
+
+ ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
+ RTL8152_REQ_GET_REGS, RTL8152_REQT_READ,
+ PLA_TCR0, MCU_TYPE_PLA, tmp, sizeof(*tmp), 500);
+ if (ret > 0)
+ ocp_data = (__le32_to_cpu(*tmp) >> 16) & VERSION_MASK;
+
+ kfree(tmp);
+
+ switch (ocp_data) {
+ case 0x4c00:
+ version = RTL_VER_01;
+ break;
+ case 0x4c10:
+ version = RTL_VER_02;
+ break;
+ case 0x5c00:
+ version = RTL_VER_03;
+ break;
+ case 0x5c10:
+ version = RTL_VER_04;
+ break;
+ case 0x5c20:
+ version = RTL_VER_05;
+ break;
+ case 0x5c30:
+ version = RTL_VER_06;
+ break;
+ default:
+ version = RTL_VER_UNKNOWN;
+ dev_info(&intf->dev, "Unknown version 0x%04x\n", ocp_data);
+ break;
+ }
+
+ return version;
+}
+
static int rtl8152_probe(struct usb_interface *intf,
const struct usb_device_id *id)
{
struct usb_device *udev = interface_to_usbdev(intf);
+ u8 version = rtl_get_version(intf);
struct r8152 *tp;
struct net_device *netdev;
int ret;
+ if (version == RTL_VER_UNKNOWN)
+ return -ENODEV;
+
if (udev->actconfig->desc.bConfigurationValue != 1) {
usb_driver_set_configuration(udev, 1);
return -ENODEV;
tp->udev = udev;
tp->netdev = netdev;
tp->intf = intf;
+ tp->version = version;
+
+ switch (version) {
+ case RTL_VER_01:
+ case RTL_VER_02:
+ tp->mii.supports_gmii = 0;
+ break;
+ default:
+ tp->mii.supports_gmii = 1;
+ break;
+ }
- r8152b_get_version(tp);
ret = rtl_ops_init(tp);
if (ret)
goto out;
static struct usb_device_id rtl8152_table[] = {
{REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8152)},
{REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8153)},
+ {REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07ab)},
+ {REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07c6)},
{REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101)},
{REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x304f)},
{REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x3062)},
* link up channels based on their CPU affinity.
*/
struct list_head percpu_list;
+
+ /*
+ * Defer freeing channel until after all cpu's have
+ * gone through grace period.
+ */
+ struct rcu_head rcu;
+
/*
* For performance critical channels (storage, networking
* etc,), Hyper-V has a mechanism to enhance the throughput
const int *srv_version, int srv_vercnt,
int *nego_fw_version, int *nego_srv_version);
- void hv_event_tasklet_disable(struct vmbus_channel *channel);
- void hv_event_tasklet_enable(struct vmbus_channel *channel);
-
void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid);
void vmbus_setevent(struct vmbus_channel *channel);
return;
}
-static inline void
-init_cached_read_index(struct vmbus_channel *channel)
-{
- struct hv_ring_buffer_info *rbi = &channel->inbound;
-
- rbi->cached_read_index = rbi->ring_buffer->read_index;
-}
-
/*
* Mask off host interrupt callback notifications
*/
/*
* An API to support in-place processing of incoming VMBUS packets.
*/
-#define VMBUS_PKT_TRAILER 8
-static inline struct vmpacket_descriptor *
-get_next_pkt_raw(struct vmbus_channel *channel)
+/* Get data payload associated with descriptor */
+static inline void *hv_pkt_data(const struct vmpacket_descriptor *desc)
{
- struct hv_ring_buffer_info *ring_info = &channel->inbound;
- u32 priv_read_loc = ring_info->priv_read_index;
- void *ring_buffer = hv_get_ring_buffer(ring_info);
- u32 dsize = ring_info->ring_datasize;
- /*
- * delta is the difference between what is available to read and
- * what was already consumed in place. We commit read index after
- * the whole batch is processed.
- */
- u32 delta = priv_read_loc >= ring_info->ring_buffer->read_index ?
- priv_read_loc - ring_info->ring_buffer->read_index :
- (dsize - ring_info->ring_buffer->read_index) + priv_read_loc;
- u32 bytes_avail_toread = (hv_get_bytes_to_read(ring_info) - delta);
-
- if (bytes_avail_toread < sizeof(struct vmpacket_descriptor))
- return NULL;
-
- return ring_buffer + priv_read_loc;
+ return (void *)((unsigned long)desc + (desc->offset8 << 3));
}
-/*
- * A helper function to step through packets "in-place"
- * This API is to be called after each successful call
- * get_next_pkt_raw().
- */
-static inline void put_pkt_raw(struct vmbus_channel *channel,
- struct vmpacket_descriptor *desc)
+/* Get data size associated with descriptor */
+static inline u32 hv_pkt_datalen(const struct vmpacket_descriptor *desc)
{
- struct hv_ring_buffer_info *ring_info = &channel->inbound;
- u32 packetlen = desc->len8 << 3;
- u32 dsize = ring_info->ring_datasize;
-
- /*
- * Include the packet trailer.
- */
- ring_info->priv_read_index += packetlen + VMBUS_PKT_TRAILER;
- ring_info->priv_read_index %= dsize;
+ return (desc->len8 << 3) - (desc->offset8 << 3);
}
+
+struct vmpacket_descriptor *
+hv_pkt_iter_first(struct vmbus_channel *channel);
+
+struct vmpacket_descriptor *
+__hv_pkt_iter_next(struct vmbus_channel *channel,
+ const struct vmpacket_descriptor *pkt);
+
+void hv_pkt_iter_close(struct vmbus_channel *channel);
+
/*
- * This call commits the read index and potentially signals the host.
- * Here is the pattern for using the "in-place" consumption APIs:
- *
- * init_cached_read_index();
- *
- * while (get_next_pkt_raw() {
- * process the packet "in-place";
- * put_pkt_raw();
- * }
- * if (packets processed in place)
- * commit_rd_index();
+ * Get next packet descriptor from iterator
+ * If at end of list, return NULL and update host.
*/
-static inline void commit_rd_index(struct vmbus_channel *channel)
+static inline struct vmpacket_descriptor *
+hv_pkt_iter_next(struct vmbus_channel *channel,
+ const struct vmpacket_descriptor *pkt)
{
- struct hv_ring_buffer_info *ring_info = &channel->inbound;
- /*
- * Make sure all reads are done before we update the read index since
- * the writer may start writing to the read area once the read index
- * is updated.
- */
- virt_rmb();
- ring_info->ring_buffer->read_index = ring_info->priv_read_index;
+ struct vmpacket_descriptor *nxt;
+
+ nxt = __hv_pkt_iter_next(channel, pkt);
+ if (!nxt)
+ hv_pkt_iter_close(channel);
- hv_signal_on_read(channel);
+ return nxt;
}
+#define foreach_vmbus_pkt(pkt, channel) \
+ for (pkt = hv_pkt_iter_first(channel); pkt; \
+ pkt = hv_pkt_iter_next(channel, pkt))
#endif /* _HYPERV_H */
__u64 hb_nonce;
} sctp_sender_hb_info_t;
- struct sctp_stream *sctp_stream_new(__u16 incnt, __u16 outcnt, gfp_t gfp);
+ int sctp_stream_new(struct sctp_association *asoc, gfp_t gfp);
+ int sctp_stream_init(struct sctp_association *asoc, gfp_t gfp);
void sctp_stream_free(struct sctp_stream *stream);
void sctp_stream_clear(struct sctp_stream *stream);
/* Did the messenge fail to send? */
int send_error;
u8 send_failed:1,
- force_delay:1,
can_delay; /* should this message be Nagle delayed */
};
void sctp_transport_burst_limited(struct sctp_transport *);
void sctp_transport_burst_reset(struct sctp_transport *);
unsigned long sctp_transport_timeout(struct sctp_transport *);
- void sctp_transport_reset(struct sctp_transport *);
- void sctp_transport_update_pmtu(struct sock *, struct sctp_transport *, u32);
+ void sctp_transport_reset(struct sctp_transport *t);
+ void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu);
void sctp_transport_immediate_rtx(struct sctp_transport *);
void sctp_transport_dst_release(struct sctp_transport *t);
void sctp_transport_dst_confirm(struct sctp_transport *t);
struct sctp_stream_out {
__u16 ssn;
__u8 state;
+ __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1];
+ __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1];
};
struct sctp_stream_in {
__u8 need_ecne:1, /* Need to send an ECNE Chunk? */
temp:1, /* Is it a temporary association? */
+ force_delay:1,
prsctp_enable:1,
reconf_enable:1;
__u32 sctp_association_get_next_tsn(struct sctp_association *);
- void sctp_assoc_sync_pmtu(struct sock *, struct sctp_association *);
+ void sctp_assoc_sync_pmtu(struct sctp_association *asoc);
void sctp_assoc_rwnd_increase(struct sctp_association *, unsigned int);
void sctp_assoc_rwnd_decrease(struct sctp_association *, unsigned int);
void sctp_assoc_set_primary(struct sctp_association *,
#define BPF_COMPLEXITY_LIMIT_INSNS 65536
#define BPF_COMPLEXITY_LIMIT_STACK 1024
+#define BPF_MAP_PTR_POISON ((void *)0xeB9F + POISON_POINTER_DELTA)
+
struct bpf_call_arg_meta {
struct bpf_map *map_ptr;
bool raw_mode;
}
}
- static int check_ptr_alignment(struct bpf_verifier_env *env,
- struct bpf_reg_state *reg, int off, int size)
+ static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
+ int off, int size)
{
- if (reg->type != PTR_TO_PACKET && reg->type != PTR_TO_MAP_VALUE_ADJ) {
- if (off % size != 0) {
- verbose("misaligned access off %d size %d\n",
- off, size);
- return -EACCES;
- } else {
- return 0;
- }
- }
-
- if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
- /* misaligned access to packet is ok on x86,arm,arm64 */
- return 0;
-
if (reg->id && size != 1) {
- verbose("Unknown packet alignment. Only byte-sized access allowed\n");
+ verbose("Unknown alignment. Only byte-sized access allowed in packet access.\n");
return -EACCES;
}
/* skb->data is NET_IP_ALIGN-ed */
- if (reg->type == PTR_TO_PACKET &&
- (NET_IP_ALIGN + reg->off + off) % size != 0) {
+ if ((NET_IP_ALIGN + reg->off + off) % size != 0) {
verbose("misaligned packet access off %d+%d+%d size %d\n",
NET_IP_ALIGN, reg->off, off, size);
return -EACCES;
}
+
return 0;
}
+ static int check_val_ptr_alignment(const struct bpf_reg_state *reg,
+ int size)
+ {
+ if (size != 1) {
+ verbose("Unknown alignment. Only byte-sized access allowed in value access.\n");
+ return -EACCES;
+ }
+
+ return 0;
+ }
+
+ static int check_ptr_alignment(const struct bpf_reg_state *reg,
+ int off, int size)
+ {
+ switch (reg->type) {
+ case PTR_TO_PACKET:
+ return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 :
+ check_pkt_ptr_alignment(reg, off, size);
+ case PTR_TO_MAP_VALUE_ADJ:
+ return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 :
+ check_val_ptr_alignment(reg, size);
+ default:
+ if (off % size != 0) {
+ verbose("misaligned access off %d size %d\n",
+ off, size);
+ return -EACCES;
+ }
+
+ return 0;
+ }
+ }
+
/* check whether memory at (regno + off) is accessible for t = (read | write)
* if t==write, value_regno is a register which value is stored into memory
* if t==read, value_regno is a register which will receive the value from memory
if (size < 0)
return size;
- err = check_ptr_alignment(env, reg, off, size);
+ err = check_ptr_alignment(reg, off, size);
if (err)
return err;
func_id != BPF_FUNC_current_task_under_cgroup)
goto error;
break;
+ case BPF_MAP_TYPE_ARRAY_OF_MAPS:
+ case BPF_MAP_TYPE_HASH_OF_MAPS:
+ if (func_id != BPF_FUNC_map_lookup_elem)
+ goto error;
default:
break;
}
}
}
-static int check_call(struct bpf_verifier_env *env, int func_id)
+static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
{
struct bpf_verifier_state *state = &env->cur_state;
const struct bpf_func_proto *fn = NULL;
} else if (fn->ret_type == RET_VOID) {
regs[BPF_REG_0].type = NOT_INIT;
} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
+ struct bpf_insn_aux_data *insn_aux;
+
regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
regs[BPF_REG_0].max_value = regs[BPF_REG_0].min_value = 0;
/* remember map_ptr, so that check_map_access()
}
regs[BPF_REG_0].map_ptr = meta.map_ptr;
regs[BPF_REG_0].id = ++env->id_gen;
+ insn_aux = &env->insn_aux_data[insn_idx];
+ if (!insn_aux->map_ptr)
+ insn_aux->map_ptr = meta.map_ptr;
+ else if (insn_aux->map_ptr != meta.map_ptr)
+ insn_aux->map_ptr = BPF_MAP_PTR_POISON;
} else {
verbose("unknown return type %d of func %s#%d\n",
fn->ret_type, func_id_name(func_id), func_id);
* register as unknown.
*/
if (env->allow_ptr_leaks &&
+ BPF_CLASS(insn->code) == BPF_ALU64 && opcode == BPF_ADD &&
(dst_reg->type == PTR_TO_MAP_VALUE ||
dst_reg->type == PTR_TO_MAP_VALUE_ADJ))
dst_reg->type = PTR_TO_MAP_VALUE_ADJ;
for (i = 0; i < MAX_BPF_REG; i++)
if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id)
- regs[i].range = dst_reg->off;
+ /* keep the maximum range already checked */
+ regs[i].range = max(regs[i].range, dst_reg->off);
for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
if (state->stack_slot_type[i] != STACK_SPILL)
continue;
reg = &state->spilled_regs[i / BPF_REG_SIZE];
if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id)
- reg->range = dst_reg->off;
+ reg->range = max(reg->range, dst_reg->off);
}
}
struct bpf_reg_state *reg = ®s[regno];
if (reg->type == PTR_TO_MAP_VALUE_OR_NULL && reg->id == id) {
- reg->type = type;
+ if (type == UNKNOWN_VALUE) {
+ __mark_reg_unknown_value(regs, regno);
+ } else if (reg->map_ptr->inner_map_meta) {
+ reg->type = CONST_PTR_TO_MAP;
+ reg->map_ptr = reg->map_ptr->inner_map_meta;
+ } else {
+ reg->type = type;
+ }
/* We don't need id from this point onwards anymore, thus we
* should better reset it, so that state pruning has chances
* to take effect.
*/
reg->id = 0;
- if (type == UNKNOWN_VALUE)
- __mark_reg_unknown_value(regs, regno);
}
}
return -EINVAL;
}
- err = check_call(env, insn->imm);
+ err = check_call(env, insn->imm, insn_idx);
if (err)
return err;
return 0;
}
+static int check_map_prealloc(struct bpf_map *map)
+{
+ return (map->map_type != BPF_MAP_TYPE_HASH &&
+ map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
+ map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
+ !(map->map_flags & BPF_F_NO_PREALLOC);
+}
+
static int check_map_prog_compatibility(struct bpf_map *map,
struct bpf_prog *prog)
{
- if (prog->type == BPF_PROG_TYPE_PERF_EVENT &&
- (map->map_type == BPF_MAP_TYPE_HASH ||
- map->map_type == BPF_MAP_TYPE_PERCPU_HASH) &&
- (map->map_flags & BPF_F_NO_PREALLOC)) {
- verbose("perf_event programs can only use preallocated hash map\n");
- return -EINVAL;
+ /* Make sure that BPF_PROG_TYPE_PERF_EVENT programs only use
+ * preallocated hash maps, since doing memory allocation
+ * in overflow_handler can crash depending on where nmi got
+ * triggered.
+ */
+ if (prog->type == BPF_PROG_TYPE_PERF_EVENT) {
+ if (!check_map_prealloc(map)) {
+ verbose("perf_event programs can only use preallocated hash map\n");
+ return -EINVAL;
+ }
+ if (map->inner_map_meta &&
+ !check_map_prealloc(map->inner_map_meta)) {
+ verbose("perf_event programs can only use preallocated inner hash map\n");
+ return -EINVAL;
+ }
}
return 0;
}
insn->src_reg = 0;
}
+/* single env->prog->insni[off] instruction was replaced with the range
+ * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
+ * [0, off) and [off, end) to new locations, so the patched range stays zero
+ */
+static int adjust_insn_aux_data(struct bpf_verifier_env *env, u32 prog_len,
+ u32 off, u32 cnt)
+{
+ struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
+
+ if (cnt == 1)
+ return 0;
+ new_data = vzalloc(sizeof(struct bpf_insn_aux_data) * prog_len);
+ if (!new_data)
+ return -ENOMEM;
+ memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
+ memcpy(new_data + off + cnt - 1, old_data + off,
+ sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
+ env->insn_aux_data = new_data;
+ vfree(old_data);
+ return 0;
+}
+
+static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
+ const struct bpf_insn *patch, u32 len)
+{
+ struct bpf_prog *new_prog;
+
+ new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
+ if (!new_prog)
+ return NULL;
+ if (adjust_insn_aux_data(env, new_prog->len, off, len))
+ return NULL;
+ return new_prog;
+}
+
/* convert load instructions that access fields of 'struct __sk_buff'
* into sequence of instructions that access fields of 'struct sk_buff'
*/
verbose("bpf verifier is misconfigured\n");
return -EINVAL;
} else if (cnt) {
- new_prog = bpf_patch_insn_single(env->prog, 0,
- insn_buf, cnt);
+ new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;
+
env->prog = new_prog;
delta += cnt - 1;
}
else
continue;
- if (env->insn_aux_data[i].ptr_type != PTR_TO_CTX)
+ if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
continue;
cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog);
return -EINVAL;
}
- new_prog = bpf_patch_insn_single(env->prog, i + delta, insn_buf,
- cnt);
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;
return 0;
}
+/* fixup insn->imm field of bpf_call instructions
+ * and inline eligible helpers as explicit sequence of BPF instructions
+ *
+ * this function is called after eBPF program passed verification
+ */
+static int fixup_bpf_calls(struct bpf_verifier_env *env)
+{
+ struct bpf_prog *prog = env->prog;
+ struct bpf_insn *insn = prog->insnsi;
+ const struct bpf_func_proto *fn;
+ const int insn_cnt = prog->len;
+ struct bpf_insn insn_buf[16];
+ struct bpf_prog *new_prog;
+ struct bpf_map *map_ptr;
+ int i, cnt, delta = 0;
+
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ if (insn->code != (BPF_JMP | BPF_CALL))
+ continue;
+
+ if (insn->imm == BPF_FUNC_get_route_realm)
+ prog->dst_needed = 1;
+ if (insn->imm == BPF_FUNC_get_prandom_u32)
+ bpf_user_rnd_init_once();
+ if (insn->imm == BPF_FUNC_xdp_adjust_head)
+ prog->xdp_adjust_head = 1;
+ if (insn->imm == BPF_FUNC_tail_call) {
+ /* mark bpf_tail_call as different opcode to avoid
+ * conditional branch in the interpeter for every normal
+ * call and to prevent accidental JITing by JIT compiler
+ * that doesn't support bpf_tail_call yet
+ */
+ insn->imm = 0;
+ insn->code |= BPF_X;
+ continue;
+ }
+
+ if (ebpf_jit_enabled() && insn->imm == BPF_FUNC_map_lookup_elem) {
+ map_ptr = env->insn_aux_data[i + delta].map_ptr;
+ if (map_ptr == BPF_MAP_PTR_POISON ||
+ !map_ptr->ops->map_gen_lookup)
+ goto patch_call_imm;
+
+ cnt = map_ptr->ops->map_gen_lookup(map_ptr, insn_buf);
+ if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
+ verbose("bpf verifier is misconfigured\n");
+ return -EINVAL;
+ }
+
+ new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
+ cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+
+ /* keep walking new program and skip insns we just inserted */
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ continue;
+ }
+
+patch_call_imm:
+ fn = prog->aux->ops->get_func_proto(insn->imm);
+ /* all functions that have prototype and verifier allowed
+ * programs to call them, must be real in-kernel functions
+ */
+ if (!fn->func) {
+ verbose("kernel subsystem misconfigured func %s#%d\n",
+ func_id_name(insn->imm), insn->imm);
+ return -EFAULT;
+ }
+ insn->imm = fn->func - __bpf_call_base;
+ }
+
+ return 0;
+}
+
static void free_states(struct bpf_verifier_env *env)
{
struct bpf_verifier_state_list *sl, *sln;
/* program is valid, convert *(u32*)(ctx + off) accesses */
ret = convert_ctx_accesses(env);
+ if (ret == 0)
+ ret = fixup_bpf_calls(env);
+
if (log_level && log_len >= log_size - 1) {
BUG_ON(log_len >= log_size);
/* verifier log exceeded user supplied buffer */
}
EXPORT_SYMBOL(__skb_flow_get_ports);
- struct arphdr *_arp;
+enum flow_dissect_ret {
+ FLOW_DISSECT_RET_OUT_GOOD,
+ FLOW_DISSECT_RET_OUT_BAD,
+ FLOW_DISSECT_RET_OUT_PROTO_AGAIN,
+};
+
+static enum flow_dissect_ret
+__skb_flow_dissect_mpls(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container, void *data, int nhoff, int hlen)
+{
+ struct flow_dissector_key_keyid *key_keyid;
+ struct mpls_label *hdr, _hdr[2];
+
+ if (!dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_MPLS_ENTROPY))
+ return FLOW_DISSECT_RET_OUT_GOOD;
+
+ hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data,
+ hlen, &_hdr);
+ if (!hdr)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ if ((ntohl(hdr[0].entry) & MPLS_LS_LABEL_MASK) >>
+ MPLS_LS_LABEL_SHIFT == MPLS_LABEL_ENTROPY) {
+ key_keyid = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_MPLS_ENTROPY,
+ target_container);
+ key_keyid->keyid = hdr[1].entry & htonl(MPLS_LS_LABEL_MASK);
+ }
+ return FLOW_DISSECT_RET_OUT_GOOD;
+}
+
+static enum flow_dissect_ret
+__skb_flow_dissect_arp(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container, void *data, int nhoff, int hlen)
+{
+ struct flow_dissector_key_arp *key_arp;
+ struct {
+ unsigned char ar_sha[ETH_ALEN];
+ unsigned char ar_sip[4];
+ unsigned char ar_tha[ETH_ALEN];
+ unsigned char ar_tip[4];
+ } *arp_eth, _arp_eth;
+ const struct arphdr *arp;
++ struct arphdr _arp;
+
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ARP))
+ return FLOW_DISSECT_RET_OUT_GOOD;
+
+ arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data,
+ hlen, &_arp);
+ if (!arp)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
+ arp->ar_pro != htons(ETH_P_IP) ||
+ arp->ar_hln != ETH_ALEN ||
+ arp->ar_pln != 4 ||
+ (arp->ar_op != htons(ARPOP_REPLY) &&
+ arp->ar_op != htons(ARPOP_REQUEST)))
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ arp_eth = __skb_header_pointer(skb, nhoff + sizeof(_arp),
+ sizeof(_arp_eth), data,
+ hlen, &_arp_eth);
+ if (!arp_eth)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ key_arp = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ARP,
+ target_container);
+
+ memcpy(&key_arp->sip, arp_eth->ar_sip, sizeof(key_arp->sip));
+ memcpy(&key_arp->tip, arp_eth->ar_tip, sizeof(key_arp->tip));
+
+ /* Only store the lower byte of the opcode;
+ * this covers ARPOP_REPLY and ARPOP_REQUEST.
+ */
+ key_arp->op = ntohs(arp->ar_op) & 0xff;
+
+ ether_addr_copy(key_arp->sha, arp_eth->ar_sha);
+ ether_addr_copy(key_arp->tha, arp_eth->ar_tha);
+
+ return FLOW_DISSECT_RET_OUT_GOOD;
+}
+
+static enum flow_dissect_ret
+__skb_flow_dissect_gre(const struct sk_buff *skb,
+ struct flow_dissector_key_control *key_control,
+ struct flow_dissector *flow_dissector,
+ void *target_container, void *data,
+ __be16 *p_proto, int *p_nhoff, int *p_hlen,
+ unsigned int flags)
+{
+ struct flow_dissector_key_keyid *key_keyid;
+ struct gre_base_hdr *hdr, _hdr;
+ int offset = 0;
+ u16 gre_ver;
+
+ hdr = __skb_header_pointer(skb, *p_nhoff, sizeof(_hdr),
+ data, *p_hlen, &_hdr);
+ if (!hdr)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ /* Only look inside GRE without routing */
+ if (hdr->flags & GRE_ROUTING)
+ return FLOW_DISSECT_RET_OUT_GOOD;
+
+ /* Only look inside GRE for version 0 and 1 */
+ gre_ver = ntohs(hdr->flags & GRE_VERSION);
+ if (gre_ver > 1)
+ return FLOW_DISSECT_RET_OUT_GOOD;
+
+ *p_proto = hdr->protocol;
+ if (gre_ver) {
+ /* Version1 must be PPTP, and check the flags */
+ if (!(*p_proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY)))
+ return FLOW_DISSECT_RET_OUT_GOOD;
+ }
+
+ offset += sizeof(struct gre_base_hdr);
+
+ if (hdr->flags & GRE_CSUM)
+ offset += sizeof(((struct gre_full_hdr *) 0)->csum) +
+ sizeof(((struct gre_full_hdr *) 0)->reserved1);
+
+ if (hdr->flags & GRE_KEY) {
+ const __be32 *keyid;
+ __be32 _keyid;
+
+ keyid = __skb_header_pointer(skb, *p_nhoff + offset,
+ sizeof(_keyid),
+ data, *p_hlen, &_keyid);
+ if (!keyid)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_GRE_KEYID)) {
+ key_keyid = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_GRE_KEYID,
+ target_container);
+ if (gre_ver == 0)
+ key_keyid->keyid = *keyid;
+ else
+ key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK;
+ }
+ offset += sizeof(((struct gre_full_hdr *) 0)->key);
+ }
+
+ if (hdr->flags & GRE_SEQ)
+ offset += sizeof(((struct pptp_gre_header *) 0)->seq);
+
+ if (gre_ver == 0) {
+ if (*p_proto == htons(ETH_P_TEB)) {
+ const struct ethhdr *eth;
+ struct ethhdr _eth;
+
+ eth = __skb_header_pointer(skb, *p_nhoff + offset,
+ sizeof(_eth),
+ data, *p_hlen, &_eth);
+ if (!eth)
+ return FLOW_DISSECT_RET_OUT_BAD;
+ *p_proto = eth->h_proto;
+ offset += sizeof(*eth);
+
+ /* Cap headers that we access via pointers at the
+ * end of the Ethernet header as our maximum alignment
+ * at that point is only 2 bytes.
+ */
+ if (NET_IP_ALIGN)
+ *p_hlen = *p_nhoff + offset;
+ }
+ } else { /* version 1, must be PPTP */
+ u8 _ppp_hdr[PPP_HDRLEN];
+ u8 *ppp_hdr;
+
+ if (hdr->flags & GRE_ACK)
+ offset += sizeof(((struct pptp_gre_header *) 0)->ack);
+
+ ppp_hdr = __skb_header_pointer(skb, *p_nhoff + offset,
+ sizeof(_ppp_hdr),
+ data, *p_hlen, _ppp_hdr);
+ if (!ppp_hdr)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ switch (PPP_PROTOCOL(ppp_hdr)) {
+ case PPP_IP:
+ *p_proto = htons(ETH_P_IP);
+ break;
+ case PPP_IPV6:
+ *p_proto = htons(ETH_P_IPV6);
+ break;
+ default:
+ /* Could probably catch some more like MPLS */
+ break;
+ }
+
+ offset += PPP_HDRLEN;
+ }
+
+ *p_nhoff += offset;
+ key_control->flags |= FLOW_DIS_ENCAPSULATION;
+ if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
+ return FLOW_DISSECT_RET_OUT_GOOD;
+
+ return FLOW_DISSECT_RET_OUT_PROTO_AGAIN;
+}
+
/**
* __skb_flow_dissect - extract the flow_keys struct and return it
* @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
struct flow_dissector_key_control *key_control;
struct flow_dissector_key_basic *key_basic;
struct flow_dissector_key_addrs *key_addrs;
- struct flow_dissector_key_arp *key_arp;
struct flow_dissector_key_ports *key_ports;
struct flow_dissector_key_icmp *key_icmp;
struct flow_dissector_key_tags *key_tags;
struct flow_dissector_key_vlan *key_vlan;
- struct flow_dissector_key_keyid *key_keyid;
bool skip_vlan = false;
u8 ip_proto = 0;
bool ret;
memcpy(key_eth_addrs, ð->h_dest, sizeof(*key_eth_addrs));
}
-again:
+proto_again:
switch (proto) {
case htons(ETH_P_IP): {
const struct iphdr *iph;
proto = vlan->h_vlan_encapsulated_proto;
nhoff += sizeof(*vlan);
if (skip_vlan)
- goto again;
+ goto proto_again;
}
skip_vlan = true;
}
}
- goto again;
+ goto proto_again;
}
case htons(ETH_P_PPP_SES): {
struct {
}
case htons(ETH_P_MPLS_UC):
- case htons(ETH_P_MPLS_MC): {
- struct mpls_label *hdr, _hdr[2];
+ case htons(ETH_P_MPLS_MC):
mpls:
- hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data,
- hlen, &_hdr);
- if (!hdr)
- goto out_bad;
-
- if ((ntohl(hdr[0].entry) & MPLS_LS_LABEL_MASK) >>
- MPLS_LS_LABEL_SHIFT == MPLS_LABEL_ENTROPY) {
- if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_MPLS_ENTROPY)) {
- key_keyid = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_MPLS_ENTROPY,
- target_container);
- key_keyid->keyid = hdr[1].entry &
- htonl(MPLS_LS_LABEL_MASK);
- }
-
+ switch (__skb_flow_dissect_mpls(skb, flow_dissector,
+ target_container, data,
+ nhoff, hlen)) {
+ case FLOW_DISSECT_RET_OUT_GOOD:
goto out_good;
+ case FLOW_DISSECT_RET_OUT_BAD:
+ default:
+ goto out_bad;
}
-
- goto out_good;
- }
-
case htons(ETH_P_FCOE):
if ((hlen - nhoff) < FCOE_HEADER_LEN)
goto out_bad;
goto out_good;
case htons(ETH_P_ARP):
- case htons(ETH_P_RARP): {
- struct {
- unsigned char ar_sha[ETH_ALEN];
- unsigned char ar_sip[4];
- unsigned char ar_tha[ETH_ALEN];
- unsigned char ar_tip[4];
- } *arp_eth, _arp_eth;
- const struct arphdr *arp;
- struct arphdr _arp;
-
- arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data,
- hlen, &_arp);
- if (!arp)
- goto out_bad;
-
- if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
- arp->ar_pro != htons(ETH_P_IP) ||
- arp->ar_hln != ETH_ALEN ||
- arp->ar_pln != 4 ||
- (arp->ar_op != htons(ARPOP_REPLY) &&
- arp->ar_op != htons(ARPOP_REQUEST)))
- goto out_bad;
-
- arp_eth = __skb_header_pointer(skb, nhoff + sizeof(_arp),
- sizeof(_arp_eth), data,
- hlen,
- &_arp_eth);
- if (!arp_eth)
+ case htons(ETH_P_RARP):
+ switch (__skb_flow_dissect_arp(skb, flow_dissector,
+ target_container, data,
+ nhoff, hlen)) {
+ case FLOW_DISSECT_RET_OUT_GOOD:
+ goto out_good;
+ case FLOW_DISSECT_RET_OUT_BAD:
+ default:
goto out_bad;
-
- if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_ARP)) {
-
- key_arp = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_ARP,
- target_container);
-
- memcpy(&key_arp->sip, arp_eth->ar_sip,
- sizeof(key_arp->sip));
- memcpy(&key_arp->tip, arp_eth->ar_tip,
- sizeof(key_arp->tip));
-
- /* Only store the lower byte of the opcode;
- * this covers ARPOP_REPLY and ARPOP_REQUEST.
- */
- key_arp->op = ntohs(arp->ar_op) & 0xff;
-
- ether_addr_copy(key_arp->sha, arp_eth->ar_sha);
- ether_addr_copy(key_arp->tha, arp_eth->ar_tha);
}
-
- goto out_good;
- }
-
default:
goto out_bad;
}
ip_proto_again:
switch (ip_proto) {
- case IPPROTO_GRE: {
- struct gre_base_hdr *hdr, _hdr;
- u16 gre_ver;
- int offset = 0;
-
- hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
- if (!hdr)
+ case IPPROTO_GRE:
+ switch (__skb_flow_dissect_gre(skb, key_control, flow_dissector,
+ target_container, data,
+ &proto, &nhoff, &hlen, flags)) {
+ case FLOW_DISSECT_RET_OUT_GOOD:
+ goto out_good;
+ case FLOW_DISSECT_RET_OUT_BAD:
goto out_bad;
-
- /* Only look inside GRE without routing */
- if (hdr->flags & GRE_ROUTING)
- break;
-
- /* Only look inside GRE for version 0 and 1 */
- gre_ver = ntohs(hdr->flags & GRE_VERSION);
- if (gre_ver > 1)
- break;
-
- proto = hdr->protocol;
- if (gre_ver) {
- /* Version1 must be PPTP, and check the flags */
- if (!(proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY)))
- break;
+ case FLOW_DISSECT_RET_OUT_PROTO_AGAIN:
+ goto proto_again;
}
-
- offset += sizeof(struct gre_base_hdr);
-
- if (hdr->flags & GRE_CSUM)
- offset += sizeof(((struct gre_full_hdr *)0)->csum) +
- sizeof(((struct gre_full_hdr *)0)->reserved1);
-
- if (hdr->flags & GRE_KEY) {
- const __be32 *keyid;
- __be32 _keyid;
-
- keyid = __skb_header_pointer(skb, nhoff + offset, sizeof(_keyid),
- data, hlen, &_keyid);
- if (!keyid)
- goto out_bad;
-
- if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_GRE_KEYID)) {
- key_keyid = skb_flow_dissector_target(flow_dissector,
- FLOW_DISSECTOR_KEY_GRE_KEYID,
- target_container);
- if (gre_ver == 0)
- key_keyid->keyid = *keyid;
- else
- key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK;
- }
- offset += sizeof(((struct gre_full_hdr *)0)->key);
- }
-
- if (hdr->flags & GRE_SEQ)
- offset += sizeof(((struct pptp_gre_header *)0)->seq);
-
- if (gre_ver == 0) {
- if (proto == htons(ETH_P_TEB)) {
- const struct ethhdr *eth;
- struct ethhdr _eth;
-
- eth = __skb_header_pointer(skb, nhoff + offset,
- sizeof(_eth),
- data, hlen, &_eth);
- if (!eth)
- goto out_bad;
- proto = eth->h_proto;
- offset += sizeof(*eth);
-
- /* Cap headers that we access via pointers at the
- * end of the Ethernet header as our maximum alignment
- * at that point is only 2 bytes.
- */
- if (NET_IP_ALIGN)
- hlen = (nhoff + offset);
- }
- } else { /* version 1, must be PPTP */
- u8 _ppp_hdr[PPP_HDRLEN];
- u8 *ppp_hdr;
-
- if (hdr->flags & GRE_ACK)
- offset += sizeof(((struct pptp_gre_header *)0)->ack);
-
- ppp_hdr = __skb_header_pointer(skb, nhoff + offset,
- sizeof(_ppp_hdr),
- data, hlen, _ppp_hdr);
- if (!ppp_hdr)
- goto out_bad;
-
- switch (PPP_PROTOCOL(ppp_hdr)) {
- case PPP_IP:
- proto = htons(ETH_P_IP);
- break;
- case PPP_IPV6:
- proto = htons(ETH_P_IPV6);
- break;
- default:
- /* Could probably catch some more like MPLS */
- break;
- }
-
- offset += PPP_HDRLEN;
- }
-
- nhoff += offset;
- key_control->flags |= FLOW_DIS_ENCAPSULATION;
- if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
- goto out_good;
-
- goto again;
- }
case NEXTHDR_HOP:
case NEXTHDR_ROUTING:
case NEXTHDR_DEST: {
#define PNEIGH_HASHMASK 0xF
static void neigh_timer_handler(unsigned long arg);
-static void __neigh_notify(struct neighbour *n, int type, int flags);
-static void neigh_update_notify(struct neighbour *neigh);
+static void __neigh_notify(struct neighbour *n, int type, int flags,
+ u32 pid);
+static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
#ifdef CONFIG_PROC_FS
if (neigh->parms->neigh_cleanup)
neigh->parms->neigh_cleanup(neigh);
- __neigh_notify(neigh, RTM_DELNEIGH, 0);
+ __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
neigh_release(neigh);
}
if (skb)
skb = skb_clone(skb, GFP_ATOMIC);
write_unlock(&neigh->lock);
- neigh->ops->solicit(neigh, skb);
+ if (neigh->ops->solicit)
+ neigh->ops->solicit(neigh, skb);
atomic_inc(&neigh->probes);
kfree_skb(skb);
}
}
if (notify)
- neigh_update_notify(neigh);
+ neigh_update_notify(neigh, 0);
neigh_release(neigh);
}
*/
int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
- u32 flags)
+ u32 flags, u32 nlmsg_pid)
{
u8 old;
int err;
write_unlock_bh(&neigh->lock);
if (notify)
- neigh_update_notify(neigh);
+ neigh_update_notify(neigh, nlmsg_pid);
return err;
}
lladdr || !dev->addr_len);
if (neigh)
neigh_update(neigh, lladdr, NUD_STALE,
- NEIGH_UPDATE_F_OVERRIDE);
+ NEIGH_UPDATE_F_OVERRIDE, 0);
return neigh;
}
EXPORT_SYMBOL(neigh_event_ns);
err = neigh_update(neigh, NULL, NUD_FAILED,
NEIGH_UPDATE_F_OVERRIDE |
- NEIGH_UPDATE_F_ADMIN);
+ NEIGH_UPDATE_F_ADMIN,
+ NETLINK_CB(skb).portid);
neigh_release(neigh);
out:
neigh_event_send(neigh, NULL);
err = 0;
} else
- err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
+ err = neigh_update(neigh, lladdr, ndm->ndm_state, flags,
+ NETLINK_CB(skb).portid);
neigh_release(neigh);
out:
return -EMSGSIZE;
}
-static void neigh_update_notify(struct neighbour *neigh)
+static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
{
call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
- __neigh_notify(neigh, RTM_NEWNEIGH, 0);
+ __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
}
static bool neigh_master_filtered(struct net_device *dev, int master_idx)
+ nla_total_size(4); /* NDA_PROBES */
}
-static void __neigh_notify(struct neighbour *n, int type, int flags)
+static void __neigh_notify(struct neighbour *n, int type, int flags,
+ u32 pid)
{
struct net *net = dev_net(n->dev);
struct sk_buff *skb;
if (skb == NULL)
goto errout;
- err = neigh_fill_info(skb, n, 0, 0, type, flags);
+ err = neigh_fill_info(skb, n, pid, 0, type, flags);
if (err < 0) {
/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
void neigh_app_ns(struct neighbour *n)
{
- __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
+ __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
}
EXPORT_SYMBOL(neigh_app_ns);
#include <net/tcp.h>
static siphash_key_t net_secret __read_mostly;
+ static siphash_key_t ts_secret __read_mostly;
static __always_inline void net_secret_init(void)
{
+ net_get_random_once(&ts_secret, sizeof(ts_secret));
net_get_random_once(&net_secret, sizeof(net_secret));
}
#endif
#endif
#if IS_ENABLED(CONFIG_IPV6)
-u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
- __be16 sport, __be16 dport, u32 *tsoff)
+ static u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr)
+ {
+ const struct {
+ struct in6_addr saddr;
+ struct in6_addr daddr;
+ } __aligned(SIPHASH_ALIGNMENT) combined = {
+ .saddr = *(struct in6_addr *)saddr,
+ .daddr = *(struct in6_addr *)daddr,
+ };
+
+ if (sysctl_tcp_timestamps != 1)
+ return 0;
+
+ return siphash(&combined, offsetofend(typeof(combined), daddr),
+ &ts_secret);
+ }
+
+u32 secure_tcpv6_seq_and_tsoff(const __be32 *saddr, const __be32 *daddr,
+ __be16 sport, __be16 dport, u32 *tsoff)
{
const struct {
struct in6_addr saddr;
net_secret_init();
hash = siphash(&combined, offsetofend(typeof(combined), dport),
&net_secret);
- *tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0;
+ *tsoff = secure_tcpv6_ts_off(saddr, daddr);
return seq_scale(hash);
}
-EXPORT_SYMBOL(secure_tcpv6_sequence_number);
+EXPORT_SYMBOL(secure_tcpv6_seq_and_tsoff);
u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
__be16 dport)
#endif
#ifdef CONFIG_INET
+ static u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr)
+ {
+ if (sysctl_tcp_timestamps != 1)
+ return 0;
+
+ return siphash_2u32((__force u32)saddr, (__force u32)daddr,
+ &ts_secret);
+ }
-/* secure_tcp_sequence_number(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d),
+/* secure_tcp_seq_and_tsoff(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d),
* but fortunately, `sport' cannot be 0 in any circumstances. If this changes,
* it would be easy enough to have the former function use siphash_4u32, passing
* the arguments as separate u32.
*/
-
-u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
- __be16 sport, __be16 dport, u32 *tsoff)
+u32 secure_tcp_seq_and_tsoff(__be32 saddr, __be32 daddr,
+ __be16 sport, __be16 dport, u32 *tsoff)
{
u64 hash;
net_secret_init();
hash = siphash_3u32((__force u32)saddr, (__force u32)daddr,
(__force u32)sport << 16 | (__force u32)dport,
&net_secret);
- *tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0;
+ *tsoff = secure_tcp_ts_off(saddr, daddr);
return seq_scale(hash);
}
#include <linux/export.h>
#include <net/net_namespace.h>
#include <net/arp.h>
+#include <net/dsa.h>
#include <net/ip.h>
#include <net/ipconfig.h>
#include <net/route.h>
while ((d = next)) {
next = d->next;
dev = d->dev;
- if ((!ic_dev || dev != ic_dev->dev) && !netdev_uses_dsa(dev)) {
+ if (d != ic_dev && !netdev_uses_dsa(dev)) {
pr_debug("IP-Config: Downing %s\n", dev->name);
dev_change_flags(dev, d->flags);
}
*
*****************************************************************************/
-static void hex_dump(const unsigned char *buf, size_t len)
-{
- size_t i;
-
- for (i = 0; i < len; i++) {
- if (i && !(i % 16))
- printk("\n");
- printk("%02x ", *(buf + i));
- }
- printk("\n");
-}
-
/*
* Parse and mangle SNMP message according to mapping.
* (And this is the fucking 'basic' method).
struct snmp_object *obj;
if (debug > 1)
- hex_dump(msg, len);
+ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, 16, 1,
+ msg, len, 0);
asn1_open(&ctx, msg, len);
.timeout = 180,
};
- static struct nf_conntrack_helper snmp_helper __read_mostly = {
- .me = THIS_MODULE,
- .help = help,
- .expect_policy = &snmp_exp_policy,
- .name = "snmp",
- .tuple.src.l3num = AF_INET,
- .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT),
- .tuple.dst.protonum = IPPROTO_UDP,
- };
-
static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
.me = THIS_MODULE,
.help = help,
static int __init nf_nat_snmp_basic_init(void)
{
- int ret = 0;
-
BUG_ON(nf_nat_snmp_hook != NULL);
RCU_INIT_POINTER(nf_nat_snmp_hook, help);
- ret = nf_conntrack_helper_register(&snmp_trap_helper);
- if (ret < 0) {
- nf_conntrack_helper_unregister(&snmp_helper);
- return ret;
- }
- return ret;
+ return nf_conntrack_helper_register(&snmp_trap_helper);
}
static void __exit nf_nat_snmp_basic_fini(void)
{
RCU_INIT_POINTER(nf_nat_snmp_hook, NULL);
+ synchronize_rcu();
nf_conntrack_helper_unregister(&snmp_trap_helper);
}
#define REXMIT_LOST 1 /* retransmit packets marked lost */
#define REXMIT_NEW 2 /* FRTO-style transmit of unsent/new packets */
- static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb)
+ static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb,
+ unsigned int len)
{
static bool __once __read_mostly;
rcu_read_lock();
dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
- pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n",
- dev ? dev->name : "Unknown driver");
+ if (!dev || len >= dev->mtu)
+ pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n",
+ dev ? dev->name : "Unknown driver");
rcu_read_unlock();
}
}
if (len >= icsk->icsk_ack.rcv_mss) {
icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
tcp_sk(sk)->advmss);
- if (unlikely(icsk->icsk_ack.rcv_mss != len))
- tcp_gro_dev_warn(sk, skb);
+ /* Account for possibly-removed options */
+ if (unlikely(len > icsk->icsk_ack.rcv_mss +
+ MAX_TCP_OPTION_SPACE))
+ tcp_gro_dev_warn(sk, skb, len);
} else {
/* Otherwise, we make more careful check taking into account,
* that SACKs block is variable.
const int ts)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (metric > tp->reordering) {
- int mib_idx;
+ int mib_idx;
+ if (metric > tp->reordering) {
tp->reordering = min(sysctl_tcp_max_reordering, metric);
- /* This exciting event is worth to be remembered. 8) */
- if (ts)
- mib_idx = LINUX_MIB_TCPTSREORDER;
- else if (tcp_is_reno(tp))
- mib_idx = LINUX_MIB_TCPRENOREORDER;
- else if (tcp_is_fack(tp))
- mib_idx = LINUX_MIB_TCPFACKREORDER;
- else
- mib_idx = LINUX_MIB_TCPSACKREORDER;
-
- NET_INC_STATS(sock_net(sk), mib_idx);
#if FASTRETRANS_DEBUG > 1
pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
}
tp->rack.reord = 1;
+
+ /* This exciting event is worth to be remembered. 8) */
+ if (ts)
+ mib_idx = LINUX_MIB_TCPTSREORDER;
+ else if (tcp_is_reno(tp))
+ mib_idx = LINUX_MIB_TCPRENOREORDER;
+ else if (tcp_is_fack(tp))
+ mib_idx = LINUX_MIB_TCPFACKREORDER;
+ else
+ mib_idx = LINUX_MIB_TCPSACKREORDER;
+
+ NET_INC_STATS(sock_net(sk), mib_idx);
}
/* This must be called before lost_out is incremented */
!estab && sysctl_tcp_window_scaling) {
__u8 snd_wscale = *(__u8 *)ptr;
opt_rx->wscale_ok = 1;
- if (snd_wscale > 14) {
- net_info_ratelimited("%s: Illegal window scaling value %d >14 received\n",
+ if (snd_wscale > TCP_MAX_WSCALE) {
+ net_info_ratelimited("%s: Illegal window scaling value %d > %u received\n",
__func__,
- snd_wscale);
- snd_wscale = 14;
+ snd_wscale,
+ TCP_MAX_WSCALE);
+ snd_wscale = TCP_MAX_WSCALE;
}
opt_rx->snd_wscale = snd_wscale;
}
goto drop_and_free;
if (isn && tmp_opt.tstamp_ok)
- af_ops->init_seq(skb, &tcp_rsk(req)->ts_off);
+ af_ops->init_seq_tsoff(skb, &tcp_rsk(req)->ts_off);
if (!want_cookie && !isn) {
- /* VJ's idea. We save last timestamp seen
- * from the destination in peer table, when entering
- * state TIME-WAIT, and check against it before
- * accepting new connection request.
- *
- * If "isn" is not zero, this request hit alive
- * timewait bucket, so that all the necessary checks
- * are made in the function processing timewait state.
- */
- if (net->ipv4.tcp_death_row.sysctl_tw_recycle) {
- bool strict;
-
- dst = af_ops->route_req(sk, &fl, req, &strict);
-
- if (dst && strict &&
- !tcp_peer_is_proven(req, dst, true,
- tmp_opt.saw_tstamp)) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
- goto drop_and_release;
- }
- }
/* Kill the following clause, if you dislike this way. */
- else if (!net->ipv4.sysctl_tcp_syncookies &&
- (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
- (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
- !tcp_peer_is_proven(req, dst, false,
- tmp_opt.saw_tstamp)) {
+ if (!net->ipv4.sysctl_tcp_syncookies &&
+ (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
+ (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
+ !tcp_peer_is_proven(req, dst)) {
/* Without syncookies last quarter of
* backlog is filled with destinations,
* proven to be alive.
goto drop_and_release;
}
- isn = af_ops->init_seq(skb, &tcp_rsk(req)->ts_off);
+ isn = af_ops->init_seq_tsoff(skb, &tcp_rsk(req)->ts_off);
}
if (!dst) {
- dst = af_ops->route_req(sk, &fl, req, NULL);
+ dst = af_ops->route_req(sk, &fl, req);
if (!dst)
goto drop_and_free;
}
cb->nlh->nlmsg_seq,
IPCTNL_MSG_EXP_NEW,
exp) < 0) {
- if (!atomic_inc_not_zero(&exp->use))
+ if (!refcount_inc_not_zero(&exp->use))
continue;
cb->args[1] = (unsigned long)exp;
goto out;
cb->nlh->nlmsg_seq,
IPCTNL_MSG_EXP_NEW,
exp) < 0) {
- if (!atomic_inc_not_zero(&exp->use))
+ if (!refcount_inc_not_zero(&exp->use))
continue;
cb->args[1] = (unsigned long)exp;
goto out;
#ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT
RCU_INIT_POINTER(nfnl_ct_hook, NULL);
#endif
+ synchronize_rcu();
}
module_init(ctnetlink_init);
strcpy(timeout->name, nla_data(cda[CTA_TIMEOUT_NAME]));
timeout->l3num = l3num;
timeout->l4proto = l4proto;
- atomic_set(&timeout->refcnt, 1);
+ refcount_set(&timeout->refcnt, 1);
list_add_tail_rcu(&timeout->head, &net->nfct_timeout_list);
return 0;
nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(timeout->l3num)) ||
nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, timeout->l4proto->l4proto) ||
nla_put_be32(skb, CTA_TIMEOUT_USE,
- htonl(atomic_read(&timeout->refcnt))))
+ htonl(refcount_read(&timeout->refcnt))))
goto nla_put_failure;
if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) {
/* We want to avoid races with ctnl_timeout_put. So only when the
* current refcnt is 1, we decrease it to 0.
*/
- if (atomic_cmpxchg(&timeout->refcnt, 1, 0) == 1) {
+ if (refcount_dec_if_one(&timeout->refcnt)) {
/* We are protected by nfnl mutex. */
list_del_rcu(&timeout->head);
nf_ct_l4proto_put(timeout->l4proto);
if (!try_module_get(THIS_MODULE))
goto err;
- if (!atomic_inc_not_zero(&timeout->refcnt)) {
+ if (!refcount_inc_not_zero(&timeout->refcnt)) {
module_put(THIS_MODULE);
goto err;
}
static void ctnl_timeout_put(struct ctnl_timeout *timeout)
{
- if (atomic_dec_and_test(&timeout->refcnt))
+ if (refcount_dec_and_test(&timeout->refcnt))
kfree_rcu(timeout, rcu_head);
module_put(THIS_MODULE);
list_del_rcu(&cur->head);
nf_ct_l4proto_put(cur->l4proto);
- if (atomic_dec_and_test(&cur->refcnt))
+ if (refcount_dec_and_test(&cur->refcnt))
kfree_rcu(cur, rcu_head);
}
}
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL);
RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL);
+ synchronize_rcu();
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
- rcu_barrier();
}
module_init(cttimeout_init);
struct sctp_chunk *chk, *temp;
list_for_each_entry_safe(chk, temp, queue, transmitted_list) {
+ struct sctp_stream_out *streamout;
+
if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
continue;
sctp_insert_list(&asoc->outqueue.abandoned,
&chk->transmitted_list);
+ streamout = &asoc->stream->out[chk->sinfo.sinfo_stream];
asoc->sent_cnt_removable--;
asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
+ streamout->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
if (!chk->tsn_gap_acked) {
if (chk->transport)
q->out_qlen -= chk->skb->len;
asoc->sent_cnt_removable--;
asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
+ if (chk->sinfo.sinfo_stream < asoc->stream->outcnt) {
+ struct sctp_stream_out *streamout =
+ &asoc->stream->out[chk->sinfo.sinfo_stream];
+
+ streamout->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
+ }
msg_len -= SCTP_DATA_SNDSIZE(chk) +
sizeof(struct sk_buff) +
/* RFC 2960 6.5 Every DATA chunk MUST carry a valid
* stream identifier.
*/
- if (chunk->sinfo.sinfo_stream >=
- asoc->c.sinit_num_ostreams) {
+ if (chunk->sinfo.sinfo_stream >= asoc->stream->outcnt) {
/* Mark as failed send. */
sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM);
else if (param.p->type == SCTP_PARAM_RESET_IN_REQUEST)
reply = sctp_process_strreset_inreq(
(struct sctp_association *)asoc, param, &ev);
- /* More handles for other types will be added here, by now it
- * just ignores other types.
- */
+ else if (param.p->type == SCTP_PARAM_RESET_TSN_REQUEST)
+ reply = sctp_process_strreset_tsnreq(
+ (struct sctp_association *)asoc, param, &ev);
+ else if (param.p->type == SCTP_PARAM_RESET_ADD_OUT_STREAMS)
+ reply = sctp_process_strreset_addstrm_out(
+ (struct sctp_association *)asoc, param, &ev);
+ else if (param.p->type == SCTP_PARAM_RESET_ADD_IN_STREAMS)
+ reply = sctp_process_strreset_addstrm_in(
+ (struct sctp_association *)asoc, param, &ev);
+ else if (param.p->type == SCTP_PARAM_RESET_RESPONSE)
+ reply = sctp_process_strreset_resp(
+ (struct sctp_association *)asoc, param, &ev);
if (ev)
sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
/* Silently discard the chunk if stream-id is not valid */
sctp_walk_fwdtsn(skip, chunk) {
- if (ntohs(skip->stream) >= asoc->c.sinit_max_instreams)
+ if (ntohs(skip->stream) >= asoc->stream->incnt)
goto discard_noforce;
}
/* Silently discard the chunk if stream-id is not valid */
sctp_walk_fwdtsn(skip, chunk) {
- if (ntohs(skip->stream) >= asoc->c.sinit_max_instreams)
+ if (ntohs(skip->stream) >= asoc->stream->incnt)
goto gen_shutdown;
}
* and discard the DATA chunk.
*/
sid = ntohs(data_hdr->stream);
- if (sid >= asoc->c.sinit_max_instreams) {
+ if (sid >= asoc->stream->incnt) {
/* Mark tsn as received even though we drop it */
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn));
}
if (asoc->pmtu_pending)
- sctp_assoc_pending_pmtu(sk, asoc);
+ sctp_assoc_pending_pmtu(asoc);
/* If fragmentation is disabled and the message length exceeds the
* association fragmentation point, return EMSGSIZE. The I-D
}
/* Check for invalid stream. */
- if (sinfo->sinfo_stream >= asoc->c.sinit_num_ostreams) {
+ if (sinfo->sinfo_stream >= asoc->stream->outcnt) {
err = -EINVAL;
goto out_free;
}
err = PTR_ERR(datamsg);
goto out_free;
}
- datamsg->force_delay = !!(msg->msg_flags & MSG_MORE);
+ asoc->force_delay = !!(msg->msg_flags & MSG_MORE);
/* Now send the (possibly) fragmented message. */
list_for_each_entry(chunk, &datamsg->chunks, frag_list) {
if ((params->spp_flags & SPP_PMTUD_DISABLE) && params->spp_pathmtu) {
if (trans) {
trans->pathmtu = params->spp_pathmtu;
- sctp_assoc_sync_pmtu(sctp_opt2sk(sp), asoc);
+ sctp_assoc_sync_pmtu(asoc);
} else if (asoc) {
asoc->pathmtu = params->spp_pathmtu;
} else {
(trans->param_flags & ~SPP_PMTUD) | pmtud_change;
if (update) {
sctp_transport_pmtu(trans, sctp_opt2sk(sp));
- sctp_assoc_sync_pmtu(sctp_opt2sk(sp), asoc);
+ sctp_assoc_sync_pmtu(asoc);
}
} else if (asoc) {
asoc->param_flags =
return retval;
}
+static int sctp_setsockopt_reconfig_supported(struct sock *sk,
+ char __user *optval,
+ unsigned int optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_association *asoc;
+ int retval = -EINVAL;
+
+ if (optlen != sizeof(params))
+ goto out;
+
+ if (copy_from_user(¶ms, optval, optlen)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (asoc) {
+ asoc->reconf_enable = !!params.assoc_value;
+ } else if (!params.assoc_id) {
+ struct sctp_sock *sp = sctp_sk(sk);
+
+ sp->ep->reconf_enable = !!params.assoc_value;
+ } else {
+ goto out;
+ }
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
static int sctp_setsockopt_enable_strreset(struct sock *sk,
char __user *optval,
unsigned int optlen)
case SCTP_DEFAULT_PRINFO:
retval = sctp_setsockopt_default_prinfo(sk, optval, optlen);
break;
+ case SCTP_RECONFIG_SUPPORTED:
+ retval = sctp_setsockopt_reconfig_supported(sk, optval, optlen);
+ break;
case SCTP_ENABLE_STREAM_RESET:
retval = sctp_setsockopt_enable_strreset(sk, optval, optlen);
break;
info->sctpi_rwnd = asoc->a_rwnd;
info->sctpi_unackdata = asoc->unack_data;
info->sctpi_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map);
- info->sctpi_instrms = asoc->c.sinit_max_instreams;
- info->sctpi_outstrms = asoc->c.sinit_num_ostreams;
+ info->sctpi_instrms = asoc->stream->incnt;
+ info->sctpi_outstrms = asoc->stream->outcnt;
list_for_each(pos, &asoc->base.inqueue.in_chunk_list)
info->sctpi_inqueue++;
list_for_each(pos, &asoc->outqueue.out_chunk_list)
status.sstat_unackdata = asoc->unack_data;
status.sstat_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map);
- status.sstat_instrms = asoc->c.sinit_max_instreams;
- status.sstat_outstrms = asoc->c.sinit_num_ostreams;
+ status.sstat_instrms = asoc->stream->incnt;
+ status.sstat_outstrms = asoc->stream->outcnt;
status.sstat_fragmentation_point = asoc->frag_point;
status.sstat_primary.spinfo_assoc_id = sctp_assoc2id(transport->asoc);
memcpy(&status.sstat_primary.spinfo_address, &transport->ipaddr,
return retval;
}
+static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_stream_out *streamout;
+ struct sctp_association *asoc;
+ struct sctp_prstatus params;
+ int retval = -EINVAL;
+ int policy;
+
+ if (len < sizeof(params))
+ goto out;
+
+ len = sizeof(params);
+ if (copy_from_user(¶ms, optval, len)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ policy = params.sprstat_policy;
+ if (policy & ~SCTP_PR_SCTP_MASK)
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.sprstat_assoc_id);
+ if (!asoc || params.sprstat_sid >= asoc->stream->outcnt)
+ goto out;
+
+ streamout = &asoc->stream->out[params.sprstat_sid];
+ if (policy == SCTP_PR_SCTP_NONE) {
+ params.sprstat_abandoned_unsent = 0;
+ params.sprstat_abandoned_sent = 0;
+ for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) {
+ params.sprstat_abandoned_unsent +=
+ streamout->abandoned_unsent[policy];
+ params.sprstat_abandoned_sent +=
+ streamout->abandoned_sent[policy];
+ }
+ } else {
+ params.sprstat_abandoned_unsent =
+ streamout->abandoned_unsent[__SCTP_PR_INDEX(policy)];
+ params.sprstat_abandoned_sent =
+ streamout->abandoned_sent[__SCTP_PR_INDEX(policy)];
+ }
+
+ if (put_user(len, optlen) || copy_to_user(optval, ¶ms, len)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
+static int sctp_getsockopt_reconfig_supported(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_association *asoc;
+ int retval = -EFAULT;
+
+ if (len < sizeof(params)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ len = sizeof(params);
+ if (copy_from_user(¶ms, optval, len))
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (asoc) {
+ params.assoc_value = asoc->reconf_enable;
+ } else if (!params.assoc_id) {
+ struct sctp_sock *sp = sctp_sk(sk);
+
+ params.assoc_value = sp->ep->reconf_enable;
+ } else {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ if (put_user(len, optlen))
+ goto out;
+
+ if (copy_to_user(optval, ¶ms, len))
+ goto out;
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
static int sctp_getsockopt_enable_strreset(struct sock *sk, int len,
char __user *optval,
int __user *optlen)
retval = sctp_getsockopt_pr_assocstatus(sk, len, optval,
optlen);
break;
+ case SCTP_PR_STREAM_STATUS:
+ retval = sctp_getsockopt_pr_streamstatus(sk, len, optval,
+ optlen);
+ break;
+ case SCTP_RECONFIG_SUPPORTED:
+ retval = sctp_getsockopt_reconfig_supported(sk, len, optval,
+ optlen);
+ break;
case SCTP_ENABLE_STREAM_RESET:
retval = sctp_getsockopt_enable_strreset(sk, len, optval,
optlen);
if (sk->sk_shutdown & RCV_SHUTDOWN)
break;
- if (sk_can_busy_loop(sk) &&
- sk_busy_loop(sk, noblock))
- continue;
+ if (sk_can_busy_loop(sk)) {
+ sk_busy_loop(sk, noblock);
+
+ if (!skb_queue_empty(&sk->sk_receive_queue))
+ continue;
+ }
/* User doesn't want to wait. */
error = -EAGAIN;
#include <net/sctp/sctp.h>
#include <net/sctp/sm.h>
- struct sctp_stream *sctp_stream_new(__u16 incnt, __u16 outcnt, gfp_t gfp)
+ int sctp_stream_new(struct sctp_association *asoc, gfp_t gfp)
{
struct sctp_stream *stream;
int i;
stream = kzalloc(sizeof(*stream), gfp);
if (!stream)
- return NULL;
+ return -ENOMEM;
- stream->outcnt = outcnt;
+ stream->outcnt = asoc->c.sinit_num_ostreams;
stream->out = kcalloc(stream->outcnt, sizeof(*stream->out), gfp);
if (!stream->out) {
kfree(stream);
- return NULL;
+ return -ENOMEM;
}
for (i = 0; i < stream->outcnt; i++)
stream->out[i].state = SCTP_STREAM_OPEN;
- stream->incnt = incnt;
+ asoc->stream = stream;
+
+ return 0;
+ }
+
+ int sctp_stream_init(struct sctp_association *asoc, gfp_t gfp)
+ {
+ struct sctp_stream *stream = asoc->stream;
+ int i;
+
+ /* Initial stream->out size may be very big, so free it and alloc
+ * a new one with new outcnt to save memory.
+ */
+ kfree(stream->out);
+ stream->outcnt = asoc->c.sinit_num_ostreams;
+ stream->out = kcalloc(stream->outcnt, sizeof(*stream->out), gfp);
+ if (!stream->out)
+ goto nomem;
+
+ for (i = 0; i < stream->outcnt; i++)
+ stream->out[i].state = SCTP_STREAM_OPEN;
+
+ stream->incnt = asoc->c.sinit_max_instreams;
stream->in = kcalloc(stream->incnt, sizeof(*stream->in), gfp);
if (!stream->in) {
kfree(stream->out);
- kfree(stream);
- return NULL;
+ goto nomem;
}
- return stream;
+ return 0;
+
+ nomem:
+ asoc->stream = NULL;
+ kfree(stream);
+
+ return -ENOMEM;
}
void sctp_stream_free(struct sctp_stream *stream)
stream->out = streamout;
}
- if (in) {
- struct sctp_stream_in *streamin;
-
- streamin = krealloc(stream->in, incnt * sizeof(*streamin),
- GFP_KERNEL);
- if (!streamin)
- goto out;
-
- memset(streamin + stream->incnt, 0, in * sizeof(*streamin));
- stream->in = streamin;
- }
-
chunk = sctp_make_strreset_addstrm(asoc, out, in);
if (!chunk)
goto out;
}
static sctp_paramhdr_t *sctp_chunk_lookup_strreset_param(
- struct sctp_association *asoc, __u32 resp_seq)
+ struct sctp_association *asoc, __u32 resp_seq,
+ __be16 type)
{
struct sctp_chunk *chunk = asoc->strreset_chunk;
struct sctp_reconf_chunk *hdr;
union sctp_params param;
- if (ntohl(resp_seq) != asoc->strreset_outseq || !chunk)
+ if (!chunk)
return NULL;
hdr = (struct sctp_reconf_chunk *)chunk->chunk_hdr;
*/
struct sctp_strreset_tsnreq *req = param.v;
- if (req->request_seq == resp_seq)
+ if ((!resp_seq || req->request_seq == resp_seq) &&
+ (!type || type == req->param_hdr.type))
return param.v;
}
goto out;
if (asoc->strreset_chunk) {
- sctp_paramhdr_t *param_hdr;
- struct sctp_transport *t;
-
- param_hdr = sctp_chunk_lookup_strreset_param(
- asoc, outreq->response_seq);
- if (!param_hdr || param_hdr->type !=
- SCTP_PARAM_RESET_IN_REQUEST) {
+ if (!sctp_chunk_lookup_strreset_param(
+ asoc, outreq->response_seq,
+ SCTP_PARAM_RESET_IN_REQUEST)) {
/* same process with outstanding isn't 0 */
result = SCTP_STRRESET_ERR_IN_PROGRESS;
goto out;
asoc->strreset_outseq++;
if (!asoc->strreset_outstanding) {
+ struct sctp_transport *t;
+
t = asoc->strreset_chunk->transport;
if (del_timer(&t->reconf_timer))
sctp_transport_put(t);
return chunk;
}
+
+struct sctp_chunk *sctp_process_strreset_tsnreq(
+ struct sctp_association *asoc,
+ union sctp_params param,
+ struct sctp_ulpevent **evp)
+{
+ __u32 init_tsn = 0, next_tsn = 0, max_tsn_seen;
+ struct sctp_strreset_tsnreq *tsnreq = param.v;
+ struct sctp_stream *stream = asoc->stream;
+ __u32 result = SCTP_STRRESET_DENIED;
+ __u32 request_seq;
+ __u16 i;
+
+ request_seq = ntohl(tsnreq->request_seq);
+ if (request_seq > asoc->strreset_inseq) {
+ result = SCTP_STRRESET_ERR_BAD_SEQNO;
+ goto out;
+ } else if (request_seq == asoc->strreset_inseq) {
+ asoc->strreset_inseq++;
+ }
+
+ if (!(asoc->strreset_enable & SCTP_ENABLE_RESET_ASSOC_REQ))
+ goto out;
+
+ if (asoc->strreset_outstanding) {
+ result = SCTP_STRRESET_ERR_IN_PROGRESS;
+ goto out;
+ }
+
+ /* G3: The same processing as though a SACK chunk with no gap report
+ * and a cumulative TSN ACK of the Sender's Next TSN minus 1 were
+ * received MUST be performed.
+ */
+ max_tsn_seen = sctp_tsnmap_get_max_tsn_seen(&asoc->peer.tsn_map);
+ sctp_ulpq_reasm_flushtsn(&asoc->ulpq, max_tsn_seen);
+ sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
+
+ /* G1: Compute an appropriate value for the Receiver's Next TSN -- the
+ * TSN that the peer should use to send the next DATA chunk. The
+ * value SHOULD be the smallest TSN not acknowledged by the
+ * receiver of the request plus 2^31.
+ */
+ init_tsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + (1 << 31);
+ sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL,
+ init_tsn, GFP_ATOMIC);
+
+ /* G4: The same processing as though a FWD-TSN chunk (as defined in
+ * [RFC3758]) with all streams affected and a new cumulative TSN
+ * ACK of the Receiver's Next TSN minus 1 were received MUST be
+ * performed.
+ */
+ sctp_outq_free(&asoc->outqueue);
+
+ /* G2: Compute an appropriate value for the local endpoint's next TSN,
+ * i.e., the next TSN assigned by the receiver of the SSN/TSN reset
+ * chunk. The value SHOULD be the highest TSN sent by the receiver
+ * of the request plus 1.
+ */
+ next_tsn = asoc->next_tsn;
+ asoc->ctsn_ack_point = next_tsn - 1;
+ asoc->adv_peer_ack_point = asoc->ctsn_ack_point;
+
+ /* G5: The next expected and outgoing SSNs MUST be reset to 0 for all
+ * incoming and outgoing streams.
+ */
+ for (i = 0; i < stream->outcnt; i++)
+ stream->out[i].ssn = 0;
+ for (i = 0; i < stream->incnt; i++)
+ stream->in[i].ssn = 0;
+
+ result = SCTP_STRRESET_PERFORMED;
+
+ *evp = sctp_ulpevent_make_assoc_reset_event(asoc, 0, init_tsn,
+ next_tsn, GFP_ATOMIC);
+
+out:
+ return sctp_make_strreset_tsnresp(asoc, result, request_seq,
+ next_tsn, init_tsn);
+}
+
+struct sctp_chunk *sctp_process_strreset_addstrm_out(
+ struct sctp_association *asoc,
+ union sctp_params param,
+ struct sctp_ulpevent **evp)
+{
+ struct sctp_strreset_addstrm *addstrm = param.v;
+ struct sctp_stream *stream = asoc->stream;
+ __u32 result = SCTP_STRRESET_DENIED;
+ struct sctp_stream_in *streamin;
+ __u32 request_seq, incnt;
+ __u16 in;
+
+ request_seq = ntohl(addstrm->request_seq);
+ if (request_seq > asoc->strreset_inseq) {
+ result = SCTP_STRRESET_ERR_BAD_SEQNO;
+ goto out;
+ } else if (request_seq == asoc->strreset_inseq) {
+ asoc->strreset_inseq++;
+ }
+
+ if (!(asoc->strreset_enable & SCTP_ENABLE_CHANGE_ASSOC_REQ))
+ goto out;
+
+ if (asoc->strreset_chunk) {
+ if (!sctp_chunk_lookup_strreset_param(
+ asoc, 0, SCTP_PARAM_RESET_ADD_IN_STREAMS)) {
+ /* same process with outstanding isn't 0 */
+ result = SCTP_STRRESET_ERR_IN_PROGRESS;
+ goto out;
+ }
+
+ asoc->strreset_outstanding--;
+ asoc->strreset_outseq++;
+
+ if (!asoc->strreset_outstanding) {
+ struct sctp_transport *t;
+
+ t = asoc->strreset_chunk->transport;
+ if (del_timer(&t->reconf_timer))
+ sctp_transport_put(t);
+
+ sctp_chunk_put(asoc->strreset_chunk);
+ asoc->strreset_chunk = NULL;
+ }
+ }
+
+ in = ntohs(addstrm->number_of_streams);
+ incnt = stream->incnt + in;
+ if (!in || incnt > SCTP_MAX_STREAM)
+ goto out;
+
+ streamin = krealloc(stream->in, incnt * sizeof(*streamin),
+ GFP_ATOMIC);
+ if (!streamin)
+ goto out;
+
+ memset(streamin + stream->incnt, 0, in * sizeof(*streamin));
+ stream->in = streamin;
+ stream->incnt = incnt;
+
+ result = SCTP_STRRESET_PERFORMED;
+
+ *evp = sctp_ulpevent_make_stream_change_event(asoc,
+ 0, ntohs(addstrm->number_of_streams), 0, GFP_ATOMIC);
+
+out:
+ return sctp_make_strreset_resp(asoc, result, request_seq);
+}
+
+struct sctp_chunk *sctp_process_strreset_addstrm_in(
+ struct sctp_association *asoc,
+ union sctp_params param,
+ struct sctp_ulpevent **evp)
+{
+ struct sctp_strreset_addstrm *addstrm = param.v;
+ struct sctp_stream *stream = asoc->stream;
+ __u32 result = SCTP_STRRESET_DENIED;
+ struct sctp_stream_out *streamout;
+ struct sctp_chunk *chunk = NULL;
+ __u32 request_seq, outcnt;
+ __u16 out;
+
+ request_seq = ntohl(addstrm->request_seq);
+ if (request_seq > asoc->strreset_inseq) {
+ result = SCTP_STRRESET_ERR_BAD_SEQNO;
+ goto out;
+ } else if (request_seq == asoc->strreset_inseq) {
+ asoc->strreset_inseq++;
+ }
+
+ if (!(asoc->strreset_enable & SCTP_ENABLE_CHANGE_ASSOC_REQ))
+ goto out;
+
+ if (asoc->strreset_outstanding) {
+ result = SCTP_STRRESET_ERR_IN_PROGRESS;
+ goto out;
+ }
+
+ out = ntohs(addstrm->number_of_streams);
+ outcnt = stream->outcnt + out;
+ if (!out || outcnt > SCTP_MAX_STREAM)
+ goto out;
+
+ streamout = krealloc(stream->out, outcnt * sizeof(*streamout),
+ GFP_ATOMIC);
+ if (!streamout)
+ goto out;
+
+ memset(streamout + stream->outcnt, 0, out * sizeof(*streamout));
+ stream->out = streamout;
+
+ chunk = sctp_make_strreset_addstrm(asoc, out, 0);
+ if (!chunk)
+ goto out;
+
+ asoc->strreset_chunk = chunk;
+ asoc->strreset_outstanding = 1;
+ sctp_chunk_hold(asoc->strreset_chunk);
+
+ stream->outcnt = outcnt;
+
+ *evp = sctp_ulpevent_make_stream_change_event(asoc,
+ 0, 0, ntohs(addstrm->number_of_streams), GFP_ATOMIC);
+
+out:
+ if (!chunk)
+ chunk = sctp_make_strreset_resp(asoc, result, request_seq);
+
+ return chunk;
+}
+
+struct sctp_chunk *sctp_process_strreset_resp(
+ struct sctp_association *asoc,
+ union sctp_params param,
+ struct sctp_ulpevent **evp)
+{
+ struct sctp_strreset_resp *resp = param.v;
+ struct sctp_stream *stream = asoc->stream;
+ struct sctp_transport *t;
+ __u16 i, nums, flags = 0;
+ sctp_paramhdr_t *req;
+ __u32 result;
+
+ req = sctp_chunk_lookup_strreset_param(asoc, resp->response_seq, 0);
+ if (!req)
+ return NULL;
+
+ result = ntohl(resp->result);
+ if (result != SCTP_STRRESET_PERFORMED) {
+ /* if in progress, do nothing but retransmit */
+ if (result == SCTP_STRRESET_IN_PROGRESS)
+ return NULL;
+ else if (result == SCTP_STRRESET_DENIED)
+ flags = SCTP_STREAM_RESET_DENIED;
+ else
+ flags = SCTP_STREAM_RESET_FAILED;
+ }
+
+ if (req->type == SCTP_PARAM_RESET_OUT_REQUEST) {
+ struct sctp_strreset_outreq *outreq;
+ __u16 *str_p = NULL;
+
+ outreq = (struct sctp_strreset_outreq *)req;
+ nums = (ntohs(outreq->param_hdr.length) - sizeof(*outreq)) / 2;
+
+ if (result == SCTP_STRRESET_PERFORMED) {
+ if (nums) {
+ str_p = outreq->list_of_streams;
+ for (i = 0; i < nums; i++)
+ stream->out[ntohs(str_p[i])].ssn = 0;
+ } else {
+ for (i = 0; i < stream->outcnt; i++)
+ stream->out[i].ssn = 0;
+ }
+
+ flags = SCTP_STREAM_RESET_OUTGOING_SSN;
+ }
+
+ for (i = 0; i < stream->outcnt; i++)
+ stream->out[i].state = SCTP_STREAM_OPEN;
+
+ *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags,
+ nums, str_p, GFP_ATOMIC);
+ } else if (req->type == SCTP_PARAM_RESET_IN_REQUEST) {
+ struct sctp_strreset_inreq *inreq;
+ __u16 *str_p = NULL;
+
+ /* if the result is performed, it's impossible for inreq */
+ if (result == SCTP_STRRESET_PERFORMED)
+ return NULL;
+
+ inreq = (struct sctp_strreset_inreq *)req;
+ nums = (ntohs(inreq->param_hdr.length) - sizeof(*inreq)) / 2;
+
+ str_p = inreq->list_of_streams;
+ *evp = sctp_ulpevent_make_stream_reset_event(asoc, flags,
+ nums, str_p, GFP_ATOMIC);
+ } else if (req->type == SCTP_PARAM_RESET_TSN_REQUEST) {
+ struct sctp_strreset_resptsn *resptsn;
+ __u32 stsn, rtsn;
+
+ /* check for resptsn, as sctp_verify_reconf didn't do it*/
+ if (ntohs(param.p->length) != sizeof(*resptsn))
+ return NULL;
+
+ resptsn = (struct sctp_strreset_resptsn *)resp;
+ stsn = ntohl(resptsn->senders_next_tsn);
+ rtsn = ntohl(resptsn->receivers_next_tsn);
+
+ if (result == SCTP_STRRESET_PERFORMED) {
+ __u32 mtsn = sctp_tsnmap_get_max_tsn_seen(
+ &asoc->peer.tsn_map);
+
+ sctp_ulpq_reasm_flushtsn(&asoc->ulpq, mtsn);
+ sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC);
+
+ sctp_tsnmap_init(&asoc->peer.tsn_map,
+ SCTP_TSN_MAP_INITIAL,
+ stsn, GFP_ATOMIC);
+
+ sctp_outq_free(&asoc->outqueue);
+
+ asoc->next_tsn = rtsn;
+ asoc->ctsn_ack_point = asoc->next_tsn - 1;
+ asoc->adv_peer_ack_point = asoc->ctsn_ack_point;
+
+ for (i = 0; i < stream->outcnt; i++)
+ stream->out[i].ssn = 0;
+ for (i = 0; i < stream->incnt; i++)
+ stream->in[i].ssn = 0;
+ }
+
+ for (i = 0; i < stream->outcnt; i++)
+ stream->out[i].state = SCTP_STREAM_OPEN;
+
+ *evp = sctp_ulpevent_make_assoc_reset_event(asoc, flags,
+ stsn, rtsn, GFP_ATOMIC);
+ } else if (req->type == SCTP_PARAM_RESET_ADD_OUT_STREAMS) {
+ struct sctp_strreset_addstrm *addstrm;
+ __u16 number;
+
+ addstrm = (struct sctp_strreset_addstrm *)req;
+ nums = ntohs(addstrm->number_of_streams);
+ number = stream->outcnt - nums;
+
+ if (result == SCTP_STRRESET_PERFORMED)
+ for (i = number; i < stream->outcnt; i++)
+ stream->out[i].state = SCTP_STREAM_OPEN;
+ else
+ stream->outcnt = number;
+
+ *evp = sctp_ulpevent_make_stream_change_event(asoc, flags,
+ 0, nums, GFP_ATOMIC);
+ } else if (req->type == SCTP_PARAM_RESET_ADD_IN_STREAMS) {
+ struct sctp_strreset_addstrm *addstrm;
+
+ /* if the result is performed, it's impossible for addstrm in
+ * request.
+ */
+ if (result == SCTP_STRRESET_PERFORMED)
+ return NULL;
+
+ addstrm = (struct sctp_strreset_addstrm *)req;
+ nums = ntohs(addstrm->number_of_streams);
+
+ *evp = sctp_ulpevent_make_stream_change_event(asoc, flags,
+ nums, 0, GFP_ATOMIC);
+ }
+
+ asoc->strreset_outstanding--;
+ asoc->strreset_outseq++;
+
+ /* remove everything for this reconf request */
+ if (!asoc->strreset_outstanding) {
+ t = asoc->strreset_chunk->transport;
+ if (del_timer(&t->reconf_timer))
+ sctp_transport_put(t);
+
+ sctp_chunk_put(asoc->strreset_chunk);
+ asoc->strreset_chunk = NULL;
+ }
+
+ return NULL;
+}
LIBDIR := ../../../lib
BPFDIR := $(LIBDIR)/bpf
+ APIDIR := ../../../include/uapi
+ GENDIR := ../../../../include/generated
+ GENHDR := $(GENDIR)/autoconf.h
- CFLAGS += -Wall -O2 -I../../../include/uapi -I$(LIBDIR) -I../../../include
+ ifneq ($(wildcard $(GENHDR)),)
+ GENFLAGS := -DHAVE_GENHDR
+ endif
+
-CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS)
-LDLIBS += -lcap
++CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
+LDLIBS += -lcap -lelf
-TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map
+TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs
+
+TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o
TEST_PROGS := test_kmod.sh
include ../lib.mk
-BPFOBJ := $(OUTPUT)/bpf.o
+BPFOBJ := $(OUTPUT)/libbpf.a
$(TEST_GEN_PROGS): $(BPFOBJ)
$(BPFOBJ): force
$(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/
+
+CLANG ?= clang
+
+%.o: %.c
+ $(CLANG) -I../../../include/uapi -I../../../../samples/bpf/ \
+ -D__x86_64__ -Wno-compare-distinct-pointer-types \
+ -O2 -target bpf -c $< -o $@
#include <bpf/bpf.h>
+ #ifdef HAVE_GENHDR
+ # include "autoconf.h"
+ #else
+ # if defined(__i386) || defined(__x86_64) || defined(__s390x__) || defined(__aarch64__)
+ # define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1
+ # endif
+ #endif
+
#include "../../../include/linux/filter.h"
#ifndef ARRAY_SIZE
#define MAX_INSNS 512
#define MAX_FIXUPS 8
+#define MAX_NR_MAPS 4
+ #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0)
+
struct bpf_test {
const char *descr;
struct bpf_insn insns[MAX_INSNS];
int fixup_map1[MAX_FIXUPS];
int fixup_map2[MAX_FIXUPS];
int fixup_prog[MAX_FIXUPS];
+ int fixup_map_in_map[MAX_FIXUPS];
const char *errstr;
const char *errstr_unpriv;
enum {
REJECT
} result, result_unpriv;
enum bpf_prog_type prog_type;
+ uint8_t flags;
};
/* Note we want this to be 64 bit aligned so that the end of our array is
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
+ {
+ "direct packet access: test15 (spill with xadd)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8),
+ BPF_MOV64_IMM(BPF_REG_5, 4096),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_STX_XADD(BPF_DW, BPF_REG_4, BPF_REG_5, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_2, BPF_REG_5, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R2 invalid mem access 'inv'",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
{
"helper access to packet: test1, valid packet_ptr range",
.insns = {
.errstr_unpriv = "R0 pointer arithmetic prohibited",
.result_unpriv = REJECT,
.result = ACCEPT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"valid map access into an array with a variable",
.errstr_unpriv = "R0 pointer arithmetic prohibited",
.result_unpriv = REJECT,
.result = ACCEPT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"valid map access into an array with a signed variable",
.errstr_unpriv = "R0 pointer arithmetic prohibited",
.result_unpriv = REJECT,
.result = ACCEPT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"invalid map access into an array with a constant",
.errstr = "R0 min value is outside of the array range",
.result_unpriv = REJECT,
.result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"invalid map access into an array with a variable",
.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
.result_unpriv = REJECT,
.result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"invalid map access into an array with no floor check",
.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
.result_unpriv = REJECT,
.result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"invalid map access into an array with a invalid max check",
.errstr = "invalid access to map value, value_size=48 off=44 size=8",
.result_unpriv = REJECT,
.result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"invalid map access into an array with a invalid max check",
.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
.result_unpriv = REJECT,
.result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"multiple registers share map_lookup_elem result",
.result = REJECT,
.errstr_unpriv = "R0 pointer arithmetic prohibited",
.result_unpriv = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"constant register |= constant should keep constant type",
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_LWT_XMIT,
},
+ {
+ "overlapping checks for direct packet access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_LWT_XMIT,
+ },
{
"invalid access of tc_classid for LWT_IN",
.insns = {
.result_unpriv = REJECT,
},
{
- "map element value (adjusted) is preserved across register spilling",
+ "map element value or null is marked on register spilling",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -152),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 leaks addr",
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ },
+ {
+ "map element value store of cleared call register",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R1 !read_ok",
+ .errstr = "R1 !read_ok",
+ .result = REJECT,
+ .result_unpriv = REJECT,
+ },
+ {
+ "map element value with unaligned store",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 17),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 43),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, -2, 44),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 32),
+ BPF_ST_MEM(BPF_DW, BPF_REG_8, 2, 33),
+ BPF_ST_MEM(BPF_DW, BPF_REG_8, -2, 34),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 5),
+ BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 22),
+ BPF_ST_MEM(BPF_DW, BPF_REG_8, 4, 23),
+ BPF_ST_MEM(BPF_DW, BPF_REG_8, -7, 24),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_8),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 3),
+ BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 22),
+ BPF_ST_MEM(BPF_DW, BPF_REG_7, 4, 23),
+ BPF_ST_MEM(BPF_DW, BPF_REG_7, -4, 24),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 pointer arithmetic prohibited",
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "map element value with unaligned load",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 9),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 2),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 5),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 pointer arithmetic prohibited",
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "map element value illegal alu op, 1",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 pointer arithmetic prohibited",
+ .errstr = "invalid mem access 'inv'",
+ .result = REJECT,
+ .result_unpriv = REJECT,
+ },
+ {
+ "map element value illegal alu op, 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ALU32_IMM(BPF_ADD, BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 pointer arithmetic prohibited",
+ .errstr = "invalid mem access 'inv'",
+ .result = REJECT,
+ .result_unpriv = REJECT,
+ },
+ {
+ "map element value illegal alu op, 3",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ALU64_IMM(BPF_DIV, BPF_REG_0, 42),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 pointer arithmetic prohibited",
+ .errstr = "invalid mem access 'inv'",
+ .result = REJECT,
+ .result_unpriv = REJECT,
+ },
+ {
+ "map element value illegal alu op, 4",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ENDIAN(BPF_FROM_BE, BPF_REG_0, 64),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 pointer arithmetic prohibited",
+ .errstr = "invalid mem access 'inv'",
+ .result = REJECT,
+ .result_unpriv = REJECT,
+ },
+ {
+ "map element value illegal alu op, 5",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_MOV64_IMM(BPF_REG_3, 4096),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+ BPF_STX_XADD(BPF_DW, BPF_REG_2, BPF_REG_3, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 invalid mem access 'inv'",
+ .errstr = "R0 invalid mem access 'inv'",
+ .result = REJECT,
+ .result_unpriv = REJECT,
+ },
+ {
+ "map element value is preserved across register spilling",
.insns = {
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
.errstr_unpriv = "R0 pointer arithmetic prohibited",
.result = ACCEPT,
.result_unpriv = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"helper access to variable memory: stack, bitwise AND + JMP, correct bounds",
.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
.result = REJECT,
.result_unpriv = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"invalid range check",
.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
.result = REJECT,
.result_unpriv = REJECT,
- },
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "map in map access",
+ .insns = {
+ BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_REG(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_in_map = { 3 },
+ .result = ACCEPT,
+ },
+ {
+ "invalid inner map pointer",
+ .insns = {
+ BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_REG(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_in_map = { 3 },
+ .errstr = "R1 type=inv expected=map_ptr",
+ .errstr_unpriv = "R1 pointer arithmetic prohibited",
+ .result = REJECT,
+ },
+ {
+ "forgot null checking on the inner map pointer",
+ .insns = {
+ BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_REG(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_in_map = { 3 },
+ .errstr = "R1 type=map_value_or_null expected=map_ptr",
+ .result = REJECT,
+ }
};
static int probe_filter_length(const struct bpf_insn *fp)
return fd;
}
+static int create_map_in_map(void)
+{
+ int inner_map_fd, outer_map_fd;
+
+ inner_map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(int),
+ sizeof(int), 1, 0);
+ if (inner_map_fd < 0) {
+ printf("Failed to create array '%s'!\n", strerror(errno));
+ return inner_map_fd;
+ }
+
+ outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS,
+ sizeof(int), inner_map_fd, 1, 0);
+ if (outer_map_fd < 0)
+ printf("Failed to create array of maps '%s'!\n",
+ strerror(errno));
+
+ close(inner_map_fd);
+
+ return outer_map_fd;
+}
+
static char bpf_vlog[32768];
static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
- int *fd_f1, int *fd_f2, int *fd_f3)
+ int *map_fds)
{
int *fixup_map1 = test->fixup_map1;
int *fixup_map2 = test->fixup_map2;
int *fixup_prog = test->fixup_prog;
+ int *fixup_map_in_map = test->fixup_map_in_map;
/* Allocating HTs with 1 elem is fine here, since we only test
* for verifier and not do a runtime lookup, so the only thing
* that really matters is value size in this case.
*/
if (*fixup_map1) {
- *fd_f1 = create_map(sizeof(long long), 1);
+ map_fds[0] = create_map(sizeof(long long), 1);
do {
- prog[*fixup_map1].imm = *fd_f1;
+ prog[*fixup_map1].imm = map_fds[0];
fixup_map1++;
} while (*fixup_map1);
}
if (*fixup_map2) {
- *fd_f2 = create_map(sizeof(struct test_val), 1);
+ map_fds[1] = create_map(sizeof(struct test_val), 1);
do {
- prog[*fixup_map2].imm = *fd_f2;
+ prog[*fixup_map2].imm = map_fds[1];
fixup_map2++;
} while (*fixup_map2);
}
if (*fixup_prog) {
- *fd_f3 = create_prog_array();
+ map_fds[2] = create_prog_array();
do {
- prog[*fixup_prog].imm = *fd_f3;
+ prog[*fixup_prog].imm = map_fds[2];
fixup_prog++;
} while (*fixup_prog);
}
+
+ if (*fixup_map_in_map) {
+ map_fds[3] = create_map_in_map();
+ do {
+ prog[*fixup_map_in_map].imm = map_fds[3];
+ fixup_map_in_map++;
+ } while (*fixup_map_in_map);
+ }
}
static void do_test_single(struct bpf_test *test, bool unpriv,
int *passes, int *errors)
{
+ int fd_prog, expected_ret, reject_from_alignment;
struct bpf_insn *prog = test->insns;
int prog_len = probe_filter_length(prog);
int prog_type = test->prog_type;
++<<<<<<< HEAD
+ int map_fds[MAX_NR_MAPS];
+ int fd_prog, expected_ret;
++=======
+ int fd_f1 = -1, fd_f2 = -1, fd_f3 = -1;
++>>>>>>> ea6b1720ce25f92f7a17b2e0c2b653d20773d10a
const char *expected_err;
+ int i;
+
+ for (i = 0; i < MAX_NR_MAPS; i++)
+ map_fds[i] = -1;
- do_test_fixup(test, prog, &fd_f1, &fd_f2, &fd_f3);
+ do_test_fixup(test, prog, map_fds);
fd_prog = bpf_load_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
prog, prog_len, "GPL", 0, bpf_vlog,
test->result_unpriv : test->result;
expected_err = unpriv && test->errstr_unpriv ?
test->errstr_unpriv : test->errstr;
+
+ reject_from_alignment = fd_prog < 0 &&
+ (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) &&
+ strstr(bpf_vlog, "Unknown alignment.");
+ #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ if (reject_from_alignment) {
+ printf("FAIL\nFailed due to alignment despite having efficient unaligned access: '%s'!\n",
+ strerror(errno));
+ goto fail_log;
+ }
+ #endif
if (expected_ret == ACCEPT) {
- if (fd_prog < 0) {
+ if (fd_prog < 0 && !reject_from_alignment) {
printf("FAIL\nFailed to load prog '%s'!\n",
strerror(errno));
goto fail_log;
printf("FAIL\nUnexpected success to load!\n");
goto fail_log;
}
- if (!strstr(bpf_vlog, expected_err)) {
+ if (!strstr(bpf_vlog, expected_err) && !reject_from_alignment) {
printf("FAIL\nUnexpected error message!\n");
goto fail_log;
}
}
(*passes)++;
- printf("OK\n");
+ printf("OK%s\n", reject_from_alignment ?
+ " (NOTE: reject due to unknown alignment)" : "");
close_fds:
close(fd_prog);
- close(fd_f1);
- close(fd_f2);
- close(fd_f3);
+ for (i = 0; i < MAX_NR_MAPS; i++)
+ close(map_fds[i]);
sched_yield();
return;
fail_log: