]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/commitdiff
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 24 Feb 2019 17:28:26 +0000 (09:28 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 24 Feb 2019 17:28:26 +0000 (09:28 -0800)
Pull networking fixes from David Miller:
 "Hopefully the last pull request for this release. Fingers crossed:

   1) Only refcount ESP stats on full sockets, from Martin Willi.

   2) Missing barriers in AF_UNIX, from Al Viro.

   3) RCU protection fixes in ipv6 route code, from Paolo Abeni.

   4) Avoid false positives in untrusted GSO validation, from Willem de
      Bruijn.

   5) Forwarded mesh packets in mac80211 need more tailroom allocated,
      from Felix Fietkau.

   6) Use operstate consistently for linkup in team driver, from George
      Wilkie.

   7) ThunderX bug fixes from Vadim Lomovtsev. Mostly races between VF
      and PF code paths.

   8) Purge ipv6 exceptions during netdevice removal, from Paolo Abeni.

   9) nfp eBPF code gen fixes from Jiong Wang.

  10) bnxt_en firmware timeout fix from Michael Chan.

  11) Use after free in udp/udpv6 error handlers, from Paolo Abeni.

  12) Fix a race in x25_bind triggerable by syzbot, from Eric Dumazet"

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (65 commits)
  net: phy: realtek: Dummy IRQ calls for RTL8366RB
  tcp: repaired skbs must init their tso_segs
  net/x25: fix a race in x25_bind()
  net: dsa: Remove documentation for port_fdb_prepare
  Revert "bridge: do not add port to router list when receives query with source 0.0.0.0"
  selftests: fib_tests: sleep after changing carrier. again.
  net: set static variable an initial value in atl2_probe()
  net: phy: marvell10g: Fix Multi-G advertisement to only advertise 10G
  bpf, doc: add bpf list as secondary entry to maintainers file
  udp: fix possible user after free in error handler
  udpv6: fix possible user after free in error handler
  fou6: fix proto error handler argument type
  udpv6: add the required annotation to mib type
  mdio_bus: Fix use-after-free on device_register fails
  net: Set rtm_table to RT_TABLE_COMPAT for ipv6 for tables > 255
  bnxt_en: Wait longer for the firmware message response to complete.
  bnxt_en: Fix typo in firmware message timeout logic.
  nfp: bpf: fix ALU32 high bits clearance bug
  nfp: bpf: fix code-gen bug on BPF_ALU | BPF_XOR | BPF_K
  Documentation: networking: switchdev: Update port parent ID section
  ...

64 files changed:
Documentation/networking/dsa/dsa.txt
Documentation/networking/switchdev.txt
MAINTAINERS
drivers/net/bonding/bond_main.c
drivers/net/ethernet/atheros/atlx/atl2.c
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt.h
drivers/net/ethernet/cavium/thunder/nic.h
drivers/net/ethernet/cavium/thunder/nic_main.c
drivers/net/ethernet/cavium/thunder/nicvf_main.c
drivers/net/ethernet/cavium/thunder/thunder_bgx.c
drivers/net/ethernet/cavium/thunder/thunder_bgx.h
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/i40e/i40e_txrx.c
drivers/net/ethernet/intel/i40e/i40e_xsk.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
drivers/net/ethernet/marvell/mvneta.c
drivers/net/ethernet/netronome/nfp/bpf/jit.c
drivers/net/ipvlan/ipvlan_main.c
drivers/net/phy/marvell10g.c
drivers/net/phy/mdio_bus.c
drivers/net/phy/realtek.c
drivers/net/team/team.c
drivers/net/usb/r8152.c
drivers/net/vrf.c
drivers/net/wireless/mac80211_hwsim.c
include/linux/phy.h
include/linux/virtio_net.h
include/net/phonet/pep.h
include/net/xfrm.h
kernel/bpf/lpm_trie.c
net/bpf/test_run.c
net/bridge/br_multicast.c
net/compat.c
net/dsa/port.c
net/ipv4/esp4.c
net/ipv4/ip_gre.c
net/ipv4/tcp_output.c
net/ipv4/udp.c
net/ipv6/esp6.c
net/ipv6/fou6.c
net/ipv6/ip6_gre.c
net/ipv6/route.c
net/ipv6/udp.c
net/ipv6/xfrm6_tunnel.c
net/key/af_key.c
net/mac80211/main.c
net/mac80211/rx.c
net/phonet/pep.c
net/sctp/transport.c
net/smc/smc.h
net/tipc/socket.c
net/unix/af_unix.c
net/unix/diag.c
net/x25/af_x25.c
net/xdp/xsk.c
net/xfrm/xfrm_interface.c
net/xfrm/xfrm_policy.c
net/xfrm/xfrm_state.c
net/xfrm/xfrm_user.c
security/lsm_audit.c
tools/testing/selftests/bpf/test_lpm_map.c
tools/testing/selftests/net/fib_tests.sh

index 25170ad7d25b4b58774f939743ddc3ef9d58d3d9..101f2b2c69ad14d003d674ed1f6d0de1995bcf93 100644 (file)
@@ -533,16 +533,12 @@ Bridge VLAN filtering
   function that the driver has to call for each VLAN the given port is a member
   of. A switchdev object is used to carry the VID and bridge flags.
 
-- port_fdb_prepare: bridge layer function invoked when the bridge prepares the
-  installation of a Forwarding Database entry. If the operation is not
-  supported, this function should return -EOPNOTSUPP to inform the bridge code
-  to fallback to a software implementation. No hardware setup must be done in
-  this function. See port_fdb_add for this and details.
-
 - port_fdb_add: bridge layer function invoked when the bridge wants to install a
   Forwarding Database entry, the switch hardware should be programmed with the
   specified address in the specified VLAN Id in the forwarding database
-  associated with this VLAN ID
+  associated with this VLAN ID. If the operation is not supported, this
+  function should return -EOPNOTSUPP to inform the bridge code to fallback to
+  a software implementation.
 
 Note: VLAN ID 0 corresponds to the port private database, which, in the context
 of DSA, would be the its port-based VLAN, used by the associated bridge device.
index 82236a17b5e65198be004d2cdd6a7c5bd8a9b7d4..97b7ca8b9b86bfd192753e71795ee6f8e9a4ef24 100644 (file)
@@ -92,11 +92,11 @@ device.
 Switch ID
 ^^^^^^^^^
 
-The switchdev driver must implement the switchdev op switchdev_port_attr_get
-for SWITCHDEV_ATTR_ID_PORT_PARENT_ID for each port netdev, returning the same
-physical ID for each port of a switch.  The ID must be unique between switches
-on the same system.  The ID does not need to be unique between switches on
-different systems.
+The switchdev driver must implement the net_device operation
+ndo_get_port_parent_id for each port netdev, returning the same physical ID for
+each port of a switch. The ID must be unique between switches on the same
+system. The ID does not need to be unique between switches on different
+systems.
 
 The switch ID is used to locate ports on a switch and to know if aggregated
 ports belong to the same switch.
index e6e17d8c5aae9483dbb94f722d764438d64f26b5..dce5c099f43c39079a873a48737b7862f3de8c98 100644 (file)
@@ -2851,7 +2851,7 @@ R:        Martin KaFai Lau <kafai@fb.com>
 R:     Song Liu <songliubraving@fb.com>
 R:     Yonghong Song <yhs@fb.com>
 L:     netdev@vger.kernel.org
-L:     linux-kernel@vger.kernel.org
+L:     bpf@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git
 Q:     https://patchwork.ozlabs.org/project/netdev/list/?delegate=77147
@@ -2881,6 +2881,7 @@ N:        bpf
 BPF JIT for ARM
 M:     Shubham Bansal <illusionist.neo@gmail.com>
 L:     netdev@vger.kernel.org
+L:     bpf@vger.kernel.org
 S:     Maintained
 F:     arch/arm/net/
 
@@ -2889,18 +2890,21 @@ M:      Daniel Borkmann <daniel@iogearbox.net>
 M:     Alexei Starovoitov <ast@kernel.org>
 M:     Zi Shen Lim <zlim.lnx@gmail.com>
 L:     netdev@vger.kernel.org
+L:     bpf@vger.kernel.org
 S:     Supported
 F:     arch/arm64/net/
 
 BPF JIT for MIPS (32-BIT AND 64-BIT)
 M:     Paul Burton <paul.burton@mips.com>
 L:     netdev@vger.kernel.org
+L:     bpf@vger.kernel.org
 S:     Maintained
 F:     arch/mips/net/
 
 BPF JIT for NFP NICs
 M:     Jakub Kicinski <jakub.kicinski@netronome.com>
 L:     netdev@vger.kernel.org
+L:     bpf@vger.kernel.org
 S:     Supported
 F:     drivers/net/ethernet/netronome/nfp/bpf/
 
@@ -2908,6 +2912,7 @@ BPF JIT for POWERPC (32-BIT AND 64-BIT)
 M:     Naveen N. Rao <naveen.n.rao@linux.ibm.com>
 M:     Sandipan Das <sandipan@linux.ibm.com>
 L:     netdev@vger.kernel.org
+L:     bpf@vger.kernel.org
 S:     Maintained
 F:     arch/powerpc/net/
 
@@ -2915,6 +2920,7 @@ BPF JIT for S390
 M:     Martin Schwidefsky <schwidefsky@de.ibm.com>
 M:     Heiko Carstens <heiko.carstens@de.ibm.com>
 L:     netdev@vger.kernel.org
+L:     bpf@vger.kernel.org
 S:     Maintained
 F:     arch/s390/net/
 X:     arch/s390/net/pnet.c
@@ -2922,12 +2928,14 @@ X:      arch/s390/net/pnet.c
 BPF JIT for SPARC (32-BIT AND 64-BIT)
 M:     David S. Miller <davem@davemloft.net>
 L:     netdev@vger.kernel.org
+L:     bpf@vger.kernel.org
 S:     Maintained
 F:     arch/sparc/net/
 
 BPF JIT for X86 32-BIT
 M:     Wang YanQing <udknight@gmail.com>
 L:     netdev@vger.kernel.org
+L:     bpf@vger.kernel.org
 S:     Maintained
 F:     arch/x86/net/bpf_jit_comp32.c
 
@@ -2935,6 +2943,7 @@ BPF JIT for X86 64-BIT
 M:     Alexei Starovoitov <ast@kernel.org>
 M:     Daniel Borkmann <daniel@iogearbox.net>
 L:     netdev@vger.kernel.org
+L:     bpf@vger.kernel.org
 S:     Supported
 F:     arch/x86/net/
 X:     arch/x86/net/bpf_jit_comp32.c
@@ -3389,9 +3398,8 @@ F:        Documentation/media/v4l-drivers/cafe_ccic*
 F:     drivers/media/platform/marvell-ccic/
 
 CAIF NETWORK LAYER
-M:     Dmitry Tarnyagin <dmitry.tarnyagin@lockless.no>
 L:     netdev@vger.kernel.org
-S:     Supported
+S:     Orphan
 F:     Documentation/networking/caif/
 F:     drivers/net/caif/
 F:     include/uapi/linux/caif/
@@ -8486,6 +8494,7 @@ L7 BPF FRAMEWORK
 M:     John Fastabend <john.fastabend@gmail.com>
 M:     Daniel Borkmann <daniel@iogearbox.net>
 L:     netdev@vger.kernel.org
+L:     bpf@vger.kernel.org
 S:     Maintained
 F:     include/linux/skmsg.h
 F:     net/core/skmsg.c
@@ -16713,6 +16722,7 @@ M:      Jesper Dangaard Brouer <hawk@kernel.org>
 M:     John Fastabend <john.fastabend@gmail.com>
 L:     netdev@vger.kernel.org
 L:     xdp-newbies@vger.kernel.org
+L:     bpf@vger.kernel.org
 S:     Supported
 F:     net/core/xdp.c
 F:     include/net/xdp.h
@@ -16726,6 +16736,7 @@ XDP SOCKETS (AF_XDP)
 M:     Björn Töpel <bjorn.topel@intel.com>
 M:     Magnus Karlsson <magnus.karlsson@intel.com>
 L:     netdev@vger.kernel.org
+L:     bpf@vger.kernel.org
 S:     Maintained
 F:     kernel/bpf/xskmap.c
 F:     net/xdp/
index 485462d3087fcadcdbdb6fefc4b0846dbbb56838..537c90c8eb0acf953ae1928806fbf0eb540cb47c 100644 (file)
@@ -1183,29 +1183,22 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb)
                }
        }
 
-       /* Link-local multicast packets should be passed to the
-        * stack on the link they arrive as well as pass them to the
-        * bond-master device. These packets are mostly usable when
-        * stack receives it with the link on which they arrive
-        * (e.g. LLDP) they also must be available on master. Some of
-        * the use cases include (but are not limited to): LLDP agents
-        * that must be able to operate both on enslaved interfaces as
-        * well as on bonds themselves; linux bridges that must be able
-        * to process/pass BPDUs from attached bonds when any kind of
-        * STP version is enabled on the network.
+       /*
+        * For packets determined by bond_should_deliver_exact_match() call to
+        * be suppressed we want to make an exception for link-local packets.
+        * This is necessary for e.g. LLDP daemons to be able to monitor
+        * inactive slave links without being forced to bind to them
+        * explicitly.
+        *
+        * At the same time, packets that are passed to the bonding master
+        * (including link-local ones) can have their originating interface
+        * determined via PACKET_ORIGDEV socket option.
         */
-       if (is_link_local_ether_addr(eth_hdr(skb)->h_dest)) {
-               struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
-
-               if (nskb) {
-                       nskb->dev = bond->dev;
-                       nskb->queue_mapping = 0;
-                       netif_rx(nskb);
-               }
-               return RX_HANDLER_PASS;
-       }
-       if (bond_should_deliver_exact_match(skb, slave, bond))
+       if (bond_should_deliver_exact_match(skb, slave, bond)) {
+               if (is_link_local_ether_addr(eth_hdr(skb)->h_dest))
+                       return RX_HANDLER_PASS;
                return RX_HANDLER_EXACT;
+       }
 
        skb->dev = bond->dev;
 
index bb41becb66099389216192c761541ad1fd51790d..31ff1e0d1baacc1fba3a95329b9de8159dfbadd6 100644 (file)
@@ -1335,13 +1335,11 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
        struct net_device *netdev;
        struct atl2_adapter *adapter;
-       static int cards_found;
+       static int cards_found = 0;
        unsigned long mmio_start;
        int mmio_len;
        int err;
 
-       cards_found = 0;
-
        err = pci_enable_device(pdev);
        if (err)
                return err;
index 8bc7e495b027083942e6963c56bed62a25fd6f5d..d95730c6e0f20a9b1bd5987796bf790e1ea8ac32 100644 (file)
@@ -3903,7 +3903,7 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
                        if (len)
                                break;
                        /* on first few passes, just barely sleep */
-                       if (i < DFLT_HWRM_CMD_TIMEOUT)
+                       if (i < HWRM_SHORT_TIMEOUT_COUNTER)
                                usleep_range(HWRM_SHORT_MIN_TIMEOUT,
                                             HWRM_SHORT_MAX_TIMEOUT);
                        else
@@ -3926,7 +3926,7 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len,
                        dma_rmb();
                        if (*valid)
                                break;
-                       udelay(1);
+                       usleep_range(1, 5);
                }
 
                if (j >= HWRM_VALID_BIT_DELAY_USEC) {
index a451796deefe50890c198aba294134e8c6453b62..2fb653e0048da1370e71bc2ff03d3c1352cc8c74 100644 (file)
@@ -582,7 +582,7 @@ struct nqe_cn {
        (HWRM_SHORT_TIMEOUT_COUNTER * HWRM_SHORT_MIN_TIMEOUT +          \
         ((n) - HWRM_SHORT_TIMEOUT_COUNTER) * HWRM_MIN_TIMEOUT))
 
-#define HWRM_VALID_BIT_DELAY_USEC      20
+#define HWRM_VALID_BIT_DELAY_USEC      150
 
 #define BNXT_HWRM_CHNL_CHIMP   0
 #define BNXT_HWRM_CHNL_KONG    1
index f4d81765221ea583b327f97879bc8ae3c705259a..62636c1ed14194977e1f5c548413088558045de2 100644 (file)
@@ -271,7 +271,7 @@ struct xcast_addr_list {
 };
 
 struct nicvf_work {
-       struct delayed_work    work;
+       struct work_struct     work;
        u8                     mode;
        struct xcast_addr_list *mc;
 };
@@ -327,7 +327,11 @@ struct nicvf {
        struct nicvf_work       rx_mode_work;
        /* spinlock to protect workqueue arguments from concurrent access */
        spinlock_t              rx_mode_wq_lock;
-
+       /* workqueue for handling kernel ndo_set_rx_mode() calls */
+       struct workqueue_struct *nicvf_rx_mode_wq;
+       /* mutex to protect VF's mailbox contents from concurrent access */
+       struct mutex            rx_mode_mtx;
+       struct delayed_work     link_change_work;
        /* PTP timestamp */
        struct cavium_ptp       *ptp_clock;
        /* Inbound timestamping is on */
@@ -575,10 +579,8 @@ struct set_ptp {
 
 struct xcast {
        u8    msg;
-       union {
-               u8    mode;
-               u64   mac;
-       } data;
+       u8    mode;
+       u64   mac:48;
 };
 
 /* 128 bit shared memory between PF and each VF */
index 6c8dcb65ff031d230303604c2071797027bf11a4..c90252829ed3402a86727f909eeeffe07c90c3fe 100644 (file)
@@ -57,14 +57,8 @@ struct nicpf {
 #define        NIC_GET_BGX_FROM_VF_LMAC_MAP(map)       ((map >> 4) & 0xF)
 #define        NIC_GET_LMAC_FROM_VF_LMAC_MAP(map)      (map & 0xF)
        u8                      *vf_lmac_map;
-       struct delayed_work     dwork;
-       struct workqueue_struct *check_link;
-       u8                      *link;
-       u8                      *duplex;
-       u32                     *speed;
        u16                     cpi_base[MAX_NUM_VFS_SUPPORTED];
        u16                     rssi_base[MAX_NUM_VFS_SUPPORTED];
-       bool                    mbx_lock[MAX_NUM_VFS_SUPPORTED];
 
        /* MSI-X */
        u8                      num_vec;
@@ -929,6 +923,35 @@ static void nic_config_timestamp(struct nicpf *nic, int vf, struct set_ptp *ptp)
        nic_reg_write(nic, NIC_PF_PKIND_0_15_CFG | (pkind_idx << 3), pkind_val);
 }
 
+/* Get BGX LMAC link status and update corresponding VF
+ * if there is a change, valid only if internal L2 switch
+ * is not present otherwise VF link is always treated as up
+ */
+static void nic_link_status_get(struct nicpf *nic, u8 vf)
+{
+       union nic_mbx mbx = {};
+       struct bgx_link_status link;
+       u8 bgx, lmac;
+
+       mbx.link_status.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE;
+
+       /* Get BGX, LMAC indices for the VF */
+       bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+       lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+
+       /* Get interface link status */
+       bgx_get_lmac_link_state(nic->node, bgx, lmac, &link);
+
+       /* Send a mbox message to VF with current link status */
+       mbx.link_status.link_up = link.link_up;
+       mbx.link_status.duplex = link.duplex;
+       mbx.link_status.speed = link.speed;
+       mbx.link_status.mac_type = link.mac_type;
+
+       /* reply with link status */
+       nic_send_msg_to_vf(nic, vf, &mbx);
+}
+
 /* Interrupt handler to handle mailbox messages from VFs */
 static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
 {
@@ -941,8 +964,6 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
        int i;
        int ret = 0;
 
-       nic->mbx_lock[vf] = true;
-
        mbx_addr = nic_get_mbx_addr(vf);
        mbx_data = (u64 *)&mbx;
 
@@ -957,12 +978,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
        switch (mbx.msg.msg) {
        case NIC_MBOX_MSG_READY:
                nic_mbx_send_ready(nic, vf);
-               if (vf < nic->num_vf_en) {
-                       nic->link[vf] = 0;
-                       nic->duplex[vf] = 0;
-                       nic->speed[vf] = 0;
-               }
-               goto unlock;
+               return;
        case NIC_MBOX_MSG_QS_CFG:
                reg_addr = NIC_PF_QSET_0_127_CFG |
                           (mbx.qs.num << NIC_QS_ID_SHIFT);
@@ -1031,7 +1047,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
                break;
        case NIC_MBOX_MSG_RSS_SIZE:
                nic_send_rss_size(nic, vf);
-               goto unlock;
+               return;
        case NIC_MBOX_MSG_RSS_CFG:
        case NIC_MBOX_MSG_RSS_CFG_CONT:
                nic_config_rss(nic, &mbx.rss_cfg);
@@ -1039,7 +1055,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
        case NIC_MBOX_MSG_CFG_DONE:
                /* Last message of VF config msg sequence */
                nic_enable_vf(nic, vf, true);
-               goto unlock;
+               break;
        case NIC_MBOX_MSG_SHUTDOWN:
                /* First msg in VF teardown sequence */
                if (vf >= nic->num_vf_en)
@@ -1049,19 +1065,19 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
                break;
        case NIC_MBOX_MSG_ALLOC_SQS:
                nic_alloc_sqs(nic, &mbx.sqs_alloc);
-               goto unlock;
+               return;
        case NIC_MBOX_MSG_NICVF_PTR:
                nic->nicvf[vf] = mbx.nicvf.nicvf;
                break;
        case NIC_MBOX_MSG_PNICVF_PTR:
                nic_send_pnicvf(nic, vf);
-               goto unlock;
+               return;
        case NIC_MBOX_MSG_SNICVF_PTR:
                nic_send_snicvf(nic, &mbx.nicvf);
-               goto unlock;
+               return;
        case NIC_MBOX_MSG_BGX_STATS:
                nic_get_bgx_stats(nic, &mbx.bgx_stats);
-               goto unlock;
+               return;
        case NIC_MBOX_MSG_LOOPBACK:
                ret = nic_config_loopback(nic, &mbx.lbk);
                break;
@@ -1070,7 +1086,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
                break;
        case NIC_MBOX_MSG_PFC:
                nic_pause_frame(nic, vf, &mbx.pfc);
-               goto unlock;
+               return;
        case NIC_MBOX_MSG_PTP_CFG:
                nic_config_timestamp(nic, vf, &mbx.ptp);
                break;
@@ -1094,7 +1110,7 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
                bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
                lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
                bgx_set_dmac_cam_filter(nic->node, bgx, lmac,
-                                       mbx.xcast.data.mac,
+                                       mbx.xcast.mac,
                                        vf < NIC_VF_PER_MBX_REG ? vf :
                                        vf - NIC_VF_PER_MBX_REG);
                break;
@@ -1106,8 +1122,15 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
                }
                bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
                lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
-               bgx_set_xcast_mode(nic->node, bgx, lmac, mbx.xcast.data.mode);
+               bgx_set_xcast_mode(nic->node, bgx, lmac, mbx.xcast.mode);
                break;
+       case NIC_MBOX_MSG_BGX_LINK_CHANGE:
+               if (vf >= nic->num_vf_en) {
+                       ret = -1; /* NACK */
+                       break;
+               }
+               nic_link_status_get(nic, vf);
+               return;
        default:
                dev_err(&nic->pdev->dev,
                        "Invalid msg from VF%d, msg 0x%x\n", vf, mbx.msg.msg);
@@ -1121,8 +1144,6 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
                        mbx.msg.msg, vf);
                nic_mbx_send_nack(nic, vf);
        }
-unlock:
-       nic->mbx_lock[vf] = false;
 }
 
 static irqreturn_t nic_mbx_intr_handler(int irq, void *nic_irq)
@@ -1270,52 +1291,6 @@ static int nic_sriov_init(struct pci_dev *pdev, struct nicpf *nic)
        return 0;
 }
 
-/* Poll for BGX LMAC link status and update corresponding VF
- * if there is a change, valid only if internal L2 switch
- * is not present otherwise VF link is always treated as up
- */
-static void nic_poll_for_link(struct work_struct *work)
-{
-       union nic_mbx mbx = {};
-       struct nicpf *nic;
-       struct bgx_link_status link;
-       u8 vf, bgx, lmac;
-
-       nic = container_of(work, struct nicpf, dwork.work);
-
-       mbx.link_status.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE;
-
-       for (vf = 0; vf < nic->num_vf_en; vf++) {
-               /* Poll only if VF is UP */
-               if (!nic->vf_enabled[vf])
-                       continue;
-
-               /* Get BGX, LMAC indices for the VF */
-               bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
-               lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
-               /* Get interface link status */
-               bgx_get_lmac_link_state(nic->node, bgx, lmac, &link);
-
-               /* Inform VF only if link status changed */
-               if (nic->link[vf] == link.link_up)
-                       continue;
-
-               if (!nic->mbx_lock[vf]) {
-                       nic->link[vf] = link.link_up;
-                       nic->duplex[vf] = link.duplex;
-                       nic->speed[vf] = link.speed;
-
-                       /* Send a mbox message to VF with current link status */
-                       mbx.link_status.link_up = link.link_up;
-                       mbx.link_status.duplex = link.duplex;
-                       mbx.link_status.speed = link.speed;
-                       mbx.link_status.mac_type = link.mac_type;
-                       nic_send_msg_to_vf(nic, vf, &mbx);
-               }
-       }
-       queue_delayed_work(nic->check_link, &nic->dwork, HZ * 2);
-}
-
 static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
        struct device *dev = &pdev->dev;
@@ -1384,18 +1359,6 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        if (!nic->vf_lmac_map)
                goto err_release_regions;
 
-       nic->link = devm_kmalloc_array(dev, max_lmac, sizeof(u8), GFP_KERNEL);
-       if (!nic->link)
-               goto err_release_regions;
-
-       nic->duplex = devm_kmalloc_array(dev, max_lmac, sizeof(u8), GFP_KERNEL);
-       if (!nic->duplex)
-               goto err_release_regions;
-
-       nic->speed = devm_kmalloc_array(dev, max_lmac, sizeof(u32), GFP_KERNEL);
-       if (!nic->speed)
-               goto err_release_regions;
-
        /* Initialize hardware */
        nic_init_hw(nic);
 
@@ -1411,22 +1374,8 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        if (err)
                goto err_unregister_interrupts;
 
-       /* Register a physical link status poll fn() */
-       nic->check_link = alloc_workqueue("check_link_status",
-                                         WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
-       if (!nic->check_link) {
-               err = -ENOMEM;
-               goto err_disable_sriov;
-       }
-
-       INIT_DELAYED_WORK(&nic->dwork, nic_poll_for_link);
-       queue_delayed_work(nic->check_link, &nic->dwork, 0);
-
        return 0;
 
-err_disable_sriov:
-       if (nic->flags & NIC_SRIOV_ENABLED)
-               pci_disable_sriov(pdev);
 err_unregister_interrupts:
        nic_unregister_interrupts(nic);
 err_release_regions:
@@ -1447,12 +1396,6 @@ static void nic_remove(struct pci_dev *pdev)
        if (nic->flags & NIC_SRIOV_ENABLED)
                pci_disable_sriov(pdev);
 
-       if (nic->check_link) {
-               /* Destroy work Queue */
-               cancel_delayed_work_sync(&nic->dwork);
-               destroy_workqueue(nic->check_link);
-       }
-
        nic_unregister_interrupts(nic);
        pci_release_regions(pdev);
 
index 88f8a8fa93cdcef2162f1867b46ed9525ef4fbf1..503cfadff4ace4c0c7858bba02a0322656550e82 100644 (file)
@@ -68,9 +68,6 @@ module_param(cpi_alg, int, 0444);
 MODULE_PARM_DESC(cpi_alg,
                 "PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)");
 
-/* workqueue for handling kernel ndo_set_rx_mode() calls */
-static struct workqueue_struct *nicvf_rx_mode_wq;
-
 static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx)
 {
        if (nic->sqs_mode)
@@ -127,6 +124,9 @@ int nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx)
 {
        int timeout = NIC_MBOX_MSG_TIMEOUT;
        int sleep = 10;
+       int ret = 0;
+
+       mutex_lock(&nic->rx_mode_mtx);
 
        nic->pf_acked = false;
        nic->pf_nacked = false;
@@ -139,7 +139,8 @@ int nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx)
                        netdev_err(nic->netdev,
                                   "PF NACK to mbox msg 0x%02x from VF%d\n",
                                   (mbx->msg.msg & 0xFF), nic->vf_id);
-                       return -EINVAL;
+                       ret = -EINVAL;
+                       break;
                }
                msleep(sleep);
                if (nic->pf_acked)
@@ -149,10 +150,12 @@ int nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx)
                        netdev_err(nic->netdev,
                                   "PF didn't ACK to mbox msg 0x%02x from VF%d\n",
                                   (mbx->msg.msg & 0xFF), nic->vf_id);
-                       return -EBUSY;
+                       ret = -EBUSY;
+                       break;
                }
        }
-       return 0;
+       mutex_unlock(&nic->rx_mode_mtx);
+       return ret;
 }
 
 /* Checks if VF is able to comminicate with PF
@@ -172,6 +175,17 @@ static int nicvf_check_pf_ready(struct nicvf *nic)
        return 1;
 }
 
+static void nicvf_send_cfg_done(struct nicvf *nic)
+{
+       union nic_mbx mbx = {};
+
+       mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE;
+       if (nicvf_send_msg_to_pf(nic, &mbx)) {
+               netdev_err(nic->netdev,
+                          "PF didn't respond to CFG DONE msg\n");
+       }
+}
+
 static void nicvf_read_bgx_stats(struct nicvf *nic, struct bgx_stats_msg *bgx)
 {
        if (bgx->rx)
@@ -228,21 +242,24 @@ static void  nicvf_handle_mbx_intr(struct nicvf *nic)
                break;
        case NIC_MBOX_MSG_BGX_LINK_CHANGE:
                nic->pf_acked = true;
-               nic->link_up = mbx.link_status.link_up;
-               nic->duplex = mbx.link_status.duplex;
-               nic->speed = mbx.link_status.speed;
-               nic->mac_type = mbx.link_status.mac_type;
-               if (nic->link_up) {
-                       netdev_info(nic->netdev, "Link is Up %d Mbps %s duplex\n",
-                                   nic->speed,
-                                   nic->duplex == DUPLEX_FULL ?
-                                   "Full" : "Half");
-                       netif_carrier_on(nic->netdev);
-                       netif_tx_start_all_queues(nic->netdev);
-               } else {
-                       netdev_info(nic->netdev, "Link is Down\n");
-                       netif_carrier_off(nic->netdev);
-                       netif_tx_stop_all_queues(nic->netdev);
+               if (nic->link_up != mbx.link_status.link_up) {
+                       nic->link_up = mbx.link_status.link_up;
+                       nic->duplex = mbx.link_status.duplex;
+                       nic->speed = mbx.link_status.speed;
+                       nic->mac_type = mbx.link_status.mac_type;
+                       if (nic->link_up) {
+                               netdev_info(nic->netdev,
+                                           "Link is Up %d Mbps %s duplex\n",
+                                           nic->speed,
+                                           nic->duplex == DUPLEX_FULL ?
+                                           "Full" : "Half");
+                               netif_carrier_on(nic->netdev);
+                               netif_tx_start_all_queues(nic->netdev);
+                       } else {
+                               netdev_info(nic->netdev, "Link is Down\n");
+                               netif_carrier_off(nic->netdev);
+                               netif_tx_stop_all_queues(nic->netdev);
+                       }
                }
                break;
        case NIC_MBOX_MSG_ALLOC_SQS:
@@ -1311,6 +1328,11 @@ int nicvf_stop(struct net_device *netdev)
        struct nicvf_cq_poll *cq_poll = NULL;
        union nic_mbx mbx = {};
 
+       cancel_delayed_work_sync(&nic->link_change_work);
+
+       /* wait till all queued set_rx_mode tasks completes */
+       drain_workqueue(nic->nicvf_rx_mode_wq);
+
        mbx.msg.msg = NIC_MBOX_MSG_SHUTDOWN;
        nicvf_send_msg_to_pf(nic, &mbx);
 
@@ -1410,13 +1432,27 @@ static int nicvf_update_hw_max_frs(struct nicvf *nic, int mtu)
        return nicvf_send_msg_to_pf(nic, &mbx);
 }
 
+static void nicvf_link_status_check_task(struct work_struct *work_arg)
+{
+       struct nicvf *nic = container_of(work_arg,
+                                        struct nicvf,
+                                        link_change_work.work);
+       union nic_mbx mbx = {};
+       mbx.msg.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE;
+       nicvf_send_msg_to_pf(nic, &mbx);
+       queue_delayed_work(nic->nicvf_rx_mode_wq,
+                          &nic->link_change_work, 2 * HZ);
+}
+
 int nicvf_open(struct net_device *netdev)
 {
        int cpu, err, qidx;
        struct nicvf *nic = netdev_priv(netdev);
        struct queue_set *qs = nic->qs;
        struct nicvf_cq_poll *cq_poll = NULL;
-       union nic_mbx mbx = {};
+
+       /* wait till all queued set_rx_mode tasks completes if any */
+       drain_workqueue(nic->nicvf_rx_mode_wq);
 
        netif_carrier_off(netdev);
 
@@ -1512,8 +1548,12 @@ int nicvf_open(struct net_device *netdev)
                nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx);
 
        /* Send VF config done msg to PF */
-       mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE;
-       nicvf_write_to_mbx(nic, &mbx);
+       nicvf_send_cfg_done(nic);
+
+       INIT_DELAYED_WORK(&nic->link_change_work,
+                         nicvf_link_status_check_task);
+       queue_delayed_work(nic->nicvf_rx_mode_wq,
+                          &nic->link_change_work, 0);
 
        return 0;
 cleanup:
@@ -1941,15 +1981,17 @@ static void __nicvf_set_rx_mode_task(u8 mode, struct xcast_addr_list *mc_addrs,
 
        /* flush DMAC filters and reset RX mode */
        mbx.xcast.msg = NIC_MBOX_MSG_RESET_XCAST;
-       nicvf_send_msg_to_pf(nic, &mbx);
+       if (nicvf_send_msg_to_pf(nic, &mbx) < 0)
+               goto free_mc;
 
        if (mode & BGX_XCAST_MCAST_FILTER) {
                /* once enabling filtering, we need to signal to PF to add
                 * its' own LMAC to the filter to accept packets for it.
                 */
                mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST;
-               mbx.xcast.data.mac = 0;
-               nicvf_send_msg_to_pf(nic, &mbx);
+               mbx.xcast.mac = 0;
+               if (nicvf_send_msg_to_pf(nic, &mbx) < 0)
+                       goto free_mc;
        }
 
        /* check if we have any specific MACs to be added to PF DMAC filter */
@@ -1957,23 +1999,25 @@ static void __nicvf_set_rx_mode_task(u8 mode, struct xcast_addr_list *mc_addrs,
                /* now go through kernel list of MACs and add them one by one */
                for (idx = 0; idx < mc_addrs->count; idx++) {
                        mbx.xcast.msg = NIC_MBOX_MSG_ADD_MCAST;
-                       mbx.xcast.data.mac = mc_addrs->mc[idx];
-                       nicvf_send_msg_to_pf(nic, &mbx);
+                       mbx.xcast.mac = mc_addrs->mc[idx];
+                       if (nicvf_send_msg_to_pf(nic, &mbx) < 0)
+                               goto free_mc;
                }
-               kfree(mc_addrs);
        }
 
        /* and finally set rx mode for PF accordingly */
        mbx.xcast.msg = NIC_MBOX_MSG_SET_XCAST;
-       mbx.xcast.data.mode = mode;
+       mbx.xcast.mode = mode;
 
        nicvf_send_msg_to_pf(nic, &mbx);
+free_mc:
+       kfree(mc_addrs);
 }
 
 static void nicvf_set_rx_mode_task(struct work_struct *work_arg)
 {
        struct nicvf_work *vf_work = container_of(work_arg, struct nicvf_work,
-                                                 work.work);
+                                                 work);
        struct nicvf *nic = container_of(vf_work, struct nicvf, rx_mode_work);
        u8 mode;
        struct xcast_addr_list *mc;
@@ -2030,7 +2074,7 @@ static void nicvf_set_rx_mode(struct net_device *netdev)
        kfree(nic->rx_mode_work.mc);
        nic->rx_mode_work.mc = mc_list;
        nic->rx_mode_work.mode = mode;
-       queue_delayed_work(nicvf_rx_mode_wq, &nic->rx_mode_work.work, 0);
+       queue_work(nic->nicvf_rx_mode_wq, &nic->rx_mode_work.work);
        spin_unlock(&nic->rx_mode_wq_lock);
 }
 
@@ -2187,8 +2231,12 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        INIT_WORK(&nic->reset_task, nicvf_reset_task);
 
-       INIT_DELAYED_WORK(&nic->rx_mode_work.work, nicvf_set_rx_mode_task);
+       nic->nicvf_rx_mode_wq = alloc_ordered_workqueue("nicvf_rx_mode_wq_VF%d",
+                                                       WQ_MEM_RECLAIM,
+                                                       nic->vf_id);
+       INIT_WORK(&nic->rx_mode_work.work, nicvf_set_rx_mode_task);
        spin_lock_init(&nic->rx_mode_wq_lock);
+       mutex_init(&nic->rx_mode_mtx);
 
        err = register_netdev(netdev);
        if (err) {
@@ -2228,13 +2276,15 @@ static void nicvf_remove(struct pci_dev *pdev)
        nic = netdev_priv(netdev);
        pnetdev = nic->pnicvf->netdev;
 
-       cancel_delayed_work_sync(&nic->rx_mode_work.work);
-
        /* Check if this Qset is assigned to different VF.
         * If yes, clean primary and all secondary Qsets.
         */
        if (pnetdev && (pnetdev->reg_state == NETREG_REGISTERED))
                unregister_netdev(pnetdev);
+       if (nic->nicvf_rx_mode_wq) {
+               destroy_workqueue(nic->nicvf_rx_mode_wq);
+               nic->nicvf_rx_mode_wq = NULL;
+       }
        nicvf_unregister_interrupts(nic);
        pci_set_drvdata(pdev, NULL);
        if (nic->drv_stats)
@@ -2261,17 +2311,11 @@ static struct pci_driver nicvf_driver = {
 static int __init nicvf_init_module(void)
 {
        pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION);
-       nicvf_rx_mode_wq = alloc_ordered_workqueue("nicvf_generic",
-                                                  WQ_MEM_RECLAIM);
        return pci_register_driver(&nicvf_driver);
 }
 
 static void __exit nicvf_cleanup_module(void)
 {
-       if (nicvf_rx_mode_wq) {
-               destroy_workqueue(nicvf_rx_mode_wq);
-               nicvf_rx_mode_wq = NULL;
-       }
        pci_unregister_driver(&nicvf_driver);
 }
 
index e337da6ba2a4c16973f4728c9fd9564da162942c..673c57b8023fe3e2113cc1121f15d6700d12faa1 100644 (file)
@@ -1217,7 +1217,7 @@ static void bgx_init_hw(struct bgx *bgx)
 
        /* Disable MAC steering (NCSI traffic) */
        for (i = 0; i < RX_TRAFFIC_STEER_RULE_COUNT; i++)
-               bgx_reg_write(bgx, 0, BGX_CMR_RX_STREERING + (i * 8), 0x00);
+               bgx_reg_write(bgx, 0, BGX_CMR_RX_STEERING + (i * 8), 0x00);
 }
 
 static u8 bgx_get_lane2sds_cfg(struct bgx *bgx, struct lmac *lmac)
index cbdd20b9ee6f1d4ede1551c37d585f3ecef0ab1b..5cbc54e9eb19c4e285fdcc0a8d38953ee59c2df3 100644 (file)
@@ -60,7 +60,7 @@
 #define  RX_DMACX_CAM_EN                       BIT_ULL(48)
 #define  RX_DMACX_CAM_LMACID(x)                        (((u64)x) << 49)
 #define  RX_DMAC_COUNT                         32
-#define BGX_CMR_RX_STREERING           0x300
+#define BGX_CMR_RX_STEERING            0x300
 #define  RX_TRAFFIC_STEER_RULE_COUNT           8
 #define BGX_CMR_CHAN_MSK_AND           0x450
 #define BGX_CMR_BIST_STATUS            0x460
index f52e2c46e6a7b2beeafcb4b0c5c035afece68fcf..e4ff531db14a977dfe70b59c7ee0cc0465868d69 100644 (file)
@@ -3289,8 +3289,11 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
             i40e_alloc_rx_buffers_zc(ring, I40E_DESC_UNUSED(ring)) :
             !i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring));
        if (!ok) {
+               /* Log this in case the user has forgotten to give the kernel
+                * any buffers, even later in the application.
+                */
                dev_info(&vsi->back->pdev->dev,
-                        "Failed allocate some buffers on %sRx ring %d (pf_q %d)\n",
+                        "Failed to allocate some buffers on %sRx ring %d (pf_q %d)\n",
                         ring->xsk_umem ? "UMEM enabled " : "",
                         ring->queue_index, pf_q);
        }
@@ -6725,8 +6728,13 @@ void i40e_down(struct i40e_vsi *vsi)
 
        for (i = 0; i < vsi->num_queue_pairs; i++) {
                i40e_clean_tx_ring(vsi->tx_rings[i]);
-               if (i40e_enabled_xdp_vsi(vsi))
+               if (i40e_enabled_xdp_vsi(vsi)) {
+                       /* Make sure that in-progress ndo_xdp_xmit
+                        * calls are completed.
+                        */
+                       synchronize_rcu();
                        i40e_clean_tx_ring(vsi->xdp_rings[i]);
+               }
                i40e_clean_rx_ring(vsi->rx_rings[i]);
        }
 
@@ -11895,6 +11903,14 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi,
        if (old_prog)
                bpf_prog_put(old_prog);
 
+       /* Kick start the NAPI context if there is an AF_XDP socket open
+        * on that queue id. This so that receiving will start.
+        */
+       if (need_reset && prog)
+               for (i = 0; i < vsi->num_queue_pairs; i++)
+                       if (vsi->xdp_rings[i]->xsk_umem)
+                               (void)i40e_xsk_async_xmit(vsi->netdev, i);
+
        return 0;
 }
 
@@ -11955,8 +11971,13 @@ static void i40e_queue_pair_reset_stats(struct i40e_vsi *vsi, int queue_pair)
 static void i40e_queue_pair_clean_rings(struct i40e_vsi *vsi, int queue_pair)
 {
        i40e_clean_tx_ring(vsi->tx_rings[queue_pair]);
-       if (i40e_enabled_xdp_vsi(vsi))
+       if (i40e_enabled_xdp_vsi(vsi)) {
+               /* Make sure that in-progress ndo_xdp_xmit calls are
+                * completed.
+                */
+               synchronize_rcu();
                i40e_clean_tx_ring(vsi->xdp_rings[queue_pair]);
+       }
        i40e_clean_rx_ring(vsi->rx_rings[queue_pair]);
 }
 
index a7e14e98889f142f5e363593f8ac5266f9063b56..6c97667d20eff136cde56c0447c54892c464fe31 100644 (file)
@@ -3709,6 +3709,7 @@ int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
        struct i40e_netdev_priv *np = netdev_priv(dev);
        unsigned int queue_index = smp_processor_id();
        struct i40e_vsi *vsi = np->vsi;
+       struct i40e_pf *pf = vsi->back;
        struct i40e_ring *xdp_ring;
        int drops = 0;
        int i;
@@ -3716,7 +3717,8 @@ int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
        if (test_bit(__I40E_VSI_DOWN, vsi->state))
                return -ENETDOWN;
 
-       if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
+       if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs ||
+           test_bit(__I40E_CONFIG_BUSY, pf->state))
                return -ENXIO;
 
        if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
index 870cf654e4364480e41887ec0e0d054a05f4e25a..3827f16e692357bf2ad71156bb97682e778d5ed2 100644 (file)
@@ -183,6 +183,11 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem,
                err = i40e_queue_pair_enable(vsi, qid);
                if (err)
                        return err;
+
+               /* Kick start the NAPI context so that receiving will start */
+               err = i40e_xsk_async_xmit(vsi->netdev, qid);
+               if (err)
+                       return err;
        }
 
        return 0;
index daff8183534b96b1aa3dbaaf482d5d7c5101c1f9..cb35d8202572442ca28526761fc41c0dc3a5b72f 100644 (file)
@@ -3953,8 +3953,11 @@ static void ixgbe_setup_mrqc(struct ixgbe_adapter *adapter)
                        else
                                mrqc = IXGBE_MRQC_VMDQRSS64EN;
 
-                       /* Enable L3/L4 for Tx Switched packets */
-                       mrqc |= IXGBE_MRQC_L3L4TXSWEN;
+                       /* Enable L3/L4 for Tx Switched packets only for X550,
+                        * older devices do not support this feature
+                        */
+                       if (hw->mac.type >= ixgbe_mac_X550)
+                               mrqc |= IXGBE_MRQC_L3L4TXSWEN;
                } else {
                        if (tcs > 4)
                                mrqc = IXGBE_MRQC_RTRSS8TCEN;
@@ -10225,6 +10228,7 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
        int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
        struct ixgbe_adapter *adapter = netdev_priv(dev);
        struct bpf_prog *old_prog;
+       bool need_reset;
 
        if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
                return -EINVAL;
@@ -10247,9 +10251,10 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
                return -ENOMEM;
 
        old_prog = xchg(&adapter->xdp_prog, prog);
+       need_reset = (!!prog != !!old_prog);
 
        /* If transitioning XDP modes reconfigure rings */
-       if (!!prog != !!old_prog) {
+       if (need_reset) {
                int err = ixgbe_setup_tc(dev, adapter->hw_tcs);
 
                if (err) {
@@ -10265,6 +10270,14 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
        if (old_prog)
                bpf_prog_put(old_prog);
 
+       /* Kick start the NAPI context if there is an AF_XDP socket open
+        * on that queue id. This so that receiving will start.
+        */
+       if (need_reset && prog)
+               for (i = 0; i < adapter->num_rx_queues; i++)
+                       if (adapter->xdp_ring[i]->xsk_umem)
+                               (void)ixgbe_xsk_async_xmit(adapter->netdev, i);
+
        return 0;
 }
 
index 65c3e2c979d4d89775d0d3fe9afad63a3046d075..36a8879536a4a552405de80ad6f6c3a319ebbd6e 100644 (file)
@@ -144,11 +144,19 @@ static int ixgbe_xsk_umem_enable(struct ixgbe_adapter *adapter,
                ixgbe_txrx_ring_disable(adapter, qid);
 
        err = ixgbe_add_xsk_umem(adapter, umem, qid);
+       if (err)
+               return err;
 
-       if (if_running)
+       if (if_running) {
                ixgbe_txrx_ring_enable(adapter, qid);
 
-       return err;
+               /* Kick start the NAPI context so that receiving will start */
+               err = ixgbe_xsk_async_xmit(adapter->netdev, qid);
+               if (err)
+                       return err;
+       }
+
+       return 0;
 }
 
 static int ixgbe_xsk_umem_disable(struct ixgbe_adapter *adapter, u16 qid)
@@ -634,7 +642,8 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
        dma_addr_t dma;
 
        while (budget-- > 0) {
-               if (unlikely(!ixgbe_desc_unused(xdp_ring))) {
+               if (unlikely(!ixgbe_desc_unused(xdp_ring)) ||
+                   !netif_carrier_ok(xdp_ring->netdev)) {
                        work_done = false;
                        break;
                }
index 9d4568eb2297f1b31a63d0fece85e78906a9ef4e..8433fb9c3eeeb0a723948d111fcda337383223f1 100644 (file)
@@ -2146,7 +2146,7 @@ err_drop_frame:
                        if (unlikely(!skb))
                                goto err_drop_frame_ret_pool;
 
-                       dma_sync_single_range_for_cpu(dev->dev.parent,
+                       dma_sync_single_range_for_cpu(&pp->bm_priv->pdev->dev,
                                                      rx_desc->buf_phys_addr,
                                                      MVNETA_MH_SIZE + NET_SKB_PAD,
                                                      rx_bytes,
index e23ca90289f71ca5776bab7e66659640de1857c2..0a868c829b907dda6c86ae57520eada246690dd9 100644 (file)
@@ -1291,15 +1291,10 @@ wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
 
 static int
 wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
-             enum alu_op alu_op, bool skip)
+             enum alu_op alu_op)
 {
        const struct bpf_insn *insn = &meta->insn;
 
-       if (skip) {
-               meta->skip = true;
-               return 0;
-       }
-
        wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm);
        wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
 
@@ -2309,7 +2304,7 @@ static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 
 static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-       return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR, !~meta->insn.imm);
+       return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR);
 }
 
 static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
@@ -2319,7 +2314,7 @@ static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 
 static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-       return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm);
+       return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND);
 }
 
 static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
@@ -2329,7 +2324,7 @@ static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 
 static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-       return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm);
+       return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR);
 }
 
 static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
@@ -2339,7 +2334,7 @@ static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 
 static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-       return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD, !meta->insn.imm);
+       return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD);
 }
 
 static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
@@ -2349,7 +2344,7 @@ static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 
 static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
-       return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm);
+       return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB);
 }
 
 static int mul_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
index 7cdac77d0c68527630b89c2eaff874582200d93a..07e41c42bcf5e923285e1e3435247b47fa90d071 100644 (file)
@@ -499,6 +499,8 @@ static int ipvlan_nl_changelink(struct net_device *dev,
 
        if (!data)
                return 0;
+       if (!ns_capable(dev_net(ipvlan->phy_dev)->user_ns, CAP_NET_ADMIN))
+               return -EPERM;
 
        if (data[IFLA_IPVLAN_MODE]) {
                u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
@@ -601,6 +603,8 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev,
                struct ipvl_dev *tmp = netdev_priv(phy_dev);
 
                phy_dev = tmp->phy_dev;
+               if (!ns_capable(dev_net(phy_dev)->user_ns, CAP_NET_ADMIN))
+                       return -EPERM;
        } else if (!netif_is_ipvlan_port(phy_dev)) {
                /* Exit early if the underlying link is invalid or busy */
                if (phy_dev->type != ARPHRD_ETHER ||
index 82ab6ed3b74ee5b6f3229d62fa276901c53df454..6bac602094bd3955b66a9b6a15f54a4261abe997 100644 (file)
@@ -26,6 +26,8 @@
 #include <linux/marvell_phy.h>
 #include <linux/phy.h>
 
+#define MDIO_AN_10GBT_CTRL_ADV_NBT_MASK        0x01e0
+
 enum {
        MV_PCS_BASE_T           = 0x0000,
        MV_PCS_BASE_R           = 0x1000,
@@ -386,8 +388,10 @@ static int mv3310_config_aneg(struct phy_device *phydev)
        else
                reg = 0;
 
+       /* Make sure we clear unsupported 2.5G/5G advertising */
        ret = mv3310_modify(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_CTRL,
-                           MDIO_AN_10GBT_CTRL_ADV10G, reg);
+                           MDIO_AN_10GBT_CTRL_ADV10G |
+                           MDIO_AN_10GBT_CTRL_ADV_NBT_MASK, reg);
        if (ret < 0)
                return ret;
        if (ret > 0)
index 66b9cfe692fc707be1acb6a3f87dbfc996566c6a..7368616286ae9ced5b44e782e69877604025a7b6 100644 (file)
@@ -379,7 +379,6 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner)
        err = device_register(&bus->dev);
        if (err) {
                pr_err("mii_bus %s failed to register\n", bus->id);
-               put_device(&bus->dev);
                return -EINVAL;
        }
 
index c6010fb1aa0f2049788d1991135a944606438372..cb4a23041a94a80ed8a2b6dcc50730beee22bd64 100644 (file)
@@ -282,6 +282,13 @@ static struct phy_driver realtek_drvs[] = {
                .name           = "RTL8366RB Gigabit Ethernet",
                .features       = PHY_GBIT_FEATURES,
                .config_init    = &rtl8366rb_config_init,
+               /* These interrupts are handled by the irq controller
+                * embedded inside the RTL8366RB, they get unmasked when the
+                * irq is requested and ACKed by reading the status register,
+                * which is done by the irqchip code.
+                */
+               .ack_interrupt  = genphy_no_ack_interrupt,
+               .config_intr    = genphy_no_config_intr,
                .suspend        = genphy_suspend,
                .resume         = genphy_resume,
        },
index 958f1cf67282d46fbc379930a333d2d47a295709..6ce3f666d142a801f195451e33f6eab3d1a2f4c6 100644 (file)
@@ -1256,7 +1256,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev,
        list_add_tail_rcu(&port->list, &team->port_list);
        team_port_enable(team, port);
        __team_compute_features(team);
-       __team_port_change_port_added(port, !!netif_carrier_ok(port_dev));
+       __team_port_change_port_added(port, !!netif_oper_up(port_dev));
        __team_options_change_check(team);
 
        netdev_info(dev, "Port device %s added\n", portname);
@@ -2915,7 +2915,7 @@ static int team_device_event(struct notifier_block *unused,
 
        switch (event) {
        case NETDEV_UP:
-               if (netif_carrier_ok(dev))
+               if (netif_oper_up(dev))
                        team_port_change_check(port, true);
                break;
        case NETDEV_DOWN:
index ada6baf8847aa87f8980f2aae0deef6a211ed9de..86c8c64fbb0f33e920a5fbe77e2d73fe69c14991 100644 (file)
@@ -1179,7 +1179,7 @@ static int vendor_mac_passthru_addr_read(struct r8152 *tp, struct sockaddr *sa)
        } else {
                /* test for RTL8153-BND and RTL8153-BD */
                ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_MISC_1);
-               if ((ocp_data & BND_MASK) == 0 && (ocp_data & BD_MASK)) {
+               if ((ocp_data & BND_MASK) == 0 && (ocp_data & BD_MASK) == 0) {
                        netif_dbg(tp, probe, tp->netdev,
                                  "Invalid variant for MAC pass through\n");
                        return -ENODEV;
index 95909e262ba4369fa853acf24c62bf271670d4ad..7c1430ed02445b6e6f13c663b555ef550276c899 100644 (file)
@@ -1273,6 +1273,9 @@ static void vrf_setup(struct net_device *dev)
 
        /* default to no qdisc; user can add if desired */
        dev->priv_flags |= IFF_NO_QUEUE;
+
+       dev->min_mtu = 0;
+       dev->max_mtu = 0;
 }
 
 static int vrf_validate(struct nlattr *tb[], struct nlattr *data[],
index 320edcac469985308ea96a7883dcdf86367d83ae..6359053bd0c783e0a85c0954ebec52717a288245 100644 (file)
@@ -3554,7 +3554,7 @@ static int hwsim_get_radio_nl(struct sk_buff *msg, struct genl_info *info)
                        goto out_err;
                }
 
-               genlmsg_reply(skb, info);
+               res = genlmsg_reply(skb, info);
                break;
        }
 
index 127fcc9c37781564d72978ad0626e5d4c19cf740..333b56d8f746368623ae738b554ea8c37b54f503 100644 (file)
@@ -992,6 +992,14 @@ static inline int genphy_no_soft_reset(struct phy_device *phydev)
 {
        return 0;
 }
+static inline int genphy_no_ack_interrupt(struct phy_device *phydev)
+{
+       return 0;
+}
+static inline int genphy_no_config_intr(struct phy_device *phydev)
+{
+       return 0;
+}
 int genphy_read_mmd_unsupported(struct phy_device *phdev, int devad,
                                u16 regnum);
 int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum,
index 71f2394abbf7c08c3215cc90517b6c2a836192cf..e0348cb0a1dd7d2f3320e58a7ec1cc76e4a8799b 100644 (file)
@@ -61,10 +61,20 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
                /* gso packets without NEEDS_CSUM do not set transport_offset.
                 * probe and drop if does not match one of the above types.
                 */
-               if (gso_type) {
+               if (gso_type && skb->network_header) {
+                       if (!skb->protocol)
+                               virtio_net_hdr_set_proto(skb, hdr);
+retry:
                        skb_probe_transport_header(skb, -1);
-                       if (!skb_transport_header_was_set(skb))
+                       if (!skb_transport_header_was_set(skb)) {
+                               /* UFO does not specify ipv4 or 6: try both */
+                               if (gso_type & SKB_GSO_UDP &&
+                                   skb->protocol == htons(ETH_P_IP)) {
+                                       skb->protocol = htons(ETH_P_IPV6);
+                                       goto retry;
+                               }
                                return -EINVAL;
+                       }
                }
        }
 
index b669fe6dbc3bad2a6d61b7a2b9d54ff83b7e51c8..98f31c7ea23df92e82f00c251bdeca684180b1b5 100644 (file)
@@ -63,10 +63,11 @@ struct pnpipehdr {
                u8              state_after_reset;      /* reset request */
                u8              error_code;             /* any response */
                u8              pep_type;               /* status indication */
-               u8              data[1];
+               u8              data0;                  /* anything else */
        };
+       u8                      data[];
 };
-#define other_pep_type         data[1]
+#define other_pep_type         data[0]
 
 static inline struct pnpipehdr *pnp_hdr(struct sk_buff *skb)
 {
index 7298a53b970296d0860956645d9c47b45d32300f..85386becbaea211504eaeae6a549e96d204afc75 100644 (file)
@@ -853,7 +853,7 @@ static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols)
                xfrm_pol_put(pols[i]);
 }
 
-void __xfrm_state_destroy(struct xfrm_state *);
+void __xfrm_state_destroy(struct xfrm_state *, bool);
 
 static inline void __xfrm_state_put(struct xfrm_state *x)
 {
@@ -863,7 +863,13 @@ static inline void __xfrm_state_put(struct xfrm_state *x)
 static inline void xfrm_state_put(struct xfrm_state *x)
 {
        if (refcount_dec_and_test(&x->refcnt))
-               __xfrm_state_destroy(x);
+               __xfrm_state_destroy(x, false);
+}
+
+static inline void xfrm_state_put_sync(struct xfrm_state *x)
+{
+       if (refcount_dec_and_test(&x->refcnt))
+               __xfrm_state_destroy(x, true);
 }
 
 static inline void xfrm_state_hold(struct xfrm_state *x)
@@ -1590,7 +1596,7 @@ struct xfrmk_spdinfo {
 
 struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
 int xfrm_state_delete(struct xfrm_state *x);
-int xfrm_state_flush(struct net *net, u8 proto, bool task_valid);
+int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync);
 int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid);
 void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si);
 void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si);
index abf1002080dfb1bc72c25e167eee425fc64da2af..93a5cbbde421c346e72b10cd56e04a13888d9e7b 100644 (file)
@@ -471,6 +471,7 @@ static int trie_delete_elem(struct bpf_map *map, void *_key)
        }
 
        if (!node || node->prefixlen != key->prefixlen ||
+           node->prefixlen != matchlen ||
            (node->flags & LPM_TREE_NODE_FLAG_IM)) {
                ret = -ENOENT;
                goto out;
index fa2644d276ef1134bc3b41ce02b70bfdd8a678fa..e31e1b20f7f4dea446ec596ae661f7beb080f4ee 100644 (file)
 #include <net/sock.h>
 #include <net/tcp.h>
 
-static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
-               struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
-{
-       u32 ret;
-
-       preempt_disable();
-       rcu_read_lock();
-       bpf_cgroup_storage_set(storage);
-       ret = BPF_PROG_RUN(prog, ctx);
-       rcu_read_unlock();
-       preempt_enable();
-
-       return ret;
-}
-
-static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *ret,
-                       u32 *time)
+static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
+                       u32 *retval, u32 *time)
 {
        struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { 0 };
        enum bpf_cgroup_storage_type stype;
        u64 time_start, time_spent = 0;
+       int ret = 0;
        u32 i;
 
        for_each_cgroup_storage_type(stype) {
@@ -48,25 +34,42 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *ret,
 
        if (!repeat)
                repeat = 1;
+
+       rcu_read_lock();
+       preempt_disable();
        time_start = ktime_get_ns();
        for (i = 0; i < repeat; i++) {
-               *ret = bpf_test_run_one(prog, ctx, storage);
+               bpf_cgroup_storage_set(storage);
+               *retval = BPF_PROG_RUN(prog, ctx);
+
+               if (signal_pending(current)) {
+                       ret = -EINTR;
+                       break;
+               }
+
                if (need_resched()) {
-                       if (signal_pending(current))
-                               break;
                        time_spent += ktime_get_ns() - time_start;
+                       preempt_enable();
+                       rcu_read_unlock();
+
                        cond_resched();
+
+                       rcu_read_lock();
+                       preempt_disable();
                        time_start = ktime_get_ns();
                }
        }
        time_spent += ktime_get_ns() - time_start;
+       preempt_enable();
+       rcu_read_unlock();
+
        do_div(time_spent, repeat);
        *time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
 
        for_each_cgroup_storage_type(stype)
                bpf_cgroup_storage_free(storage[stype]);
 
-       return 0;
+       return ret;
 }
 
 static int bpf_test_finish(const union bpf_attr *kattr,
index 3aeff0895669609b753607abb362fc7bbbb7f28a..ac92b2eb32b1acafbdb85cd18f69079d977fa7fb 100644 (file)
@@ -1204,14 +1204,7 @@ static void br_multicast_query_received(struct net_bridge *br,
                return;
 
        br_multicast_update_query_timer(br, query, max_delay);
-
-       /* Based on RFC4541, section 2.1.1 IGMP Forwarding Rules,
-        * the arrival port for IGMP Queries where the source address
-        * is 0.0.0.0 should not be added to router port list.
-        */
-       if ((saddr->proto == htons(ETH_P_IP) && saddr->u.ip4) ||
-           saddr->proto == htons(ETH_P_IPV6))
-               br_multicast_mark_router(br, port);
+       br_multicast_mark_router(br, port);
 }
 
 static void br_ip4_multicast_query(struct net_bridge *br,
index 959d1c51826d8b18765bce50b4378f177e912797..3d348198004ffe4596f09e96c509e7c153dd7a80 100644 (file)
@@ -388,8 +388,12 @@ static int __compat_sys_setsockopt(int fd, int level, int optname,
                                   char __user *optval, unsigned int optlen)
 {
        int err;
-       struct socket *sock = sockfd_lookup(fd, &err);
+       struct socket *sock;
+
+       if (optlen > INT_MAX)
+               return -EINVAL;
 
+       sock = sockfd_lookup(fd, &err);
        if (sock) {
                err = security_socket_setsockopt(sock, level, optname);
                if (err) {
index 2d7e01b23572877423aabd002e7e74315e320882..2a2a878b5ce333f7caa775ebc876259d79f5d0e9 100644 (file)
@@ -69,7 +69,6 @@ static void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
 
 int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy)
 {
-       u8 stp_state = dp->bridge_dev ? BR_STATE_BLOCKING : BR_STATE_FORWARDING;
        struct dsa_switch *ds = dp->ds;
        int port = dp->index;
        int err;
@@ -80,7 +79,8 @@ int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy)
                        return err;
        }
 
-       dsa_port_set_state_now(dp, stp_state);
+       if (!dp->bridge_dev)
+               dsa_port_set_state_now(dp, BR_STATE_FORWARDING);
 
        return 0;
 }
@@ -90,7 +90,8 @@ void dsa_port_disable(struct dsa_port *dp, struct phy_device *phy)
        struct dsa_switch *ds = dp->ds;
        int port = dp->index;
 
-       dsa_port_set_state_now(dp, BR_STATE_DISABLED);
+       if (!dp->bridge_dev)
+               dsa_port_set_state_now(dp, BR_STATE_DISABLED);
 
        if (ds->ops->port_disable)
                ds->ops->port_disable(ds, port, phy);
index 5459f41fc26fa75beeb2800bf55a19a3feda507d..10e809b296ec8644e108923c6faa1e4e2179bc20 100644 (file)
@@ -328,7 +328,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
                        skb->len += tailen;
                        skb->data_len += tailen;
                        skb->truesize += tailen;
-                       if (sk)
+                       if (sk && sk_fullsock(sk))
                                refcount_add(tailen, &sk->sk_wmem_alloc);
 
                        goto out;
index 3978f807fa8b7c8514f7727174facdb9812c9c59..6ae89f2b541bf2221d2842646f42ea8b05928056 100644 (file)
@@ -1457,9 +1457,23 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
        struct ip_tunnel_parm *p = &t->parms;
        __be16 o_flags = p->o_flags;
 
-       if ((t->erspan_ver == 1 || t->erspan_ver == 2) &&
-           !t->collect_md)
-               o_flags |= TUNNEL_KEY;
+       if (t->erspan_ver == 1 || t->erspan_ver == 2) {
+               if (!t->collect_md)
+                       o_flags |= TUNNEL_KEY;
+
+               if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
+                       goto nla_put_failure;
+
+               if (t->erspan_ver == 1) {
+                       if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
+                               goto nla_put_failure;
+               } else {
+                       if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
+                               goto nla_put_failure;
+                       if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
+                               goto nla_put_failure;
+               }
+       }
 
        if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
            nla_put_be16(skb, IFLA_GRE_IFLAGS,
@@ -1495,19 +1509,6 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
                        goto nla_put_failure;
        }
 
-       if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
-               goto nla_put_failure;
-
-       if (t->erspan_ver == 1) {
-               if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
-                       goto nla_put_failure;
-       } else if (t->erspan_ver == 2) {
-               if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
-                       goto nla_put_failure;
-               if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
-                       goto nla_put_failure;
-       }
-
        return 0;
 
 nla_put_failure:
index 730bc44dbad9363814705b28c2f91a2253d91207..ccc78f3a4b60d3012430488bdfbcfc5122ff8627 100644 (file)
@@ -2347,6 +2347,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
                        /* "skb_mstamp_ns" is used as a start point for the retransmit timer */
                        skb->skb_mstamp_ns = tp->tcp_wstamp_ns = tp->tcp_clock_cache;
                        list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
+                       tcp_init_tso_segs(skb, mss_now);
                        goto repair; /* Skip network transmission */
                }
 
index 5c3cd5d84a6f6f09cd5744373fee168a10a6baf4..372fdc5381a98e0d8a673ef1649323f91764ad8e 100644 (file)
@@ -562,10 +562,12 @@ static int __udp4_lib_err_encap_no_sk(struct sk_buff *skb, u32 info)
 
        for (i = 0; i < MAX_IPTUN_ENCAP_OPS; i++) {
                int (*handler)(struct sk_buff *skb, u32 info);
+               const struct ip_tunnel_encap_ops *encap;
 
-               if (!iptun_encaps[i])
+               encap = rcu_dereference(iptun_encaps[i]);
+               if (!encap)
                        continue;
-               handler = rcu_dereference(iptun_encaps[i]->err_handler);
+               handler = encap->err_handler;
                if (handler && !handler(skb, info))
                        return 0;
        }
index 5afe9f83374de5239ced868cd7a5821e1de391c9..239d4a65ad6ef26988010cfa514491d4bf18f2c7 100644 (file)
@@ -296,7 +296,7 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
                        skb->len += tailen;
                        skb->data_len += tailen;
                        skb->truesize += tailen;
-                       if (sk)
+                       if (sk && sk_fullsock(sk))
                                refcount_add(tailen, &sk->sk_wmem_alloc);
 
                        goto out;
index b858bd5280bf54155f31032313f95aa30373f814..867474abe2698d947b347373a8bc179defec7378 100644 (file)
@@ -72,7 +72,7 @@ static int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
 
 static int gue6_err_proto_handler(int proto, struct sk_buff *skb,
                                  struct inet6_skb_parm *opt,
-                                 u8 type, u8 code, int offset, u32 info)
+                                 u8 type, u8 code, int offset, __be32 info)
 {
        const struct inet6_protocol *ipprot;
 
index 43890898b0b56fc219ca9dc9b217d485e11971b6..26f25b6e2833994d50bd4b96e40c97c64ff23e34 100644 (file)
@@ -1722,6 +1722,9 @@ static int ip6erspan_tap_validate(struct nlattr *tb[], struct nlattr *data[],
 static void ip6erspan_set_version(struct nlattr *data[],
                                  struct __ip6_tnl_parm *parms)
 {
+       if (!data)
+               return;
+
        parms->erspan_ver = 1;
        if (data[IFLA_GRE_ERSPAN_VER])
                parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
@@ -2104,9 +2107,23 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
        struct __ip6_tnl_parm *p = &t->parms;
        __be16 o_flags = p->o_flags;
 
-       if ((p->erspan_ver == 1 || p->erspan_ver == 2) &&
-           !p->collect_md)
-               o_flags |= TUNNEL_KEY;
+       if (p->erspan_ver == 1 || p->erspan_ver == 2) {
+               if (!p->collect_md)
+                       o_flags |= TUNNEL_KEY;
+
+               if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, p->erspan_ver))
+                       goto nla_put_failure;
+
+               if (p->erspan_ver == 1) {
+                       if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index))
+                               goto nla_put_failure;
+               } else {
+                       if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, p->dir))
+                               goto nla_put_failure;
+                       if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, p->hwid))
+                               goto nla_put_failure;
+               }
+       }
 
        if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
            nla_put_be16(skb, IFLA_GRE_IFLAGS,
@@ -2121,8 +2138,7 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
            nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) ||
            nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) ||
            nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags) ||
-           nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark) ||
-           nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index))
+           nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark))
                goto nla_put_failure;
 
        if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
@@ -2140,19 +2156,6 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
                        goto nla_put_failure;
        }
 
-       if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, p->erspan_ver))
-               goto nla_put_failure;
-
-       if (p->erspan_ver == 1) {
-               if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index))
-                       goto nla_put_failure;
-       } else if (p->erspan_ver == 2) {
-               if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, p->dir))
-                       goto nla_put_failure;
-               if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, p->hwid))
-                       goto nla_put_failure;
-       }
-
        return 0;
 
 nla_put_failure:
index 964491cf36720fc3fa5601076002e9c90a4e3302..ce15dc4ccbfa0cc14233b1fe7842479726ffed5a 100644 (file)
@@ -1274,18 +1274,29 @@ static DEFINE_SPINLOCK(rt6_exception_lock);
 static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
                                 struct rt6_exception *rt6_ex)
 {
+       struct fib6_info *from;
        struct net *net;
 
        if (!bucket || !rt6_ex)
                return;
 
        net = dev_net(rt6_ex->rt6i->dst.dev);
+       net->ipv6.rt6_stats->fib_rt_cache--;
+
+       /* purge completely the exception to allow releasing the held resources:
+        * some [sk] cache may keep the dst around for unlimited time
+        */
+       from = rcu_dereference_protected(rt6_ex->rt6i->from,
+                                        lockdep_is_held(&rt6_exception_lock));
+       rcu_assign_pointer(rt6_ex->rt6i->from, NULL);
+       fib6_info_release(from);
+       dst_dev_put(&rt6_ex->rt6i->dst);
+
        hlist_del_rcu(&rt6_ex->hlist);
        dst_release(&rt6_ex->rt6i->dst);
        kfree_rcu(rt6_ex, rcu);
        WARN_ON_ONCE(!bucket->depth);
        bucket->depth--;
-       net->ipv6.rt6_stats->fib_rt_cache--;
 }
 
 /* Remove oldest rt6_ex in bucket and free the memory
@@ -1599,15 +1610,15 @@ static int rt6_remove_exception_rt(struct rt6_info *rt)
 static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
 {
        struct rt6_exception_bucket *bucket;
-       struct fib6_info *from = rt->from;
        struct in6_addr *src_key = NULL;
        struct rt6_exception *rt6_ex;
-
-       if (!from ||
-           !(rt->rt6i_flags & RTF_CACHE))
-               return;
+       struct fib6_info *from;
 
        rcu_read_lock();
+       from = rcu_dereference(rt->from);
+       if (!from || !(rt->rt6i_flags & RTF_CACHE))
+               goto unlock;
+
        bucket = rcu_dereference(from->rt6i_exception_bucket);
 
 #ifdef CONFIG_IPV6_SUBTREES
@@ -1626,6 +1637,7 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
        if (rt6_ex)
                rt6_ex->stamp = jiffies;
 
+unlock:
        rcu_read_unlock();
 }
 
@@ -2742,20 +2754,24 @@ static int ip6_route_check_nh_onlink(struct net *net,
        u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
        const struct in6_addr *gw_addr = &cfg->fc_gateway;
        u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
+       struct fib6_info *from;
        struct rt6_info *grt;
        int err;
 
        err = 0;
        grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
        if (grt) {
+               rcu_read_lock();
+               from = rcu_dereference(grt->from);
                if (!grt->dst.error &&
                    /* ignore match if it is the default route */
-                   grt->from && !ipv6_addr_any(&grt->from->fib6_dst.addr) &&
+                   from && !ipv6_addr_any(&from->fib6_dst.addr) &&
                    (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
                        NL_SET_ERR_MSG(extack,
                                       "Nexthop has invalid gateway or device mismatch");
                        err = -EINVAL;
                }
+               rcu_read_unlock();
 
                ip6_rt_put(grt);
        }
@@ -4649,7 +4665,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
                table = rt->fib6_table->tb6_id;
        else
                table = RT6_TABLE_UNSPEC;
-       rtm->rtm_table = table;
+       rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
        if (nla_put_u32(skb, RTA_TABLE, table))
                goto nla_put_failure;
 
index 2596ffdeebeaaa60f096d774f3da32bd5486a2f4..b444483cdb2b42ef7acdbd7d23a0c046f55077c2 100644 (file)
@@ -288,8 +288,8 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
        int peeked, peeking, off;
        int err;
        int is_udplite = IS_UDPLITE(sk);
+       struct udp_mib __percpu *mib;
        bool checksum_valid = false;
-       struct udp_mib *mib;
        int is_udp4;
 
        if (flags & MSG_ERRQUEUE)
@@ -420,17 +420,19 @@ EXPORT_SYMBOL(udpv6_encap_enable);
  */
 static int __udp6_lib_err_encap_no_sk(struct sk_buff *skb,
                                      struct inet6_skb_parm *opt,
-                                     u8 type, u8 code, int offset, u32 info)
+                                     u8 type, u8 code, int offset, __be32 info)
 {
        int i;
 
        for (i = 0; i < MAX_IPTUN_ENCAP_OPS; i++) {
                int (*handler)(struct sk_buff *skb, struct inet6_skb_parm *opt,
-                              u8 type, u8 code, int offset, u32 info);
+                              u8 type, u8 code, int offset, __be32 info);
+               const struct ip6_tnl_encap_ops *encap;
 
-               if (!ip6tun_encaps[i])
+               encap = rcu_dereference(ip6tun_encaps[i]);
+               if (!encap)
                        continue;
-               handler = rcu_dereference(ip6tun_encaps[i]->err_handler);
+               handler = encap->err_handler;
                if (handler && !handler(skb, opt, type, code, offset, info))
                        return 0;
        }
index f5b4febeaa25b57604ba3f701e78cfb0c4a23f5c..bc65db782bfb1fa49d5e5f9d2a25c77372905feb 100644 (file)
@@ -344,8 +344,8 @@ static void __net_exit xfrm6_tunnel_net_exit(struct net *net)
        struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
        unsigned int i;
 
-       xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
        xfrm_flush_gc();
+       xfrm_state_flush(net, IPSEC_PROTO_ANY, false, true);
 
        for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
                WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i]));
index 655c787f9d54919c66666a4425a26969b9ddf91c..5651c29cb5bd0068d025c9500f6f7513556f65e7 100644 (file)
@@ -196,30 +196,22 @@ static int pfkey_release(struct socket *sock)
        return 0;
 }
 
-static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2,
-                              gfp_t allocation, struct sock *sk)
+static int pfkey_broadcast_one(struct sk_buff *skb, gfp_t allocation,
+                              struct sock *sk)
 {
        int err = -ENOBUFS;
 
-       sock_hold(sk);
-       if (*skb2 == NULL) {
-               if (refcount_read(&skb->users) != 1) {
-                       *skb2 = skb_clone(skb, allocation);
-               } else {
-                       *skb2 = skb;
-                       refcount_inc(&skb->users);
-               }
-       }
-       if (*skb2 != NULL) {
-               if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) {
-                       skb_set_owner_r(*skb2, sk);
-                       skb_queue_tail(&sk->sk_receive_queue, *skb2);
-                       sk->sk_data_ready(sk);
-                       *skb2 = NULL;
-                       err = 0;
-               }
+       if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
+               return err;
+
+       skb = skb_clone(skb, allocation);
+
+       if (skb) {
+               skb_set_owner_r(skb, sk);
+               skb_queue_tail(&sk->sk_receive_queue, skb);
+               sk->sk_data_ready(sk);
+               err = 0;
        }
-       sock_put(sk);
        return err;
 }
 
@@ -234,7 +226,6 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
 {
        struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
        struct sock *sk;
-       struct sk_buff *skb2 = NULL;
        int err = -ESRCH;
 
        /* XXX Do we need something like netlink_overrun?  I think
@@ -253,7 +244,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
                 * socket.
                 */
                if (pfk->promisc)
-                       pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk);
+                       pfkey_broadcast_one(skb, GFP_ATOMIC, sk);
 
                /* the exact target will be processed later */
                if (sk == one_sk)
@@ -268,7 +259,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
                                continue;
                }
 
-               err2 = pfkey_broadcast_one(skb, &skb2, GFP_ATOMIC, sk);
+               err2 = pfkey_broadcast_one(skb, GFP_ATOMIC, sk);
 
                /* Error is cleared after successful sending to at least one
                 * registered KM */
@@ -278,9 +269,8 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
        rcu_read_unlock();
 
        if (one_sk != NULL)
-               err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk);
+               err = pfkey_broadcast_one(skb, allocation, one_sk);
 
-       kfree_skb(skb2);
        kfree_skb(skb);
        return err;
 }
@@ -1783,7 +1773,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_m
        if (proto == 0)
                return -EINVAL;
 
-       err = xfrm_state_flush(net, proto, true);
+       err = xfrm_state_flush(net, proto, true, false);
        err2 = unicast_flush_resp(sk, hdr);
        if (err || err2) {
                if (err == -ESRCH) /* empty table - go quietly */
index 87a7299267340675be3bd910183bdd84471deeaf..977dea436ee89dda59c4ba8e3e03114759dca323 100644 (file)
@@ -615,13 +615,13 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
         * We need a bit of data queued to build aggregates properly, so
         * instruct the TCP stack to allow more than a single ms of data
         * to be queued in the stack. The value is a bit-shift of 1
-        * second, so 8 is ~4ms of queued data. Only affects local TCP
+        * second, so 7 is ~8ms of queued data. Only affects local TCP
         * sockets.
         * This is the default, anyhow - drivers may need to override it
         * for local reasons (longer buffers, longer completion time, or
         * similar).
         */
-       local->hw.tx_sk_pacing_shift = 8;
+       local->hw.tx_sk_pacing_shift = 7;
 
        /* set up some defaults */
        local->hw.queues = 1;
index bb4d71efb6fb87f462887312ed631a4f11a16148..c2a6da5d80da8868c9b973a987538bce9157e08f 100644 (file)
@@ -2644,6 +2644,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
        struct ieee80211_sub_if_data *sdata = rx->sdata;
        struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
        u16 ac, q, hdrlen;
+       int tailroom = 0;
 
        hdr = (struct ieee80211_hdr *) skb->data;
        hdrlen = ieee80211_hdrlen(hdr->frame_control);
@@ -2732,8 +2733,12 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
        if (!ifmsh->mshcfg.dot11MeshForwarding)
                goto out;
 
+       if (sdata->crypto_tx_tailroom_needed_cnt)
+               tailroom = IEEE80211_ENCRYPT_TAILROOM;
+
        fwd_skb = skb_copy_expand(skb, local->tx_headroom +
-                                      sdata->encrypt_headroom, 0, GFP_ATOMIC);
+                                      sdata->encrypt_headroom,
+                                 tailroom, GFP_ATOMIC);
        if (!fwd_skb)
                goto out;
 
index 9fc76b19cd3c17289604976f61accfb8013eabd6..db34735403035bd327be48658a58d2d553e06491 100644 (file)
@@ -132,7 +132,7 @@ static int pep_indicate(struct sock *sk, u8 id, u8 code,
        ph->utid = 0;
        ph->message_id = id;
        ph->pipe_handle = pn->pipe_handle;
-       ph->data[0] = code;
+       ph->error_code = code;
        return pn_skb_send(sk, skb, NULL);
 }
 
@@ -153,7 +153,7 @@ static int pipe_handler_request(struct sock *sk, u8 id, u8 code,
        ph->utid = id; /* whatever */
        ph->message_id = id;
        ph->pipe_handle = pn->pipe_handle;
-       ph->data[0] = code;
+       ph->error_code = code;
        return pn_skb_send(sk, skb, NULL);
 }
 
@@ -208,7 +208,7 @@ static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code,
        struct pnpipehdr *ph;
        struct sockaddr_pn dst;
        u8 data[4] = {
-               oph->data[0], /* PEP type */
+               oph->pep_type, /* PEP type */
                code, /* error code, at an unusual offset */
                PAD, PAD,
        };
@@ -221,7 +221,7 @@ static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code,
        ph->utid = oph->utid;
        ph->message_id = PNS_PEP_CTRL_RESP;
        ph->pipe_handle = oph->pipe_handle;
-       ph->data[0] = oph->data[1]; /* CTRL id */
+       ph->data0 = oph->data[0]; /* CTRL id */
 
        pn_skb_get_src_sockaddr(oskb, &dst);
        return pn_skb_send(sk, skb, &dst);
@@ -272,17 +272,17 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb)
                return -EINVAL;
 
        hdr = pnp_hdr(skb);
-       if (hdr->data[0] != PN_PEP_TYPE_COMMON) {
+       if (hdr->pep_type != PN_PEP_TYPE_COMMON) {
                net_dbg_ratelimited("Phonet unknown PEP type: %u\n",
-                                   (unsigned int)hdr->data[0]);
+                                   (unsigned int)hdr->pep_type);
                return -EOPNOTSUPP;
        }
 
-       switch (hdr->data[1]) {
+       switch (hdr->data[0]) {
        case PN_PEP_IND_FLOW_CONTROL:
                switch (pn->tx_fc) {
                case PN_LEGACY_FLOW_CONTROL:
-                       switch (hdr->data[4]) {
+                       switch (hdr->data[3]) {
                        case PEP_IND_BUSY:
                                atomic_set(&pn->tx_credits, 0);
                                break;
@@ -292,7 +292,7 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb)
                        }
                        break;
                case PN_ONE_CREDIT_FLOW_CONTROL:
-                       if (hdr->data[4] == PEP_IND_READY)
+                       if (hdr->data[3] == PEP_IND_READY)
                                atomic_set(&pn->tx_credits, wake = 1);
                        break;
                }
@@ -301,12 +301,12 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb)
        case PN_PEP_IND_ID_MCFC_GRANT_CREDITS:
                if (pn->tx_fc != PN_MULTI_CREDIT_FLOW_CONTROL)
                        break;
-               atomic_add(wake = hdr->data[4], &pn->tx_credits);
+               atomic_add(wake = hdr->data[3], &pn->tx_credits);
                break;
 
        default:
                net_dbg_ratelimited("Phonet unknown PEP indication: %u\n",
-                                   (unsigned int)hdr->data[1]);
+                                   (unsigned int)hdr->data[0]);
                return -EOPNOTSUPP;
        }
        if (wake)
@@ -318,7 +318,7 @@ static int pipe_rcv_created(struct sock *sk, struct sk_buff *skb)
 {
        struct pep_sock *pn = pep_sk(sk);
        struct pnpipehdr *hdr = pnp_hdr(skb);
-       u8 n_sb = hdr->data[0];
+       u8 n_sb = hdr->data0;
 
        pn->rx_fc = pn->tx_fc = PN_LEGACY_FLOW_CONTROL;
        __skb_pull(skb, sizeof(*hdr));
@@ -506,7 +506,7 @@ static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb)
                return -ECONNREFUSED;
 
        /* Parse sub-blocks */
-       n_sb = hdr->data[4];
+       n_sb = hdr->data[3];
        while (n_sb > 0) {
                u8 type, buf[6], len = sizeof(buf);
                const u8 *data = pep_get_sb(skb, &type, &len, buf);
@@ -739,7 +739,7 @@ static int pipe_do_remove(struct sock *sk)
        ph->utid = 0;
        ph->message_id = PNS_PIPE_REMOVE_REQ;
        ph->pipe_handle = pn->pipe_handle;
-       ph->data[0] = PAD;
+       ph->data0 = PAD;
        return pn_skb_send(sk, skb, NULL);
 }
 
@@ -817,7 +817,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp,
        peer_type = hdr->other_pep_type << 8;
 
        /* Parse sub-blocks (options) */
-       n_sb = hdr->data[4];
+       n_sb = hdr->data[3];
        while (n_sb > 0) {
                u8 type, buf[1], len = sizeof(buf);
                const u8 *data = pep_get_sb(skb, &type, &len, buf);
@@ -1109,7 +1109,7 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb)
        ph->utid = 0;
        if (pn->aligned) {
                ph->message_id = PNS_PIPE_ALIGNED_DATA;
-               ph->data[0] = 0; /* padding */
+               ph->data0 = 0; /* padding */
        } else
                ph->message_id = PNS_PIPE_DATA;
        ph->pipe_handle = pn->pipe_handle;
index 033696e6f74fbea97392059d243a078af925e832..ad158d311ffaea073f0c13d76d20f4756ceec879 100644 (file)
@@ -207,7 +207,8 @@ void sctp_transport_reset_hb_timer(struct sctp_transport *transport)
 
        /* When a data chunk is sent, reset the heartbeat interval.  */
        expires = jiffies + sctp_transport_timeout(transport);
-       if (time_before(transport->hb_timer.expires, expires) &&
+       if ((time_before(transport->hb_timer.expires, expires) ||
+            !timer_pending(&transport->hb_timer)) &&
            !mod_timer(&transport->hb_timer,
                       expires + prandom_u32_max(transport->rto)))
                sctp_transport_hold(transport);
index 5721416d060534ff294be6db270e699fd942ba7f..adbdf195eb085f5507444340fb1f8ddcb69548d5 100644 (file)
@@ -113,9 +113,9 @@ struct smc_host_cdc_msg {           /* Connection Data Control message */
 } __aligned(8);
 
 enum smc_urg_state {
-       SMC_URG_VALID,                  /* data present */
-       SMC_URG_NOTYET,                 /* data pending */
-       SMC_URG_READ                    /* data was already read */
+       SMC_URG_VALID   = 1,                    /* data present */
+       SMC_URG_NOTYET  = 2,                    /* data pending */
+       SMC_URG_READ    = 3,                    /* data was already read */
 };
 
 struct smc_connection {
index 1217c90a363b75f7d74ab45b1f1cd6c538a872ce..684f2125fc6b6ed45fd47cf6fff0c2d3ec971cab 100644 (file)
@@ -388,7 +388,7 @@ static int tipc_sk_sock_err(struct socket *sock, long *timeout)
                rc_ = tipc_sk_sock_err((sock_), timeo_);                       \
                if (rc_)                                                       \
                        break;                                                 \
-               prepare_to_wait(sk_sleep(sk_), &wait_, TASK_INTERRUPTIBLE);    \
+               add_wait_queue(sk_sleep(sk_), &wait_);                         \
                release_sock(sk_);                                             \
                *(timeo_) = wait_woken(&wait_, TASK_INTERRUPTIBLE, *(timeo_)); \
                sched_annotate_sleep();                                        \
@@ -1677,7 +1677,7 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk)
 static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
 {
        struct sock *sk = sock->sk;
-       DEFINE_WAIT(wait);
+       DEFINE_WAIT_FUNC(wait, woken_wake_function);
        long timeo = *timeop;
        int err = sock_error(sk);
 
@@ -1685,15 +1685,17 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
                return err;
 
        for (;;) {
-               prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
                if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
                        if (sk->sk_shutdown & RCV_SHUTDOWN) {
                                err = -ENOTCONN;
                                break;
                        }
+                       add_wait_queue(sk_sleep(sk), &wait);
                        release_sock(sk);
-                       timeo = schedule_timeout(timeo);
+                       timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
+                       sched_annotate_sleep();
                        lock_sock(sk);
+                       remove_wait_queue(sk_sleep(sk), &wait);
                }
                err = 0;
                if (!skb_queue_empty(&sk->sk_receive_queue))
@@ -1709,7 +1711,6 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
                if (err)
                        break;
        }
-       finish_wait(sk_sleep(sk), &wait);
        *timeop = timeo;
        return err;
 }
index 74d1eed7cbd4ea1c641db2fa9a104cc648715d24..a95d479caeea022df4ae98c2267ad4b3427d751d 100644 (file)
@@ -890,7 +890,7 @@ retry:
        addr->hash ^= sk->sk_type;
 
        __unix_remove_socket(sk);
-       u->addr = addr;
+       smp_store_release(&u->addr, addr);
        __unix_insert_socket(&unix_socket_table[addr->hash], sk);
        spin_unlock(&unix_table_lock);
        err = 0;
@@ -1060,7 +1060,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 
        err = 0;
        __unix_remove_socket(sk);
-       u->addr = addr;
+       smp_store_release(&u->addr, addr);
        __unix_insert_socket(list, sk);
 
 out_unlock:
@@ -1331,15 +1331,29 @@ restart:
        RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
        otheru = unix_sk(other);
 
-       /* copy address information from listening to new sock*/
-       if (otheru->addr) {
-               refcount_inc(&otheru->addr->refcnt);
-               newu->addr = otheru->addr;
-       }
+       /* copy address information from listening to new sock
+        *
+        * The contents of *(otheru->addr) and otheru->path
+        * are seen fully set up here, since we have found
+        * otheru in hash under unix_table_lock.  Insertion
+        * into the hash chain we'd found it in had been done
+        * in an earlier critical area protected by unix_table_lock,
+        * the same one where we'd set *(otheru->addr) contents,
+        * as well as otheru->path and otheru->addr itself.
+        *
+        * Using smp_store_release() here to set newu->addr
+        * is enough to make those stores, as well as stores
+        * to newu->path visible to anyone who gets newu->addr
+        * by smp_load_acquire().  IOW, the same warranties
+        * as for unix_sock instances bound in unix_bind() or
+        * in unix_autobind().
+        */
        if (otheru->path.dentry) {
                path_get(&otheru->path);
                newu->path = otheru->path;
        }
+       refcount_inc(&otheru->addr->refcnt);
+       smp_store_release(&newu->addr, otheru->addr);
 
        /* Set credentials */
        copy_peercred(sk, other);
@@ -1453,7 +1467,7 @@ out:
 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
 {
        struct sock *sk = sock->sk;
-       struct unix_sock *u;
+       struct unix_address *addr;
        DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
        int err = 0;
 
@@ -1468,19 +1482,15 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
                sock_hold(sk);
        }
 
-       u = unix_sk(sk);
-       unix_state_lock(sk);
-       if (!u->addr) {
+       addr = smp_load_acquire(&unix_sk(sk)->addr);
+       if (!addr) {
                sunaddr->sun_family = AF_UNIX;
                sunaddr->sun_path[0] = 0;
                err = sizeof(short);
        } else {
-               struct unix_address *addr = u->addr;
-
                err = addr->len;
                memcpy(sunaddr, addr->name, addr->len);
        }
-       unix_state_unlock(sk);
        sock_put(sk);
 out:
        return err;
@@ -2073,11 +2083,11 @@ static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
 
 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
 {
-       struct unix_sock *u = unix_sk(sk);
+       struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
 
-       if (u->addr) {
-               msg->msg_namelen = u->addr->len;
-               memcpy(msg->msg_name, u->addr->name, u->addr->len);
+       if (addr) {
+               msg->msg_namelen = addr->len;
+               memcpy(msg->msg_name, addr->name, addr->len);
        }
 }
 
@@ -2581,15 +2591,14 @@ static int unix_open_file(struct sock *sk)
        if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
                return -EPERM;
 
-       unix_state_lock(sk);
+       if (!smp_load_acquire(&unix_sk(sk)->addr))
+               return -ENOENT;
+
        path = unix_sk(sk)->path;
-       if (!path.dentry) {
-               unix_state_unlock(sk);
+       if (!path.dentry)
                return -ENOENT;
-       }
 
        path_get(&path);
-       unix_state_unlock(sk);
 
        fd = get_unused_fd_flags(O_CLOEXEC);
        if (fd < 0)
@@ -2830,7 +2839,7 @@ static int unix_seq_show(struct seq_file *seq, void *v)
                        (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
                        sock_i_ino(s));
 
-               if (u->addr) {
+               if (u->addr) {  // under unix_table_lock here
                        int i, len;
                        seq_putc(seq, ' ');
 
index 384c84e83462e51d24e469515f4b52f8dcf55877..3183d9b8ab33232c6f42686677c056a58bc5d2fa 100644 (file)
@@ -10,7 +10,8 @@
 
 static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb)
 {
-       struct unix_address *addr = unix_sk(sk)->addr;
+       /* might or might not have unix_table_lock */
+       struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
 
        if (!addr)
                return 0;
index ec3a828672ef53991146bf94a2266932a5f25872..eff31348e20b121ed2ca740c9d2c89e26da316d9 100644 (file)
@@ -679,8 +679,7 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        struct sockaddr_x25 *addr = (struct sockaddr_x25 *)uaddr;
        int len, i, rc = 0;
 
-       if (!sock_flag(sk, SOCK_ZAPPED) ||
-           addr_len != sizeof(struct sockaddr_x25) ||
+       if (addr_len != sizeof(struct sockaddr_x25) ||
            addr->sx25_family != AF_X25) {
                rc = -EINVAL;
                goto out;
@@ -699,9 +698,13 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        }
 
        lock_sock(sk);
-       x25_sk(sk)->source_addr = addr->sx25_addr;
-       x25_insert_socket(sk);
-       sock_reset_flag(sk, SOCK_ZAPPED);
+       if (sock_flag(sk, SOCK_ZAPPED)) {
+               x25_sk(sk)->source_addr = addr->sx25_addr;
+               x25_insert_socket(sk);
+               sock_reset_flag(sk, SOCK_ZAPPED);
+       } else {
+               rc = -EINVAL;
+       }
        release_sock(sk);
        SOCK_DEBUG(sk, "x25_bind: socket is bound\n");
 out:
index 45f3b528dc0999ab0fd1697f9a7a2dc15a90ac52..85e4fe4f18cced0c546390390561c25e5544ed0c 100644 (file)
@@ -366,7 +366,6 @@ static int xsk_release(struct socket *sock)
 
        xskq_destroy(xs->rx);
        xskq_destroy(xs->tx);
-       xdp_put_umem(xs->umem);
 
        sock_orphan(sk);
        sock->sk = NULL;
@@ -718,6 +717,18 @@ static const struct proto_ops xsk_proto_ops = {
        .sendpage       = sock_no_sendpage,
 };
 
+static void xsk_destruct(struct sock *sk)
+{
+       struct xdp_sock *xs = xdp_sk(sk);
+
+       if (!sock_flag(sk, SOCK_DEAD))
+               return;
+
+       xdp_put_umem(xs->umem);
+
+       sk_refcnt_debug_dec(sk);
+}
+
 static int xsk_create(struct net *net, struct socket *sock, int protocol,
                      int kern)
 {
@@ -744,6 +755,9 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
 
        sk->sk_family = PF_XDP;
 
+       sk->sk_destruct = xsk_destruct;
+       sk_refcnt_debug_inc(sk);
+
        sock_set_flag(sk, SOCK_RCU_FREE);
 
        xs = xdp_sk(sk);
index 6be8c7df15bb20f5a641a6922110b63f80652dca..dbb3c1945b5c911b5933f60b284015a91524c832 100644 (file)
@@ -76,10 +76,10 @@ static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb)
        int ifindex;
        struct xfrm_if *xi;
 
-       if (!skb->dev)
+       if (!secpath_exists(skb) || !skb->dev)
                return NULL;
 
-       xfrmn = net_generic(dev_net(skb->dev), xfrmi_net_id);
+       xfrmn = net_generic(xs_net(xfrm_input_state(skb)), xfrmi_net_id);
        ifindex = skb->dev->ifindex;
 
        for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) {
index ba0a4048c846bbbbde6e0d3320f6d4cfa46da424..8d1a898d0ba562a25e8d42b1692d62ba766b7353 100644 (file)
@@ -3314,8 +3314,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 
        if (ifcb) {
                xi = ifcb->decode_session(skb);
-               if (xi)
+               if (xi) {
                        if_id = xi->p.if_id;
+                       net = xi->net;
+               }
        }
        rcu_read_unlock();
 
index 23c92891758a829e06dd776baf7ca340dec67b9f..1bb971f46fc6f9096f59c99740d6e832276c3b34 100644 (file)
@@ -432,7 +432,7 @@ void xfrm_state_free(struct xfrm_state *x)
 }
 EXPORT_SYMBOL(xfrm_state_free);
 
-static void xfrm_state_gc_destroy(struct xfrm_state *x)
+static void ___xfrm_state_destroy(struct xfrm_state *x)
 {
        tasklet_hrtimer_cancel(&x->mtimer);
        del_timer_sync(&x->rtimer);
@@ -474,7 +474,7 @@ static void xfrm_state_gc_task(struct work_struct *work)
        synchronize_rcu();
 
        hlist_for_each_entry_safe(x, tmp, &gc_list, gclist)
-               xfrm_state_gc_destroy(x);
+               ___xfrm_state_destroy(x);
 }
 
 static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
@@ -598,14 +598,19 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
 }
 EXPORT_SYMBOL(xfrm_state_alloc);
 
-void __xfrm_state_destroy(struct xfrm_state *x)
+void __xfrm_state_destroy(struct xfrm_state *x, bool sync)
 {
        WARN_ON(x->km.state != XFRM_STATE_DEAD);
 
-       spin_lock_bh(&xfrm_state_gc_lock);
-       hlist_add_head(&x->gclist, &xfrm_state_gc_list);
-       spin_unlock_bh(&xfrm_state_gc_lock);
-       schedule_work(&xfrm_state_gc_work);
+       if (sync) {
+               synchronize_rcu();
+               ___xfrm_state_destroy(x);
+       } else {
+               spin_lock_bh(&xfrm_state_gc_lock);
+               hlist_add_head(&x->gclist, &xfrm_state_gc_list);
+               spin_unlock_bh(&xfrm_state_gc_lock);
+               schedule_work(&xfrm_state_gc_work);
+       }
 }
 EXPORT_SYMBOL(__xfrm_state_destroy);
 
@@ -708,7 +713,7 @@ xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool
 }
 #endif
 
-int xfrm_state_flush(struct net *net, u8 proto, bool task_valid)
+int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync)
 {
        int i, err = 0, cnt = 0;
 
@@ -730,7 +735,10 @@ restart:
                                err = xfrm_state_delete(x);
                                xfrm_audit_state_delete(x, err ? 0 : 1,
                                                        task_valid);
-                               xfrm_state_put(x);
+                               if (sync)
+                                       xfrm_state_put_sync(x);
+                               else
+                                       xfrm_state_put(x);
                                if (!err)
                                        cnt++;
 
@@ -2215,7 +2223,7 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x)
                if (atomic_read(&t->tunnel_users) == 2)
                        xfrm_state_delete(t);
                atomic_dec(&t->tunnel_users);
-               xfrm_state_put(t);
+               xfrm_state_put_sync(t);
                x->tunnel = NULL;
        }
 }
@@ -2375,8 +2383,8 @@ void xfrm_state_fini(struct net *net)
        unsigned int sz;
 
        flush_work(&net->xfrm.state_hash_work);
-       xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
        flush_work(&xfrm_state_gc_work);
+       xfrm_state_flush(net, IPSEC_PROTO_ANY, false, true);
 
        WARN_ON(!list_empty(&net->xfrm.state_all));
 
index c6d26afcf89df4c9327acf3074d042cbbc8eba62..a131f9ff979e1b64015ade91942cf1ce88eee15c 100644 (file)
@@ -1932,7 +1932,7 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
        struct xfrm_usersa_flush *p = nlmsg_data(nlh);
        int err;
 
-       err = xfrm_state_flush(net, p->proto, true);
+       err = xfrm_state_flush(net, p->proto, true, false);
        if (err) {
                if (err == -ESRCH) /* empty table */
                        return 0;
index f8400101935661aeecd629bef6b28f36561b56b9..33028c098ef3c6314c39c4e62ccf223dde7c7eb3 100644 (file)
@@ -321,6 +321,7 @@ static void dump_common_audit_data(struct audit_buffer *ab,
                if (a->u.net->sk) {
                        struct sock *sk = a->u.net->sk;
                        struct unix_sock *u;
+                       struct unix_address *addr;
                        int len = 0;
                        char *p = NULL;
 
@@ -351,14 +352,15 @@ static void dump_common_audit_data(struct audit_buffer *ab,
 #endif
                        case AF_UNIX:
                                u = unix_sk(sk);
+                               addr = smp_load_acquire(&u->addr);
+                               if (!addr)
+                                       break;
                                if (u->path.dentry) {
                                        audit_log_d_path(ab, " path=", &u->path);
                                        break;
                                }
-                               if (!u->addr)
-                                       break;
-                               len = u->addr->len-sizeof(short);
-                               p = &u->addr->name->sun_path[0];
+                               len = addr->len-sizeof(short);
+                               p = &addr->name->sun_path[0];
                                audit_log_format(ab, " path=");
                                if (*p)
                                        audit_log_untrustedstring(ab, p);
index 147e34cfceb79bef10672643c0ba265ccf68293d..02d7c871862af26080e823a6936a54bbd182e454 100644 (file)
@@ -474,6 +474,16 @@ static void test_lpm_delete(void)
        assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 &&
                errno == ENOENT);
 
+       key->prefixlen = 30; // unused prefix so far
+       inet_pton(AF_INET, "192.255.0.0", key->data);
+       assert(bpf_map_delete_elem(map_fd, key) == -1 &&
+               errno == ENOENT);
+
+       key->prefixlen = 16; // same prefix as the root node
+       inet_pton(AF_INET, "192.255.0.0", key->data);
+       assert(bpf_map_delete_elem(map_fd, key) == -1 &&
+               errno == ENOENT);
+
        /* assert initial lookup */
        key->prefixlen = 32;
        inet_pton(AF_INET, "192.168.0.1", key->data);
index 802b4af187297a7c8f2ddf1e086877b35cd41687..1080ff55a788f720f240271741fbc38680061b7a 100755 (executable)
@@ -388,6 +388,7 @@ fib_carrier_unicast_test()
 
        set -e
        $IP link set dev dummy0 carrier off
+       sleep 1
        set +e
 
        echo "    Carrier down"