]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blobdiff - drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
net/mlx5e: Rx, Check ip headers sanity
[mirror_ubuntu-bionic-kernel.git] / drivers / net / ethernet / mellanox / mlx5 / core / en_rx.c
index 5b499c7a698f0f0143f624111e400dda6966f9e1..6a3cb9e6aa57b7d6535307196f04c0dffb43f934 100644 (file)
@@ -36,6 +36,8 @@
 #include <linux/tcp.h>
 #include <linux/bpf_trace.h>
 #include <net/busy_poll.h>
+#include <net/ip6_checksum.h>
+#include <net/inet_ecn.h>
 #include "en.h"
 #include "en_tc.h"
 #include "eswitch.h"
@@ -547,20 +549,33 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
        return true;
 }
 
+static void mlx5e_lro_update_tcp_hdr(struct mlx5_cqe64 *cqe, struct tcphdr *tcp)
+{
+       u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe);
+       u8 tcp_ack     = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) ||
+                        (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA);
+
+       tcp->check                      = 0;
+       tcp->psh                        = get_cqe_lro_tcppsh(cqe);
+
+       if (tcp_ack) {
+               tcp->ack                = 1;
+               tcp->ack_seq            = cqe->lro_ack_seq_num;
+               tcp->window             = cqe->lro_tcp_win;
+       }
+}
+
 static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
                                 u32 cqe_bcnt)
 {
        struct ethhdr   *eth = (struct ethhdr *)(skb->data);
        struct tcphdr   *tcp;
        int network_depth = 0;
+       __wsum check;
        __be16 proto;
        u16 tot_len;
        void *ip_p;
 
-       u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe);
-       u8 tcp_ack = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) ||
-               (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA);
-
        proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth);
 
        tot_len = cqe_bcnt - network_depth;
@@ -577,23 +592,30 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
                ipv4->check             = 0;
                ipv4->check             = ip_fast_csum((unsigned char *)ipv4,
                                                       ipv4->ihl);
+
+               mlx5e_lro_update_tcp_hdr(cqe, tcp);
+               check = csum_partial(tcp, tcp->doff * 4,
+                                    csum_unfold((__force __sum16)cqe->check_sum));
+               /* Almost done, don't forget the pseudo header */
+               tcp->check = csum_tcpudp_magic(ipv4->saddr, ipv4->daddr,
+                                              tot_len - sizeof(struct iphdr),
+                                              IPPROTO_TCP, check);
        } else {
+               u16 payload_len = tot_len - sizeof(struct ipv6hdr);
                struct ipv6hdr *ipv6 = ip_p;
 
                tcp = ip_p + sizeof(struct ipv6hdr);
                skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
 
                ipv6->hop_limit         = cqe->lro_min_ttl;
-               ipv6->payload_len       = cpu_to_be16(tot_len -
-                                                     sizeof(struct ipv6hdr));
-       }
-
-       tcp->psh = get_cqe_lro_tcppsh(cqe);
-
-       if (tcp_ack) {
-               tcp->ack                = 1;
-               tcp->ack_seq            = cqe->lro_ack_seq_num;
-               tcp->window             = cqe->lro_tcp_win;
+               ipv6->payload_len       = cpu_to_be16(payload_len);
+
+               mlx5e_lro_update_tcp_hdr(cqe, tcp);
+               check = csum_partial(tcp, tcp->doff * 4,
+                                    csum_unfold((__force __sum16)cqe->check_sum));
+               /* Almost done, don't forget the pseudo header */
+               tcp->check = csum_ipv6_magic(&ipv6->saddr, &ipv6->daddr, payload_len,
+                                            IPPROTO_TCP, check);
        }
 }
 
@@ -607,14 +629,59 @@ static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe,
        skb_set_hash(skb, be32_to_cpu(cqe->rss_hash_result), ht);
 }
 
-static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth)
+static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth,
+                                       __be16 *proto)
 {
-       __be16 ethertype = ((struct ethhdr *)skb->data)->h_proto;
+       *proto = ((struct ethhdr *)skb->data)->h_proto;
+       *proto = __vlan_get_protocol(skb, *proto, network_depth);
 
-       ethertype = __vlan_get_protocol(skb, ethertype, network_depth);
-       return (ethertype == htons(ETH_P_IP) || ethertype == htons(ETH_P_IPV6));
+       if (*proto == htons(ETH_P_IP))
+               return pskb_may_pull(skb, *network_depth + sizeof(struct iphdr));
+
+       if (*proto == htons(ETH_P_IPV6))
+               return pskb_may_pull(skb, *network_depth + sizeof(struct ipv6hdr));
+
+       return false;
 }
 
+static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb)
+{
+       int network_depth = 0;
+       __be16 proto;
+       void *ip;
+       int rc;
+
+       if (unlikely(!is_last_ethertype_ip(skb, &network_depth, &proto)))
+               return;
+
+       ip = skb->data + network_depth;
+       rc = ((proto == htons(ETH_P_IP)) ? IP_ECN_set_ce((struct iphdr *)ip) :
+                                        IP6_ECN_set_ce(skb, (struct ipv6hdr *)ip));
+
+       rq->stats.ecn_mark += !!rc;
+}
+
+static u32 mlx5e_get_fcs(const struct sk_buff *skb)
+{
+       const void *fcs_bytes;
+       u32 _fcs_bytes;
+
+       fcs_bytes = skb_header_pointer(skb, skb->len - ETH_FCS_LEN,
+                                      ETH_FCS_LEN, &_fcs_bytes);
+
+       return __get_unaligned_cpu32(fcs_bytes);
+}
+
+static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto)
+{
+       void *ip_p = skb->data + network_depth;
+
+       return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol :
+                                           ((struct ipv6hdr *)ip_p)->nexthdr;
+}
+
+#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN)
+
 static inline void mlx5e_handle_csum(struct net_device *netdev,
                                     struct mlx5_cqe64 *cqe,
                                     struct mlx5e_rq *rq,
@@ -622,6 +689,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
                                     bool   lro)
 {
        int network_depth = 0;
+       __be16 proto;
 
        if (unlikely(!(netdev->features & NETIF_F_RXCSUM)))
                goto csum_none;
@@ -632,7 +700,25 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
                return;
        }
 
-       if (is_last_ethertype_ip(skb, &network_depth)) {
+       /* True when explicitly set via priv flag, or XDP prog is loaded */
+       if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state))
+               goto csum_unnecessary;
+
+       /* CQE csum doesn't cover padding octets in short ethernet
+        * frames. And the pad field is appended prior to calculating
+        * and appending the FCS field.
+        *
+        * Detecting these padded frames requires to verify and parse
+        * IP headers, so we simply force all those small frames to be
+        * CHECKSUM_UNNECESSARY even if they are not padded.
+        */
+       if (short_frame(skb->len))
+               goto csum_unnecessary;
+
+       if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) {
+               if (unlikely(get_ip_proto(skb, network_depth, proto) == IPPROTO_SCTP))
+                       goto csum_unnecessary;
+
                skb->ip_summed = CHECKSUM_COMPLETE;
                skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
                if (network_depth > ETH_HLEN)
@@ -643,10 +729,15 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
                        skb->csum = csum_partial(skb->data + ETH_HLEN,
                                                 network_depth - ETH_HLEN,
                                                 skb->csum);
+               if (unlikely(netdev->features & NETIF_F_RXFCS))
+                       skb->csum = csum_block_add(skb->csum,
+                                                  (__force __wsum)mlx5e_get_fcs(skb),
+                                                  skb->len - ETH_FCS_LEN);
                rq->stats.csum_complete++;
                return;
        }
 
+csum_unnecessary:
        if (likely((cqe->hds_ip_ext & CQE_L3_OK) &&
                   (cqe->hds_ip_ext & CQE_L4_OK))) {
                skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -664,6 +755,8 @@ csum_none:
        rq->stats.csum_none++;
 }
 
+#define MLX5E_CE_BIT_MASK 0x80
+
 static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
                                      u32 cqe_bcnt,
                                      struct mlx5e_rq *rq,
@@ -703,6 +796,10 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
        skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK;
 
        mlx5e_handle_csum(netdev, cqe, rq, skb, !!lro_num_seg);
+       /* checking CE bit in cqe - MSB in ml_path field */
+       if (unlikely(cqe->ml_path & MLX5E_CE_BIT_MASK))
+               mlx5e_enable_ecn(rq, skb);
+
        skb->protocol = eth_type_trans(skb, netdev);
 }
 
@@ -1051,7 +1148,7 @@ mpwrq_cqe_out:
 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
 {
        struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
-       struct mlx5e_xdpsq *xdpsq;
+       struct mlx5e_xdpsq *xdpsq = &rq->xdpsq;
        struct mlx5_cqe64 *cqe;
        int work_done = 0;
 
@@ -1062,10 +1159,11 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
                work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget);
 
        cqe = mlx5_cqwq_get_cqe(&cq->wq);
-       if (!cqe)
+       if (!cqe) {
+               if (unlikely(work_done))
+                       goto out;
                return 0;
-
-       xdpsq = &rq->xdpsq;
+       }
 
        do {
                if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) {
@@ -1080,6 +1178,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
                rq->handle_rx_cqe(rq, cqe);
        } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
 
+out:
        if (xdpsq->db.doorbell) {
                mlx5e_xmit_xdp_doorbell(xdpsq);
                xdpsq->db.doorbell = false;
@@ -1175,7 +1274,9 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
                                         u32 cqe_bcnt,
                                         struct sk_buff *skb)
 {
+       struct hwtstamp_config *tstamp;
        struct net_device *netdev;
+       struct mlx5e_priv *priv;
        char *pseudo_header;
        u32 qpn;
        u8 *dgid;
@@ -1194,6 +1295,9 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
                return;
        }
 
+       priv = mlx5i_epriv(netdev);
+       tstamp = &priv->tstamp;
+
        g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
        dgid = skb->data + MLX5_IB_GRH_DGID_OFFSET;
        if ((!g) || dgid[0] != 0xff)
@@ -1214,7 +1318,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
        skb->ip_summed = CHECKSUM_COMPLETE;
        skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
 
-       if (unlikely(mlx5e_rx_hw_stamp(rq->tstamp)))
+       if (unlikely(mlx5e_rx_hw_stamp(tstamp)))
                skb_hwtstamps(skb)->hwtstamp =
                                mlx5_timecounter_cyc2time(rq->clock, get_cqe_ts(cqe));