bpf: Add IPv6 Segment Routing helpers

author Mathieu Xhonneux <m.xhonneux@gmail.com>

Sun, 20 May 2018 13:58:14 +0000 (14:58 +0100)

committer Daniel Borkmann <daniel@iogearbox.net>

Thu, 24 May 2018 09:57:35 +0000 (11:57 +0200)
author Mathieu Xhonneux <m.xhonneux@gmail.com>
Sun, 20 May 2018 13:58:14 +0000 (14:58 +0100)
committer Daniel Borkmann <daniel@iogearbox.net>
Thu, 24 May 2018 09:57:35 +0000 (11:57 +0200)
diff --git a/include/net/seg6_local.h b/include/net/seg6_local.h

index 57498b23085d6985b2fa9e858aa9b8bc322a4d2f..661fd5b4d3e0b9ea40079d161131d6471d7335aa 100644 (file)
--- a/include/net/seg6_local.h
+++ b/include/net/seg6_local.h
@@ -15,10 +15,18 @@
  #ifndef _NET_SEG6_LOCAL_H
  #define _NET_SEG6_LOCAL_H
  
+#include <linux/percpu.h>
  #include <linux/net.h>
  #include <linux/ipv6.h>
  
  extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
                                u32 tbl_id);
  
+struct seg6_bpf_srh_state {
+       bool valid;
+       u16 hdrlen;
+};
+
+DECLARE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
+
  #endif
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h

index 344d2ddcef49d75fd46d5e0d498440db81edf624..fdaf6a0bfa5bf0dd65ebfd2dac2d6bcc2f795a63 100644 (file)
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1902,6 +1902,90 @@ union bpf_attr {
   *             egress otherwise). This is the only flag supported for now.
   *     Return
   *             **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
+ *     Description
+ *             Encapsulate the packet associated to *skb* within a Layer 3
+ *             protocol header. This header is provided in the buffer at
+ *             address *hdr*, with *len* its size in bytes. *type* indicates
+ *             the protocol of the header and can be one of:
+ *
+ *             **BPF_LWT_ENCAP_SEG6**
+ *                     IPv6 encapsulation with Segment Routing Header
+ *                     (**struct ipv6_sr_hdr**). *hdr* only contains the SRH,
+ *                     the IPv6 header is computed by the kernel.
+ *             **BPF_LWT_ENCAP_SEG6_INLINE**
+ *                     Only works if *skb* contains an IPv6 packet. Insert a
+ *                     Segment Routing Header (**struct ipv6_sr_hdr**) inside
+ *                     the IPv6 header.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
+ *     Description
+ *             Store *len* bytes from address *from* into the packet
+ *             associated to *skb*, at *offset*. Only the flags, tag and TLVs
+ *             inside the outermost IPv6 Segment Routing Header can be
+ *             modified through this helper.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
+ *     Description
+ *             Adjust the size allocated to TLVs in the outermost IPv6
+ *             Segment Routing Header contained in the packet associated to
+ *             *skb*, at position *offset* by *delta* bytes. Only offsets
+ *             after the segments are accepted. *delta* can be as well
+ *             positive (growing) as negative (shrinking).
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
+ *     Description
+ *             Apply an IPv6 Segment Routing action of type *action* to the
+ *             packet associated to *skb*. Each action takes a parameter
+ *             contained at address *param*, and of length *param_len* bytes.
+ *             *action* can be one of:
+ *
+ *             **SEG6_LOCAL_ACTION_END_X**
+ *                     End.X action: Endpoint with Layer-3 cross-connect.
+ *                     Type of *param*: **struct in6_addr**.
+ *             **SEG6_LOCAL_ACTION_END_T**
+ *                     End.T action: Endpoint with specific IPv6 table lookup.
+ *                     Type of *param*: **int**.
+ *             **SEG6_LOCAL_ACTION_END_B6**
+ *                     End.B6 action: Endpoint bound to an SRv6 policy.
+ *                     Type of param: **struct ipv6_sr_hdr**.
+ *             **SEG6_LOCAL_ACTION_END_B6_ENCAP**
+ *                     End.B6.Encap action: Endpoint bound to an SRv6
+ *                     encapsulation policy.
+ *                     Type of param: **struct ipv6_sr_hdr**.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
   */
  #define __BPF_FUNC_MAPPER(FN)          \
         FN(unspec),                     \
@@ -1976,7 +2060,11 @@ union bpf_attr {
         FN(fib_lookup),                 \
         FN(sock_hash_update),           \
         FN(msg_redirect_hash),          \
-       FN(sk_redirect_hash),
+       FN(sk_redirect_hash),           \
+       FN(lwt_push_encap),             \
+       FN(lwt_seg6_store_bytes),       \
+       FN(lwt_seg6_adjust_srh),        \
+       FN(lwt_seg6_action),
  
  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
   * function eBPF program intends to call
@@ -2043,6 +2131,12 @@ enum bpf_hdr_start_off {
         BPF_HDR_START_NET,
  };
  
+/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
+enum bpf_lwt_encap_mode {
+       BPF_LWT_ENCAP_SEG6,
+       BPF_LWT_ENCAP_SEG6_INLINE
+};
+
  /* user accessible mirror of in-kernel sk_buff.
   * new fields can only be added to the end of this structure
   */
diff --git a/net/core/filter.c b/net/core/filter.c

index ba3ff5aa575a6b1ad8bff8aa3e1e28027be3abb1..2e05dcfda6d70be1c367aeb29fe5bb6ca807318f 100644 (file)
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -64,6 +64,10 @@
  #include <net/ip_fib.h>
  #include <net/flow.h>
  #include <net/arp.h>
+#include <net/ipv6.h>
+#include <linux/seg6_local.h>
+#include <net/seg6.h>
+#include <net/seg6_local.h>
  
  /**
   *     sk_filter_trim_cap - run a packet through a socket filter
@@ -3363,28 +3367,6 @@ static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
         .arg3_type      = ARG_ANYTHING,
  };
  
-bool bpf_helper_changes_pkt_data(void *func)
-{
-       if (func == bpf_skb_vlan_push ||
-           func == bpf_skb_vlan_pop ||
-           func == bpf_skb_store_bytes ||
-           func == bpf_skb_change_proto ||
-           func == bpf_skb_change_head ||
-           func == bpf_skb_change_tail ||
-           func == bpf_skb_adjust_room ||
-           func == bpf_skb_pull_data ||
-           func == bpf_clone_redirect ||
-           func == bpf_l3_csum_replace ||
-           func == bpf_l4_csum_replace ||
-           func == bpf_xdp_adjust_head ||
-           func == bpf_xdp_adjust_meta ||
-           func == bpf_msg_pull_data ||
-           func == bpf_xdp_adjust_tail)
-               return true;
-
-       return false;
-}
-
  static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
                                   unsigned long off, unsigned long len)
  {
@@ -4360,6 +4342,264 @@ static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
         .arg4_type      = ARG_ANYTHING,
  };
  
+#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
+static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
+{
+       int err;
+       struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)hdr;
+
+       if (!seg6_validate_srh(srh, len))
+               return -EINVAL;
+
+       switch (type) {
+       case BPF_LWT_ENCAP_SEG6_INLINE:
+               if (skb->protocol != htons(ETH_P_IPV6))
+                       return -EBADMSG;
+
+               err = seg6_do_srh_inline(skb, srh);
+               break;
+       case BPF_LWT_ENCAP_SEG6:
+               skb_reset_inner_headers(skb);
+               skb->encapsulation = 1;
+               err = seg6_do_srh_encap(skb, srh, IPPROTO_IPV6);
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       bpf_compute_data_pointers(skb);
+       if (err)
+               return err;
+
+       ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+       skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+
+       return seg6_lookup_nexthop(skb, NULL, 0);
+}
+#endif /* CONFIG_IPV6_SEG6_BPF */
+
+BPF_CALL_4(bpf_lwt_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
+          u32, len)
+{
+       switch (type) {
+#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
+       case BPF_LWT_ENCAP_SEG6:
+       case BPF_LWT_ENCAP_SEG6_INLINE:
+               return bpf_push_seg6_encap(skb, type, hdr, len);
+#endif
+       default:
+               return -EINVAL;
+       }
+}
+
+static const struct bpf_func_proto bpf_lwt_push_encap_proto = {
+       .func           = bpf_lwt_push_encap,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_PTR_TO_MEM,
+       .arg4_type      = ARG_CONST_SIZE
+};
+
+BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
+          const void *, from, u32, len)
+{
+#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
+       struct seg6_bpf_srh_state *srh_state =
+               this_cpu_ptr(&seg6_bpf_srh_states);
+       void *srh_tlvs, *srh_end, *ptr;
+       struct ipv6_sr_hdr *srh;
+       int srhoff = 0;
+
+       if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+               return -EINVAL;
+
+       srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+       srh_tlvs = (void *)((char *)srh + ((srh->first_segment + 1) << 4));
+       srh_end = (void *)((char *)srh + sizeof(*srh) + srh_state->hdrlen);
+
+       ptr = skb->data + offset;
+       if (ptr >= srh_tlvs && ptr + len <= srh_end)
+               srh_state->valid = 0;
+       else if (ptr < (void *)&srh->flags ||
+                ptr + len > (void *)&srh->segments)
+               return -EFAULT;
+
+       if (unlikely(bpf_try_make_writable(skb, offset + len)))
+               return -EFAULT;
+
+       memcpy(skb->data + offset, from, len);
+       return 0;
+#else /* CONFIG_IPV6_SEG6_BPF */
+       return -EOPNOTSUPP;
+#endif
+}
+
+static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
+       .func           = bpf_lwt_seg6_store_bytes,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_PTR_TO_MEM,
+       .arg4_type      = ARG_CONST_SIZE
+};
+
+BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
+          u32, action, void *, param, u32, param_len)
+{
+#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
+       struct seg6_bpf_srh_state *srh_state =
+               this_cpu_ptr(&seg6_bpf_srh_states);
+       struct ipv6_sr_hdr *srh;
+       int srhoff = 0;
+       int err;
+
+       if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+               return -EINVAL;
+       srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+
+       if (!srh_state->valid) {
+               if (unlikely((srh_state->hdrlen & 7) != 0))
+                       return -EBADMSG;
+
+               srh->hdrlen = (u8)(srh_state->hdrlen >> 3);
+               if (unlikely(!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3)))
+                       return -EBADMSG;
+
+               srh_state->valid = 1;
+       }
+
+       switch (action) {
+       case SEG6_LOCAL_ACTION_END_X:
+               if (param_len != sizeof(struct in6_addr))
+                       return -EINVAL;
+               return seg6_lookup_nexthop(skb, (struct in6_addr *)param, 0);
+       case SEG6_LOCAL_ACTION_END_T:
+               if (param_len != sizeof(int))
+                       return -EINVAL;
+               return seg6_lookup_nexthop(skb, NULL, *(int *)param);
+       case SEG6_LOCAL_ACTION_END_B6:
+               err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6_INLINE,
+                                         param, param_len);
+               if (!err)
+                       srh_state->hdrlen =
+                               ((struct ipv6_sr_hdr *)param)->hdrlen << 3;
+               return err;
+       case SEG6_LOCAL_ACTION_END_B6_ENCAP:
+               err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6,
+                                         param, param_len);
+               if (!err)
+                       srh_state->hdrlen =
+                               ((struct ipv6_sr_hdr *)param)->hdrlen << 3;
+               return err;
+       default:
+               return -EINVAL;
+       }
+#else /* CONFIG_IPV6_SEG6_BPF */
+       return -EOPNOTSUPP;
+#endif
+}
+
+static const struct bpf_func_proto bpf_lwt_seg6_action_proto = {
+       .func           = bpf_lwt_seg6_action,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_PTR_TO_MEM,
+       .arg4_type      = ARG_CONST_SIZE
+};
+
+BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
+          s32, len)
+{
+#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
+       struct seg6_bpf_srh_state *srh_state =
+               this_cpu_ptr(&seg6_bpf_srh_states);
+       void *srh_end, *srh_tlvs, *ptr;
+       struct ipv6_sr_hdr *srh;
+       struct ipv6hdr *hdr;
+       int srhoff = 0;
+       int ret;
+
+       if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+               return -EINVAL;
+       srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+
+       srh_tlvs = (void *)((unsigned char *)srh + sizeof(*srh) +
+                       ((srh->first_segment + 1) << 4));
+       srh_end = (void *)((unsigned char *)srh + sizeof(*srh) +
+                       srh_state->hdrlen);
+       ptr = skb->data + offset;
+
+       if (unlikely(ptr < srh_tlvs || ptr > srh_end))
+               return -EFAULT;
+       if (unlikely(len < 0 && (void *)((char *)ptr - len) > srh_end))
+               return -EFAULT;
+
+       if (len > 0) {
+               ret = skb_cow_head(skb, len);
+               if (unlikely(ret < 0))
+                       return ret;
+
+               ret = bpf_skb_net_hdr_push(skb, offset, len);
+       } else {
+               ret = bpf_skb_net_hdr_pop(skb, offset, -1 * len);
+       }
+
+       bpf_compute_data_pointers(skb);
+       if (unlikely(ret < 0))
+               return ret;
+
+       hdr = (struct ipv6hdr *)skb->data;
+       hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+
+       srh_state->hdrlen += len;
+       srh_state->valid = 0;
+       return 0;
+#else /* CONFIG_IPV6_SEG6_BPF */
+       return -EOPNOTSUPP;
+#endif
+}
+
+static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
+       .func           = bpf_lwt_seg6_adjust_srh,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_ANYTHING,
+};
+
+bool bpf_helper_changes_pkt_data(void *func)
+{
+       if (func == bpf_skb_vlan_push ||
+           func == bpf_skb_vlan_pop ||
+           func == bpf_skb_store_bytes ||
+           func == bpf_skb_change_proto ||
+           func == bpf_skb_change_head ||
+           func == bpf_skb_change_tail ||
+           func == bpf_skb_adjust_room ||
+           func == bpf_skb_pull_data ||
+           func == bpf_clone_redirect ||
+           func == bpf_l3_csum_replace ||
+           func == bpf_l4_csum_replace ||
+           func == bpf_xdp_adjust_head ||
+           func == bpf_xdp_adjust_meta ||
+           func == bpf_msg_pull_data ||
+           func == bpf_xdp_adjust_tail ||
+           func == bpf_lwt_push_encap ||
+           func == bpf_lwt_seg6_store_bytes ||
+           func == bpf_lwt_seg6_adjust_srh ||
+           func == bpf_lwt_seg6_action
+           )
+               return true;
+
+       return false;
+}
+
  static const struct bpf_func_proto *
  bpf_base_func_proto(enum bpf_func_id func_id)
  {
@@ -4774,7 +5014,6 @@ static bool lwt_is_valid_access(int off, int size,
         return bpf_skb_is_valid_access(off, size, type, prog, info);
  }
  
-
  /* Attach type specific accesses */
  static bool __sock_filter_check_attach_type(int off,
                                             enum bpf_access_type access_type,
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig

index 11e4e80cf7e981826ade121bfe9c4559724c0f40..0eff75525da101e4fce2798626a317366f94623f 100644 (file)
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -329,4 +329,9 @@ config IPV6_SEG6_HMAC
  
           If unsure, say N.
  
+config IPV6_SEG6_BPF
+       def_bool y
+       depends on IPV6_SEG6_LWTUNNEL
+       depends on IPV6 = y
+
  endif # IPV6
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c

index e9b23fb924adb99aa008728fd71d95df4c21b92d..ae68c1ef8fb0b4fb1d507796123b6b858822cbaf 100644 (file)
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -449,6 +449,8 @@ drop:
         return err;
  }
  
+DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
+
  static struct seg6_action_desc seg6_action_table[] = {
         {
                 .action         = SEG6_LOCAL_ACTION_END,
author	Mathieu Xhonneux <m.xhonneux@gmail.com>
	Sun, 20 May 2018 13:58:14 +0000 (14:58 +0100)
committer	Daniel Borkmann <daniel@iogearbox.net>
	Thu, 24 May 2018 09:57:35 +0000 (11:57 +0200)
include/net/seg6_local.h		patch \| blob \| blame \| history
include/uapi/linux/bpf.h		patch \| blob \| blame \| history
net/core/filter.c		patch \| blob \| blame \| history
net/ipv6/Kconfig		patch \| blob \| blame \| history
net/ipv6/seg6_local.c		patch \| blob \| blame \| history