selftests/bpf: test for seg6local End.BPF action

author Mathieu Xhonneux <m.xhonneux@gmail.com>

Sun, 20 May 2018 13:58:17 +0000 (14:58 +0100)

committer Daniel Borkmann <daniel@iogearbox.net>

Thu, 24 May 2018 09:57:36 +0000 (11:57 +0200)
author Mathieu Xhonneux <m.xhonneux@gmail.com>
Sun, 20 May 2018 13:58:17 +0000 (14:58 +0100)
committer Daniel Borkmann <daniel@iogearbox.net>
Thu, 24 May 2018 09:57:36 +0000 (11:57 +0200)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h

index 344d2ddcef49d75fd46d5e0d498440db81edf624..e95fec90c2c199fb909201f13c8ea2aa8886dd19 100644 (file)
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -141,6 +141,7 @@ enum bpf_prog_type {
         BPF_PROG_TYPE_SK_MSG,
         BPF_PROG_TYPE_RAW_TRACEPOINT,
         BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+       BPF_PROG_TYPE_LWT_SEG6LOCAL,
  };
  
  enum bpf_attach_type {
@@ -1902,6 +1903,90 @@ union bpf_attr {
   *             egress otherwise). This is the only flag supported for now.
   *     Return
   *             **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
+ *     Description
+ *             Encapsulate the packet associated to *skb* within a Layer 3
+ *             protocol header. This header is provided in the buffer at
+ *             address *hdr*, with *len* its size in bytes. *type* indicates
+ *             the protocol of the header and can be one of:
+ *
+ *             **BPF_LWT_ENCAP_SEG6**
+ *                     IPv6 encapsulation with Segment Routing Header
+ *                     (**struct ipv6_sr_hdr**). *hdr* only contains the SRH,
+ *                     the IPv6 header is computed by the kernel.
+ *             **BPF_LWT_ENCAP_SEG6_INLINE**
+ *                     Only works if *skb* contains an IPv6 packet. Insert a
+ *                     Segment Routing Header (**struct ipv6_sr_hdr**) inside
+ *                     the IPv6 header.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
+ *     Description
+ *             Store *len* bytes from address *from* into the packet
+ *             associated to *skb*, at *offset*. Only the flags, tag and TLVs
+ *             inside the outermost IPv6 Segment Routing Header can be
+ *             modified through this helper.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
+ *     Description
+ *             Adjust the size allocated to TLVs in the outermost IPv6
+ *             Segment Routing Header contained in the packet associated to
+ *             *skb*, at position *offset* by *delta* bytes. Only offsets
+ *             after the segments are accepted. *delta* can be as well
+ *             positive (growing) as negative (shrinking).
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
+ *     Description
+ *             Apply an IPv6 Segment Routing action of type *action* to the
+ *             packet associated to *skb*. Each action takes a parameter
+ *             contained at address *param*, and of length *param_len* bytes.
+ *             *action* can be one of:
+ *
+ *             **SEG6_LOCAL_ACTION_END_X**
+ *                     End.X action: Endpoint with Layer-3 cross-connect.
+ *                     Type of *param*: **struct in6_addr**.
+ *             **SEG6_LOCAL_ACTION_END_T**
+ *                     End.T action: Endpoint with specific IPv6 table lookup.
+ *                     Type of *param*: **int**.
+ *             **SEG6_LOCAL_ACTION_END_B6**
+ *                     End.B6 action: Endpoint bound to an SRv6 policy.
+ *                     Type of param: **struct ipv6_sr_hdr**.
+ *             **SEG6_LOCAL_ACTION_END_B6_ENCAP**
+ *                     End.B6.Encap action: Endpoint bound to an SRv6
+ *                     encapsulation policy.
+ *                     Type of param: **struct ipv6_sr_hdr**.
+ *
+ *             A call to this helper is susceptible to change the underlaying
+ *             packet buffer. Therefore, at load time, all checks on pointers
+ *             previously done by the verifier are invalidated and must be
+ *             performed again, if the helper is used in combination with
+ *             direct packet access.
+ *     Return
+ *             0 on success, or a negative error in case of failure.
   */
  #define __BPF_FUNC_MAPPER(FN)          \
         FN(unspec),                     \
@@ -1976,7 +2061,11 @@ union bpf_attr {
         FN(fib_lookup),                 \
         FN(sock_hash_update),           \
         FN(msg_redirect_hash),          \
-       FN(sk_redirect_hash),
+       FN(sk_redirect_hash),           \
+       FN(lwt_push_encap),             \
+       FN(lwt_seg6_store_bytes),       \
+       FN(lwt_seg6_adjust_srh),        \
+       FN(lwt_seg6_action),
  
  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
   * function eBPF program intends to call
@@ -2043,6 +2132,12 @@ enum bpf_hdr_start_off {
         BPF_HDR_START_NET,
  };
  
+/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
+enum bpf_lwt_encap_mode {
+       BPF_LWT_ENCAP_SEG6,
+       BPF_LWT_ENCAP_SEG6_INLINE
+};
+
  /* user accessible mirror of in-kernel sk_buff.
   * new fields can only be added to the end of this structure
   */
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile

index 49618c9a5638324f12cabdb151e95453851c1198..85044448bbc79548d019502926c7e09eac8b1e12 100644 (file)
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -33,7 +33,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
         sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
         sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
         test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \
-       test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o
+       test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
+       test_lwt_seg6local.o
  
  # Order correspond to 'make run_tests' order
  TEST_PROGS := test_kmod.sh \
@@ -42,7 +43,8 @@ TEST_PROGS := test_kmod.sh \
         test_xdp_meta.sh \
         test_offload.py \
         test_sock_addr.sh \
-       test_tunnel.sh
+       test_tunnel.sh \
+       test_lwt_seg6local.sh
  
  # Compile but not part of 'make run_tests'
  TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h

index 8f143dfb370032c9309069db7e6ee21815367875..334d3e8c5e891691cb48d96b4f8d224876a27b94 100644 (file)
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -114,6 +114,18 @@ static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) =
  static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params,
                              int plen, __u32 flags) =
         (void *) BPF_FUNC_fib_lookup;
+static int (*bpf_lwt_push_encap)(void *ctx, unsigned int type, void *hdr,
+                                unsigned int len) =
+       (void *) BPF_FUNC_lwt_push_encap;
+static int (*bpf_lwt_seg6_store_bytes)(void *ctx, unsigned int offset,
+                                      void *from, unsigned int len) =
+       (void *) BPF_FUNC_lwt_seg6_store_bytes;
+static int (*bpf_lwt_seg6_action)(void *ctx, unsigned int action, void *param,
+                                 unsigned int param_len) =
+       (void *) BPF_FUNC_lwt_seg6_action;
+static int (*bpf_lwt_seg6_adjust_srh)(void *ctx, unsigned int offset,
+                                     unsigned int len) =
+       (void *) BPF_FUNC_lwt_seg6_adjust_srh;
  
  /* llvm builtin functions that eBPF C program may use to
   * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.c b/tools/testing/selftests/bpf/test_lwt_seg6local.c

new file mode 100644 (file)

index 0000000..0575751
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lwt_seg6local.c
@@ -0,0 +1,437 @@
+#include <stddef.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <linux/seg6_local.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define bpf_printk(fmt, ...)                           \
+({                                                     \
+       char ____fmt[] = fmt;                           \
+       bpf_trace_printk(____fmt, sizeof(____fmt),      \
+                       ##__VA_ARGS__);                 \
+})
+
+/* Packet parsing state machine helpers. */
+#define cursor_advance(_cursor, _len) \
+       ({ void *_tmp = _cursor; _cursor += _len; _tmp; })
+
+#define SR6_FLAG_ALERT (1 << 4)
+
+#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \
+                               0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32))
+#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \
+                               0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32))
+#define BPF_PACKET_HEADER __attribute__((packed))
+
+struct ip6_t {
+       unsigned int ver:4;
+       unsigned int priority:8;
+       unsigned int flow_label:20;
+       unsigned short payload_len;
+       unsigned char next_header;
+       unsigned char hop_limit;
+       unsigned long long src_hi;
+       unsigned long long src_lo;
+       unsigned long long dst_hi;
+       unsigned long long dst_lo;
+} BPF_PACKET_HEADER;
+
+struct ip6_addr_t {
+       unsigned long long hi;
+       unsigned long long lo;
+} BPF_PACKET_HEADER;
+
+struct ip6_srh_t {
+       unsigned char nexthdr;
+       unsigned char hdrlen;
+       unsigned char type;
+       unsigned char segments_left;
+       unsigned char first_segment;
+       unsigned char flags;
+       unsigned short tag;
+
+       struct ip6_addr_t segments[0];
+} BPF_PACKET_HEADER;
+
+struct sr6_tlv_t {
+       unsigned char type;
+       unsigned char len;
+       unsigned char value[0];
+} BPF_PACKET_HEADER;
+
+__attribute__((always_inline)) struct ip6_srh_t *get_srh(struct __sk_buff *skb)
+{
+       void *cursor, *data_end;
+       struct ip6_srh_t *srh;
+       struct ip6_t *ip;
+       uint8_t *ipver;
+
+       data_end = (void *)(long)skb->data_end;
+       cursor = (void *)(long)skb->data;
+       ipver = (uint8_t *)cursor;
+
+       if ((void *)ipver + sizeof(*ipver) > data_end)
+               return NULL;
+
+       if ((*ipver >> 4) != 6)
+               return NULL;
+
+       ip = cursor_advance(cursor, sizeof(*ip));
+       if ((void *)ip + sizeof(*ip) > data_end)
+               return NULL;
+
+       if (ip->next_header != 43)
+               return NULL;
+
+       srh = cursor_advance(cursor, sizeof(*srh));
+       if ((void *)srh + sizeof(*srh) > data_end)
+               return NULL;
+
+       if (srh->type != 4)
+               return NULL;
+
+       return srh;
+}
+
+__attribute__((always_inline))
+int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad,
+                  uint32_t old_pad, uint32_t pad_off)
+{
+       int err;
+
+       if (new_pad != old_pad) {
+               err = bpf_lwt_seg6_adjust_srh(skb, pad_off,
+                                         (int) new_pad - (int) old_pad);
+               if (err)
+                       return err;
+       }
+
+       if (new_pad > 0) {
+               char pad_tlv_buf[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                                       0, 0, 0};
+               struct sr6_tlv_t *pad_tlv = (struct sr6_tlv_t *) pad_tlv_buf;
+
+               pad_tlv->type = SR6_TLV_PADDING;
+               pad_tlv->len = new_pad - 2;
+
+               err = bpf_lwt_seg6_store_bytes(skb, pad_off,
+                                              (void *)pad_tlv_buf, new_pad);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+__attribute__((always_inline))
+int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
+                         uint32_t *tlv_off, uint32_t *pad_size,
+                         uint32_t *pad_off)
+{
+       uint32_t srh_off, cur_off;
+       int offset_valid = 0;
+       int err;
+
+       srh_off = (char *)srh - (char *)(long)skb->data;
+       // cur_off = end of segments, start of possible TLVs
+       cur_off = srh_off + sizeof(*srh) +
+               sizeof(struct ip6_addr_t) * (srh->first_segment + 1);
+
+       *pad_off = 0;
+
+       // we can only go as far as ~10 TLVs due to the BPF max stack size
+       #pragma clang loop unroll(full)
+       for (int i = 0; i < 10; i++) {
+               struct sr6_tlv_t tlv;
+
+               if (cur_off == *tlv_off)
+                       offset_valid = 1;
+
+               if (cur_off >= srh_off + ((srh->hdrlen + 1) << 3))
+                       break;
+
+               err = bpf_skb_load_bytes(skb, cur_off, &tlv, sizeof(tlv));
+               if (err)
+                       return err;
+
+               if (tlv.type == SR6_TLV_PADDING) {
+                       *pad_size = tlv.len + sizeof(tlv);
+                       *pad_off = cur_off;
+
+                       if (*tlv_off == srh_off) {
+                               *tlv_off = cur_off;
+                               offset_valid = 1;
+                       }
+                       break;
+
+               } else if (tlv.type == SR6_TLV_HMAC) {
+                       break;
+               }
+
+               cur_off += sizeof(tlv) + tlv.len;
+       } // we reached the padding or HMAC TLVs, or the end of the SRH
+
+       if (*pad_off == 0)
+               *pad_off = cur_off;
+
+       if (*tlv_off == -1)
+               *tlv_off = cur_off;
+       else if (!offset_valid)
+               return -EINVAL;
+
+       return 0;
+}
+
+__attribute__((always_inline))
+int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off,
+           struct sr6_tlv_t *itlv, uint8_t tlv_size)
+{
+       uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
+       uint8_t len_remaining, new_pad;
+       uint32_t pad_off = 0;
+       uint32_t pad_size = 0;
+       uint32_t partial_srh_len;
+       int err;
+
+       if (tlv_off != -1)
+               tlv_off += srh_off;
+
+       if (itlv->type == SR6_TLV_PADDING || itlv->type == SR6_TLV_HMAC)
+               return -EINVAL;
+
+       err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
+       if (err)
+               return err;
+
+       err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, sizeof(*itlv) + itlv->len);
+       if (err)
+               return err;
+
+       err = bpf_lwt_seg6_store_bytes(skb, tlv_off, (void *)itlv, tlv_size);
+       if (err)
+               return err;
+
+       // the following can't be moved inside update_tlv_pad because the
+       // bpf verifier has some issues with it
+       pad_off += sizeof(*itlv) + itlv->len;
+       partial_srh_len = pad_off - srh_off;
+       len_remaining = partial_srh_len % 8;
+       new_pad = 8 - len_remaining;
+
+       if (new_pad == 1) // cannot pad for 1 byte only
+               new_pad = 9;
+       else if (new_pad == 8)
+               new_pad = 0;
+
+       return update_tlv_pad(skb, new_pad, pad_size, pad_off);
+}
+
+__attribute__((always_inline))
+int delete_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh,
+              uint32_t tlv_off)
+{
+       uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
+       uint8_t len_remaining, new_pad;
+       uint32_t partial_srh_len;
+       uint32_t pad_off = 0;
+       uint32_t pad_size = 0;
+       struct sr6_tlv_t tlv;
+       int err;
+
+       tlv_off += srh_off;
+
+       err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
+       if (err)
+               return err;
+
+       err = bpf_skb_load_bytes(skb, tlv_off, &tlv, sizeof(tlv));
+       if (err)
+               return err;
+
+       err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, -(sizeof(tlv) + tlv.len));
+       if (err)
+               return err;
+
+       pad_off -= sizeof(tlv) + tlv.len;
+       partial_srh_len = pad_off - srh_off;
+       len_remaining = partial_srh_len % 8;
+       new_pad = 8 - len_remaining;
+       if (new_pad == 1) // cannot pad for 1 byte only
+               new_pad = 9;
+       else if (new_pad == 8)
+               new_pad = 0;
+
+       return update_tlv_pad(skb, new_pad, pad_size, pad_off);
+}
+
+__attribute__((always_inline))
+int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh)
+{
+       int tlv_offset = sizeof(struct ip6_t) + sizeof(struct ip6_srh_t) +
+               ((srh->first_segment + 1) << 4);
+       struct sr6_tlv_t tlv;
+
+       if (bpf_skb_load_bytes(skb, tlv_offset, &tlv, sizeof(struct sr6_tlv_t)))
+               return 0;
+
+       if (tlv.type == SR6_TLV_EGRESS && tlv.len == 18) {
+               struct ip6_addr_t egr_addr;
+
+               if (bpf_skb_load_bytes(skb, tlv_offset + 4, &egr_addr, 16))
+                       return 0;
+
+               // check if egress TLV value is correct
+               if (ntohll(egr_addr.hi) == 0xfd00000000000000 &&
+                               ntohll(egr_addr.lo) == 0x4)
+                       return 1;
+       }
+
+       return 0;
+}
+
+// This function will push a SRH with segments fd00::1, fd00::2, fd00::3,
+// fd00::4
+SEC("encap_srh")
+int __encap_srh(struct __sk_buff *skb)
+{
+       unsigned long long hi = 0xfd00000000000000;
+       struct ip6_addr_t *seg;
+       struct ip6_srh_t *srh;
+       char srh_buf[72]; // room for 4 segments
+       int err;
+
+       srh = (struct ip6_srh_t *)srh_buf;
+       srh->nexthdr = 0;
+       srh->hdrlen = 8;
+       srh->type = 4;
+       srh->segments_left = 3;
+       srh->first_segment = 3;
+       srh->flags = 0;
+       srh->tag = 0;
+
+       seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh));
+
+       #pragma clang loop unroll(full)
+       for (unsigned long long lo = 0; lo < 4; lo++) {
+               seg->lo = htonll(4 - lo);
+               seg->hi = htonll(hi);
+               seg = (struct ip6_addr_t *)((char *)seg + sizeof(*seg));
+       }
+
+       err = bpf_lwt_push_encap(skb, 0, (void *)srh, sizeof(srh_buf));
+       if (err)
+               return BPF_DROP;
+
+       return BPF_REDIRECT;
+}
+
+// Add an Egress TLV fc00::4, add the flag A,
+// and apply End.X action to fc42::1
+SEC("add_egr_x")
+int __add_egr_x(struct __sk_buff *skb)
+{
+       unsigned long long hi = 0xfc42000000000000;
+       unsigned long long lo = 0x1;
+       struct ip6_srh_t *srh = get_srh(skb);
+       uint8_t new_flags = SR6_FLAG_ALERT;
+       struct ip6_addr_t addr;
+       int err, offset;
+
+       if (srh == NULL)
+               return BPF_DROP;
+
+       uint8_t tlv[20] = {2, 18, 0, 0, 0xfd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+                          0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4};
+
+       err = add_tlv(skb, srh, (srh->hdrlen+1) << 3,
+                     (struct sr6_tlv_t *)&tlv, 20);
+       if (err)
+               return BPF_DROP;
+
+       offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
+       err = bpf_lwt_seg6_store_bytes(skb, offset,
+                                      (void *)&new_flags, sizeof(new_flags));
+       if (err)
+               return BPF_DROP;
+
+       addr.lo = htonll(lo);
+       addr.hi = htonll(hi);
+       err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X,
+                                 (void *)&addr, sizeof(addr));
+       if (err)
+               return BPF_DROP;
+       return BPF_REDIRECT;
+}
+
+// Pop the Egress TLV, reset the flags, change the tag 2442 and finally do a
+// simple End action
+SEC("pop_egr")
+int __pop_egr(struct __sk_buff *skb)
+{
+       struct ip6_srh_t *srh = get_srh(skb);
+       uint16_t new_tag = bpf_htons(2442);
+       uint8_t new_flags = 0;
+       int err, offset;
+
+       if (srh == NULL)
+               return BPF_DROP;
+
+       if (srh->flags != SR6_FLAG_ALERT)
+               return BPF_DROP;
+
+       if (srh->hdrlen != 11) // 4 segments + Egress TLV + Padding TLV
+               return BPF_DROP;
+
+       if (!has_egr_tlv(skb, srh))
+               return BPF_DROP;
+
+       err = delete_tlv(skb, srh, 8 + (srh->first_segment + 1) * 16);
+       if (err)
+               return BPF_DROP;
+
+       offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
+       if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_flags,
+                                    sizeof(new_flags)))
+               return BPF_DROP;
+
+       offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, tag);
+       if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_tag,
+                                    sizeof(new_tag)))
+               return BPF_DROP;
+
+       return BPF_OK;
+}
+
+// Inspect if the Egress TLV and flag have been removed, if the tag is correct,
+// then apply a End.T action to reach the last segment
+SEC("inspect_t")
+int __inspect_t(struct __sk_buff *skb)
+{
+       struct ip6_srh_t *srh = get_srh(skb);
+       int table = 117;
+       int err;
+
+       if (srh == NULL)
+               return BPF_DROP;
+
+       if (srh->flags != 0)
+               return BPF_DROP;
+
+       if (srh->tag != bpf_htons(2442))
+               return BPF_DROP;
+
+       if (srh->hdrlen != 8) // 4 segments
+               return BPF_DROP;
+
+       err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_T,
+                                 (void *)&table, sizeof(table));
+
+       if (err)
+               return BPF_DROP;
+
+       return BPF_REDIRECT;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh

new file mode 100755 (executable)

index 0000000..1c77994
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
@@ -0,0 +1,140 @@
+#!/bin/bash
+# Connects 6 network namespaces through veths.
+# Each NS may have different IPv6 global scope addresses :
+#   NS1 ---- NS2 ---- NS3 ---- NS4 ---- NS5 ---- NS6
+# fb00::1           fd00::1  fd00::2  fd00::3  fb00::6
+#                   fc42::1           fd00::4
+#
+# All IPv6 packets going to fb00::/16 through NS2 will be encapsulated in a
+# IPv6 header with a Segment Routing Header, with segments :
+#      fd00::1 -> fd00::2 -> fd00::3 -> fd00::4
+#
+# 3 fd00::/16 IPv6 addresses are binded to seg6local End.BPF actions :
+# - fd00::1 : add a TLV, change the flags and apply a End.X action to fc42::1
+# - fd00::2 : remove the TLV, change the flags, add a tag
+# - fd00::3 : apply an End.T action to fd00::4, through routing table 117
+#
+# fd00::4 is a simple Segment Routing node decapsulating the inner IPv6 packet.
+# Each End.BPF action will validate the operations applied on the SRH by the
+# previous BPF program in the chain, otherwise the packet is dropped.
+#
+# An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this
+# datagram can be read on NS6 when binding to fb00::6.
+
+TMP_FILE="/tmp/selftest_lwt_seg6local.txt"
+
+cleanup()
+{
+       if [ "$?" = "0" ]; then
+               echo "selftests: test_lwt_seg6local [PASS]";
+       else
+               echo "selftests: test_lwt_seg6local [FAILED]";
+       fi
+
+       set +e
+       ip netns del ns1 2> /dev/null
+       ip netns del ns2 2> /dev/null
+       ip netns del ns3 2> /dev/null
+       ip netns del ns4 2> /dev/null
+       ip netns del ns5 2> /dev/null
+       ip netns del ns6 2> /dev/null
+       rm -f $TMP_FILE
+}
+
+set -e
+
+ip netns add ns1
+ip netns add ns2
+ip netns add ns3
+ip netns add ns4
+ip netns add ns5
+ip netns add ns6
+
+trap cleanup 0 2 3 6 9
+
+ip link add veth1 type veth peer name veth2
+ip link add veth3 type veth peer name veth4
+ip link add veth5 type veth peer name veth6
+ip link add veth7 type veth peer name veth8
+ip link add veth9 type veth peer name veth10
+
+ip link set veth1 netns ns1
+ip link set veth2 netns ns2
+ip link set veth3 netns ns2
+ip link set veth4 netns ns3
+ip link set veth5 netns ns3
+ip link set veth6 netns ns4
+ip link set veth7 netns ns4
+ip link set veth8 netns ns5
+ip link set veth9 netns ns5
+ip link set veth10 netns ns6
+
+ip netns exec ns1 ip link set dev veth1 up
+ip netns exec ns2 ip link set dev veth2 up
+ip netns exec ns2 ip link set dev veth3 up
+ip netns exec ns3 ip link set dev veth4 up
+ip netns exec ns3 ip link set dev veth5 up
+ip netns exec ns4 ip link set dev veth6 up
+ip netns exec ns4 ip link set dev veth7 up
+ip netns exec ns5 ip link set dev veth8 up
+ip netns exec ns5 ip link set dev veth9 up
+ip netns exec ns6 ip link set dev veth10 up
+ip netns exec ns6 ip link set dev lo up
+
+# All link scope addresses and routes required between veths
+ip netns exec ns1 ip -6 addr add fb00::12/16 dev veth1 scope link
+ip netns exec ns1 ip -6 route add fb00::21 dev veth1 scope link
+ip netns exec ns2 ip -6 addr add fb00::21/16 dev veth2 scope link
+ip netns exec ns2 ip -6 addr add fb00::34/16 dev veth3 scope link
+ip netns exec ns2 ip -6 route add fb00::43 dev veth3 scope link
+ip netns exec ns3 ip -6 route add fb00::65 dev veth5 scope link
+ip netns exec ns3 ip -6 addr add fb00::43/16 dev veth4 scope link
+ip netns exec ns3 ip -6 addr add fb00::56/16 dev veth5 scope link
+ip netns exec ns4 ip -6 addr add fb00::65/16 dev veth6 scope link
+ip netns exec ns4 ip -6 addr add fb00::78/16 dev veth7 scope link
+ip netns exec ns4 ip -6 route add fb00::87 dev veth7 scope link
+ip netns exec ns5 ip -6 addr add fb00::87/16 dev veth8 scope link
+ip netns exec ns5 ip -6 addr add fb00::910/16 dev veth9 scope link
+ip netns exec ns5 ip -6 route add fb00::109 dev veth9 scope link
+ip netns exec ns5 ip -6 route add fb00::109 table 117 dev veth9 scope link
+ip netns exec ns6 ip -6 addr add fb00::109/16 dev veth10 scope link
+
+ip netns exec ns1 ip -6 addr add fb00::1/16 dev lo
+ip netns exec ns1 ip -6 route add fb00::6 dev veth1 via fb00::21
+
+ip netns exec ns2 ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o sec encap_srh dev veth2
+ip netns exec ns2 ip -6 route add fd00::1 dev veth3 via fb00::43 scope link
+
+ip netns exec ns3 ip -6 route add fc42::1 dev veth5 via fb00::65
+ip netns exec ns3 ip -6 route add fd00::1 encap seg6local action End.BPF obj test_lwt_seg6local.o sec add_egr_x dev veth4
+
+ip netns exec ns4 ip -6 route add fd00::2 encap seg6local action End.BPF obj test_lwt_seg6local.o sec pop_egr dev veth6
+ip netns exec ns4 ip -6 addr add fc42::1 dev lo
+ip netns exec ns4 ip -6 route add fd00::3 dev veth7 via fb00::87
+
+ip netns exec ns5 ip -6 route add fd00::4 table 117 dev veth9 via fb00::109
+ip netns exec ns5 ip -6 route add fd00::3 encap seg6local action End.BPF obj test_lwt_seg6local.o sec inspect_t dev veth8
+
+ip netns exec ns6 ip -6 addr add fb00::6/16 dev lo
+ip netns exec ns6 ip -6 addr add fd00::4/16 dev lo
+
+ip netns exec ns1 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ns2 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ns3 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ns4 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ns5 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+
+ip netns exec ns6 sysctl net.ipv6.conf.all.seg6_enabled=1 > /dev/null
+ip netns exec ns6 sysctl net.ipv6.conf.lo.seg6_enabled=1 > /dev/null
+ip netns exec ns6 sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null
+
+ip netns exec ns6 nc -l -6 -u -d 7330 > $TMP_FILE &
+ip netns exec ns1 bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330"
+sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment
+kill -INT $!
+
+if [[ $(< $TMP_FILE) != "foobar" ]]; then
+       exit 1
+fi
+
+exit 0
author	Mathieu Xhonneux <m.xhonneux@gmail.com>
	Sun, 20 May 2018 13:58:17 +0000 (14:58 +0100)
committer	Daniel Borkmann <daniel@iogearbox.net>
	Thu, 24 May 2018 09:57:36 +0000 (11:57 +0200)
tools/include/uapi/linux/bpf.h		patch \| blob \| blame \| history
tools/testing/selftests/bpf/Makefile		patch \| blob \| blame \| history
tools/testing/selftests/bpf/bpf_helpers.h		patch \| blob \| blame \| history
tools/testing/selftests/bpf/test_lwt_seg6local.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/test_lwt_seg6local.sh	[new file with mode: 0755]	patch \| blob