__u8 data[0]; /* Arbitrary size */
};
+struct bpf_cgroup_storage_key {
+ __u64 cgroup_inode_id; /* cgroup inode id */
+ __u32 attach_type; /* program attach type */
+};
+
/* BPF syscall commands, see bpf(2) man-page for details. */
enum bpf_cmd {
BPF_MAP_CREATE,
BPF_BTF_LOAD,
BPF_BTF_GET_FD_BY_ID,
BPF_TASK_FD_QUERY,
+ BPF_MAP_LOOKUP_AND_DELETE_ELEM,
};
enum bpf_map_type {
BPF_MAP_TYPE_CPUMAP,
BPF_MAP_TYPE_XSKMAP,
BPF_MAP_TYPE_SOCKHASH,
+ BPF_MAP_TYPE_CGROUP_STORAGE,
+ BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+ BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+ BPF_MAP_TYPE_QUEUE,
+ BPF_MAP_TYPE_STACK,
};
+/* Note that tracing related programs such as
+ * BPF_PROG_TYPE_{KPROBE,TRACEPOINT,PERF_EVENT,RAW_TRACEPOINT}
+ * are not subject to a stable API since kernel internal data
+ * structures can change from release to release and may
+ * therefore break existing tracing BPF programs. Tracing BPF
+ * programs correspond to /a/ specific kernel which is to be
+ * analyzed, and not /a/ specific kernel /and/ all future ones.
+ */
enum bpf_prog_type {
BPF_PROG_TYPE_UNSPEC,
BPF_PROG_TYPE_SOCKET_FILTER,
BPF_PROG_TYPE_RAW_TRACEPOINT,
BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
BPF_PROG_TYPE_LWT_SEG6LOCAL,
+ BPF_PROG_TYPE_LIRC_MODE2,
+ BPF_PROG_TYPE_SK_REUSEPORT,
+ BPF_PROG_TYPE_FLOW_DISSECTOR,
};
enum bpf_attach_type {
BPF_CGROUP_INET6_CONNECT,
BPF_CGROUP_INET4_POST_BIND,
BPF_CGROUP_INET6_POST_BIND,
+ BPF_CGROUP_UDP4_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
+ BPF_LIRC_MODE2,
+ BPF_FLOW_DISSECTOR,
__MAX_BPF_ATTACH_TYPE
};
*/
#define BPF_F_STRICT_ALIGNMENT (1U << 0)
+/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the
+ * verifier will allow any alignment whatsoever. On platforms
+ * with strict alignment requirements for loads ands stores (such
+ * as sparc and mips) the verifier validates that all loads and
+ * stores provably follow this requirement. This flag turns that
+ * checking and enforcement off.
+ *
+ * It is mostly used for testing when we want to validate the
+ * context and memory access aspects of the verifier, but because
+ * of an unaligned access the alignment check would trigger before
+ * the one we are interested in.
+ */
+#define BPF_F_ANY_ALIGNMENT (1U << 1)
+
/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */
#define BPF_PSEUDO_MAP_FD 1
/* Specify numa node during map creation */
#define BPF_F_NUMA_NODE (1U << 2)
-/* flags for BPF_PROG_QUERY */
-#define BPF_F_QUERY_EFFECTIVE (1U << 0)
-
#define BPF_OBJ_NAME_LEN 16U
/* Flags for accessing BPF object */
/* Flag for stack_map, store build_id+offset instead of pointer */
#define BPF_F_STACK_BUILD_ID (1U << 5)
+/* Zero-initialize hash function seed. This should only be used for testing. */
+#define BPF_F_ZERO_SEED (1U << 6)
+
+/* flags for BPF_PROG_QUERY */
+#define BPF_F_QUERY_EFFECTIVE (1U << 0)
+
enum bpf_stack_build_id_status {
/* user space need an empty entry to identify end of a trace */
BPF_STACK_BUILD_ID_EMPTY = 0,
__u32 log_level; /* verbosity level of verifier */
__u32 log_size; /* size of user buffer */
__aligned_u64 log_buf; /* user supplied buffer */
- __u32 kern_version; /* checked when prog_type=kprobe */
+ __u32 kern_version; /* not used */
__u32 prog_flags;
char prog_name[BPF_OBJ_NAME_LEN];
__u32 prog_ifindex; /* ifindex of netdev to prep for */
* (context accesses, allowed helpers, etc).
*/
__u32 expected_attach_type;
+ __u32 prog_btf_fd; /* fd pointing to BTF type data */
+ __u32 func_info_rec_size; /* userspace bpf_func_info size */
+ __aligned_u64 func_info; /* func info */
+ __u32 func_info_cnt; /* number of bpf_func_info records */
+ __u32 line_info_rec_size; /* userspace bpf_line_info size */
+ __aligned_u64 line_info; /* line info */
+ __u32 line_info_cnt; /* number of bpf_line_info records */
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
__u32 prog_fd;
__u32 retval;
- __u32 data_size_in;
- __u32 data_size_out;
+ __u32 data_size_in; /* input: len of data_in */
+ __u32 data_size_out; /* input/output: len of data_out
+ * returns ENOSPC if data_out
+ * is too small.
+ */
__aligned_u64 data_in;
__aligned_u64 data_out;
__u32 repeat;
* Return
* 0 on success, or a negative error in case of failure.
*
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * Description
+ * Push an element *value* in *map*. *flags* is one of:
+ *
+ * **BPF_EXIST**
+ * If the queue/stack is full, the oldest element is removed to
+ * make room for this.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
* int bpf_probe_read(void *dst, u32 size, const void *src)
* Description
* For tracing programs, safely attempt to read *size* bytes from
* ::
*
* # sysctl kernel.perf_event_max_stack=<new value>
- *
* Return
* The positive or null stack id on success, or a negative error
* in case of failure.
* A 8-byte long non-decreasing number on success, or 0 if the
* socket field is missing inside *skb*.
*
+ * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
+ * Description
+ * Equivalent to bpf_get_socket_cookie() helper that accepts
+ * *skb*, but gets socket from **struct bpf_sock_addr** contex.
+ * Return
+ * A 8-byte long non-decreasing number.
+ *
+ * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
+ * Description
+ * Equivalent to bpf_get_socket_cookie() helper that accepts
+ * *skb*, but gets socket from **struct bpf_sock_ops** contex.
+ * Return
+ * A 8-byte long non-decreasing number.
+ *
* u32 bpf_get_socket_uid(struct sk_buff *skb)
* Return
* The owner UID of the socket associated to *skb*. If the socket
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 flags)
+ * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags)
* Description
* Grow or shrink the room for data in the packet associated to
* *skb* by *len_diff*, and according to the selected *mode*.
* ::
*
* # sysctl kernel.perf_event_max_stack=<new value>
- *
* Return
- * a non-negative value equal to or less than size on success, or
- * a negative error in case of failure.
+ * A non-negative value equal to or less than *size* on success,
+ * or a negative error in case of failure.
*
- * int skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header)
+ * int bpf_skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header)
* Description
* This helper is similar to **bpf_skb_load_bytes**\ () in that
* it provides an easy way to load *len* bytes from *offset*
* in socket filters where *skb*\ **->data** does not always point
* to the start of the mac header and where "direct packet access"
* is not available.
- *
* Return
* 0 on success, or a negative error in case of failure.
*
* If lookup is successful and result shows packet is to be
* forwarded, the neighbor tables are searched for the nexthop.
* If successful (ie., FIB lookup shows forwarding and nexthop
- * is resolved), the nexthop address is returned in ipv4_dst,
- * ipv6_dst or mpls_out based on family, smac is set to mac
- * address of egress device, dmac is set to nexthop mac address,
- * rt_metric is set to metric from route.
- *
- * *plen* argument is the size of the passed in struct.
- * *flags* argument can be one or more BPF_FIB_LOOKUP_ flags:
- *
- * **BPF_FIB_LOOKUP_DIRECT** means do a direct table lookup vs
- * full lookup using FIB rules
- * **BPF_FIB_LOOKUP_OUTPUT** means do lookup from an egress
- * perspective (default is ingress)
- *
- * *ctx* is either **struct xdp_md** for XDP programs or
- * **struct sk_buff** tc cls_act programs.
- *
- * Return
- * Egress device index on success, 0 if packet needs to continue
- * up the stack for further processing or a negative error in case
- * of failure.
+ * is resolved), the nexthop address is returned in ipv4_dst
+ * or ipv6_dst based on family, smac is set to mac address of
+ * egress device, dmac is set to nexthop mac address, rt_metric
+ * is set to metric from route (IPv4/IPv6 only), and ifindex
+ * is set to the device index of the nexthop from the FIB lookup.
+ *
+ * *plen* argument is the size of the passed in struct.
+ * *flags* argument can be a combination of one or more of the
+ * following values:
+ *
+ * **BPF_FIB_LOOKUP_DIRECT**
+ * Do a direct table lookup vs full lookup using FIB
+ * rules.
+ * **BPF_FIB_LOOKUP_OUTPUT**
+ * Perform lookup from an egress perspective (default is
+ * ingress).
+ *
+ * *ctx* is either **struct xdp_md** for XDP programs or
+ * **struct sk_buff** tc cls_act programs.
+ * Return
+ * * < 0 if any input argument is invalid
+ * * 0 on success (packet is forwarded, nexthop neighbor exists)
+ * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
+ * packet is not forwarded or needs assist from full stack
*
* int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
* Description
* direct packet access.
* Return
* 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
+ * Description
+ * This helper is used in programs implementing IR decoding, to
+ * report a successfully decoded key press with *scancode*,
+ * *toggle* value in the given *protocol*. The scancode will be
+ * translated to a keycode using the rc keymap, and reported as
+ * an input key down event. After a period a key up event is
+ * generated. This period can be extended by calling either
+ * **bpf_rc_keydown**\ () again with the same values, or calling
+ * **bpf_rc_repeat**\ ().
+ *
+ * Some protocols include a toggle bit, in case the button was
+ * released and pressed again between consecutive scancodes.
+ *
+ * The *ctx* should point to the lirc sample as passed into
+ * the program.
+ *
+ * The *protocol* is the decoded protocol number (see
+ * **enum rc_proto** for some predefined values).
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ * "**y**".
+ * Return
+ * 0
+ *
+ * int bpf_rc_repeat(void *ctx)
+ * Description
+ * This helper is used in programs implementing IR decoding, to
+ * report a successfully decoded repeat key message. This delays
+ * the generation of a key up event for previously generated
+ * key down event.
+ *
+ * Some IR protocols like NEC have a special IR message for
+ * repeating last button, for when a button is held down.
+ *
+ * The *ctx* should point to the lirc sample as passed into
+ * the program.
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ * "**y**".
+ * Return
+ * 0
+ *
+ * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb)
+ * Description
+ * Return the cgroup v2 id of the socket associated with the *skb*.
+ * This is roughly similar to the **bpf_get_cgroup_classid**\ ()
+ * helper for cgroup v1 by providing a tag resp. identifier that
+ * can be matched on or used for map lookups e.g. to implement
+ * policy. The cgroup v2 id of a given path in the hierarchy is
+ * exposed in user space through the f_handle API in order to get
+ * to the same 64-bit id.
+ *
+ * This helper can be used on TC egress path, but not on ingress,
+ * and is available only if the kernel was compiled with the
+ * **CONFIG_SOCK_CGROUP_DATA** configuration option.
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
+ *
+ * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
+ * Description
+ * Return id of cgroup v2 that is ancestor of cgroup associated
+ * with the *skb* at the *ancestor_level*. The root cgroup is at
+ * *ancestor_level* zero and each step down the hierarchy
+ * increments the level. If *ancestor_level* == level of cgroup
+ * associated with *skb*, then return value will be same as that
+ * of **bpf_skb_cgroup_id**\ ().
+ *
+ * The helper is useful to implement policies based on cgroups
+ * that are upper in hierarchy than immediate cgroup associated
+ * with *skb*.
+ *
+ * The format of returned id and helper limitations are same as in
+ * **bpf_skb_cgroup_id**\ ().
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
+ *
+ * u64 bpf_get_current_cgroup_id(void)
+ * Return
+ * A 64-bit integer containing the current cgroup id based
+ * on the cgroup within which the current task is running.
+ *
+ * void* get_local_storage(void *map, u64 flags)
+ * Description
+ * Get the pointer to the local storage area.
+ * The type and the size of the local storage is defined
+ * by the *map* argument.
+ * The *flags* meaning is specific for each map type,
+ * and has to be 0 for cgroup local storage.
+ *
+ * Depending on the BPF program type, a local storage area
+ * can be shared between multiple instances of the BPF program,
+ * running simultaneously.
+ *
+ * A user should care about the synchronization by himself.
+ * For example, by using the **BPF_STX_XADD** instruction to alter
+ * the shared data.
+ * Return
+ * A pointer to the local storage area.
+ *
+ * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * Select a **SO_REUSEPORT** socket from a
+ * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*.
+ * It checks the selected socket is matching the incoming
+ * request in the socket buffer.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
+ * Description
+ * Look for TCP socket matching *tuple*, optionally in a child
+ * network namespace *netns*. The return value must be checked,
+ * and if non-**NULL**, released via **bpf_sk_release**\ ().
+ *
+ * The *ctx* should point to the context of the program, such as
+ * the skb or socket (depending on the hook in use). This is used
+ * to determine the base network namespace for the lookup.
+ *
+ * *tuple_size* must be one of:
+ *
+ * **sizeof**\ (*tuple*\ **->ipv4**)
+ * Look for an IPv4 socket.
+ * **sizeof**\ (*tuple*\ **->ipv6**)
+ * Look for an IPv6 socket.
+ *
+ * If the *netns* is a negative signed 32-bit integer, then the
+ * socket lookup table in the netns associated with the *ctx* will
+ * will be used. For the TC hooks, this is the netns of the device
+ * in the skb. For socket hooks, this is the netns of the socket.
+ * If *netns* is any other signed 32-bit value greater than or
+ * equal to zero then it specifies the ID of the netns relative to
+ * the netns associated with the *ctx*. *netns* values beyond the
+ * range of 32-bit integers are reserved for future use.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_NET** configuration option.
+ * Return
+ * Pointer to **struct bpf_sock**, or **NULL** in case of failure.
+ * For sockets with reuseport option, the **struct bpf_sock**
+ * result is from **reuse->socks**\ [] using the hash of the tuple.
+ *
+ * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
+ * Description
+ * Look for UDP socket matching *tuple*, optionally in a child
+ * network namespace *netns*. The return value must be checked,
+ * and if non-**NULL**, released via **bpf_sk_release**\ ().
+ *
+ * The *ctx* should point to the context of the program, such as
+ * the skb or socket (depending on the hook in use). This is used
+ * to determine the base network namespace for the lookup.
+ *
+ * *tuple_size* must be one of:
+ *
+ * **sizeof**\ (*tuple*\ **->ipv4**)
+ * Look for an IPv4 socket.
+ * **sizeof**\ (*tuple*\ **->ipv6**)
+ * Look for an IPv6 socket.
+ *
+ * If the *netns* is a negative signed 32-bit integer, then the
+ * socket lookup table in the netns associated with the *ctx* will
+ * will be used. For the TC hooks, this is the netns of the device
+ * in the skb. For socket hooks, this is the netns of the socket.
+ * If *netns* is any other signed 32-bit value greater than or
+ * equal to zero then it specifies the ID of the netns relative to
+ * the netns associated with the *ctx*. *netns* values beyond the
+ * range of 32-bit integers are reserved for future use.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_NET** configuration option.
+ * Return
+ * Pointer to **struct bpf_sock**, or **NULL** in case of failure.
+ * For sockets with reuseport option, the **struct bpf_sock**
+ * result is from **reuse->socks**\ [] using the hash of the tuple.
+ *
+ * int bpf_sk_release(struct bpf_sock *sock)
+ * Description
+ * Release the reference held by *sock*. *sock* must be a
+ * non-**NULL** pointer that was returned from
+ * **bpf_sk_lookup_xxx**\ ().
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_pop_elem(struct bpf_map *map, void *value)
+ * Description
+ * Pop an element from *map*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_peek_elem(struct bpf_map *map, void *value)
+ * Description
+ * Get an element from *map* without removing it.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags)
+ * Description
+ * For socket policies, insert *len* bytes into *msg* at offset
+ * *start*.
+ *
+ * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
+ * *msg* it may want to insert metadata or options into the *msg*.
+ * This can later be read and used by any of the lower layer BPF
+ * hooks.
+ *
+ * This helper may fail if under memory pressure (a malloc
+ * fails) in these cases BPF programs will get an appropriate
+ * error and BPF programs will need to handle them.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags)
+ * Description
+ * Will remove *pop* bytes from a *msg* starting at byte *start*.
+ * This may result in **ENOMEM** errors under certain situations if
+ * an allocation and copy are required due to a full ring buffer.
+ * However, the helper will try to avoid doing the allocation
+ * if possible. Other errors can occur if input parameters are
+ * invalid either due to *start* byte not being valid part of *msg*
+ * payload and/or *pop* value being to large.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y)
+ * Description
+ * This helper is used in programs implementing IR decoding, to
+ * report a successfully decoded pointer movement.
+ *
+ * The *ctx* should point to the lirc sample as passed into
+ * the program.
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ * "**y**".
+ * Return
+ * 0
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
FN(lwt_push_encap), \
FN(lwt_seg6_store_bytes), \
FN(lwt_seg6_adjust_srh), \
- FN(lwt_seg6_action),
+ FN(lwt_seg6_action), \
+ FN(rc_repeat), \
+ FN(rc_keydown), \
+ FN(skb_cgroup_id), \
+ FN(get_current_cgroup_id), \
+ FN(get_local_storage), \
+ FN(sk_select_reuseport), \
+ FN(skb_ancestor_cgroup_id), \
+ FN(sk_lookup_tcp), \
+ FN(sk_lookup_udp), \
+ FN(sk_release), \
+ FN(map_push_elem), \
+ FN(map_pop_elem), \
+ FN(map_peek_elem), \
+ FN(msg_push_data), \
+ FN(msg_pop_data), \
+ FN(rc_pointer_rel),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
/* BPF_FUNC_perf_event_output for sk_buff input context. */
#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
+/* Current network namespace */
+#define BPF_F_CURRENT_NETNS (-1L)
+
/* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode {
BPF_ADJ_ROOM_NET,
BPF_LWT_ENCAP_SEG6_INLINE
};
+#define __bpf_md_ptr(type, name) \
+union { \
+ type name; \
+ __u64 :64; \
+} __attribute__((aligned(8)))
+
/* user accessible mirror of in-kernel sk_buff.
* new fields can only be added to the end of this structure
*/
/* ... here. */
__u32 data_meta;
+ __bpf_md_ptr(struct bpf_flow_keys *, flow_keys);
+ __u64 tstamp;
+ __u32 wire_len;
};
struct bpf_tunnel_key {
};
__u8 tunnel_tos;
__u8 tunnel_ttl;
- __u16 tunnel_ext;
+ __u16 tunnel_ext; /* Padding, future use. */
__u32 tunnel_label;
};
__u32 reqid;
__u32 spi; /* Stored in network byte order */
__u16 family;
+ __u16 ext; /* Padding, future use. */
union {
__u32 remote_ipv4; /* Stored in network byte order */
__u32 remote_ipv6[4]; /* Stored in network byte order */
*/
};
+struct bpf_sock_tuple {
+ union {
+ struct {
+ __be32 saddr;
+ __be32 daddr;
+ __be16 sport;
+ __be16 dport;
+ } ipv4;
+ struct {
+ __be32 saddr[4];
+ __be32 daddr[4];
+ __be16 sport;
+ __be16 dport;
+ } ipv6;
+ };
+};
+
#define XDP_PACKET_HEADROOM 256
/* User return codes for XDP prog type.
* be added to the end of this structure
*/
struct sk_msg_md {
- void *data;
- void *data_end;
+ __bpf_md_ptr(void *, data);
+ __bpf_md_ptr(void *, data_end);
__u32 family;
__u32 remote_ip4; /* Stored in network byte order */
__u32 local_ip6[4]; /* Stored in network byte order */
__u32 remote_port; /* Stored in network byte order */
__u32 local_port; /* stored in host byte order */
+ __u32 size; /* Total size of sk_msg */
+};
+
+struct sk_reuseport_md {
+ /*
+ * Start of directly accessible data. It begins from
+ * the tcp/udp header.
+ */
+ __bpf_md_ptr(void *, data);
+ /* End of directly accessible data */
+ __bpf_md_ptr(void *, data_end);
+ /*
+ * Total length of packet (starting from the tcp/udp header).
+ * Note that the directly accessible bytes (data_end - data)
+ * could be less than this "len". Those bytes could be
+ * indirectly read by a helper "bpf_skb_load_bytes()".
+ */
+ __u32 len;
+ /*
+ * Eth protocol in the mac header (network byte order). e.g.
+ * ETH_P_IP(0x0800) and ETH_P_IPV6(0x86DD)
+ */
+ __u32 eth_protocol;
+ __u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
+ __u32 bind_inany; /* Is sock bound to an INANY address? */
+ __u32 hash; /* A hash of the packet 4 tuples */
};
#define BPF_TAG_SIZE 8
__u32 nr_jited_func_lens;
__aligned_u64 jited_ksyms;
__aligned_u64 jited_func_lens;
+ __u32 btf_id;
+ __u32 func_info_rec_size;
+ __aligned_u64 func_info;
+ __u32 nr_func_info;
+ __u32 nr_line_info;
+ __aligned_u64 line_info;
+ __aligned_u64 jited_line_info;
+ __u32 nr_jited_line_info;
+ __u32 line_info_rec_size;
+ __u32 jited_line_info_rec_size;
+ __u32 nr_prog_tags;
+ __aligned_u64 prog_tags;
} __attribute__((aligned(8)));
struct bpf_map_info {
__u32 map_flags;
char name[BPF_OBJ_NAME_LEN];
__u32 ifindex;
+ __u32 :32;
__u64 netns_dev;
__u64 netns_ino;
__u32 btf_id;
__u32 family; /* Allows 4-byte read, but no write */
__u32 type; /* Allows 4-byte read, but no write */
__u32 protocol; /* Allows 4-byte read, but no write */
+ __u32 msg_src_ip4; /* Allows 1,2,4-byte read an 4-byte write.
+ * Stored in network byte order.
+ */
+ __u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
+ * Stored in network byte order.
+ */
};
/* User bpf_sock_ops struct to access socket values and specify request ops
* Arg1: old_state
* Arg2: new_state
*/
+ BPF_SOCK_OPS_TCP_LISTEN_CB, /* Called on listen(2), right after
+ * socket transition to LISTEN state.
+ */
};
/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
#define BPF_FIB_LOOKUP_DIRECT BIT(0)
#define BPF_FIB_LOOKUP_OUTPUT BIT(1)
+enum {
+ BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */
+ BPF_FIB_LKUP_RET_BLACKHOLE, /* dest is blackholed; can be dropped */
+ BPF_FIB_LKUP_RET_UNREACHABLE, /* dest is unreachable; can be dropped */
+ BPF_FIB_LKUP_RET_PROHIBIT, /* dest not allowed; can be dropped */
+ BPF_FIB_LKUP_RET_NOT_FWDED, /* packet is not forwarded */
+ BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */
+ BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
+ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
+ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
+};
+
struct bpf_fib_lookup {
- /* input */
- __u8 family; /* network family, AF_INET, AF_INET6, AF_MPLS */
+ /* input: network family for lookup (AF_INET, AF_INET6)
+ * output: network family of egress nexthop
+ */
+ __u8 family;
/* set if lookup is to consider L4 data - e.g., FIB rules */
__u8 l4_protocol;
/* total length of packet from network header - used for MTU check */
__u16 tot_len;
- __u32 ifindex; /* L3 device index for lookup */
+
+ /* input: L3 device index for lookup
+ * output: device index from FIB lookup
+ */
+ __u32 ifindex;
union {
/* inputs to lookup */
__u8 tos; /* AF_INET */
- __be32 flowlabel; /* AF_INET6 */
+ __be32 flowinfo; /* AF_INET6, flow_label + priority */
- /* output: metric of fib result */
- __u32 rt_metric;
+ /* output: metric of fib result (IPv4/IPv6 only) */
+ __u32 rt_metric;
};
union {
- __be32 mpls_in;
__be32 ipv4_src;
__u32 ipv6_src[4]; /* in6_addr; network order */
};
- /* input to bpf_fib_lookup, *dst is destination address.
- * output: bpf_fib_lookup sets to gateway address
+ /* input to bpf_fib_lookup, ipv{4,6}_dst is destination address in
+ * network header. output: bpf_fib_lookup sets to gateway address
+ * if FIB lookup returns gateway route
*/
union {
- /* return for MPLS lookups */
- __be32 mpls_out[4]; /* support up to 4 labels */
__be32 ipv4_dst;
__u32 ipv6_dst[4]; /* in6_addr; network order */
};
BPF_FD_TYPE_URETPROBE, /* filename + offset */
};
+struct bpf_flow_keys {
+ __u16 nhoff;
+ __u16 thoff;
+ __u16 addr_proto; /* ETH_P_* of valid addrs */
+ __u8 is_frag;
+ __u8 is_first_frag;
+ __u8 is_encap;
+ __u8 ip_proto;
+ __be16 n_proto;
+ __be16 sport;
+ __be16 dport;
+ union {
+ struct {
+ __be32 ipv4_src;
+ __be32 ipv4_dst;
+ };
+ struct {
+ __u32 ipv6_src[4]; /* in6_addr; network order */
+ __u32 ipv6_dst[4]; /* in6_addr; network order */
+ };
+ };
+};
+
+struct bpf_func_info {
+ __u32 insn_off;
+ __u32 type_id;
+};
+
+#define BPF_LINE_INFO_LINE_NUM(line_col) ((line_col) >> 10)
+#define BPF_LINE_INFO_LINE_COL(line_col) ((line_col) & 0x3ff)
+
+struct bpf_line_info {
+ __u32 insn_off;
+ __u32 file_name_off;
+ __u32 line_off;
+ __u32 line_col;
+};
+
#endif /* __LINUX_BPF_H__ */