1 // SPDX-License-Identifier: GPL-2.0
2 /* XSKMAP used for AF_XDP sockets
3 * Copyright(c) 2018 Intel Corporation.
7 #include <linux/capability.h>
8 #include <net/xdp_sock.h>
9 #include <linux/slab.h>
10 #include <linux/sched.h>
14 static struct xsk_map_node
*xsk_map_node_alloc(struct xsk_map
*map
,
15 struct xdp_sock __rcu
**map_entry
)
17 struct xsk_map_node
*node
;
19 node
= bpf_map_kzalloc(&map
->map
, sizeof(*node
),
20 GFP_ATOMIC
| __GFP_NOWARN
);
22 return ERR_PTR(-ENOMEM
);
24 bpf_map_inc(&map
->map
);
27 node
->map_entry
= map_entry
;
31 static void xsk_map_node_free(struct xsk_map_node
*node
)
33 bpf_map_put(&node
->map
->map
);
37 static void xsk_map_sock_add(struct xdp_sock
*xs
, struct xsk_map_node
*node
)
39 spin_lock_bh(&xs
->map_list_lock
);
40 list_add_tail(&node
->node
, &xs
->map_list
);
41 spin_unlock_bh(&xs
->map_list_lock
);
44 static void xsk_map_sock_delete(struct xdp_sock
*xs
,
45 struct xdp_sock __rcu
**map_entry
)
47 struct xsk_map_node
*n
, *tmp
;
49 spin_lock_bh(&xs
->map_list_lock
);
50 list_for_each_entry_safe(n
, tmp
, &xs
->map_list
, node
) {
51 if (map_entry
== n
->map_entry
) {
56 spin_unlock_bh(&xs
->map_list_lock
);
59 static struct bpf_map
*xsk_map_alloc(union bpf_attr
*attr
)
65 if (!capable(CAP_NET_ADMIN
))
66 return ERR_PTR(-EPERM
);
68 if (attr
->max_entries
== 0 || attr
->key_size
!= 4 ||
69 attr
->value_size
!= 4 ||
70 attr
->map_flags
& ~(BPF_F_NUMA_NODE
| BPF_F_RDONLY
| BPF_F_WRONLY
))
71 return ERR_PTR(-EINVAL
);
73 numa_node
= bpf_map_attr_numa_node(attr
);
74 size
= struct_size(m
, xsk_map
, attr
->max_entries
);
76 m
= bpf_map_area_alloc(size
, numa_node
);
78 return ERR_PTR(-ENOMEM
);
80 bpf_map_init_from_attr(&m
->map
, attr
);
81 spin_lock_init(&m
->lock
);
86 static void xsk_map_free(struct bpf_map
*map
)
88 struct xsk_map
*m
= container_of(map
, struct xsk_map
, map
);
94 static int xsk_map_get_next_key(struct bpf_map
*map
, void *key
, void *next_key
)
96 struct xsk_map
*m
= container_of(map
, struct xsk_map
, map
);
97 u32 index
= key
? *(u32
*)key
: U32_MAX
;
100 if (index
>= m
->map
.max_entries
) {
105 if (index
== m
->map
.max_entries
- 1)
111 static int xsk_map_gen_lookup(struct bpf_map
*map
, struct bpf_insn
*insn_buf
)
113 const int ret
= BPF_REG_0
, mp
= BPF_REG_1
, index
= BPF_REG_2
;
114 struct bpf_insn
*insn
= insn_buf
;
116 *insn
++ = BPF_LDX_MEM(BPF_W
, ret
, index
, 0);
117 *insn
++ = BPF_JMP_IMM(BPF_JGE
, ret
, map
->max_entries
, 5);
118 *insn
++ = BPF_ALU64_IMM(BPF_LSH
, ret
, ilog2(sizeof(struct xsk_sock
*)));
119 *insn
++ = BPF_ALU64_IMM(BPF_ADD
, mp
, offsetof(struct xsk_map
, xsk_map
));
120 *insn
++ = BPF_ALU64_REG(BPF_ADD
, ret
, mp
);
121 *insn
++ = BPF_LDX_MEM(BPF_SIZEOF(struct xsk_sock
*), ret
, ret
, 0);
122 *insn
++ = BPF_JMP_IMM(BPF_JA
, 0, 0, 1);
123 *insn
++ = BPF_MOV64_IMM(ret
, 0);
124 return insn
- insn_buf
;
127 /* Elements are kept alive by RCU; either by rcu_read_lock() (from syscall) or
128 * by local_bh_disable() (from XDP calls inside NAPI). The
129 * rcu_read_lock_bh_held() below makes lockdep accept both.
131 static void *__xsk_map_lookup_elem(struct bpf_map
*map
, u32 key
)
133 struct xsk_map
*m
= container_of(map
, struct xsk_map
, map
);
135 if (key
>= map
->max_entries
)
138 return rcu_dereference_check(m
->xsk_map
[key
], rcu_read_lock_bh_held());
141 static void *xsk_map_lookup_elem(struct bpf_map
*map
, void *key
)
143 return __xsk_map_lookup_elem(map
, *(u32
*)key
);
146 static void *xsk_map_lookup_elem_sys_only(struct bpf_map
*map
, void *key
)
148 return ERR_PTR(-EOPNOTSUPP
);
151 static int xsk_map_update_elem(struct bpf_map
*map
, void *key
, void *value
,
154 struct xsk_map
*m
= container_of(map
, struct xsk_map
, map
);
155 struct xdp_sock __rcu
**map_entry
;
156 struct xdp_sock
*xs
, *old_xs
;
157 u32 i
= *(u32
*)key
, fd
= *(u32
*)value
;
158 struct xsk_map_node
*node
;
162 if (unlikely(map_flags
> BPF_EXIST
))
164 if (unlikely(i
>= m
->map
.max_entries
))
167 sock
= sockfd_lookup(fd
, &err
);
171 if (sock
->sk
->sk_family
!= PF_XDP
) {
176 xs
= (struct xdp_sock
*)sock
->sk
;
178 map_entry
= &m
->xsk_map
[i
];
179 node
= xsk_map_node_alloc(m
, map_entry
);
182 return PTR_ERR(node
);
185 spin_lock_bh(&m
->lock
);
186 old_xs
= rcu_dereference_protected(*map_entry
, lockdep_is_held(&m
->lock
));
190 } else if (old_xs
&& map_flags
== BPF_NOEXIST
) {
193 } else if (!old_xs
&& map_flags
== BPF_EXIST
) {
197 xsk_map_sock_add(xs
, node
);
198 rcu_assign_pointer(*map_entry
, xs
);
200 xsk_map_sock_delete(old_xs
, map_entry
);
201 spin_unlock_bh(&m
->lock
);
206 spin_unlock_bh(&m
->lock
);
208 xsk_map_node_free(node
);
212 static int xsk_map_delete_elem(struct bpf_map
*map
, void *key
)
214 struct xsk_map
*m
= container_of(map
, struct xsk_map
, map
);
215 struct xdp_sock __rcu
**map_entry
;
216 struct xdp_sock
*old_xs
;
219 if (k
>= map
->max_entries
)
222 spin_lock_bh(&m
->lock
);
223 map_entry
= &m
->xsk_map
[k
];
224 old_xs
= unrcu_pointer(xchg(map_entry
, NULL
));
226 xsk_map_sock_delete(old_xs
, map_entry
);
227 spin_unlock_bh(&m
->lock
);
232 static int xsk_map_redirect(struct bpf_map
*map
, u32 ifindex
, u64 flags
)
234 return __bpf_xdp_redirect_map(map
, ifindex
, flags
, 0,
235 __xsk_map_lookup_elem
);
238 void xsk_map_try_sock_delete(struct xsk_map
*map
, struct xdp_sock
*xs
,
239 struct xdp_sock __rcu
**map_entry
)
241 spin_lock_bh(&map
->lock
);
242 if (rcu_access_pointer(*map_entry
) == xs
) {
243 rcu_assign_pointer(*map_entry
, NULL
);
244 xsk_map_sock_delete(xs
, map_entry
);
246 spin_unlock_bh(&map
->lock
);
249 static bool xsk_map_meta_equal(const struct bpf_map
*meta0
,
250 const struct bpf_map
*meta1
)
252 return meta0
->max_entries
== meta1
->max_entries
&&
253 bpf_map_meta_equal(meta0
, meta1
);
256 static int xsk_map_btf_id
;
257 const struct bpf_map_ops xsk_map_ops
= {
258 .map_meta_equal
= xsk_map_meta_equal
,
259 .map_alloc
= xsk_map_alloc
,
260 .map_free
= xsk_map_free
,
261 .map_get_next_key
= xsk_map_get_next_key
,
262 .map_lookup_elem
= xsk_map_lookup_elem
,
263 .map_gen_lookup
= xsk_map_gen_lookup
,
264 .map_lookup_elem_sys_only
= xsk_map_lookup_elem_sys_only
,
265 .map_update_elem
= xsk_map_update_elem
,
266 .map_delete_elem
= xsk_map_delete_elem
,
267 .map_check_btf
= map_check_no_btf
,
268 .map_btf_name
= "xsk_map",
269 .map_btf_id
= &xsk_map_btf_id
,
270 .map_redirect
= xsk_map_redirect
,