]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/core/filter.c
Merge branch 'enic-next'
[mirror_ubuntu-artful-kernel.git] / net / core / filter.c
CommitLineData
1da177e4
LT
1/*
2 * Linux Socket Filter - Kernel level socket filtering
3 *
bd4cf0ed
AS
4 * Based on the design of the Berkeley Packet Filter. The new
5 * internal format has been designed by PLUMgrid:
1da177e4 6 *
bd4cf0ed
AS
7 * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
8 *
9 * Authors:
10 *
11 * Jay Schulist <jschlst@samba.org>
12 * Alexei Starovoitov <ast@plumgrid.com>
13 * Daniel Borkmann <dborkman@redhat.com>
1da177e4
LT
14 *
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version
18 * 2 of the License, or (at your option) any later version.
19 *
20 * Andi Kleen - Fix a few bad bugs and races.
93699863 21 * Kris Katterjohn - Added many additional checks in sk_chk_filter()
1da177e4
LT
22 */
23
24#include <linux/module.h>
25#include <linux/types.h>
1da177e4
LT
26#include <linux/mm.h>
27#include <linux/fcntl.h>
28#include <linux/socket.h>
29#include <linux/in.h>
30#include <linux/inet.h>
31#include <linux/netdevice.h>
32#include <linux/if_packet.h>
5a0e3ad6 33#include <linux/gfp.h>
1da177e4
LT
34#include <net/ip.h>
35#include <net/protocol.h>
4738c1db 36#include <net/netlink.h>
1da177e4
LT
37#include <linux/skbuff.h>
38#include <net/sock.h>
39#include <linux/errno.h>
40#include <linux/timer.h>
1da177e4 41#include <asm/uaccess.h>
40daafc8 42#include <asm/unaligned.h>
1da177e4 43#include <linux/filter.h>
86e4ca66 44#include <linux/ratelimit.h>
46b325c7 45#include <linux/seccomp.h>
f3335031 46#include <linux/if_vlan.h>
1da177e4 47
30743837 48/* Registers */
1268e253
DM
49#define BPF_R0 regs[BPF_REG_0]
50#define BPF_R1 regs[BPF_REG_1]
51#define BPF_R2 regs[BPF_REG_2]
52#define BPF_R3 regs[BPF_REG_3]
53#define BPF_R4 regs[BPF_REG_4]
54#define BPF_R5 regs[BPF_REG_5]
55#define BPF_R6 regs[BPF_REG_6]
56#define BPF_R7 regs[BPF_REG_7]
57#define BPF_R8 regs[BPF_REG_8]
58#define BPF_R9 regs[BPF_REG_9]
59#define BPF_R10 regs[BPF_REG_10]
30743837
DB
60
61/* Named registers */
62#define A regs[insn->a_reg]
63#define X regs[insn->x_reg]
64#define FP regs[BPF_REG_FP]
65#define ARG1 regs[BPF_REG_ARG1]
66#define CTX regs[BPF_REG_CTX]
67#define K insn->imm
68
f03fb3f4
JS
69/* No hurry in this branch
70 *
71 * Exported for the bpf jit load helper.
72 */
73void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size)
1da177e4
LT
74{
75 u8 *ptr = NULL;
76
77 if (k >= SKF_NET_OFF)
d56f90a7 78 ptr = skb_network_header(skb) + k - SKF_NET_OFF;
1da177e4 79 else if (k >= SKF_LL_OFF)
98e399f8 80 ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
4bc65dd8 81 if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
1da177e4 82 return ptr;
eb9672f4 83
1da177e4
LT
84 return NULL;
85}
86
62ab0812 87static inline void *load_pointer(const struct sk_buff *skb, int k,
4ec93edb 88 unsigned int size, void *buffer)
0b05b2a4
PM
89{
90 if (k >= 0)
91 return skb_header_pointer(skb, k, size, buffer);
eb9672f4 92
f03fb3f4 93 return bpf_internal_load_pointer_neg_helper(skb, k, size);
0b05b2a4
PM
94}
95
43db6d65
SH
96/**
97 * sk_filter - run a packet through a socket filter
98 * @sk: sock associated with &sk_buff
99 * @skb: buffer to filter
43db6d65
SH
100 *
101 * Run the filter code and then cut skb->data to correct size returned by
102 * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
103 * than pkt_len we keep whole skb->data. This is the socket level
104 * wrapper to sk_run_filter. It returns 0 if the packet should
105 * be accepted or -EPERM if the packet should be tossed.
106 *
107 */
108int sk_filter(struct sock *sk, struct sk_buff *skb)
109{
110 int err;
111 struct sk_filter *filter;
112
c93bdd0e
MG
113 /*
114 * If the skb was allocated from pfmemalloc reserves, only
115 * allow SOCK_MEMALLOC sockets to use it as this socket is
116 * helping free memory
117 */
118 if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC))
119 return -ENOMEM;
120
43db6d65
SH
121 err = security_sock_rcv_skb(sk, skb);
122 if (err)
123 return err;
124
80f8f102
ED
125 rcu_read_lock();
126 filter = rcu_dereference(sk->sk_filter);
43db6d65 127 if (filter) {
0a14842f 128 unsigned int pkt_len = SK_RUN_FILTER(filter, skb);
0d7da9dd 129
43db6d65
SH
130 err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
131 }
80f8f102 132 rcu_read_unlock();
43db6d65
SH
133
134 return err;
135}
136EXPORT_SYMBOL(sk_filter);
137
bd4cf0ed
AS
138/* Base function for offset calculation. Needs to go into .text section,
139 * therefore keeping it non-static as well; will also be used by JITs
140 * anyway later on, so do not let the compiler omit it.
141 */
142noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
143{
144 return 0;
145}
146
1da177e4 147/**
bd4cf0ed
AS
148 * __sk_run_filter - run a filter on a given context
149 * @ctx: buffer to run the filter on
01d32f6e 150 * @insn: filter to apply
1da177e4 151 *
bd4cf0ed 152 * Decode and apply filter instructions to the skb->data. Return length to
01d32f6e 153 * keep, 0 for none. @ctx is the data we are operating on, @insn is the
bd4cf0ed 154 * array of filter instructions.
1da177e4 155 */
bd4cf0ed 156unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
1da177e4 157{
bd4cf0ed
AS
158 u64 stack[MAX_BPF_STACK / sizeof(u64)];
159 u64 regs[MAX_BPF_REG], tmp;
bd4cf0ed
AS
160 static const void *jumptable[256] = {
161 [0 ... 255] = &&default_label,
162 /* Now overwrite non-defaults ... */
5bcfedf0
DB
163#define DL(A, B, C) [BPF_##A|BPF_##B|BPF_##C] = &&A##_##B##_##C
164 DL(ALU, ADD, X),
165 DL(ALU, ADD, K),
166 DL(ALU, SUB, X),
167 DL(ALU, SUB, K),
168 DL(ALU, AND, X),
169 DL(ALU, AND, K),
170 DL(ALU, OR, X),
171 DL(ALU, OR, K),
172 DL(ALU, LSH, X),
173 DL(ALU, LSH, K),
174 DL(ALU, RSH, X),
175 DL(ALU, RSH, K),
176 DL(ALU, XOR, X),
177 DL(ALU, XOR, K),
178 DL(ALU, MUL, X),
179 DL(ALU, MUL, K),
180 DL(ALU, MOV, X),
181 DL(ALU, MOV, K),
182 DL(ALU, DIV, X),
183 DL(ALU, DIV, K),
184 DL(ALU, MOD, X),
185 DL(ALU, MOD, K),
186 DL(ALU, NEG, 0),
187 DL(ALU, END, TO_BE),
188 DL(ALU, END, TO_LE),
189 DL(ALU64, ADD, X),
190 DL(ALU64, ADD, K),
191 DL(ALU64, SUB, X),
192 DL(ALU64, SUB, K),
193 DL(ALU64, AND, X),
194 DL(ALU64, AND, K),
195 DL(ALU64, OR, X),
196 DL(ALU64, OR, K),
197 DL(ALU64, LSH, X),
198 DL(ALU64, LSH, K),
199 DL(ALU64, RSH, X),
200 DL(ALU64, RSH, K),
201 DL(ALU64, XOR, X),
202 DL(ALU64, XOR, K),
203 DL(ALU64, MUL, X),
204 DL(ALU64, MUL, K),
205 DL(ALU64, MOV, X),
206 DL(ALU64, MOV, K),
207 DL(ALU64, ARSH, X),
208 DL(ALU64, ARSH, K),
209 DL(ALU64, DIV, X),
210 DL(ALU64, DIV, K),
211 DL(ALU64, MOD, X),
212 DL(ALU64, MOD, K),
213 DL(ALU64, NEG, 0),
214 DL(JMP, CALL, 0),
215 DL(JMP, JA, 0),
216 DL(JMP, JEQ, X),
217 DL(JMP, JEQ, K),
218 DL(JMP, JNE, X),
219 DL(JMP, JNE, K),
220 DL(JMP, JGT, X),
221 DL(JMP, JGT, K),
222 DL(JMP, JGE, X),
223 DL(JMP, JGE, K),
224 DL(JMP, JSGT, X),
225 DL(JMP, JSGT, K),
226 DL(JMP, JSGE, X),
227 DL(JMP, JSGE, K),
228 DL(JMP, JSET, X),
229 DL(JMP, JSET, K),
230 DL(JMP, EXIT, 0),
231 DL(STX, MEM, B),
232 DL(STX, MEM, H),
233 DL(STX, MEM, W),
234 DL(STX, MEM, DW),
235 DL(STX, XADD, W),
236 DL(STX, XADD, DW),
237 DL(ST, MEM, B),
238 DL(ST, MEM, H),
239 DL(ST, MEM, W),
240 DL(ST, MEM, DW),
241 DL(LDX, MEM, B),
242 DL(LDX, MEM, H),
243 DL(LDX, MEM, W),
244 DL(LDX, MEM, DW),
245 DL(LD, ABS, W),
246 DL(LD, ABS, H),
247 DL(LD, ABS, B),
248 DL(LD, IND, W),
249 DL(LD, IND, H),
250 DL(LD, IND, B),
bd4cf0ed
AS
251#undef DL
252 };
30743837
DB
253 void *ptr;
254 int off;
1da177e4 255
30743837
DB
256#define CONT ({ insn++; goto select_insn; })
257#define CONT_JMP ({ insn++; goto select_insn; })
258
259 FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
260 ARG1 = (u64) (unsigned long) ctx;
261
262 /* Register for user BPF programs need to be reset first. */
263 regs[BPF_REG_A] = 0;
264 regs[BPF_REG_X] = 0;
bd4cf0ed
AS
265
266select_insn:
267 goto *jumptable[insn->code];
268
269 /* ALU */
270#define ALU(OPCODE, OP) \
5bcfedf0 271 ALU64_##OPCODE##_X: \
bd4cf0ed
AS
272 A = A OP X; \
273 CONT; \
5bcfedf0 274 ALU_##OPCODE##_X: \
bd4cf0ed
AS
275 A = (u32) A OP (u32) X; \
276 CONT; \
5bcfedf0 277 ALU64_##OPCODE##_K: \
bd4cf0ed
AS
278 A = A OP K; \
279 CONT; \
5bcfedf0 280 ALU_##OPCODE##_K: \
bd4cf0ed
AS
281 A = (u32) A OP (u32) K; \
282 CONT;
283
5bcfedf0
DB
284 ALU(ADD, +)
285 ALU(SUB, -)
286 ALU(AND, &)
287 ALU(OR, |)
288 ALU(LSH, <<)
289 ALU(RSH, >>)
290 ALU(XOR, ^)
291 ALU(MUL, *)
bd4cf0ed 292#undef ALU
5bcfedf0 293 ALU_NEG_0:
bd4cf0ed
AS
294 A = (u32) -A;
295 CONT;
5bcfedf0 296 ALU64_NEG_0:
bd4cf0ed
AS
297 A = -A;
298 CONT;
5bcfedf0 299 ALU_MOV_X:
bd4cf0ed
AS
300 A = (u32) X;
301 CONT;
5bcfedf0 302 ALU_MOV_K:
bd4cf0ed
AS
303 A = (u32) K;
304 CONT;
5bcfedf0 305 ALU64_MOV_X:
bd4cf0ed
AS
306 A = X;
307 CONT;
5bcfedf0 308 ALU64_MOV_K:
bd4cf0ed
AS
309 A = K;
310 CONT;
5bcfedf0 311 ALU64_ARSH_X:
bd4cf0ed
AS
312 (*(s64 *) &A) >>= X;
313 CONT;
5bcfedf0 314 ALU64_ARSH_K:
bd4cf0ed
AS
315 (*(s64 *) &A) >>= K;
316 CONT;
5bcfedf0 317 ALU64_MOD_X:
5f9fde5f
DB
318 if (unlikely(X == 0))
319 return 0;
bd4cf0ed 320 tmp = A;
5f9fde5f 321 A = do_div(tmp, X);
bd4cf0ed 322 CONT;
5bcfedf0 323 ALU_MOD_X:
5f9fde5f
DB
324 if (unlikely(X == 0))
325 return 0;
bd4cf0ed 326 tmp = (u32) A;
5f9fde5f 327 A = do_div(tmp, (u32) X);
bd4cf0ed 328 CONT;
5bcfedf0 329 ALU64_MOD_K:
bd4cf0ed 330 tmp = A;
5f9fde5f 331 A = do_div(tmp, K);
bd4cf0ed 332 CONT;
5bcfedf0 333 ALU_MOD_K:
bd4cf0ed 334 tmp = (u32) A;
5f9fde5f 335 A = do_div(tmp, (u32) K);
bd4cf0ed 336 CONT;
5bcfedf0 337 ALU64_DIV_X:
5f9fde5f
DB
338 if (unlikely(X == 0))
339 return 0;
340 do_div(A, X);
bd4cf0ed 341 CONT;
5bcfedf0 342 ALU_DIV_X:
5f9fde5f
DB
343 if (unlikely(X == 0))
344 return 0;
bd4cf0ed 345 tmp = (u32) A;
5f9fde5f 346 do_div(tmp, (u32) X);
bd4cf0ed
AS
347 A = (u32) tmp;
348 CONT;
5bcfedf0 349 ALU64_DIV_K:
5f9fde5f 350 do_div(A, K);
bd4cf0ed 351 CONT;
5bcfedf0 352 ALU_DIV_K:
bd4cf0ed 353 tmp = (u32) A;
5f9fde5f 354 do_div(tmp, (u32) K);
bd4cf0ed
AS
355 A = (u32) tmp;
356 CONT;
5bcfedf0 357 ALU_END_TO_BE:
bd4cf0ed
AS
358 switch (K) {
359 case 16:
360 A = (__force u16) cpu_to_be16(A);
361 break;
362 case 32:
363 A = (__force u32) cpu_to_be32(A);
364 break;
365 case 64:
366 A = (__force u64) cpu_to_be64(A);
367 break;
368 }
369 CONT;
5bcfedf0 370 ALU_END_TO_LE:
bd4cf0ed
AS
371 switch (K) {
372 case 16:
373 A = (__force u16) cpu_to_le16(A);
374 break;
375 case 32:
376 A = (__force u32) cpu_to_le32(A);
377 break;
378 case 64:
379 A = (__force u64) cpu_to_le64(A);
380 break;
381 }
382 CONT;
383
384 /* CALL */
5bcfedf0 385 JMP_CALL_0:
1268e253
DM
386 /* Function call scratches BPF_R1-BPF_R5 registers,
387 * preserves BPF_R6-BPF_R9, and stores return value
388 * into BPF_R0.
bd4cf0ed 389 */
1268e253
DM
390 BPF_R0 = (__bpf_call_base + insn->imm)(BPF_R1, BPF_R2, BPF_R3,
391 BPF_R4, BPF_R5);
bd4cf0ed
AS
392 CONT;
393
394 /* JMP */
5bcfedf0 395 JMP_JA_0:
bd4cf0ed
AS
396 insn += insn->off;
397 CONT;
5bcfedf0 398 JMP_JEQ_X:
bd4cf0ed
AS
399 if (A == X) {
400 insn += insn->off;
401 CONT_JMP;
402 }
403 CONT;
5bcfedf0 404 JMP_JEQ_K:
bd4cf0ed
AS
405 if (A == K) {
406 insn += insn->off;
407 CONT_JMP;
408 }
409 CONT;
5bcfedf0 410 JMP_JNE_X:
bd4cf0ed
AS
411 if (A != X) {
412 insn += insn->off;
413 CONT_JMP;
414 }
415 CONT;
5bcfedf0 416 JMP_JNE_K:
bd4cf0ed
AS
417 if (A != K) {
418 insn += insn->off;
419 CONT_JMP;
420 }
421 CONT;
5bcfedf0 422 JMP_JGT_X:
bd4cf0ed
AS
423 if (A > X) {
424 insn += insn->off;
425 CONT_JMP;
426 }
427 CONT;
5bcfedf0 428 JMP_JGT_K:
bd4cf0ed
AS
429 if (A > K) {
430 insn += insn->off;
431 CONT_JMP;
432 }
433 CONT;
5bcfedf0 434 JMP_JGE_X:
bd4cf0ed
AS
435 if (A >= X) {
436 insn += insn->off;
437 CONT_JMP;
438 }
439 CONT;
5bcfedf0 440 JMP_JGE_K:
bd4cf0ed
AS
441 if (A >= K) {
442 insn += insn->off;
443 CONT_JMP;
444 }
445 CONT;
5bcfedf0
DB
446 JMP_JSGT_X:
447 if (((s64) A) > ((s64) X)) {
bd4cf0ed
AS
448 insn += insn->off;
449 CONT_JMP;
450 }
451 CONT;
5bcfedf0
DB
452 JMP_JSGT_K:
453 if (((s64) A) > ((s64) K)) {
bd4cf0ed
AS
454 insn += insn->off;
455 CONT_JMP;
456 }
457 CONT;
5bcfedf0
DB
458 JMP_JSGE_X:
459 if (((s64) A) >= ((s64) X)) {
bd4cf0ed
AS
460 insn += insn->off;
461 CONT_JMP;
462 }
463 CONT;
5bcfedf0
DB
464 JMP_JSGE_K:
465 if (((s64) A) >= ((s64) K)) {
bd4cf0ed
AS
466 insn += insn->off;
467 CONT_JMP;
468 }
469 CONT;
5bcfedf0 470 JMP_JSET_X:
bd4cf0ed
AS
471 if (A & X) {
472 insn += insn->off;
473 CONT_JMP;
474 }
475 CONT;
5bcfedf0 476 JMP_JSET_K:
bd4cf0ed
AS
477 if (A & K) {
478 insn += insn->off;
479 CONT_JMP;
480 }
481 CONT;
5bcfedf0 482 JMP_EXIT_0:
1268e253 483 return BPF_R0;
bd4cf0ed
AS
484
485 /* STX and ST and LDX*/
486#define LDST(SIZEOP, SIZE) \
5bcfedf0 487 STX_MEM_##SIZEOP: \
bd4cf0ed
AS
488 *(SIZE *)(unsigned long) (A + insn->off) = X; \
489 CONT; \
5bcfedf0 490 ST_MEM_##SIZEOP: \
bd4cf0ed
AS
491 *(SIZE *)(unsigned long) (A + insn->off) = K; \
492 CONT; \
5bcfedf0 493 LDX_MEM_##SIZEOP: \
bd4cf0ed
AS
494 A = *(SIZE *)(unsigned long) (X + insn->off); \
495 CONT;
496
5bcfedf0
DB
497 LDST(B, u8)
498 LDST(H, u16)
499 LDST(W, u32)
500 LDST(DW, u64)
bd4cf0ed 501#undef LDST
5bcfedf0 502 STX_XADD_W: /* lock xadd *(u32 *)(A + insn->off) += X */
bd4cf0ed
AS
503 atomic_add((u32) X, (atomic_t *)(unsigned long)
504 (A + insn->off));
505 CONT;
5bcfedf0 506 STX_XADD_DW: /* lock xadd *(u64 *)(A + insn->off) += X */
bd4cf0ed
AS
507 atomic64_add((u64) X, (atomic64_t *)(unsigned long)
508 (A + insn->off));
509 CONT;
1268e253 510 LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + K)) */
bd4cf0ed
AS
511 off = K;
512load_word:
1268e253
DM
513 /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are
514 * only appearing in the programs where ctx ==
515 * skb. All programs keep 'ctx' in regs[BPF_REG_CTX]
516 * == BPF_R6, sk_convert_filter() saves it in BPF_R6,
517 * internal BPF verifier will check that BPF_R6 ==
518 * ctx.
bd4cf0ed 519 *
1268e253
DM
520 * BPF_ABS and BPF_IND are wrappers of function calls,
521 * so they scratch BPF_R1-BPF_R5 registers, preserve
522 * BPF_R6-BPF_R9, and store return value into BPF_R0.
bd4cf0ed
AS
523 *
524 * Implicit input:
525 * ctx
526 *
527 * Explicit input:
528 * X == any register
529 * K == 32-bit immediate
530 *
531 * Output:
1268e253 532 * BPF_R0 - 8/16/32-bit skb data converted to cpu endianness
bd4cf0ed
AS
533 */
534 ptr = load_pointer((struct sk_buff *) ctx, off, 4, &tmp);
535 if (likely(ptr != NULL)) {
1268e253 536 BPF_R0 = get_unaligned_be32(ptr);
bd4cf0ed
AS
537 CONT;
538 }
539 return 0;
1268e253 540 LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + K)) */
bd4cf0ed
AS
541 off = K;
542load_half:
543 ptr = load_pointer((struct sk_buff *) ctx, off, 2, &tmp);
544 if (likely(ptr != NULL)) {
1268e253 545 BPF_R0 = get_unaligned_be16(ptr);
bd4cf0ed
AS
546 CONT;
547 }
548 return 0;
1268e253 549 LD_ABS_B: /* BPF_R0 = *(u8 *) (ctx + K) */
bd4cf0ed
AS
550 off = K;
551load_byte:
552 ptr = load_pointer((struct sk_buff *) ctx, off, 1, &tmp);
553 if (likely(ptr != NULL)) {
1268e253 554 BPF_R0 = *(u8 *)ptr;
bd4cf0ed
AS
555 CONT;
556 }
557 return 0;
1268e253 558 LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + X + K)) */
bd4cf0ed
AS
559 off = K + X;
560 goto load_word;
1268e253 561 LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + X + K)) */
bd4cf0ed
AS
562 off = K + X;
563 goto load_half;
1268e253 564 LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + X + K) */
bd4cf0ed
AS
565 off = K + X;
566 goto load_byte;
567
568 default_label:
569 /* If we ever reach this, we have a bug somewhere. */
570 WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code);
571 return 0;
bd4cf0ed
AS
572}
573
574u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx,
575 const struct sock_filter_int *insni)
576 __attribute__ ((alias ("__sk_run_filter")));
577
578u32 sk_run_filter_int_skb(const struct sk_buff *ctx,
579 const struct sock_filter_int *insni)
580 __attribute__ ((alias ("__sk_run_filter")));
581EXPORT_SYMBOL_GPL(sk_run_filter_int_skb);
582
583/* Helper to find the offset of pkt_type in sk_buff structure. We want
584 * to make sure its still a 3bit field starting at a byte boundary;
585 * taken from arch/x86/net/bpf_jit_comp.c.
586 */
587#define PKT_TYPE_MAX 7
588static unsigned int pkt_type_offset(void)
589{
590 struct sk_buff skb_probe = { .pkt_type = ~0, };
591 u8 *ct = (u8 *) &skb_probe;
592 unsigned int off;
593
594 for (off = 0; off < sizeof(struct sk_buff); off++) {
595 if (ct[off] == PKT_TYPE_MAX)
596 return off;
597 }
598
599 pr_err_once("Please fix %s, as pkt_type couldn't be found!\n", __func__);
600 return -1;
601}
602
30743837 603static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
bd4cf0ed 604{
eb9672f4 605 return __skb_get_poff((struct sk_buff *)(unsigned long) ctx);
bd4cf0ed
AS
606}
607
30743837 608static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
bd4cf0ed 609{
eb9672f4 610 struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx;
bd4cf0ed
AS
611 struct nlattr *nla;
612
613 if (skb_is_nonlinear(skb))
614 return 0;
615
05ab8f26
MK
616 if (skb->len < sizeof(struct nlattr))
617 return 0;
618
30743837 619 if (a > skb->len - sizeof(struct nlattr))
bd4cf0ed
AS
620 return 0;
621
30743837 622 nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x);
bd4cf0ed
AS
623 if (nla)
624 return (void *) nla - (void *) skb->data;
625
626 return 0;
627}
628
30743837 629static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
bd4cf0ed 630{
eb9672f4 631 struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx;
bd4cf0ed
AS
632 struct nlattr *nla;
633
634 if (skb_is_nonlinear(skb))
635 return 0;
636
05ab8f26
MK
637 if (skb->len < sizeof(struct nlattr))
638 return 0;
639
30743837 640 if (a > skb->len - sizeof(struct nlattr))
bd4cf0ed
AS
641 return 0;
642
30743837
DB
643 nla = (struct nlattr *) &skb->data[a];
644 if (nla->nla_len > skb->len - a)
bd4cf0ed
AS
645 return 0;
646
30743837 647 nla = nla_find_nested(nla, x);
bd4cf0ed
AS
648 if (nla)
649 return (void *) nla - (void *) skb->data;
650
651 return 0;
652}
653
30743837 654static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
bd4cf0ed
AS
655{
656 return raw_smp_processor_id();
657}
658
4cd3675e 659/* note that this only generates 32-bit random numbers */
30743837 660static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
4cd3675e 661{
eb9672f4 662 return prandom_u32();
4cd3675e
CG
663}
664
bd4cf0ed
AS
665static bool convert_bpf_extensions(struct sock_filter *fp,
666 struct sock_filter_int **insnp)
667{
668 struct sock_filter_int *insn = *insnp;
669
670 switch (fp->k) {
671 case SKF_AD_OFF + SKF_AD_PROTOCOL:
672 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
673
9739eef1
AS
674 /* A = *(u16 *) (ctx + offsetof(protocol)) */
675 *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
676 offsetof(struct sk_buff, protocol));
bd4cf0ed
AS
677 insn++;
678
679 /* A = ntohs(A) [emitting a nop or swap16] */
680 insn->code = BPF_ALU | BPF_END | BPF_FROM_BE;
30743837 681 insn->a_reg = BPF_REG_A;
bd4cf0ed
AS
682 insn->imm = 16;
683 break;
684
685 case SKF_AD_OFF + SKF_AD_PKTTYPE:
9739eef1
AS
686 *insn = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX,
687 pkt_type_offset());
bd4cf0ed
AS
688 if (insn->off < 0)
689 return false;
690 insn++;
691
9739eef1 692 *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX);
bd4cf0ed
AS
693 break;
694
695 case SKF_AD_OFF + SKF_AD_IFINDEX:
696 case SKF_AD_OFF + SKF_AD_HATYPE:
9739eef1
AS
697 *insn = BPF_LDX_MEM(size_to_bpf(FIELD_SIZEOF(struct sk_buff, dev)),
698 BPF_REG_TMP, BPF_REG_CTX,
699 offsetof(struct sk_buff, dev));
bd4cf0ed
AS
700 insn++;
701
9739eef1
AS
702 /* if (tmp != 0) goto pc+1 */
703 *insn = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1);
bd4cf0ed
AS
704 insn++;
705
9739eef1 706 *insn = BPF_EXIT_INSN();
bd4cf0ed
AS
707 insn++;
708
709 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
710 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
711
30743837
DB
712 insn->a_reg = BPF_REG_A;
713 insn->x_reg = BPF_REG_TMP;
bd4cf0ed
AS
714
715 if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) {
716 insn->code = BPF_LDX | BPF_MEM | BPF_W;
717 insn->off = offsetof(struct net_device, ifindex);
718 } else {
719 insn->code = BPF_LDX | BPF_MEM | BPF_H;
720 insn->off = offsetof(struct net_device, type);
721 }
722 break;
723
724 case SKF_AD_OFF + SKF_AD_MARK:
725 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
726
9739eef1
AS
727 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
728 offsetof(struct sk_buff, mark));
bd4cf0ed
AS
729 break;
730
731 case SKF_AD_OFF + SKF_AD_RXHASH:
732 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
733
9739eef1
AS
734 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
735 offsetof(struct sk_buff, hash));
bd4cf0ed
AS
736 break;
737
738 case SKF_AD_OFF + SKF_AD_QUEUE:
739 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
740
9739eef1
AS
741 *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
742 offsetof(struct sk_buff, queue_mapping));
bd4cf0ed
AS
743 break;
744
745 case SKF_AD_OFF + SKF_AD_VLAN_TAG:
746 case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
747 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
748
9739eef1
AS
749 /* A = *(u16 *) (ctx + offsetof(vlan_tci)) */
750 *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
751 offsetof(struct sk_buff, vlan_tci));
bd4cf0ed
AS
752 insn++;
753
754 BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
755
756 if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) {
9739eef1
AS
757 *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A,
758 ~VLAN_TAG_PRESENT);
bd4cf0ed 759 } else {
9739eef1
AS
760 /* A >>= 12 */
761 *insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12);
bd4cf0ed
AS
762 insn++;
763
9739eef1
AS
764 /* A &= 1 */
765 *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 1);
bd4cf0ed
AS
766 }
767 break;
768
769 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
770 case SKF_AD_OFF + SKF_AD_NLATTR:
771 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
772 case SKF_AD_OFF + SKF_AD_CPU:
4cd3675e 773 case SKF_AD_OFF + SKF_AD_RANDOM:
bd4cf0ed 774 /* arg1 = ctx */
9739eef1 775 *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG1, BPF_REG_CTX);
bd4cf0ed
AS
776 insn++;
777
778 /* arg2 = A */
9739eef1 779 *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG2, BPF_REG_A);
bd4cf0ed
AS
780 insn++;
781
782 /* arg3 = X */
9739eef1 783 *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG3, BPF_REG_X);
bd4cf0ed
AS
784 insn++;
785
786 /* Emit call(ctx, arg2=A, arg3=X) */
787 insn->code = BPF_JMP | BPF_CALL;
788 switch (fp->k) {
789 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
790 insn->imm = __skb_get_pay_offset - __bpf_call_base;
791 break;
792 case SKF_AD_OFF + SKF_AD_NLATTR:
793 insn->imm = __skb_get_nlattr - __bpf_call_base;
794 break;
795 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
796 insn->imm = __skb_get_nlattr_nest - __bpf_call_base;
797 break;
798 case SKF_AD_OFF + SKF_AD_CPU:
799 insn->imm = __get_raw_cpu_id - __bpf_call_base;
800 break;
4cd3675e
CG
801 case SKF_AD_OFF + SKF_AD_RANDOM:
802 insn->imm = __get_random_u32 - __bpf_call_base;
803 break;
bd4cf0ed
AS
804 }
805 break;
806
807 case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
9739eef1
AS
808 /* A ^= X */
809 *insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X);
bd4cf0ed
AS
810 break;
811
812 default:
813 /* This is just a dummy call to avoid letting the compiler
814 * evict __bpf_call_base() as an optimization. Placed here
815 * where no-one bothers.
816 */
817 BUG_ON(__bpf_call_base(0, 0, 0, 0, 0) != 0);
818 return false;
819 }
820
821 *insnp = insn;
822 return true;
823}
824
825/**
826 * sk_convert_filter - convert filter program
827 * @prog: the user passed filter program
828 * @len: the length of the user passed filter program
829 * @new_prog: buffer where converted program will be stored
830 * @new_len: pointer to store length of converted program
831 *
832 * Remap 'sock_filter' style BPF instruction set to 'sock_filter_ext' style.
833 * Conversion workflow:
834 *
835 * 1) First pass for calculating the new program length:
836 * sk_convert_filter(old_prog, old_len, NULL, &new_len)
837 *
838 * 2) 2nd pass to remap in two passes: 1st pass finds new
839 * jump offsets, 2nd pass remapping:
840 * new_prog = kmalloc(sizeof(struct sock_filter_int) * new_len);
841 * sk_convert_filter(old_prog, old_len, new_prog, &new_len);
842 *
843 * User BPF's register A is mapped to our BPF register 6, user BPF
844 * register X is mapped to BPF register 7; frame pointer is always
845 * register 10; Context 'void *ctx' is stored in register 1, that is,
846 * for socket filters: ctx == 'struct sk_buff *', for seccomp:
847 * ctx == 'struct seccomp_data *'.
848 */
849int sk_convert_filter(struct sock_filter *prog, int len,
850 struct sock_filter_int *new_prog, int *new_len)
851{
852 int new_flen = 0, pass = 0, target, i;
853 struct sock_filter_int *new_insn;
854 struct sock_filter *fp;
855 int *addrs = NULL;
856 u8 bpf_src;
857
858 BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
30743837 859 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
bd4cf0ed
AS
860
861 if (len <= 0 || len >= BPF_MAXINSNS)
862 return -EINVAL;
863
864 if (new_prog) {
865 addrs = kzalloc(len * sizeof(*addrs), GFP_KERNEL);
866 if (!addrs)
867 return -ENOMEM;
868 }
869
870do_pass:
871 new_insn = new_prog;
872 fp = prog;
873
874 if (new_insn) {
9739eef1 875 *new_insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_CTX, BPF_REG_ARG1);
bd4cf0ed
AS
876 }
877 new_insn++;
878
879 for (i = 0; i < len; fp++, i++) {
880 struct sock_filter_int tmp_insns[6] = { };
881 struct sock_filter_int *insn = tmp_insns;
882
883 if (addrs)
884 addrs[i] = new_insn - new_prog;
885
886 switch (fp->code) {
887 /* All arithmetic insns and skb loads map as-is. */
888 case BPF_ALU | BPF_ADD | BPF_X:
889 case BPF_ALU | BPF_ADD | BPF_K:
890 case BPF_ALU | BPF_SUB | BPF_X:
891 case BPF_ALU | BPF_SUB | BPF_K:
892 case BPF_ALU | BPF_AND | BPF_X:
893 case BPF_ALU | BPF_AND | BPF_K:
894 case BPF_ALU | BPF_OR | BPF_X:
895 case BPF_ALU | BPF_OR | BPF_K:
896 case BPF_ALU | BPF_LSH | BPF_X:
897 case BPF_ALU | BPF_LSH | BPF_K:
898 case BPF_ALU | BPF_RSH | BPF_X:
899 case BPF_ALU | BPF_RSH | BPF_K:
900 case BPF_ALU | BPF_XOR | BPF_X:
901 case BPF_ALU | BPF_XOR | BPF_K:
902 case BPF_ALU | BPF_MUL | BPF_X:
903 case BPF_ALU | BPF_MUL | BPF_K:
904 case BPF_ALU | BPF_DIV | BPF_X:
905 case BPF_ALU | BPF_DIV | BPF_K:
906 case BPF_ALU | BPF_MOD | BPF_X:
907 case BPF_ALU | BPF_MOD | BPF_K:
908 case BPF_ALU | BPF_NEG:
909 case BPF_LD | BPF_ABS | BPF_W:
910 case BPF_LD | BPF_ABS | BPF_H:
911 case BPF_LD | BPF_ABS | BPF_B:
912 case BPF_LD | BPF_IND | BPF_W:
913 case BPF_LD | BPF_IND | BPF_H:
914 case BPF_LD | BPF_IND | BPF_B:
915 /* Check for overloaded BPF extension and
916 * directly convert it if found, otherwise
917 * just move on with mapping.
918 */
919 if (BPF_CLASS(fp->code) == BPF_LD &&
920 BPF_MODE(fp->code) == BPF_ABS &&
921 convert_bpf_extensions(fp, &insn))
922 break;
923
924 insn->code = fp->code;
30743837
DB
925 insn->a_reg = BPF_REG_A;
926 insn->x_reg = BPF_REG_X;
bd4cf0ed
AS
927 insn->imm = fp->k;
928 break;
929
930 /* Jump opcodes map as-is, but offsets need adjustment. */
931 case BPF_JMP | BPF_JA:
932 target = i + fp->k + 1;
933 insn->code = fp->code;
934#define EMIT_JMP \
935 do { \
936 if (target >= len || target < 0) \
937 goto err; \
938 insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0; \
939 /* Adjust pc relative offset for 2nd or 3rd insn. */ \
940 insn->off -= insn - tmp_insns; \
941 } while (0)
942
943 EMIT_JMP;
944 break;
945
946 case BPF_JMP | BPF_JEQ | BPF_K:
947 case BPF_JMP | BPF_JEQ | BPF_X:
948 case BPF_JMP | BPF_JSET | BPF_K:
949 case BPF_JMP | BPF_JSET | BPF_X:
950 case BPF_JMP | BPF_JGT | BPF_K:
951 case BPF_JMP | BPF_JGT | BPF_X:
952 case BPF_JMP | BPF_JGE | BPF_K:
953 case BPF_JMP | BPF_JGE | BPF_X:
954 if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) {
955 /* BPF immediates are signed, zero extend
956 * immediate into tmp register and use it
957 * in compare insn.
958 */
959 insn->code = BPF_ALU | BPF_MOV | BPF_K;
30743837 960 insn->a_reg = BPF_REG_TMP;
bd4cf0ed
AS
961 insn->imm = fp->k;
962 insn++;
963
30743837
DB
964 insn->a_reg = BPF_REG_A;
965 insn->x_reg = BPF_REG_TMP;
bd4cf0ed
AS
966 bpf_src = BPF_X;
967 } else {
30743837
DB
968 insn->a_reg = BPF_REG_A;
969 insn->x_reg = BPF_REG_X;
bd4cf0ed
AS
970 insn->imm = fp->k;
971 bpf_src = BPF_SRC(fp->code);
1da177e4 972 }
bd4cf0ed
AS
973
974 /* Common case where 'jump_false' is next insn. */
975 if (fp->jf == 0) {
976 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
977 target = i + fp->jt + 1;
978 EMIT_JMP;
979 break;
1da177e4 980 }
bd4cf0ed
AS
981
982 /* Convert JEQ into JNE when 'jump_true' is next insn. */
983 if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) {
984 insn->code = BPF_JMP | BPF_JNE | bpf_src;
985 target = i + fp->jf + 1;
986 EMIT_JMP;
987 break;
0b05b2a4 988 }
bd4cf0ed
AS
989
990 /* Other jumps are mapped into two insns: Jxx and JA. */
991 target = i + fp->jt + 1;
992 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
993 EMIT_JMP;
994 insn++;
995
996 insn->code = BPF_JMP | BPF_JA;
997 target = i + fp->jf + 1;
998 EMIT_JMP;
999 break;
1000
1001 /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
1002 case BPF_LDX | BPF_MSH | BPF_B:
9739eef1
AS
1003 /* tmp = A */
1004 *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_TMP, BPF_REG_A);
bd4cf0ed
AS
1005 insn++;
1006
1268e253 1007 /* A = BPF_R0 = *(u8 *) (skb->data + K) */
9739eef1 1008 *insn = BPF_LD_ABS(BPF_B, fp->k);
bd4cf0ed
AS
1009 insn++;
1010
9739eef1
AS
1011 /* A &= 0xf */
1012 *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
bd4cf0ed
AS
1013 insn++;
1014
9739eef1
AS
1015 /* A <<= 2 */
1016 *insn = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
bd4cf0ed
AS
1017 insn++;
1018
9739eef1
AS
1019 /* X = A */
1020 *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_X, BPF_REG_A);
bd4cf0ed
AS
1021 insn++;
1022
9739eef1
AS
1023 /* A = tmp */
1024 *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_A, BPF_REG_TMP);
bd4cf0ed
AS
1025 break;
1026
1027 /* RET_K, RET_A are remaped into 2 insns. */
1028 case BPF_RET | BPF_A:
1029 case BPF_RET | BPF_K:
1030 insn->code = BPF_ALU | BPF_MOV |
1031 (BPF_RVAL(fp->code) == BPF_K ?
1032 BPF_K : BPF_X);
1033 insn->a_reg = 0;
30743837 1034 insn->x_reg = BPF_REG_A;
bd4cf0ed
AS
1035 insn->imm = fp->k;
1036 insn++;
1037
9739eef1 1038 *insn = BPF_EXIT_INSN();
bd4cf0ed
AS
1039 break;
1040
1041 /* Store to stack. */
1042 case BPF_ST:
1043 case BPF_STX:
1044 insn->code = BPF_STX | BPF_MEM | BPF_W;
30743837
DB
1045 insn->a_reg = BPF_REG_FP;
1046 insn->x_reg = fp->code == BPF_ST ?
1047 BPF_REG_A : BPF_REG_X;
bd4cf0ed
AS
1048 insn->off = -(BPF_MEMWORDS - fp->k) * 4;
1049 break;
1050
1051 /* Load from stack. */
1052 case BPF_LD | BPF_MEM:
1053 case BPF_LDX | BPF_MEM:
1054 insn->code = BPF_LDX | BPF_MEM | BPF_W;
1055 insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
30743837
DB
1056 BPF_REG_A : BPF_REG_X;
1057 insn->x_reg = BPF_REG_FP;
bd4cf0ed
AS
1058 insn->off = -(BPF_MEMWORDS - fp->k) * 4;
1059 break;
1060
1061 /* A = K or X = K */
1062 case BPF_LD | BPF_IMM:
1063 case BPF_LDX | BPF_IMM:
1064 insn->code = BPF_ALU | BPF_MOV | BPF_K;
1065 insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
30743837 1066 BPF_REG_A : BPF_REG_X;
bd4cf0ed
AS
1067 insn->imm = fp->k;
1068 break;
1069
1070 /* X = A */
1071 case BPF_MISC | BPF_TAX:
9739eef1 1072 *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_X, BPF_REG_A);
bd4cf0ed
AS
1073 break;
1074
1075 /* A = X */
1076 case BPF_MISC | BPF_TXA:
9739eef1 1077 *insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_A, BPF_REG_X);
bd4cf0ed
AS
1078 break;
1079
1080 /* A = skb->len or X = skb->len */
1081 case BPF_LD | BPF_W | BPF_LEN:
1082 case BPF_LDX | BPF_W | BPF_LEN:
1083 insn->code = BPF_LDX | BPF_MEM | BPF_W;
1084 insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
30743837
DB
1085 BPF_REG_A : BPF_REG_X;
1086 insn->x_reg = BPF_REG_CTX;
bd4cf0ed
AS
1087 insn->off = offsetof(struct sk_buff, len);
1088 break;
1089
1090 /* access seccomp_data fields */
1091 case BPF_LDX | BPF_ABS | BPF_W:
9739eef1
AS
1092 /* A = *(u32 *) (ctx + K) */
1093 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
bd4cf0ed
AS
1094 break;
1095
1da177e4 1096 default:
bd4cf0ed 1097 goto err;
1da177e4 1098 }
bd4cf0ed
AS
1099
1100 insn++;
1101 if (new_prog)
1102 memcpy(new_insn, tmp_insns,
1103 sizeof(*insn) * (insn - tmp_insns));
1104
1105 new_insn += insn - tmp_insns;
1da177e4
LT
1106 }
1107
bd4cf0ed
AS
1108 if (!new_prog) {
1109 /* Only calculating new length. */
1110 *new_len = new_insn - new_prog;
1111 return 0;
1112 }
1113
1114 pass++;
1115 if (new_flen != new_insn - new_prog) {
1116 new_flen = new_insn - new_prog;
1117 if (pass > 2)
1118 goto err;
1119
1120 goto do_pass;
1121 }
1122
1123 kfree(addrs);
1124 BUG_ON(*new_len != new_flen);
1da177e4 1125 return 0;
bd4cf0ed
AS
1126err:
1127 kfree(addrs);
1128 return -EINVAL;
1da177e4
LT
1129}
1130
bd4cf0ed
AS
1131/* Security:
1132 *
2d5311e4 1133 * A BPF program is able to use 16 cells of memory to store intermediate
bd4cf0ed
AS
1134 * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()).
1135 *
2d5311e4
ED
1136 * As we dont want to clear mem[] array for each packet going through
1137 * sk_run_filter(), we check that filter loaded by user never try to read
1138 * a cell if not previously written, and we check all branches to be sure
25985edc 1139 * a malicious user doesn't try to abuse us.
2d5311e4
ED
1140 */
1141static int check_load_and_stores(struct sock_filter *filter, int flen)
1142{
1143 u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */
1144 int pc, ret = 0;
1145
1146 BUILD_BUG_ON(BPF_MEMWORDS > 16);
1147 masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL);
1148 if (!masks)
1149 return -ENOMEM;
1150 memset(masks, 0xff, flen * sizeof(*masks));
1151
1152 for (pc = 0; pc < flen; pc++) {
1153 memvalid &= masks[pc];
1154
1155 switch (filter[pc].code) {
1156 case BPF_S_ST:
1157 case BPF_S_STX:
1158 memvalid |= (1 << filter[pc].k);
1159 break;
1160 case BPF_S_LD_MEM:
1161 case BPF_S_LDX_MEM:
1162 if (!(memvalid & (1 << filter[pc].k))) {
1163 ret = -EINVAL;
1164 goto error;
1165 }
1166 break;
1167 case BPF_S_JMP_JA:
1168 /* a jump must set masks on target */
1169 masks[pc + 1 + filter[pc].k] &= memvalid;
1170 memvalid = ~0;
1171 break;
1172 case BPF_S_JMP_JEQ_K:
1173 case BPF_S_JMP_JEQ_X:
1174 case BPF_S_JMP_JGE_K:
1175 case BPF_S_JMP_JGE_X:
1176 case BPF_S_JMP_JGT_K:
1177 case BPF_S_JMP_JGT_X:
1178 case BPF_S_JMP_JSET_X:
1179 case BPF_S_JMP_JSET_K:
1180 /* a jump must set masks on targets */
1181 masks[pc + 1 + filter[pc].jt] &= memvalid;
1182 masks[pc + 1 + filter[pc].jf] &= memvalid;
1183 memvalid = ~0;
1184 break;
1185 }
1186 }
1187error:
1188 kfree(masks);
1189 return ret;
1190}
1191
1da177e4
LT
1192/**
1193 * sk_chk_filter - verify socket filter code
1194 * @filter: filter to verify
1195 * @flen: length of filter
1196 *
1197 * Check the user's filter code. If we let some ugly
1198 * filter code slip through kaboom! The filter must contain
93699863
KK
1199 * no references or jumps that are out of range, no illegal
1200 * instructions, and must end with a RET instruction.
1da177e4 1201 *
7b11f69f
KK
1202 * All jumps are forward as they are not signed.
1203 *
1204 * Returns 0 if the rule set is legal or -EINVAL if not.
1da177e4 1205 */
4f25af27 1206int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
1da177e4 1207{
cba328fc
TH
1208 /*
1209 * Valid instructions are initialized to non-0.
1210 * Invalid instructions are initialized to 0.
1211 */
1212 static const u8 codes[] = {
8c1592d6
ED
1213 [BPF_ALU|BPF_ADD|BPF_K] = BPF_S_ALU_ADD_K,
1214 [BPF_ALU|BPF_ADD|BPF_X] = BPF_S_ALU_ADD_X,
1215 [BPF_ALU|BPF_SUB|BPF_K] = BPF_S_ALU_SUB_K,
1216 [BPF_ALU|BPF_SUB|BPF_X] = BPF_S_ALU_SUB_X,
1217 [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K,
1218 [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X,
1219 [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X,
b6069a95
ED
1220 [BPF_ALU|BPF_MOD|BPF_K] = BPF_S_ALU_MOD_K,
1221 [BPF_ALU|BPF_MOD|BPF_X] = BPF_S_ALU_MOD_X,
8c1592d6
ED
1222 [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K,
1223 [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X,
1224 [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K,
1225 [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X,
9e49e889
DB
1226 [BPF_ALU|BPF_XOR|BPF_K] = BPF_S_ALU_XOR_K,
1227 [BPF_ALU|BPF_XOR|BPF_X] = BPF_S_ALU_XOR_X,
8c1592d6
ED
1228 [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K,
1229 [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X,
1230 [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K,
1231 [BPF_ALU|BPF_RSH|BPF_X] = BPF_S_ALU_RSH_X,
1232 [BPF_ALU|BPF_NEG] = BPF_S_ALU_NEG,
1233 [BPF_LD|BPF_W|BPF_ABS] = BPF_S_LD_W_ABS,
1234 [BPF_LD|BPF_H|BPF_ABS] = BPF_S_LD_H_ABS,
1235 [BPF_LD|BPF_B|BPF_ABS] = BPF_S_LD_B_ABS,
1236 [BPF_LD|BPF_W|BPF_LEN] = BPF_S_LD_W_LEN,
1237 [BPF_LD|BPF_W|BPF_IND] = BPF_S_LD_W_IND,
1238 [BPF_LD|BPF_H|BPF_IND] = BPF_S_LD_H_IND,
1239 [BPF_LD|BPF_B|BPF_IND] = BPF_S_LD_B_IND,
1240 [BPF_LD|BPF_IMM] = BPF_S_LD_IMM,
1241 [BPF_LDX|BPF_W|BPF_LEN] = BPF_S_LDX_W_LEN,
1242 [BPF_LDX|BPF_B|BPF_MSH] = BPF_S_LDX_B_MSH,
1243 [BPF_LDX|BPF_IMM] = BPF_S_LDX_IMM,
1244 [BPF_MISC|BPF_TAX] = BPF_S_MISC_TAX,
1245 [BPF_MISC|BPF_TXA] = BPF_S_MISC_TXA,
1246 [BPF_RET|BPF_K] = BPF_S_RET_K,
1247 [BPF_RET|BPF_A] = BPF_S_RET_A,
1248 [BPF_ALU|BPF_DIV|BPF_K] = BPF_S_ALU_DIV_K,
1249 [BPF_LD|BPF_MEM] = BPF_S_LD_MEM,
1250 [BPF_LDX|BPF_MEM] = BPF_S_LDX_MEM,
1251 [BPF_ST] = BPF_S_ST,
1252 [BPF_STX] = BPF_S_STX,
1253 [BPF_JMP|BPF_JA] = BPF_S_JMP_JA,
1254 [BPF_JMP|BPF_JEQ|BPF_K] = BPF_S_JMP_JEQ_K,
1255 [BPF_JMP|BPF_JEQ|BPF_X] = BPF_S_JMP_JEQ_X,
1256 [BPF_JMP|BPF_JGE|BPF_K] = BPF_S_JMP_JGE_K,
1257 [BPF_JMP|BPF_JGE|BPF_X] = BPF_S_JMP_JGE_X,
1258 [BPF_JMP|BPF_JGT|BPF_K] = BPF_S_JMP_JGT_K,
1259 [BPF_JMP|BPF_JGT|BPF_X] = BPF_S_JMP_JGT_X,
1260 [BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K,
1261 [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X,
cba328fc 1262 };
1da177e4 1263 int pc;
aa1113d9 1264 bool anc_found;
1da177e4 1265
1b93ae64 1266 if (flen == 0 || flen > BPF_MAXINSNS)
1da177e4
LT
1267 return -EINVAL;
1268
1269 /* check the filter code now */
1270 for (pc = 0; pc < flen; pc++) {
cba328fc
TH
1271 struct sock_filter *ftest = &filter[pc];
1272 u16 code = ftest->code;
93699863 1273
cba328fc
TH
1274 if (code >= ARRAY_SIZE(codes))
1275 return -EINVAL;
1276 code = codes[code];
8c1592d6 1277 if (!code)
cba328fc 1278 return -EINVAL;
93699863 1279 /* Some instructions need special checks */
cba328fc
TH
1280 switch (code) {
1281 case BPF_S_ALU_DIV_K:
b6069a95
ED
1282 case BPF_S_ALU_MOD_K:
1283 /* check for division by zero */
1284 if (ftest->k == 0)
1285 return -EINVAL;
1286 break;
cba328fc
TH
1287 case BPF_S_LD_MEM:
1288 case BPF_S_LDX_MEM:
1289 case BPF_S_ST:
1290 case BPF_S_STX:
1291 /* check for invalid memory addresses */
93699863
KK
1292 if (ftest->k >= BPF_MEMWORDS)
1293 return -EINVAL;
1294 break;
cba328fc 1295 case BPF_S_JMP_JA:
93699863
KK
1296 /*
1297 * Note, the large ftest->k might cause loops.
1298 * Compare this with conditional jumps below,
1299 * where offsets are limited. --ANK (981016)
1300 */
95c96174 1301 if (ftest->k >= (unsigned int)(flen-pc-1))
93699863 1302 return -EINVAL;
01f2f3f6 1303 break;
01f2f3f6
HPP
1304 case BPF_S_JMP_JEQ_K:
1305 case BPF_S_JMP_JEQ_X:
1306 case BPF_S_JMP_JGE_K:
1307 case BPF_S_JMP_JGE_X:
1308 case BPF_S_JMP_JGT_K:
1309 case BPF_S_JMP_JGT_X:
1310 case BPF_S_JMP_JSET_X:
1311 case BPF_S_JMP_JSET_K:
cba328fc 1312 /* for conditionals both must be safe */
e35bedf3 1313 if (pc + ftest->jt + 1 >= flen ||
93699863
KK
1314 pc + ftest->jf + 1 >= flen)
1315 return -EINVAL;
cba328fc 1316 break;
12b16dad
ED
1317 case BPF_S_LD_W_ABS:
1318 case BPF_S_LD_H_ABS:
1319 case BPF_S_LD_B_ABS:
aa1113d9 1320 anc_found = false;
12b16dad
ED
1321#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \
1322 code = BPF_S_ANC_##CODE; \
aa1113d9 1323 anc_found = true; \
12b16dad
ED
1324 break
1325 switch (ftest->k) {
1326 ANCILLARY(PROTOCOL);
1327 ANCILLARY(PKTTYPE);
1328 ANCILLARY(IFINDEX);
1329 ANCILLARY(NLATTR);
1330 ANCILLARY(NLATTR_NEST);
1331 ANCILLARY(MARK);
1332 ANCILLARY(QUEUE);
1333 ANCILLARY(HATYPE);
1334 ANCILLARY(RXHASH);
1335 ANCILLARY(CPU);
ffe06c17 1336 ANCILLARY(ALU_XOR_X);
f3335031
ED
1337 ANCILLARY(VLAN_TAG);
1338 ANCILLARY(VLAN_TAG_PRESENT);
3e5289d5 1339 ANCILLARY(PAY_OFFSET);
4cd3675e 1340 ANCILLARY(RANDOM);
12b16dad 1341 }
aa1113d9
DB
1342
1343 /* ancillary operation unknown or unsupported */
1344 if (anc_found == false && ftest->k >= SKF_AD_OFF)
1345 return -EINVAL;
01f2f3f6 1346 }
cba328fc 1347 ftest->code = code;
01f2f3f6 1348 }
93699863 1349
01f2f3f6
HPP
1350 /* last instruction must be a RET code */
1351 switch (filter[flen - 1].code) {
1352 case BPF_S_RET_K:
1353 case BPF_S_RET_A:
2d5311e4 1354 return check_load_and_stores(filter, flen);
cba328fc
TH
1355 }
1356 return -EINVAL;
1da177e4 1357}
b715631f 1358EXPORT_SYMBOL(sk_chk_filter);
1da177e4 1359
a3ea269b
DB
1360static int sk_store_orig_filter(struct sk_filter *fp,
1361 const struct sock_fprog *fprog)
1362{
1363 unsigned int fsize = sk_filter_proglen(fprog);
1364 struct sock_fprog_kern *fkprog;
1365
1366 fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL);
1367 if (!fp->orig_prog)
1368 return -ENOMEM;
1369
1370 fkprog = fp->orig_prog;
1371 fkprog->len = fprog->len;
1372 fkprog->filter = kmemdup(fp->insns, fsize, GFP_KERNEL);
1373 if (!fkprog->filter) {
1374 kfree(fp->orig_prog);
1375 return -ENOMEM;
1376 }
1377
1378 return 0;
1379}
1380
1381static void sk_release_orig_filter(struct sk_filter *fp)
1382{
1383 struct sock_fprog_kern *fprog = fp->orig_prog;
1384
1385 if (fprog) {
1386 kfree(fprog->filter);
1387 kfree(fprog);
1388 }
1389}
1390
47e958ea 1391/**
46bcf14f 1392 * sk_filter_release_rcu - Release a socket filter by rcu_head
47e958ea
PE
1393 * @rcu: rcu_head that contains the sk_filter to free
1394 */
fbc907f0 1395static void sk_filter_release_rcu(struct rcu_head *rcu)
47e958ea
PE
1396{
1397 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
1398
a3ea269b 1399 sk_release_orig_filter(fp);
0a14842f 1400 bpf_jit_free(fp);
47e958ea 1401}
fbc907f0
DB
1402
1403/**
1404 * sk_filter_release - release a socket filter
1405 * @fp: filter to remove
1406 *
1407 * Remove a filter from a socket and release its resources.
1408 */
1409static void sk_filter_release(struct sk_filter *fp)
1410{
1411 if (atomic_dec_and_test(&fp->refcnt))
1412 call_rcu(&fp->rcu, sk_filter_release_rcu);
1413}
1414
1415void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
1416{
1417 atomic_sub(sk_filter_size(fp->len), &sk->sk_omem_alloc);
1418 sk_filter_release(fp);
1419}
1420
1421void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
1422{
1423 atomic_inc(&fp->refcnt);
1424 atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc);
1425}
47e958ea 1426
bd4cf0ed
AS
1427static struct sk_filter *__sk_migrate_realloc(struct sk_filter *fp,
1428 struct sock *sk,
1429 unsigned int len)
1430{
1431 struct sk_filter *fp_new;
1432
1433 if (sk == NULL)
1434 return krealloc(fp, len, GFP_KERNEL);
1435
1436 fp_new = sock_kmalloc(sk, len, GFP_KERNEL);
1437 if (fp_new) {
eb9672f4 1438 *fp_new = *fp;
bd4cf0ed
AS
1439 /* As we're kepping orig_prog in fp_new along,
1440 * we need to make sure we're not evicting it
1441 * from the old fp.
1442 */
1443 fp->orig_prog = NULL;
1444 sk_filter_uncharge(sk, fp);
1445 }
1446
1447 return fp_new;
1448}
1449
1450static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
1451 struct sock *sk)
1452{
1453 struct sock_filter *old_prog;
1454 struct sk_filter *old_fp;
1455 int i, err, new_len, old_len = fp->len;
1456
1457 /* We are free to overwrite insns et al right here as it
1458 * won't be used at this point in time anymore internally
1459 * after the migration to the internal BPF instruction
1460 * representation.
1461 */
1462 BUILD_BUG_ON(sizeof(struct sock_filter) !=
1463 sizeof(struct sock_filter_int));
1464
1465 /* For now, we need to unfiddle BPF_S_* identifiers in place.
1466 * This can sooner or later on be subject to removal, e.g. when
1467 * JITs have been converted.
1468 */
1469 for (i = 0; i < fp->len; i++)
1470 sk_decode_filter(&fp->insns[i], &fp->insns[i]);
1471
1472 /* Conversion cannot happen on overlapping memory areas,
1473 * so we need to keep the user BPF around until the 2nd
1474 * pass. At this time, the user BPF is stored in fp->insns.
1475 */
1476 old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter),
1477 GFP_KERNEL);
1478 if (!old_prog) {
1479 err = -ENOMEM;
1480 goto out_err;
1481 }
1482
1483 /* 1st pass: calculate the new program length. */
1484 err = sk_convert_filter(old_prog, old_len, NULL, &new_len);
1485 if (err)
1486 goto out_err_free;
1487
1488 /* Expand fp for appending the new filter representation. */
1489 old_fp = fp;
1490 fp = __sk_migrate_realloc(old_fp, sk, sk_filter_size(new_len));
1491 if (!fp) {
1492 /* The old_fp is still around in case we couldn't
1493 * allocate new memory, so uncharge on that one.
1494 */
1495 fp = old_fp;
1496 err = -ENOMEM;
1497 goto out_err_free;
1498 }
1499
1500 fp->bpf_func = sk_run_filter_int_skb;
1501 fp->len = new_len;
1502
1503 /* 2nd pass: remap sock_filter insns into sock_filter_int insns. */
1504 err = sk_convert_filter(old_prog, old_len, fp->insnsi, &new_len);
1505 if (err)
1506 /* 2nd sk_convert_filter() can fail only if it fails
1507 * to allocate memory, remapping must succeed. Note,
1508 * that at this time old_fp has already been released
1509 * by __sk_migrate_realloc().
1510 */
1511 goto out_err_free;
1512
1513 kfree(old_prog);
1514 return fp;
1515
1516out_err_free:
1517 kfree(old_prog);
1518out_err:
1519 /* Rollback filter setup. */
1520 if (sk != NULL)
1521 sk_filter_uncharge(sk, fp);
1522 else
1523 kfree(fp);
1524 return ERR_PTR(err);
1525}
1526
62258278
AS
1527void __weak bpf_int_jit_compile(struct sk_filter *prog)
1528{
1529}
1530
bd4cf0ed
AS
1531static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
1532 struct sock *sk)
302d6637
JP
1533{
1534 int err;
1535
bd4cf0ed 1536 fp->bpf_func = NULL;
f8bbbfc3 1537 fp->jited = 0;
302d6637
JP
1538
1539 err = sk_chk_filter(fp->insns, fp->len);
1540 if (err)
bd4cf0ed 1541 return ERR_PTR(err);
302d6637 1542
bd4cf0ed
AS
1543 /* Probe if we can JIT compile the filter and if so, do
1544 * the compilation of the filter.
1545 */
302d6637 1546 bpf_jit_compile(fp);
bd4cf0ed
AS
1547
1548 /* JIT compiler couldn't process this filter, so do the
1549 * internal BPF translation for the optimized interpreter.
1550 */
62258278 1551 if (!fp->jited) {
bd4cf0ed
AS
1552 fp = __sk_migrate_filter(fp, sk);
1553
62258278
AS
1554 /* Probe if internal BPF can be jit-ed */
1555 bpf_int_jit_compile(fp);
1556 }
bd4cf0ed 1557 return fp;
302d6637
JP
1558}
1559
1560/**
1561 * sk_unattached_filter_create - create an unattached filter
1562 * @fprog: the filter program
c6c4b97c 1563 * @pfp: the unattached filter that is created
302d6637 1564 *
c6c4b97c 1565 * Create a filter independent of any socket. We first run some
302d6637
JP
1566 * sanity checks on it to make sure it does not explode on us later.
1567 * If an error occurs or there is insufficient memory for the filter
1568 * a negative errno code is returned. On success the return is zero.
1569 */
1570int sk_unattached_filter_create(struct sk_filter **pfp,
1571 struct sock_fprog *fprog)
1572{
a3ea269b 1573 unsigned int fsize = sk_filter_proglen(fprog);
302d6637 1574 struct sk_filter *fp;
302d6637
JP
1575
1576 /* Make sure new filter is there and in the right amounts. */
1577 if (fprog->filter == NULL)
1578 return -EINVAL;
1579
d45ed4a4 1580 fp = kmalloc(sk_filter_size(fprog->len), GFP_KERNEL);
302d6637
JP
1581 if (!fp)
1582 return -ENOMEM;
a3ea269b 1583
302d6637
JP
1584 memcpy(fp->insns, fprog->filter, fsize);
1585
1586 atomic_set(&fp->refcnt, 1);
1587 fp->len = fprog->len;
a3ea269b
DB
1588 /* Since unattached filters are not copied back to user
1589 * space through sk_get_filter(), we do not need to hold
1590 * a copy here, and can spare us the work.
1591 */
1592 fp->orig_prog = NULL;
302d6637 1593
bd4cf0ed
AS
1594 /* __sk_prepare_filter() already takes care of uncharging
1595 * memory in case something goes wrong.
1596 */
1597 fp = __sk_prepare_filter(fp, NULL);
1598 if (IS_ERR(fp))
1599 return PTR_ERR(fp);
302d6637
JP
1600
1601 *pfp = fp;
1602 return 0;
302d6637
JP
1603}
1604EXPORT_SYMBOL_GPL(sk_unattached_filter_create);
1605
1606void sk_unattached_filter_destroy(struct sk_filter *fp)
1607{
1608 sk_filter_release(fp);
1609}
1610EXPORT_SYMBOL_GPL(sk_unattached_filter_destroy);
1611
1da177e4
LT
1612/**
1613 * sk_attach_filter - attach a socket filter
1614 * @fprog: the filter program
1615 * @sk: the socket to use
1616 *
1617 * Attach the user's filter code. We first run some sanity checks on
1618 * it to make sure it does not explode on us later. If an error
1619 * occurs or there is insufficient memory for the filter a negative
1620 * errno code is returned. On success the return is zero.
1621 */
1622int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1623{
d3904b73 1624 struct sk_filter *fp, *old_fp;
a3ea269b 1625 unsigned int fsize = sk_filter_proglen(fprog);
d45ed4a4 1626 unsigned int sk_fsize = sk_filter_size(fprog->len);
1da177e4
LT
1627 int err;
1628
d59577b6
VB
1629 if (sock_flag(sk, SOCK_FILTER_LOCKED))
1630 return -EPERM;
1631
1da177e4 1632 /* Make sure new filter is there and in the right amounts. */
e35bedf3
KK
1633 if (fprog->filter == NULL)
1634 return -EINVAL;
1da177e4 1635
d45ed4a4 1636 fp = sock_kmalloc(sk, sk_fsize, GFP_KERNEL);
1da177e4
LT
1637 if (!fp)
1638 return -ENOMEM;
a3ea269b 1639
1da177e4 1640 if (copy_from_user(fp->insns, fprog->filter, fsize)) {
d45ed4a4 1641 sock_kfree_s(sk, fp, sk_fsize);
1da177e4
LT
1642 return -EFAULT;
1643 }
1644
1645 atomic_set(&fp->refcnt, 1);
1646 fp->len = fprog->len;
1647
a3ea269b
DB
1648 err = sk_store_orig_filter(fp, fprog);
1649 if (err) {
1650 sk_filter_uncharge(sk, fp);
1651 return -ENOMEM;
1652 }
1653
bd4cf0ed
AS
1654 /* __sk_prepare_filter() already takes care of uncharging
1655 * memory in case something goes wrong.
1656 */
1657 fp = __sk_prepare_filter(fp, sk);
1658 if (IS_ERR(fp))
1659 return PTR_ERR(fp);
1da177e4 1660
f91ff5b9
ED
1661 old_fp = rcu_dereference_protected(sk->sk_filter,
1662 sock_owned_by_user(sk));
d3904b73 1663 rcu_assign_pointer(sk->sk_filter, fp);
d3904b73 1664
9b013e05 1665 if (old_fp)
46bcf14f 1666 sk_filter_uncharge(sk, old_fp);
a3ea269b 1667
d3904b73 1668 return 0;
1da177e4 1669}
5ff3f073 1670EXPORT_SYMBOL_GPL(sk_attach_filter);
1da177e4 1671
55b33325
PE
1672int sk_detach_filter(struct sock *sk)
1673{
1674 int ret = -ENOENT;
1675 struct sk_filter *filter;
1676
d59577b6
VB
1677 if (sock_flag(sk, SOCK_FILTER_LOCKED))
1678 return -EPERM;
1679
f91ff5b9
ED
1680 filter = rcu_dereference_protected(sk->sk_filter,
1681 sock_owned_by_user(sk));
55b33325 1682 if (filter) {
a9b3cd7f 1683 RCU_INIT_POINTER(sk->sk_filter, NULL);
46bcf14f 1684 sk_filter_uncharge(sk, filter);
55b33325
PE
1685 ret = 0;
1686 }
a3ea269b 1687
55b33325
PE
1688 return ret;
1689}
5ff3f073 1690EXPORT_SYMBOL_GPL(sk_detach_filter);
a8fc9277 1691
ed13998c 1692void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
a8fc9277
PE
1693{
1694 static const u16 decodes[] = {
1695 [BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K,
1696 [BPF_S_ALU_ADD_X] = BPF_ALU|BPF_ADD|BPF_X,
1697 [BPF_S_ALU_SUB_K] = BPF_ALU|BPF_SUB|BPF_K,
1698 [BPF_S_ALU_SUB_X] = BPF_ALU|BPF_SUB|BPF_X,
1699 [BPF_S_ALU_MUL_K] = BPF_ALU|BPF_MUL|BPF_K,
1700 [BPF_S_ALU_MUL_X] = BPF_ALU|BPF_MUL|BPF_X,
1701 [BPF_S_ALU_DIV_X] = BPF_ALU|BPF_DIV|BPF_X,
1702 [BPF_S_ALU_MOD_K] = BPF_ALU|BPF_MOD|BPF_K,
1703 [BPF_S_ALU_MOD_X] = BPF_ALU|BPF_MOD|BPF_X,
1704 [BPF_S_ALU_AND_K] = BPF_ALU|BPF_AND|BPF_K,
1705 [BPF_S_ALU_AND_X] = BPF_ALU|BPF_AND|BPF_X,
1706 [BPF_S_ALU_OR_K] = BPF_ALU|BPF_OR|BPF_K,
1707 [BPF_S_ALU_OR_X] = BPF_ALU|BPF_OR|BPF_X,
1708 [BPF_S_ALU_XOR_K] = BPF_ALU|BPF_XOR|BPF_K,
1709 [BPF_S_ALU_XOR_X] = BPF_ALU|BPF_XOR|BPF_X,
1710 [BPF_S_ALU_LSH_K] = BPF_ALU|BPF_LSH|BPF_K,
1711 [BPF_S_ALU_LSH_X] = BPF_ALU|BPF_LSH|BPF_X,
1712 [BPF_S_ALU_RSH_K] = BPF_ALU|BPF_RSH|BPF_K,
1713 [BPF_S_ALU_RSH_X] = BPF_ALU|BPF_RSH|BPF_X,
1714 [BPF_S_ALU_NEG] = BPF_ALU|BPF_NEG,
1715 [BPF_S_LD_W_ABS] = BPF_LD|BPF_W|BPF_ABS,
1716 [BPF_S_LD_H_ABS] = BPF_LD|BPF_H|BPF_ABS,
1717 [BPF_S_LD_B_ABS] = BPF_LD|BPF_B|BPF_ABS,
1718 [BPF_S_ANC_PROTOCOL] = BPF_LD|BPF_B|BPF_ABS,
1719 [BPF_S_ANC_PKTTYPE] = BPF_LD|BPF_B|BPF_ABS,
1720 [BPF_S_ANC_IFINDEX] = BPF_LD|BPF_B|BPF_ABS,
1721 [BPF_S_ANC_NLATTR] = BPF_LD|BPF_B|BPF_ABS,
1722 [BPF_S_ANC_NLATTR_NEST] = BPF_LD|BPF_B|BPF_ABS,
1723 [BPF_S_ANC_MARK] = BPF_LD|BPF_B|BPF_ABS,
1724 [BPF_S_ANC_QUEUE] = BPF_LD|BPF_B|BPF_ABS,
1725 [BPF_S_ANC_HATYPE] = BPF_LD|BPF_B|BPF_ABS,
1726 [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS,
1727 [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS,
1728 [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS,
a8fc9277
PE
1729 [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS,
1730 [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
3e5289d5 1731 [BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS,
4cd3675e 1732 [BPF_S_ANC_RANDOM] = BPF_LD|BPF_B|BPF_ABS,
a8fc9277
PE
1733 [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN,
1734 [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND,
1735 [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND,
1736 [BPF_S_LD_B_IND] = BPF_LD|BPF_B|BPF_IND,
1737 [BPF_S_LD_IMM] = BPF_LD|BPF_IMM,
1738 [BPF_S_LDX_W_LEN] = BPF_LDX|BPF_W|BPF_LEN,
1739 [BPF_S_LDX_B_MSH] = BPF_LDX|BPF_B|BPF_MSH,
1740 [BPF_S_LDX_IMM] = BPF_LDX|BPF_IMM,
1741 [BPF_S_MISC_TAX] = BPF_MISC|BPF_TAX,
1742 [BPF_S_MISC_TXA] = BPF_MISC|BPF_TXA,
1743 [BPF_S_RET_K] = BPF_RET|BPF_K,
1744 [BPF_S_RET_A] = BPF_RET|BPF_A,
1745 [BPF_S_ALU_DIV_K] = BPF_ALU|BPF_DIV|BPF_K,
1746 [BPF_S_LD_MEM] = BPF_LD|BPF_MEM,
1747 [BPF_S_LDX_MEM] = BPF_LDX|BPF_MEM,
1748 [BPF_S_ST] = BPF_ST,
1749 [BPF_S_STX] = BPF_STX,
1750 [BPF_S_JMP_JA] = BPF_JMP|BPF_JA,
1751 [BPF_S_JMP_JEQ_K] = BPF_JMP|BPF_JEQ|BPF_K,
1752 [BPF_S_JMP_JEQ_X] = BPF_JMP|BPF_JEQ|BPF_X,
1753 [BPF_S_JMP_JGE_K] = BPF_JMP|BPF_JGE|BPF_K,
1754 [BPF_S_JMP_JGE_X] = BPF_JMP|BPF_JGE|BPF_X,
1755 [BPF_S_JMP_JGT_K] = BPF_JMP|BPF_JGT|BPF_K,
1756 [BPF_S_JMP_JGT_X] = BPF_JMP|BPF_JGT|BPF_X,
1757 [BPF_S_JMP_JSET_K] = BPF_JMP|BPF_JSET|BPF_K,
1758 [BPF_S_JMP_JSET_X] = BPF_JMP|BPF_JSET|BPF_X,
1759 };
1760 u16 code;
1761
1762 code = filt->code;
1763
1764 to->code = decodes[code];
1765 to->jt = filt->jt;
1766 to->jf = filt->jf;
aee636c4 1767 to->k = filt->k;
a8fc9277
PE
1768}
1769
a3ea269b
DB
1770int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
1771 unsigned int len)
a8fc9277 1772{
a3ea269b 1773 struct sock_fprog_kern *fprog;
a8fc9277 1774 struct sk_filter *filter;
a3ea269b 1775 int ret = 0;
a8fc9277
PE
1776
1777 lock_sock(sk);
1778 filter = rcu_dereference_protected(sk->sk_filter,
a3ea269b 1779 sock_owned_by_user(sk));
a8fc9277
PE
1780 if (!filter)
1781 goto out;
a3ea269b
DB
1782
1783 /* We're copying the filter that has been originally attached,
1784 * so no conversion/decode needed anymore.
1785 */
1786 fprog = filter->orig_prog;
1787
1788 ret = fprog->len;
a8fc9277 1789 if (!len)
a3ea269b 1790 /* User space only enquires number of filter blocks. */
a8fc9277 1791 goto out;
a3ea269b 1792
a8fc9277 1793 ret = -EINVAL;
a3ea269b 1794 if (len < fprog->len)
a8fc9277
PE
1795 goto out;
1796
1797 ret = -EFAULT;
a3ea269b
DB
1798 if (copy_to_user(ubuf, fprog->filter, sk_filter_proglen(fprog)))
1799 goto out;
a8fc9277 1800
a3ea269b
DB
1801 /* Instead of bytes, the API requests to return the number
1802 * of filter blocks.
1803 */
1804 ret = fprog->len;
a8fc9277
PE
1805out:
1806 release_sock(sk);
1807 return ret;
1808}