]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - net/ipv4/netfilter/ip_tables.c
[NETFILTER] x_tables: Abstraction layer for {ip,ip6,arp}_tables
[mirror_ubuntu-bionic-kernel.git] / net / ipv4 / netfilter / ip_tables.c
1 /*
2 * Packet matching code.
3 *
4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5 * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12 * - increase module usage count as soon as we have rules inside
13 * a table
14 * 08 Oct 2005 Harald Welte <lafore@netfilter.org>
15 * - Generalize into "x_tables" layer and "{ip,ip6,arp}_tables"
16 */
17 #include <linux/config.h>
18 #include <linux/cache.h>
19 #include <linux/capability.h>
20 #include <linux/skbuff.h>
21 #include <linux/kmod.h>
22 #include <linux/vmalloc.h>
23 #include <linux/netdevice.h>
24 #include <linux/module.h>
25 #include <linux/icmp.h>
26 #include <net/ip.h>
27 #include <asm/uaccess.h>
28 #include <asm/semaphore.h>
29 #include <linux/proc_fs.h>
30 #include <linux/err.h>
31 #include <linux/cpumask.h>
32
33 #include <linux/netfilter/x_tables.h>
34 #include <linux/netfilter_ipv4/ip_tables.h>
35
36 MODULE_LICENSE("GPL");
37 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
38 MODULE_DESCRIPTION("IPv4 packet filter");
39
40 /*#define DEBUG_IP_FIREWALL*/
41 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
42 /*#define DEBUG_IP_FIREWALL_USER*/
43
44 #ifdef DEBUG_IP_FIREWALL
45 #define dprintf(format, args...) printk(format , ## args)
46 #else
47 #define dprintf(format, args...)
48 #endif
49
50 #ifdef DEBUG_IP_FIREWALL_USER
51 #define duprintf(format, args...) printk(format , ## args)
52 #else
53 #define duprintf(format, args...)
54 #endif
55
56 #ifdef CONFIG_NETFILTER_DEBUG
57 #define IP_NF_ASSERT(x) \
58 do { \
59 if (!(x)) \
60 printk("IP_NF_ASSERT: %s:%s:%u\n", \
61 __FUNCTION__, __FILE__, __LINE__); \
62 } while(0)
63 #else
64 #define IP_NF_ASSERT(x)
65 #endif
66
67 #if 0
68 /* All the better to debug you with... */
69 #define static
70 #define inline
71 #endif
72
73 /*
74 We keep a set of rules for each CPU, so we can avoid write-locking
75 them in the softirq when updating the counters and therefore
76 only need to read-lock in the softirq; doing a write_lock_bh() in user
77 context stops packets coming through and allows user context to read
78 the counters or update the rules.
79
80 Hence the start of any table is given by get_table() below. */
81
82 /* Returns whether matches rule or not. */
83 static inline int
84 ip_packet_match(const struct iphdr *ip,
85 const char *indev,
86 const char *outdev,
87 const struct ipt_ip *ipinfo,
88 int isfrag)
89 {
90 size_t i;
91 unsigned long ret;
92
93 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
94
95 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
96 IPT_INV_SRCIP)
97 || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
98 IPT_INV_DSTIP)) {
99 dprintf("Source or dest mismatch.\n");
100
101 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
102 NIPQUAD(ip->saddr),
103 NIPQUAD(ipinfo->smsk.s_addr),
104 NIPQUAD(ipinfo->src.s_addr),
105 ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
106 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
107 NIPQUAD(ip->daddr),
108 NIPQUAD(ipinfo->dmsk.s_addr),
109 NIPQUAD(ipinfo->dst.s_addr),
110 ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
111 return 0;
112 }
113
114 /* Look for ifname matches; this should unroll nicely. */
115 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
116 ret |= (((const unsigned long *)indev)[i]
117 ^ ((const unsigned long *)ipinfo->iniface)[i])
118 & ((const unsigned long *)ipinfo->iniface_mask)[i];
119 }
120
121 if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
122 dprintf("VIA in mismatch (%s vs %s).%s\n",
123 indev, ipinfo->iniface,
124 ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
125 return 0;
126 }
127
128 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
129 ret |= (((const unsigned long *)outdev)[i]
130 ^ ((const unsigned long *)ipinfo->outiface)[i])
131 & ((const unsigned long *)ipinfo->outiface_mask)[i];
132 }
133
134 if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
135 dprintf("VIA out mismatch (%s vs %s).%s\n",
136 outdev, ipinfo->outiface,
137 ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
138 return 0;
139 }
140
141 /* Check specific protocol */
142 if (ipinfo->proto
143 && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
144 dprintf("Packet protocol %hi does not match %hi.%s\n",
145 ip->protocol, ipinfo->proto,
146 ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
147 return 0;
148 }
149
150 /* If we have a fragment rule but the packet is not a fragment
151 * then we return zero */
152 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
153 dprintf("Fragment rule but not fragment.%s\n",
154 ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
155 return 0;
156 }
157
158 return 1;
159 }
160
161 static inline int
162 ip_checkentry(const struct ipt_ip *ip)
163 {
164 if (ip->flags & ~IPT_F_MASK) {
165 duprintf("Unknown flag bits set: %08X\n",
166 ip->flags & ~IPT_F_MASK);
167 return 0;
168 }
169 if (ip->invflags & ~IPT_INV_MASK) {
170 duprintf("Unknown invflag bits set: %08X\n",
171 ip->invflags & ~IPT_INV_MASK);
172 return 0;
173 }
174 return 1;
175 }
176
177 static unsigned int
178 ipt_error(struct sk_buff **pskb,
179 const struct net_device *in,
180 const struct net_device *out,
181 unsigned int hooknum,
182 const void *targinfo,
183 void *userinfo)
184 {
185 if (net_ratelimit())
186 printk("ip_tables: error: `%s'\n", (char *)targinfo);
187
188 return NF_DROP;
189 }
190
191 static inline
192 int do_match(struct ipt_entry_match *m,
193 const struct sk_buff *skb,
194 const struct net_device *in,
195 const struct net_device *out,
196 int offset,
197 int *hotdrop)
198 {
199 /* Stop iteration if it doesn't match */
200 if (!m->u.kernel.match->match(skb, in, out, m->data, offset,
201 skb->nh.iph->ihl*4, hotdrop))
202 return 1;
203 else
204 return 0;
205 }
206
207 static inline struct ipt_entry *
208 get_entry(void *base, unsigned int offset)
209 {
210 return (struct ipt_entry *)(base + offset);
211 }
212
213 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
214 unsigned int
215 ipt_do_table(struct sk_buff **pskb,
216 unsigned int hook,
217 const struct net_device *in,
218 const struct net_device *out,
219 struct ipt_table *table,
220 void *userdata)
221 {
222 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
223 u_int16_t offset;
224 struct iphdr *ip;
225 u_int16_t datalen;
226 int hotdrop = 0;
227 /* Initializing verdict to NF_DROP keeps gcc happy. */
228 unsigned int verdict = NF_DROP;
229 const char *indev, *outdev;
230 void *table_base;
231 struct ipt_entry *e, *back;
232 struct xt_table_info *private = table->private;
233
234 /* Initialization */
235 ip = (*pskb)->nh.iph;
236 datalen = (*pskb)->len - ip->ihl * 4;
237 indev = in ? in->name : nulldevname;
238 outdev = out ? out->name : nulldevname;
239 /* We handle fragments by dealing with the first fragment as
240 * if it was a normal packet. All other fragments are treated
241 * normally, except that they will NEVER match rules that ask
242 * things we don't know, ie. tcp syn flag or ports). If the
243 * rule is also a fragment-specific rule, non-fragments won't
244 * match it. */
245 offset = ntohs(ip->frag_off) & IP_OFFSET;
246
247 read_lock_bh(&table->lock);
248 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
249 table_base = (void *)private->entries[smp_processor_id()];
250 e = get_entry(table_base, private->hook_entry[hook]);
251
252 /* For return from builtin chain */
253 back = get_entry(table_base, private->underflow[hook]);
254
255 do {
256 IP_NF_ASSERT(e);
257 IP_NF_ASSERT(back);
258 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
259 struct ipt_entry_target *t;
260
261 if (IPT_MATCH_ITERATE(e, do_match,
262 *pskb, in, out,
263 offset, &hotdrop) != 0)
264 goto no_match;
265
266 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
267
268 t = ipt_get_target(e);
269 IP_NF_ASSERT(t->u.kernel.target);
270 /* Standard target? */
271 if (!t->u.kernel.target->target) {
272 int v;
273
274 v = ((struct ipt_standard_target *)t)->verdict;
275 if (v < 0) {
276 /* Pop from stack? */
277 if (v != IPT_RETURN) {
278 verdict = (unsigned)(-v) - 1;
279 break;
280 }
281 e = back;
282 back = get_entry(table_base,
283 back->comefrom);
284 continue;
285 }
286 if (table_base + v != (void *)e + e->next_offset
287 && !(e->ip.flags & IPT_F_GOTO)) {
288 /* Save old back ptr in next entry */
289 struct ipt_entry *next
290 = (void *)e + e->next_offset;
291 next->comefrom
292 = (void *)back - table_base;
293 /* set back pointer to next entry */
294 back = next;
295 }
296
297 e = get_entry(table_base, v);
298 } else {
299 /* Targets which reenter must return
300 abs. verdicts */
301 #ifdef CONFIG_NETFILTER_DEBUG
302 ((struct ipt_entry *)table_base)->comefrom
303 = 0xeeeeeeec;
304 #endif
305 verdict = t->u.kernel.target->target(pskb,
306 in, out,
307 hook,
308 t->data,
309 userdata);
310
311 #ifdef CONFIG_NETFILTER_DEBUG
312 if (((struct ipt_entry *)table_base)->comefrom
313 != 0xeeeeeeec
314 && verdict == IPT_CONTINUE) {
315 printk("Target %s reentered!\n",
316 t->u.kernel.target->name);
317 verdict = NF_DROP;
318 }
319 ((struct ipt_entry *)table_base)->comefrom
320 = 0x57acc001;
321 #endif
322 /* Target might have changed stuff. */
323 ip = (*pskb)->nh.iph;
324 datalen = (*pskb)->len - ip->ihl * 4;
325
326 if (verdict == IPT_CONTINUE)
327 e = (void *)e + e->next_offset;
328 else
329 /* Verdict */
330 break;
331 }
332 } else {
333
334 no_match:
335 e = (void *)e + e->next_offset;
336 }
337 } while (!hotdrop);
338
339 read_unlock_bh(&table->lock);
340
341 #ifdef DEBUG_ALLOW_ALL
342 return NF_ACCEPT;
343 #else
344 if (hotdrop)
345 return NF_DROP;
346 else return verdict;
347 #endif
348 }
349
350 /* All zeroes == unconditional rule. */
351 static inline int
352 unconditional(const struct ipt_ip *ip)
353 {
354 unsigned int i;
355
356 for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
357 if (((__u32 *)ip)[i])
358 return 0;
359
360 return 1;
361 }
362
363 /* Figures out from what hook each rule can be called: returns 0 if
364 there are loops. Puts hook bitmask in comefrom. */
365 static int
366 mark_source_chains(struct xt_table_info *newinfo,
367 unsigned int valid_hooks, void *entry0)
368 {
369 unsigned int hook;
370
371 /* No recursion; use packet counter to save back ptrs (reset
372 to 0 as we leave), and comefrom to save source hook bitmask */
373 for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
374 unsigned int pos = newinfo->hook_entry[hook];
375 struct ipt_entry *e
376 = (struct ipt_entry *)(entry0 + pos);
377
378 if (!(valid_hooks & (1 << hook)))
379 continue;
380
381 /* Set initial back pointer. */
382 e->counters.pcnt = pos;
383
384 for (;;) {
385 struct ipt_standard_target *t
386 = (void *)ipt_get_target(e);
387
388 if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
389 printk("iptables: loop hook %u pos %u %08X.\n",
390 hook, pos, e->comefrom);
391 return 0;
392 }
393 e->comefrom
394 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
395
396 /* Unconditional return/END. */
397 if (e->target_offset == sizeof(struct ipt_entry)
398 && (strcmp(t->target.u.user.name,
399 IPT_STANDARD_TARGET) == 0)
400 && t->verdict < 0
401 && unconditional(&e->ip)) {
402 unsigned int oldpos, size;
403
404 /* Return: backtrack through the last
405 big jump. */
406 do {
407 e->comefrom ^= (1<<NF_IP_NUMHOOKS);
408 #ifdef DEBUG_IP_FIREWALL_USER
409 if (e->comefrom
410 & (1 << NF_IP_NUMHOOKS)) {
411 duprintf("Back unset "
412 "on hook %u "
413 "rule %u\n",
414 hook, pos);
415 }
416 #endif
417 oldpos = pos;
418 pos = e->counters.pcnt;
419 e->counters.pcnt = 0;
420
421 /* We're at the start. */
422 if (pos == oldpos)
423 goto next;
424
425 e = (struct ipt_entry *)
426 (entry0 + pos);
427 } while (oldpos == pos + e->next_offset);
428
429 /* Move along one */
430 size = e->next_offset;
431 e = (struct ipt_entry *)
432 (entry0 + pos + size);
433 e->counters.pcnt = pos;
434 pos += size;
435 } else {
436 int newpos = t->verdict;
437
438 if (strcmp(t->target.u.user.name,
439 IPT_STANDARD_TARGET) == 0
440 && newpos >= 0) {
441 /* This a jump; chase it. */
442 duprintf("Jump rule %u -> %u\n",
443 pos, newpos);
444 } else {
445 /* ... this is a fallthru */
446 newpos = pos + e->next_offset;
447 }
448 e = (struct ipt_entry *)
449 (entry0 + newpos);
450 e->counters.pcnt = pos;
451 pos = newpos;
452 }
453 }
454 next:
455 duprintf("Finished chain %u\n", hook);
456 }
457 return 1;
458 }
459
460 static inline int
461 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
462 {
463 if (i && (*i)-- == 0)
464 return 1;
465
466 if (m->u.kernel.match->destroy)
467 m->u.kernel.match->destroy(m->data,
468 m->u.match_size - sizeof(*m));
469 module_put(m->u.kernel.match->me);
470 return 0;
471 }
472
473 static inline int
474 standard_check(const struct ipt_entry_target *t,
475 unsigned int max_offset)
476 {
477 struct ipt_standard_target *targ = (void *)t;
478
479 /* Check standard info. */
480 if (t->u.target_size
481 != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
482 duprintf("standard_check: target size %u != %u\n",
483 t->u.target_size,
484 IPT_ALIGN(sizeof(struct ipt_standard_target)));
485 return 0;
486 }
487
488 if (targ->verdict >= 0
489 && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
490 duprintf("ipt_standard_check: bad verdict (%i)\n",
491 targ->verdict);
492 return 0;
493 }
494
495 if (targ->verdict < -NF_MAX_VERDICT - 1) {
496 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
497 targ->verdict);
498 return 0;
499 }
500 return 1;
501 }
502
503 static inline int
504 check_match(struct ipt_entry_match *m,
505 const char *name,
506 const struct ipt_ip *ip,
507 unsigned int hookmask,
508 unsigned int *i)
509 {
510 struct ipt_match *match;
511
512 match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
513 m->u.user.revision),
514 "ipt_%s", m->u.user.name);
515 if (IS_ERR(match) || !match) {
516 duprintf("check_match: `%s' not found\n", m->u.user.name);
517 return match ? PTR_ERR(match) : -ENOENT;
518 }
519 m->u.kernel.match = match;
520
521 if (m->u.kernel.match->checkentry
522 && !m->u.kernel.match->checkentry(name, ip, m->data,
523 m->u.match_size - sizeof(*m),
524 hookmask)) {
525 module_put(m->u.kernel.match->me);
526 duprintf("ip_tables: check failed for `%s'.\n",
527 m->u.kernel.match->name);
528 return -EINVAL;
529 }
530
531 (*i)++;
532 return 0;
533 }
534
535 static struct ipt_target ipt_standard_target;
536
537 static inline int
538 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
539 unsigned int *i)
540 {
541 struct ipt_entry_target *t;
542 struct ipt_target *target;
543 int ret;
544 unsigned int j;
545
546 if (!ip_checkentry(&e->ip)) {
547 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
548 return -EINVAL;
549 }
550
551 j = 0;
552 ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
553 if (ret != 0)
554 goto cleanup_matches;
555
556 t = ipt_get_target(e);
557 target = try_then_request_module(xt_find_target(AF_INET,
558 t->u.user.name,
559 t->u.user.revision),
560 "ipt_%s", t->u.user.name);
561 if (IS_ERR(target) || !target) {
562 duprintf("check_entry: `%s' not found\n", t->u.user.name);
563 ret = target ? PTR_ERR(target) : -ENOENT;
564 goto cleanup_matches;
565 }
566 t->u.kernel.target = target;
567
568 if (t->u.kernel.target == &ipt_standard_target) {
569 if (!standard_check(t, size)) {
570 ret = -EINVAL;
571 goto cleanup_matches;
572 }
573 } else if (t->u.kernel.target->checkentry
574 && !t->u.kernel.target->checkentry(name, e, t->data,
575 t->u.target_size
576 - sizeof(*t),
577 e->comefrom)) {
578 module_put(t->u.kernel.target->me);
579 duprintf("ip_tables: check failed for `%s'.\n",
580 t->u.kernel.target->name);
581 ret = -EINVAL;
582 goto cleanup_matches;
583 }
584
585 (*i)++;
586 return 0;
587
588 cleanup_matches:
589 IPT_MATCH_ITERATE(e, cleanup_match, &j);
590 return ret;
591 }
592
593 static inline int
594 check_entry_size_and_hooks(struct ipt_entry *e,
595 struct xt_table_info *newinfo,
596 unsigned char *base,
597 unsigned char *limit,
598 const unsigned int *hook_entries,
599 const unsigned int *underflows,
600 unsigned int *i)
601 {
602 unsigned int h;
603
604 if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
605 || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
606 duprintf("Bad offset %p\n", e);
607 return -EINVAL;
608 }
609
610 if (e->next_offset
611 < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
612 duprintf("checking: element %p size %u\n",
613 e, e->next_offset);
614 return -EINVAL;
615 }
616
617 /* Check hooks & underflows */
618 for (h = 0; h < NF_IP_NUMHOOKS; h++) {
619 if ((unsigned char *)e - base == hook_entries[h])
620 newinfo->hook_entry[h] = hook_entries[h];
621 if ((unsigned char *)e - base == underflows[h])
622 newinfo->underflow[h] = underflows[h];
623 }
624
625 /* FIXME: underflows must be unconditional, standard verdicts
626 < 0 (not IPT_RETURN). --RR */
627
628 /* Clear counters and comefrom */
629 e->counters = ((struct xt_counters) { 0, 0 });
630 e->comefrom = 0;
631
632 (*i)++;
633 return 0;
634 }
635
636 static inline int
637 cleanup_entry(struct ipt_entry *e, unsigned int *i)
638 {
639 struct ipt_entry_target *t;
640
641 if (i && (*i)-- == 0)
642 return 1;
643
644 /* Cleanup all matches */
645 IPT_MATCH_ITERATE(e, cleanup_match, NULL);
646 t = ipt_get_target(e);
647 if (t->u.kernel.target->destroy)
648 t->u.kernel.target->destroy(t->data,
649 t->u.target_size - sizeof(*t));
650 module_put(t->u.kernel.target->me);
651 return 0;
652 }
653
654 /* Checks and translates the user-supplied table segment (held in
655 newinfo) */
656 static int
657 translate_table(const char *name,
658 unsigned int valid_hooks,
659 struct xt_table_info *newinfo,
660 void *entry0,
661 unsigned int size,
662 unsigned int number,
663 const unsigned int *hook_entries,
664 const unsigned int *underflows)
665 {
666 unsigned int i;
667 int ret;
668
669 newinfo->size = size;
670 newinfo->number = number;
671
672 /* Init all hooks to impossible value. */
673 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
674 newinfo->hook_entry[i] = 0xFFFFFFFF;
675 newinfo->underflow[i] = 0xFFFFFFFF;
676 }
677
678 duprintf("translate_table: size %u\n", newinfo->size);
679 i = 0;
680 /* Walk through entries, checking offsets. */
681 ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
682 check_entry_size_and_hooks,
683 newinfo,
684 entry0,
685 entry0 + size,
686 hook_entries, underflows, &i);
687 if (ret != 0)
688 return ret;
689
690 if (i != number) {
691 duprintf("translate_table: %u not %u entries\n",
692 i, number);
693 return -EINVAL;
694 }
695
696 /* Check hooks all assigned */
697 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
698 /* Only hooks which are valid */
699 if (!(valid_hooks & (1 << i)))
700 continue;
701 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
702 duprintf("Invalid hook entry %u %u\n",
703 i, hook_entries[i]);
704 return -EINVAL;
705 }
706 if (newinfo->underflow[i] == 0xFFFFFFFF) {
707 duprintf("Invalid underflow %u %u\n",
708 i, underflows[i]);
709 return -EINVAL;
710 }
711 }
712
713 if (!mark_source_chains(newinfo, valid_hooks, entry0))
714 return -ELOOP;
715
716 /* Finally, each sanity check must pass */
717 i = 0;
718 ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
719 check_entry, name, size, &i);
720
721 if (ret != 0) {
722 IPT_ENTRY_ITERATE(entry0, newinfo->size,
723 cleanup_entry, &i);
724 return ret;
725 }
726
727 /* And one copy for every other CPU */
728 for_each_cpu(i) {
729 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
730 memcpy(newinfo->entries[i], entry0, newinfo->size);
731 }
732
733 return ret;
734 }
735
736 /* Gets counters. */
737 static inline int
738 add_entry_to_counter(const struct ipt_entry *e,
739 struct xt_counters total[],
740 unsigned int *i)
741 {
742 ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
743
744 (*i)++;
745 return 0;
746 }
747
748 static inline int
749 set_entry_to_counter(const struct ipt_entry *e,
750 struct ipt_counters total[],
751 unsigned int *i)
752 {
753 SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
754
755 (*i)++;
756 return 0;
757 }
758
759 static void
760 get_counters(const struct xt_table_info *t,
761 struct xt_counters counters[])
762 {
763 unsigned int cpu;
764 unsigned int i;
765 unsigned int curcpu;
766
767 /* Instead of clearing (by a previous call to memset())
768 * the counters and using adds, we set the counters
769 * with data used by 'current' CPU
770 * We dont care about preemption here.
771 */
772 curcpu = raw_smp_processor_id();
773
774 i = 0;
775 IPT_ENTRY_ITERATE(t->entries[curcpu],
776 t->size,
777 set_entry_to_counter,
778 counters,
779 &i);
780
781 for_each_cpu(cpu) {
782 if (cpu == curcpu)
783 continue;
784 i = 0;
785 IPT_ENTRY_ITERATE(t->entries[cpu],
786 t->size,
787 add_entry_to_counter,
788 counters,
789 &i);
790 }
791 }
792
793 static int
794 copy_entries_to_user(unsigned int total_size,
795 struct ipt_table *table,
796 void __user *userptr)
797 {
798 unsigned int off, num, countersize;
799 struct ipt_entry *e;
800 struct xt_counters *counters;
801 struct xt_table_info *private = table->private;
802 int ret = 0;
803 void *loc_cpu_entry;
804
805 /* We need atomic snapshot of counters: rest doesn't change
806 (other than comefrom, which userspace doesn't care
807 about). */
808 countersize = sizeof(struct xt_counters) * private->number;
809 counters = vmalloc_node(countersize, numa_node_id());
810
811 if (counters == NULL)
812 return -ENOMEM;
813
814 /* First, sum counters... */
815 write_lock_bh(&table->lock);
816 get_counters(private, counters);
817 write_unlock_bh(&table->lock);
818
819 /* choose the copy that is on our node/cpu, ...
820 * This choice is lazy (because current thread is
821 * allowed to migrate to another cpu)
822 */
823 loc_cpu_entry = private->entries[raw_smp_processor_id()];
824 /* ... then copy entire thing ... */
825 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
826 ret = -EFAULT;
827 goto free_counters;
828 }
829
830 /* FIXME: use iterator macros --RR */
831 /* ... then go back and fix counters and names */
832 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
833 unsigned int i;
834 struct ipt_entry_match *m;
835 struct ipt_entry_target *t;
836
837 e = (struct ipt_entry *)(loc_cpu_entry + off);
838 if (copy_to_user(userptr + off
839 + offsetof(struct ipt_entry, counters),
840 &counters[num],
841 sizeof(counters[num])) != 0) {
842 ret = -EFAULT;
843 goto free_counters;
844 }
845
846 for (i = sizeof(struct ipt_entry);
847 i < e->target_offset;
848 i += m->u.match_size) {
849 m = (void *)e + i;
850
851 if (copy_to_user(userptr + off + i
852 + offsetof(struct ipt_entry_match,
853 u.user.name),
854 m->u.kernel.match->name,
855 strlen(m->u.kernel.match->name)+1)
856 != 0) {
857 ret = -EFAULT;
858 goto free_counters;
859 }
860 }
861
862 t = ipt_get_target(e);
863 if (copy_to_user(userptr + off + e->target_offset
864 + offsetof(struct ipt_entry_target,
865 u.user.name),
866 t->u.kernel.target->name,
867 strlen(t->u.kernel.target->name)+1) != 0) {
868 ret = -EFAULT;
869 goto free_counters;
870 }
871 }
872
873 free_counters:
874 vfree(counters);
875 return ret;
876 }
877
878 static int
879 get_entries(const struct ipt_get_entries *entries,
880 struct ipt_get_entries __user *uptr)
881 {
882 int ret;
883 struct ipt_table *t;
884
885 t = xt_find_table_lock(AF_INET, entries->name);
886 if (t && !IS_ERR(t)) {
887 struct xt_table_info *private = t->private;
888 duprintf("t->private->number = %u\n",
889 private->number);
890 if (entries->size == private->size)
891 ret = copy_entries_to_user(private->size,
892 t, uptr->entrytable);
893 else {
894 duprintf("get_entries: I've got %u not %u!\n",
895 private->size,
896 entries->size);
897 ret = -EINVAL;
898 }
899 module_put(t->me);
900 xt_table_unlock(t);
901 } else
902 ret = t ? PTR_ERR(t) : -ENOENT;
903
904 return ret;
905 }
906
907 static int
908 do_replace(void __user *user, unsigned int len)
909 {
910 int ret;
911 struct ipt_replace tmp;
912 struct ipt_table *t;
913 struct xt_table_info *newinfo, *oldinfo;
914 struct xt_counters *counters;
915 void *loc_cpu_entry, *loc_cpu_old_entry;
916
917 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
918 return -EFAULT;
919
920 /* Hack: Causes ipchains to give correct error msg --RR */
921 if (len != sizeof(tmp) + tmp.size)
922 return -ENOPROTOOPT;
923
924 newinfo = xt_alloc_table_info(tmp.size);
925 if (!newinfo)
926 return -ENOMEM;
927
928 /* choose the copy that is our node/cpu */
929 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
930 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
931 tmp.size) != 0) {
932 ret = -EFAULT;
933 goto free_newinfo;
934 }
935
936 counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
937 if (!counters) {
938 ret = -ENOMEM;
939 goto free_newinfo;
940 }
941
942 ret = translate_table(tmp.name, tmp.valid_hooks,
943 newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
944 tmp.hook_entry, tmp.underflow);
945 if (ret != 0)
946 goto free_newinfo_counters;
947
948 duprintf("ip_tables: Translated table\n");
949
950 t = try_then_request_module(xt_find_table_lock(AF_INET, tmp.name),
951 "iptable_%s", tmp.name);
952 if (!t || IS_ERR(t)) {
953 ret = t ? PTR_ERR(t) : -ENOENT;
954 goto free_newinfo_counters_untrans;
955 }
956
957 /* You lied! */
958 if (tmp.valid_hooks != t->valid_hooks) {
959 duprintf("Valid hook crap: %08X vs %08X\n",
960 tmp.valid_hooks, t->valid_hooks);
961 ret = -EINVAL;
962 goto put_module;
963 }
964
965 oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret);
966 if (!oldinfo)
967 goto put_module;
968
969 /* Update module usage count based on number of rules */
970 duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
971 oldinfo->number, oldinfo->initial_entries, newinfo->number);
972 if ((oldinfo->number > oldinfo->initial_entries) ||
973 (newinfo->number <= oldinfo->initial_entries))
974 module_put(t->me);
975 if ((oldinfo->number > oldinfo->initial_entries) &&
976 (newinfo->number <= oldinfo->initial_entries))
977 module_put(t->me);
978
979 /* Get the old counters. */
980 get_counters(oldinfo, counters);
981 /* Decrease module usage counts and free resource */
982 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
983 IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
984 xt_free_table_info(oldinfo);
985 if (copy_to_user(tmp.counters, counters,
986 sizeof(struct xt_counters) * tmp.num_counters) != 0)
987 ret = -EFAULT;
988 vfree(counters);
989 xt_table_unlock(t);
990 return ret;
991
992 put_module:
993 module_put(t->me);
994 xt_table_unlock(t);
995 free_newinfo_counters_untrans:
996 IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
997 free_newinfo_counters:
998 vfree(counters);
999 free_newinfo:
1000 xt_free_table_info(newinfo);
1001 return ret;
1002 }
1003
1004 /* We're lazy, and add to the first CPU; overflow works its fey magic
1005 * and everything is OK. */
1006 static inline int
1007 add_counter_to_entry(struct ipt_entry *e,
1008 const struct xt_counters addme[],
1009 unsigned int *i)
1010 {
1011 #if 0
1012 duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1013 *i,
1014 (long unsigned int)e->counters.pcnt,
1015 (long unsigned int)e->counters.bcnt,
1016 (long unsigned int)addme[*i].pcnt,
1017 (long unsigned int)addme[*i].bcnt);
1018 #endif
1019
1020 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1021
1022 (*i)++;
1023 return 0;
1024 }
1025
1026 static int
1027 do_add_counters(void __user *user, unsigned int len)
1028 {
1029 unsigned int i;
1030 struct xt_counters_info tmp, *paddc;
1031 struct ipt_table *t;
1032 struct xt_table_info *private;
1033 int ret = 0;
1034 void *loc_cpu_entry;
1035
1036 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1037 return -EFAULT;
1038
1039 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters))
1040 return -EINVAL;
1041
1042 paddc = vmalloc_node(len, numa_node_id());
1043 if (!paddc)
1044 return -ENOMEM;
1045
1046 if (copy_from_user(paddc, user, len) != 0) {
1047 ret = -EFAULT;
1048 goto free;
1049 }
1050
1051 t = xt_find_table_lock(AF_INET, tmp.name);
1052 if (!t || IS_ERR(t)) {
1053 ret = t ? PTR_ERR(t) : -ENOENT;
1054 goto free;
1055 }
1056
1057 write_lock_bh(&t->lock);
1058 private = t->private;
1059 if (private->number != paddc->num_counters) {
1060 ret = -EINVAL;
1061 goto unlock_up_free;
1062 }
1063
1064 i = 0;
1065 /* Choose the copy that is on our node */
1066 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1067 IPT_ENTRY_ITERATE(loc_cpu_entry,
1068 private->size,
1069 add_counter_to_entry,
1070 paddc->counters,
1071 &i);
1072 unlock_up_free:
1073 write_unlock_bh(&t->lock);
1074 xt_table_unlock(t);
1075 module_put(t->me);
1076 free:
1077 vfree(paddc);
1078
1079 return ret;
1080 }
1081
1082 static int
1083 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1084 {
1085 int ret;
1086
1087 if (!capable(CAP_NET_ADMIN))
1088 return -EPERM;
1089
1090 switch (cmd) {
1091 case IPT_SO_SET_REPLACE:
1092 ret = do_replace(user, len);
1093 break;
1094
1095 case IPT_SO_SET_ADD_COUNTERS:
1096 ret = do_add_counters(user, len);
1097 break;
1098
1099 default:
1100 duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
1101 ret = -EINVAL;
1102 }
1103
1104 return ret;
1105 }
1106
1107 static int
1108 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1109 {
1110 int ret;
1111
1112 if (!capable(CAP_NET_ADMIN))
1113 return -EPERM;
1114
1115 switch (cmd) {
1116 case IPT_SO_GET_INFO: {
1117 char name[IPT_TABLE_MAXNAMELEN];
1118 struct ipt_table *t;
1119
1120 if (*len != sizeof(struct ipt_getinfo)) {
1121 duprintf("length %u != %u\n", *len,
1122 sizeof(struct ipt_getinfo));
1123 ret = -EINVAL;
1124 break;
1125 }
1126
1127 if (copy_from_user(name, user, sizeof(name)) != 0) {
1128 ret = -EFAULT;
1129 break;
1130 }
1131 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1132
1133 t = try_then_request_module(xt_find_table_lock(AF_INET, name),
1134 "iptable_%s", name);
1135 if (t && !IS_ERR(t)) {
1136 struct ipt_getinfo info;
1137 struct xt_table_info *private = t->private;
1138
1139 info.valid_hooks = t->valid_hooks;
1140 memcpy(info.hook_entry, private->hook_entry,
1141 sizeof(info.hook_entry));
1142 memcpy(info.underflow, private->underflow,
1143 sizeof(info.underflow));
1144 info.num_entries = private->number;
1145 info.size = private->size;
1146 memcpy(info.name, name, sizeof(info.name));
1147
1148 if (copy_to_user(user, &info, *len) != 0)
1149 ret = -EFAULT;
1150 else
1151 ret = 0;
1152 xt_table_unlock(t);
1153 module_put(t->me);
1154 } else
1155 ret = t ? PTR_ERR(t) : -ENOENT;
1156 }
1157 break;
1158
1159 case IPT_SO_GET_ENTRIES: {
1160 struct ipt_get_entries get;
1161
1162 if (*len < sizeof(get)) {
1163 duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1164 ret = -EINVAL;
1165 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1166 ret = -EFAULT;
1167 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1168 duprintf("get_entries: %u != %u\n", *len,
1169 sizeof(struct ipt_get_entries) + get.size);
1170 ret = -EINVAL;
1171 } else
1172 ret = get_entries(&get, user);
1173 break;
1174 }
1175
1176 case IPT_SO_GET_REVISION_MATCH:
1177 case IPT_SO_GET_REVISION_TARGET: {
1178 struct ipt_get_revision rev;
1179 int target;
1180
1181 if (*len != sizeof(rev)) {
1182 ret = -EINVAL;
1183 break;
1184 }
1185 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
1186 ret = -EFAULT;
1187 break;
1188 }
1189
1190 if (cmd == IPT_SO_GET_REVISION_TARGET)
1191 target = 1;
1192 else
1193 target = 0;
1194
1195 try_then_request_module(xt_find_revision(AF_INET, rev.name,
1196 rev.revision,
1197 target, &ret),
1198 "ipt_%s", rev.name);
1199 break;
1200 }
1201
1202 default:
1203 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1204 ret = -EINVAL;
1205 }
1206
1207 return ret;
1208 }
1209
1210 int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
1211 {
1212 int ret;
1213 struct xt_table_info *newinfo;
1214 static struct xt_table_info bootstrap
1215 = { 0, 0, 0, { 0 }, { 0 }, { } };
1216 void *loc_cpu_entry;
1217
1218 newinfo = xt_alloc_table_info(repl->size);
1219 if (!newinfo)
1220 return -ENOMEM;
1221
1222 /* choose the copy on our node/cpu
1223 * but dont care of preemption
1224 */
1225 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1226 memcpy(loc_cpu_entry, repl->entries, repl->size);
1227
1228 ret = translate_table(table->name, table->valid_hooks,
1229 newinfo, loc_cpu_entry, repl->size,
1230 repl->num_entries,
1231 repl->hook_entry,
1232 repl->underflow);
1233 if (ret != 0) {
1234 xt_free_table_info(newinfo);
1235 return ret;
1236 }
1237
1238 if (xt_register_table(table, &bootstrap, newinfo) != 0) {
1239 xt_free_table_info(newinfo);
1240 return ret;
1241 }
1242
1243 return 0;
1244 }
1245
1246 void ipt_unregister_table(struct ipt_table *table)
1247 {
1248 struct xt_table_info *private;
1249 void *loc_cpu_entry;
1250
1251 private = xt_unregister_table(table);
1252
1253 /* Decrease module usage counts and free resources */
1254 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1255 IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
1256 xt_free_table_info(private);
1257 }
1258
1259 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1260 static inline int
1261 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1262 u_int8_t type, u_int8_t code,
1263 int invert)
1264 {
1265 return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1266 ^ invert;
1267 }
1268
1269 static int
1270 icmp_match(const struct sk_buff *skb,
1271 const struct net_device *in,
1272 const struct net_device *out,
1273 const void *matchinfo,
1274 int offset,
1275 unsigned int protoff,
1276 int *hotdrop)
1277 {
1278 struct icmphdr _icmph, *ic;
1279 const struct ipt_icmp *icmpinfo = matchinfo;
1280
1281 /* Must not be a fragment. */
1282 if (offset)
1283 return 0;
1284
1285 ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph);
1286 if (ic == NULL) {
1287 /* We've been asked to examine this packet, and we
1288 * can't. Hence, no choice but to drop.
1289 */
1290 duprintf("Dropping evil ICMP tinygram.\n");
1291 *hotdrop = 1;
1292 return 0;
1293 }
1294
1295 return icmp_type_code_match(icmpinfo->type,
1296 icmpinfo->code[0],
1297 icmpinfo->code[1],
1298 ic->type, ic->code,
1299 !!(icmpinfo->invflags&IPT_ICMP_INV));
1300 }
1301
1302 /* Called when user tries to insert an entry of this type. */
1303 static int
1304 icmp_checkentry(const char *tablename,
1305 const void *info,
1306 void *matchinfo,
1307 unsigned int matchsize,
1308 unsigned int hook_mask)
1309 {
1310 const struct ipt_ip *ip = info;
1311 const struct ipt_icmp *icmpinfo = matchinfo;
1312
1313 /* Must specify proto == ICMP, and no unknown invflags */
1314 return ip->proto == IPPROTO_ICMP
1315 && !(ip->invflags & IPT_INV_PROTO)
1316 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1317 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1318 }
1319
1320 /* The built-in targets: standard (NULL) and error. */
1321 static struct ipt_target ipt_standard_target = {
1322 .name = IPT_STANDARD_TARGET,
1323 };
1324
1325 static struct ipt_target ipt_error_target = {
1326 .name = IPT_ERROR_TARGET,
1327 .target = ipt_error,
1328 };
1329
1330 static struct nf_sockopt_ops ipt_sockopts = {
1331 .pf = PF_INET,
1332 .set_optmin = IPT_BASE_CTL,
1333 .set_optmax = IPT_SO_SET_MAX+1,
1334 .set = do_ipt_set_ctl,
1335 .get_optmin = IPT_BASE_CTL,
1336 .get_optmax = IPT_SO_GET_MAX+1,
1337 .get = do_ipt_get_ctl,
1338 };
1339
1340 static struct ipt_match icmp_matchstruct = {
1341 .name = "icmp",
1342 .match = &icmp_match,
1343 .checkentry = &icmp_checkentry,
1344 };
1345
1346 static int __init init(void)
1347 {
1348 int ret;
1349
1350 xt_proto_init(AF_INET);
1351
1352 /* Noone else will be downing sem now, so we won't sleep */
1353 xt_register_target(AF_INET, &ipt_standard_target);
1354 xt_register_target(AF_INET, &ipt_error_target);
1355 xt_register_match(AF_INET, &icmp_matchstruct);
1356
1357 /* Register setsockopt */
1358 ret = nf_register_sockopt(&ipt_sockopts);
1359 if (ret < 0) {
1360 duprintf("Unable to register sockopts.\n");
1361 return ret;
1362 }
1363
1364 printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n");
1365 return 0;
1366 }
1367
1368 static void __exit fini(void)
1369 {
1370 nf_unregister_sockopt(&ipt_sockopts);
1371
1372 xt_unregister_match(AF_INET, &icmp_matchstruct);
1373 xt_unregister_target(AF_INET, &ipt_error_target);
1374 xt_unregister_target(AF_INET, &ipt_standard_target);
1375
1376 xt_proto_fini(AF_INET);
1377 }
1378
1379 EXPORT_SYMBOL(ipt_register_table);
1380 EXPORT_SYMBOL(ipt_unregister_table);
1381 EXPORT_SYMBOL(ipt_do_table);
1382 module_init(init);
1383 module_exit(fini);