]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - net/ipv4/netfilter/ip_tables.c
Merge master.kernel.org:/home/rmk/linux-2.6-serial
[mirror_ubuntu-bionic-kernel.git] / net / ipv4 / netfilter / ip_tables.c
1 /*
2 * Packet matching code.
3 *
4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5 * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12 * - increase module usage count as soon as we have rules inside
13 * a table
14 */
15 #include <linux/config.h>
16 #include <linux/cache.h>
17 #include <linux/skbuff.h>
18 #include <linux/kmod.h>
19 #include <linux/vmalloc.h>
20 #include <linux/netdevice.h>
21 #include <linux/module.h>
22 #include <linux/tcp.h>
23 #include <linux/udp.h>
24 #include <linux/icmp.h>
25 #include <net/ip.h>
26 #include <asm/uaccess.h>
27 #include <asm/semaphore.h>
28 #include <linux/proc_fs.h>
29 #include <linux/err.h>
30 #include <linux/cpumask.h>
31
32 #include <linux/netfilter_ipv4/ip_tables.h>
33
34 MODULE_LICENSE("GPL");
35 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
36 MODULE_DESCRIPTION("IPv4 packet filter");
37
38 /*#define DEBUG_IP_FIREWALL*/
39 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
40 /*#define DEBUG_IP_FIREWALL_USER*/
41
42 #ifdef DEBUG_IP_FIREWALL
43 #define dprintf(format, args...) printk(format , ## args)
44 #else
45 #define dprintf(format, args...)
46 #endif
47
48 #ifdef DEBUG_IP_FIREWALL_USER
49 #define duprintf(format, args...) printk(format , ## args)
50 #else
51 #define duprintf(format, args...)
52 #endif
53
54 #ifdef CONFIG_NETFILTER_DEBUG
55 #define IP_NF_ASSERT(x) \
56 do { \
57 if (!(x)) \
58 printk("IP_NF_ASSERT: %s:%s:%u\n", \
59 __FUNCTION__, __FILE__, __LINE__); \
60 } while(0)
61 #else
62 #define IP_NF_ASSERT(x)
63 #endif
64 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
65
66 static DECLARE_MUTEX(ipt_mutex);
67
68 /* Must have mutex */
69 #define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
70 #define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
71 #include <linux/netfilter_ipv4/listhelp.h>
72
73 #if 0
74 /* All the better to debug you with... */
75 #define static
76 #define inline
77 #endif
78
79 /*
80 We keep a set of rules for each CPU, so we can avoid write-locking
81 them in the softirq when updating the counters and therefore
82 only need to read-lock in the softirq; doing a write_lock_bh() in user
83 context stops packets coming through and allows user context to read
84 the counters or update the rules.
85
86 Hence the start of any table is given by get_table() below. */
87
88 /* The table itself */
89 struct ipt_table_info
90 {
91 /* Size per table */
92 unsigned int size;
93 /* Number of entries: FIXME. --RR */
94 unsigned int number;
95 /* Initial number of entries. Needed for module usage count */
96 unsigned int initial_entries;
97
98 /* Entry points and underflows */
99 unsigned int hook_entry[NF_IP_NUMHOOKS];
100 unsigned int underflow[NF_IP_NUMHOOKS];
101
102 /* ipt_entry tables: one per CPU */
103 void *entries[NR_CPUS];
104 };
105
106 static LIST_HEAD(ipt_target);
107 static LIST_HEAD(ipt_match);
108 static LIST_HEAD(ipt_tables);
109 #define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0)
110 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
111
112 #if 0
113 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
114 #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
115 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
116 #endif
117
118 /* Returns whether matches rule or not. */
119 static inline int
120 ip_packet_match(const struct iphdr *ip,
121 const char *indev,
122 const char *outdev,
123 const struct ipt_ip *ipinfo,
124 int isfrag)
125 {
126 size_t i;
127 unsigned long ret;
128
129 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
130
131 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
132 IPT_INV_SRCIP)
133 || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
134 IPT_INV_DSTIP)) {
135 dprintf("Source or dest mismatch.\n");
136
137 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
138 NIPQUAD(ip->saddr),
139 NIPQUAD(ipinfo->smsk.s_addr),
140 NIPQUAD(ipinfo->src.s_addr),
141 ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
142 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
143 NIPQUAD(ip->daddr),
144 NIPQUAD(ipinfo->dmsk.s_addr),
145 NIPQUAD(ipinfo->dst.s_addr),
146 ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
147 return 0;
148 }
149
150 /* Look for ifname matches; this should unroll nicely. */
151 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
152 ret |= (((const unsigned long *)indev)[i]
153 ^ ((const unsigned long *)ipinfo->iniface)[i])
154 & ((const unsigned long *)ipinfo->iniface_mask)[i];
155 }
156
157 if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
158 dprintf("VIA in mismatch (%s vs %s).%s\n",
159 indev, ipinfo->iniface,
160 ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
161 return 0;
162 }
163
164 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
165 ret |= (((const unsigned long *)outdev)[i]
166 ^ ((const unsigned long *)ipinfo->outiface)[i])
167 & ((const unsigned long *)ipinfo->outiface_mask)[i];
168 }
169
170 if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
171 dprintf("VIA out mismatch (%s vs %s).%s\n",
172 outdev, ipinfo->outiface,
173 ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
174 return 0;
175 }
176
177 /* Check specific protocol */
178 if (ipinfo->proto
179 && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
180 dprintf("Packet protocol %hi does not match %hi.%s\n",
181 ip->protocol, ipinfo->proto,
182 ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
183 return 0;
184 }
185
186 /* If we have a fragment rule but the packet is not a fragment
187 * then we return zero */
188 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
189 dprintf("Fragment rule but not fragment.%s\n",
190 ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
191 return 0;
192 }
193
194 return 1;
195 }
196
197 static inline int
198 ip_checkentry(const struct ipt_ip *ip)
199 {
200 if (ip->flags & ~IPT_F_MASK) {
201 duprintf("Unknown flag bits set: %08X\n",
202 ip->flags & ~IPT_F_MASK);
203 return 0;
204 }
205 if (ip->invflags & ~IPT_INV_MASK) {
206 duprintf("Unknown invflag bits set: %08X\n",
207 ip->invflags & ~IPT_INV_MASK);
208 return 0;
209 }
210 return 1;
211 }
212
213 static unsigned int
214 ipt_error(struct sk_buff **pskb,
215 const struct net_device *in,
216 const struct net_device *out,
217 unsigned int hooknum,
218 const void *targinfo,
219 void *userinfo)
220 {
221 if (net_ratelimit())
222 printk("ip_tables: error: `%s'\n", (char *)targinfo);
223
224 return NF_DROP;
225 }
226
227 static inline
228 int do_match(struct ipt_entry_match *m,
229 const struct sk_buff *skb,
230 const struct net_device *in,
231 const struct net_device *out,
232 int offset,
233 int *hotdrop)
234 {
235 /* Stop iteration if it doesn't match */
236 if (!m->u.kernel.match->match(skb, in, out, m->data, offset, hotdrop))
237 return 1;
238 else
239 return 0;
240 }
241
242 static inline struct ipt_entry *
243 get_entry(void *base, unsigned int offset)
244 {
245 return (struct ipt_entry *)(base + offset);
246 }
247
248 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
249 unsigned int
250 ipt_do_table(struct sk_buff **pskb,
251 unsigned int hook,
252 const struct net_device *in,
253 const struct net_device *out,
254 struct ipt_table *table,
255 void *userdata)
256 {
257 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
258 u_int16_t offset;
259 struct iphdr *ip;
260 u_int16_t datalen;
261 int hotdrop = 0;
262 /* Initializing verdict to NF_DROP keeps gcc happy. */
263 unsigned int verdict = NF_DROP;
264 const char *indev, *outdev;
265 void *table_base;
266 struct ipt_entry *e, *back;
267
268 /* Initialization */
269 ip = (*pskb)->nh.iph;
270 datalen = (*pskb)->len - ip->ihl * 4;
271 indev = in ? in->name : nulldevname;
272 outdev = out ? out->name : nulldevname;
273 /* We handle fragments by dealing with the first fragment as
274 * if it was a normal packet. All other fragments are treated
275 * normally, except that they will NEVER match rules that ask
276 * things we don't know, ie. tcp syn flag or ports). If the
277 * rule is also a fragment-specific rule, non-fragments won't
278 * match it. */
279 offset = ntohs(ip->frag_off) & IP_OFFSET;
280
281 read_lock_bh(&table->lock);
282 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
283 table_base = (void *)table->private->entries[smp_processor_id()];
284 e = get_entry(table_base, table->private->hook_entry[hook]);
285
286 #ifdef CONFIG_NETFILTER_DEBUG
287 /* Check noone else using our table */
288 if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
289 && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
290 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
291 smp_processor_id(),
292 table->name,
293 &((struct ipt_entry *)table_base)->comefrom,
294 ((struct ipt_entry *)table_base)->comefrom);
295 }
296 ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
297 #endif
298
299 /* For return from builtin chain */
300 back = get_entry(table_base, table->private->underflow[hook]);
301
302 do {
303 IP_NF_ASSERT(e);
304 IP_NF_ASSERT(back);
305 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
306 struct ipt_entry_target *t;
307
308 if (IPT_MATCH_ITERATE(e, do_match,
309 *pskb, in, out,
310 offset, &hotdrop) != 0)
311 goto no_match;
312
313 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
314
315 t = ipt_get_target(e);
316 IP_NF_ASSERT(t->u.kernel.target);
317 /* Standard target? */
318 if (!t->u.kernel.target->target) {
319 int v;
320
321 v = ((struct ipt_standard_target *)t)->verdict;
322 if (v < 0) {
323 /* Pop from stack? */
324 if (v != IPT_RETURN) {
325 verdict = (unsigned)(-v) - 1;
326 break;
327 }
328 e = back;
329 back = get_entry(table_base,
330 back->comefrom);
331 continue;
332 }
333 if (table_base + v != (void *)e + e->next_offset
334 && !(e->ip.flags & IPT_F_GOTO)) {
335 /* Save old back ptr in next entry */
336 struct ipt_entry *next
337 = (void *)e + e->next_offset;
338 next->comefrom
339 = (void *)back - table_base;
340 /* set back pointer to next entry */
341 back = next;
342 }
343
344 e = get_entry(table_base, v);
345 } else {
346 /* Targets which reenter must return
347 abs. verdicts */
348 #ifdef CONFIG_NETFILTER_DEBUG
349 ((struct ipt_entry *)table_base)->comefrom
350 = 0xeeeeeeec;
351 #endif
352 verdict = t->u.kernel.target->target(pskb,
353 in, out,
354 hook,
355 t->data,
356 userdata);
357
358 #ifdef CONFIG_NETFILTER_DEBUG
359 if (((struct ipt_entry *)table_base)->comefrom
360 != 0xeeeeeeec
361 && verdict == IPT_CONTINUE) {
362 printk("Target %s reentered!\n",
363 t->u.kernel.target->name);
364 verdict = NF_DROP;
365 }
366 ((struct ipt_entry *)table_base)->comefrom
367 = 0x57acc001;
368 #endif
369 /* Target might have changed stuff. */
370 ip = (*pskb)->nh.iph;
371 datalen = (*pskb)->len - ip->ihl * 4;
372
373 if (verdict == IPT_CONTINUE)
374 e = (void *)e + e->next_offset;
375 else
376 /* Verdict */
377 break;
378 }
379 } else {
380
381 no_match:
382 e = (void *)e + e->next_offset;
383 }
384 } while (!hotdrop);
385
386 #ifdef CONFIG_NETFILTER_DEBUG
387 ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
388 #endif
389 read_unlock_bh(&table->lock);
390
391 #ifdef DEBUG_ALLOW_ALL
392 return NF_ACCEPT;
393 #else
394 if (hotdrop)
395 return NF_DROP;
396 else return verdict;
397 #endif
398 }
399
400 /*
401 * These are weird, but module loading must not be done with mutex
402 * held (since they will register), and we have to have a single
403 * function to use try_then_request_module().
404 */
405
406 /* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */
407 static inline struct ipt_table *find_table_lock(const char *name)
408 {
409 struct ipt_table *t;
410
411 if (down_interruptible(&ipt_mutex) != 0)
412 return ERR_PTR(-EINTR);
413
414 list_for_each_entry(t, &ipt_tables, list)
415 if (strcmp(t->name, name) == 0 && try_module_get(t->me))
416 return t;
417 up(&ipt_mutex);
418 return NULL;
419 }
420
421 /* Find match, grabs ref. Returns ERR_PTR() on error. */
422 static inline struct ipt_match *find_match(const char *name, u8 revision)
423 {
424 struct ipt_match *m;
425 int err = 0;
426
427 if (down_interruptible(&ipt_mutex) != 0)
428 return ERR_PTR(-EINTR);
429
430 list_for_each_entry(m, &ipt_match, list) {
431 if (strcmp(m->name, name) == 0) {
432 if (m->revision == revision) {
433 if (try_module_get(m->me)) {
434 up(&ipt_mutex);
435 return m;
436 }
437 } else
438 err = -EPROTOTYPE; /* Found something. */
439 }
440 }
441 up(&ipt_mutex);
442 return ERR_PTR(err);
443 }
444
445 /* Find target, grabs ref. Returns ERR_PTR() on error. */
446 static inline struct ipt_target *find_target(const char *name, u8 revision)
447 {
448 struct ipt_target *t;
449 int err = 0;
450
451 if (down_interruptible(&ipt_mutex) != 0)
452 return ERR_PTR(-EINTR);
453
454 list_for_each_entry(t, &ipt_target, list) {
455 if (strcmp(t->name, name) == 0) {
456 if (t->revision == revision) {
457 if (try_module_get(t->me)) {
458 up(&ipt_mutex);
459 return t;
460 }
461 } else
462 err = -EPROTOTYPE; /* Found something. */
463 }
464 }
465 up(&ipt_mutex);
466 return ERR_PTR(err);
467 }
468
469 struct ipt_target *ipt_find_target(const char *name, u8 revision)
470 {
471 struct ipt_target *target;
472
473 target = try_then_request_module(find_target(name, revision),
474 "ipt_%s", name);
475 if (IS_ERR(target) || !target)
476 return NULL;
477 return target;
478 }
479
480 static int match_revfn(const char *name, u8 revision, int *bestp)
481 {
482 struct ipt_match *m;
483 int have_rev = 0;
484
485 list_for_each_entry(m, &ipt_match, list) {
486 if (strcmp(m->name, name) == 0) {
487 if (m->revision > *bestp)
488 *bestp = m->revision;
489 if (m->revision == revision)
490 have_rev = 1;
491 }
492 }
493 return have_rev;
494 }
495
496 static int target_revfn(const char *name, u8 revision, int *bestp)
497 {
498 struct ipt_target *t;
499 int have_rev = 0;
500
501 list_for_each_entry(t, &ipt_target, list) {
502 if (strcmp(t->name, name) == 0) {
503 if (t->revision > *bestp)
504 *bestp = t->revision;
505 if (t->revision == revision)
506 have_rev = 1;
507 }
508 }
509 return have_rev;
510 }
511
512 /* Returns true or false (if no such extension at all) */
513 static inline int find_revision(const char *name, u8 revision,
514 int (*revfn)(const char *, u8, int *),
515 int *err)
516 {
517 int have_rev, best = -1;
518
519 if (down_interruptible(&ipt_mutex) != 0) {
520 *err = -EINTR;
521 return 1;
522 }
523 have_rev = revfn(name, revision, &best);
524 up(&ipt_mutex);
525
526 /* Nothing at all? Return 0 to try loading module. */
527 if (best == -1) {
528 *err = -ENOENT;
529 return 0;
530 }
531
532 *err = best;
533 if (!have_rev)
534 *err = -EPROTONOSUPPORT;
535 return 1;
536 }
537
538
539 /* All zeroes == unconditional rule. */
540 static inline int
541 unconditional(const struct ipt_ip *ip)
542 {
543 unsigned int i;
544
545 for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
546 if (((__u32 *)ip)[i])
547 return 0;
548
549 return 1;
550 }
551
552 /* Figures out from what hook each rule can be called: returns 0 if
553 there are loops. Puts hook bitmask in comefrom. */
554 static int
555 mark_source_chains(struct ipt_table_info *newinfo,
556 unsigned int valid_hooks, void *entry0)
557 {
558 unsigned int hook;
559
560 /* No recursion; use packet counter to save back ptrs (reset
561 to 0 as we leave), and comefrom to save source hook bitmask */
562 for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
563 unsigned int pos = newinfo->hook_entry[hook];
564 struct ipt_entry *e
565 = (struct ipt_entry *)(entry0 + pos);
566
567 if (!(valid_hooks & (1 << hook)))
568 continue;
569
570 /* Set initial back pointer. */
571 e->counters.pcnt = pos;
572
573 for (;;) {
574 struct ipt_standard_target *t
575 = (void *)ipt_get_target(e);
576
577 if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
578 printk("iptables: loop hook %u pos %u %08X.\n",
579 hook, pos, e->comefrom);
580 return 0;
581 }
582 e->comefrom
583 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
584
585 /* Unconditional return/END. */
586 if (e->target_offset == sizeof(struct ipt_entry)
587 && (strcmp(t->target.u.user.name,
588 IPT_STANDARD_TARGET) == 0)
589 && t->verdict < 0
590 && unconditional(&e->ip)) {
591 unsigned int oldpos, size;
592
593 /* Return: backtrack through the last
594 big jump. */
595 do {
596 e->comefrom ^= (1<<NF_IP_NUMHOOKS);
597 #ifdef DEBUG_IP_FIREWALL_USER
598 if (e->comefrom
599 & (1 << NF_IP_NUMHOOKS)) {
600 duprintf("Back unset "
601 "on hook %u "
602 "rule %u\n",
603 hook, pos);
604 }
605 #endif
606 oldpos = pos;
607 pos = e->counters.pcnt;
608 e->counters.pcnt = 0;
609
610 /* We're at the start. */
611 if (pos == oldpos)
612 goto next;
613
614 e = (struct ipt_entry *)
615 (entry0 + pos);
616 } while (oldpos == pos + e->next_offset);
617
618 /* Move along one */
619 size = e->next_offset;
620 e = (struct ipt_entry *)
621 (entry0 + pos + size);
622 e->counters.pcnt = pos;
623 pos += size;
624 } else {
625 int newpos = t->verdict;
626
627 if (strcmp(t->target.u.user.name,
628 IPT_STANDARD_TARGET) == 0
629 && newpos >= 0) {
630 /* This a jump; chase it. */
631 duprintf("Jump rule %u -> %u\n",
632 pos, newpos);
633 } else {
634 /* ... this is a fallthru */
635 newpos = pos + e->next_offset;
636 }
637 e = (struct ipt_entry *)
638 (entry0 + newpos);
639 e->counters.pcnt = pos;
640 pos = newpos;
641 }
642 }
643 next:
644 duprintf("Finished chain %u\n", hook);
645 }
646 return 1;
647 }
648
649 static inline int
650 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
651 {
652 if (i && (*i)-- == 0)
653 return 1;
654
655 if (m->u.kernel.match->destroy)
656 m->u.kernel.match->destroy(m->data,
657 m->u.match_size - sizeof(*m));
658 module_put(m->u.kernel.match->me);
659 return 0;
660 }
661
662 static inline int
663 standard_check(const struct ipt_entry_target *t,
664 unsigned int max_offset)
665 {
666 struct ipt_standard_target *targ = (void *)t;
667
668 /* Check standard info. */
669 if (t->u.target_size
670 != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
671 duprintf("standard_check: target size %u != %u\n",
672 t->u.target_size,
673 IPT_ALIGN(sizeof(struct ipt_standard_target)));
674 return 0;
675 }
676
677 if (targ->verdict >= 0
678 && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
679 duprintf("ipt_standard_check: bad verdict (%i)\n",
680 targ->verdict);
681 return 0;
682 }
683
684 if (targ->verdict < -NF_MAX_VERDICT - 1) {
685 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
686 targ->verdict);
687 return 0;
688 }
689 return 1;
690 }
691
692 static inline int
693 check_match(struct ipt_entry_match *m,
694 const char *name,
695 const struct ipt_ip *ip,
696 unsigned int hookmask,
697 unsigned int *i)
698 {
699 struct ipt_match *match;
700
701 match = try_then_request_module(find_match(m->u.user.name,
702 m->u.user.revision),
703 "ipt_%s", m->u.user.name);
704 if (IS_ERR(match) || !match) {
705 duprintf("check_match: `%s' not found\n", m->u.user.name);
706 return match ? PTR_ERR(match) : -ENOENT;
707 }
708 m->u.kernel.match = match;
709
710 if (m->u.kernel.match->checkentry
711 && !m->u.kernel.match->checkentry(name, ip, m->data,
712 m->u.match_size - sizeof(*m),
713 hookmask)) {
714 module_put(m->u.kernel.match->me);
715 duprintf("ip_tables: check failed for `%s'.\n",
716 m->u.kernel.match->name);
717 return -EINVAL;
718 }
719
720 (*i)++;
721 return 0;
722 }
723
724 static struct ipt_target ipt_standard_target;
725
726 static inline int
727 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
728 unsigned int *i)
729 {
730 struct ipt_entry_target *t;
731 struct ipt_target *target;
732 int ret;
733 unsigned int j;
734
735 if (!ip_checkentry(&e->ip)) {
736 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
737 return -EINVAL;
738 }
739
740 j = 0;
741 ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
742 if (ret != 0)
743 goto cleanup_matches;
744
745 t = ipt_get_target(e);
746 target = try_then_request_module(find_target(t->u.user.name,
747 t->u.user.revision),
748 "ipt_%s", t->u.user.name);
749 if (IS_ERR(target) || !target) {
750 duprintf("check_entry: `%s' not found\n", t->u.user.name);
751 ret = target ? PTR_ERR(target) : -ENOENT;
752 goto cleanup_matches;
753 }
754 t->u.kernel.target = target;
755
756 if (t->u.kernel.target == &ipt_standard_target) {
757 if (!standard_check(t, size)) {
758 ret = -EINVAL;
759 goto cleanup_matches;
760 }
761 } else if (t->u.kernel.target->checkentry
762 && !t->u.kernel.target->checkentry(name, e, t->data,
763 t->u.target_size
764 - sizeof(*t),
765 e->comefrom)) {
766 module_put(t->u.kernel.target->me);
767 duprintf("ip_tables: check failed for `%s'.\n",
768 t->u.kernel.target->name);
769 ret = -EINVAL;
770 goto cleanup_matches;
771 }
772
773 (*i)++;
774 return 0;
775
776 cleanup_matches:
777 IPT_MATCH_ITERATE(e, cleanup_match, &j);
778 return ret;
779 }
780
781 static inline int
782 check_entry_size_and_hooks(struct ipt_entry *e,
783 struct ipt_table_info *newinfo,
784 unsigned char *base,
785 unsigned char *limit,
786 const unsigned int *hook_entries,
787 const unsigned int *underflows,
788 unsigned int *i)
789 {
790 unsigned int h;
791
792 if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
793 || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
794 duprintf("Bad offset %p\n", e);
795 return -EINVAL;
796 }
797
798 if (e->next_offset
799 < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
800 duprintf("checking: element %p size %u\n",
801 e, e->next_offset);
802 return -EINVAL;
803 }
804
805 /* Check hooks & underflows */
806 for (h = 0; h < NF_IP_NUMHOOKS; h++) {
807 if ((unsigned char *)e - base == hook_entries[h])
808 newinfo->hook_entry[h] = hook_entries[h];
809 if ((unsigned char *)e - base == underflows[h])
810 newinfo->underflow[h] = underflows[h];
811 }
812
813 /* FIXME: underflows must be unconditional, standard verdicts
814 < 0 (not IPT_RETURN). --RR */
815
816 /* Clear counters and comefrom */
817 e->counters = ((struct ipt_counters) { 0, 0 });
818 e->comefrom = 0;
819
820 (*i)++;
821 return 0;
822 }
823
824 static inline int
825 cleanup_entry(struct ipt_entry *e, unsigned int *i)
826 {
827 struct ipt_entry_target *t;
828
829 if (i && (*i)-- == 0)
830 return 1;
831
832 /* Cleanup all matches */
833 IPT_MATCH_ITERATE(e, cleanup_match, NULL);
834 t = ipt_get_target(e);
835 if (t->u.kernel.target->destroy)
836 t->u.kernel.target->destroy(t->data,
837 t->u.target_size - sizeof(*t));
838 module_put(t->u.kernel.target->me);
839 return 0;
840 }
841
842 /* Checks and translates the user-supplied table segment (held in
843 newinfo) */
844 static int
845 translate_table(const char *name,
846 unsigned int valid_hooks,
847 struct ipt_table_info *newinfo,
848 void *entry0,
849 unsigned int size,
850 unsigned int number,
851 const unsigned int *hook_entries,
852 const unsigned int *underflows)
853 {
854 unsigned int i;
855 int ret;
856
857 newinfo->size = size;
858 newinfo->number = number;
859
860 /* Init all hooks to impossible value. */
861 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
862 newinfo->hook_entry[i] = 0xFFFFFFFF;
863 newinfo->underflow[i] = 0xFFFFFFFF;
864 }
865
866 duprintf("translate_table: size %u\n", newinfo->size);
867 i = 0;
868 /* Walk through entries, checking offsets. */
869 ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
870 check_entry_size_and_hooks,
871 newinfo,
872 entry0,
873 entry0 + size,
874 hook_entries, underflows, &i);
875 if (ret != 0)
876 return ret;
877
878 if (i != number) {
879 duprintf("translate_table: %u not %u entries\n",
880 i, number);
881 return -EINVAL;
882 }
883
884 /* Check hooks all assigned */
885 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
886 /* Only hooks which are valid */
887 if (!(valid_hooks & (1 << i)))
888 continue;
889 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
890 duprintf("Invalid hook entry %u %u\n",
891 i, hook_entries[i]);
892 return -EINVAL;
893 }
894 if (newinfo->underflow[i] == 0xFFFFFFFF) {
895 duprintf("Invalid underflow %u %u\n",
896 i, underflows[i]);
897 return -EINVAL;
898 }
899 }
900
901 if (!mark_source_chains(newinfo, valid_hooks, entry0))
902 return -ELOOP;
903
904 /* Finally, each sanity check must pass */
905 i = 0;
906 ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
907 check_entry, name, size, &i);
908
909 if (ret != 0) {
910 IPT_ENTRY_ITERATE(entry0, newinfo->size,
911 cleanup_entry, &i);
912 return ret;
913 }
914
915 /* And one copy for every other CPU */
916 for_each_cpu(i) {
917 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
918 memcpy(newinfo->entries[i], entry0, newinfo->size);
919 }
920
921 return ret;
922 }
923
924 static struct ipt_table_info *
925 replace_table(struct ipt_table *table,
926 unsigned int num_counters,
927 struct ipt_table_info *newinfo,
928 int *error)
929 {
930 struct ipt_table_info *oldinfo;
931
932 #ifdef CONFIG_NETFILTER_DEBUG
933 {
934 int cpu;
935
936 for_each_cpu(cpu) {
937 struct ipt_entry *table_base = newinfo->entries[cpu];
938 if (table_base)
939 table_base->comefrom = 0xdead57ac;
940 }
941 }
942 #endif
943
944 /* Do the substitution. */
945 write_lock_bh(&table->lock);
946 /* Check inside lock: is the old number correct? */
947 if (num_counters != table->private->number) {
948 duprintf("num_counters != table->private->number (%u/%u)\n",
949 num_counters, table->private->number);
950 write_unlock_bh(&table->lock);
951 *error = -EAGAIN;
952 return NULL;
953 }
954 oldinfo = table->private;
955 table->private = newinfo;
956 newinfo->initial_entries = oldinfo->initial_entries;
957 write_unlock_bh(&table->lock);
958
959 return oldinfo;
960 }
961
962 /* Gets counters. */
963 static inline int
964 add_entry_to_counter(const struct ipt_entry *e,
965 struct ipt_counters total[],
966 unsigned int *i)
967 {
968 ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
969
970 (*i)++;
971 return 0;
972 }
973
974 static inline int
975 set_entry_to_counter(const struct ipt_entry *e,
976 struct ipt_counters total[],
977 unsigned int *i)
978 {
979 SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
980
981 (*i)++;
982 return 0;
983 }
984
985 static void
986 get_counters(const struct ipt_table_info *t,
987 struct ipt_counters counters[])
988 {
989 unsigned int cpu;
990 unsigned int i;
991 unsigned int curcpu;
992
993 /* Instead of clearing (by a previous call to memset())
994 * the counters and using adds, we set the counters
995 * with data used by 'current' CPU
996 * We dont care about preemption here.
997 */
998 curcpu = raw_smp_processor_id();
999
1000 i = 0;
1001 IPT_ENTRY_ITERATE(t->entries[curcpu],
1002 t->size,
1003 set_entry_to_counter,
1004 counters,
1005 &i);
1006
1007 for_each_cpu(cpu) {
1008 if (cpu == curcpu)
1009 continue;
1010 i = 0;
1011 IPT_ENTRY_ITERATE(t->entries[cpu],
1012 t->size,
1013 add_entry_to_counter,
1014 counters,
1015 &i);
1016 }
1017 }
1018
1019 static int
1020 copy_entries_to_user(unsigned int total_size,
1021 struct ipt_table *table,
1022 void __user *userptr)
1023 {
1024 unsigned int off, num, countersize;
1025 struct ipt_entry *e;
1026 struct ipt_counters *counters;
1027 int ret = 0;
1028 void *loc_cpu_entry;
1029
1030 /* We need atomic snapshot of counters: rest doesn't change
1031 (other than comefrom, which userspace doesn't care
1032 about). */
1033 countersize = sizeof(struct ipt_counters) * table->private->number;
1034 counters = vmalloc_node(countersize, numa_node_id());
1035
1036 if (counters == NULL)
1037 return -ENOMEM;
1038
1039 /* First, sum counters... */
1040 write_lock_bh(&table->lock);
1041 get_counters(table->private, counters);
1042 write_unlock_bh(&table->lock);
1043
1044 /* choose the copy that is on our node/cpu, ...
1045 * This choice is lazy (because current thread is
1046 * allowed to migrate to another cpu)
1047 */
1048 loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
1049 /* ... then copy entire thing ... */
1050 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
1051 ret = -EFAULT;
1052 goto free_counters;
1053 }
1054
1055 /* FIXME: use iterator macros --RR */
1056 /* ... then go back and fix counters and names */
1057 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
1058 unsigned int i;
1059 struct ipt_entry_match *m;
1060 struct ipt_entry_target *t;
1061
1062 e = (struct ipt_entry *)(loc_cpu_entry + off);
1063 if (copy_to_user(userptr + off
1064 + offsetof(struct ipt_entry, counters),
1065 &counters[num],
1066 sizeof(counters[num])) != 0) {
1067 ret = -EFAULT;
1068 goto free_counters;
1069 }
1070
1071 for (i = sizeof(struct ipt_entry);
1072 i < e->target_offset;
1073 i += m->u.match_size) {
1074 m = (void *)e + i;
1075
1076 if (copy_to_user(userptr + off + i
1077 + offsetof(struct ipt_entry_match,
1078 u.user.name),
1079 m->u.kernel.match->name,
1080 strlen(m->u.kernel.match->name)+1)
1081 != 0) {
1082 ret = -EFAULT;
1083 goto free_counters;
1084 }
1085 }
1086
1087 t = ipt_get_target(e);
1088 if (copy_to_user(userptr + off + e->target_offset
1089 + offsetof(struct ipt_entry_target,
1090 u.user.name),
1091 t->u.kernel.target->name,
1092 strlen(t->u.kernel.target->name)+1) != 0) {
1093 ret = -EFAULT;
1094 goto free_counters;
1095 }
1096 }
1097
1098 free_counters:
1099 vfree(counters);
1100 return ret;
1101 }
1102
1103 static int
1104 get_entries(const struct ipt_get_entries *entries,
1105 struct ipt_get_entries __user *uptr)
1106 {
1107 int ret;
1108 struct ipt_table *t;
1109
1110 t = find_table_lock(entries->name);
1111 if (t && !IS_ERR(t)) {
1112 duprintf("t->private->number = %u\n",
1113 t->private->number);
1114 if (entries->size == t->private->size)
1115 ret = copy_entries_to_user(t->private->size,
1116 t, uptr->entrytable);
1117 else {
1118 duprintf("get_entries: I've got %u not %u!\n",
1119 t->private->size,
1120 entries->size);
1121 ret = -EINVAL;
1122 }
1123 module_put(t->me);
1124 up(&ipt_mutex);
1125 } else
1126 ret = t ? PTR_ERR(t) : -ENOENT;
1127
1128 return ret;
1129 }
1130
1131 static void free_table_info(struct ipt_table_info *info)
1132 {
1133 int cpu;
1134 for_each_cpu(cpu) {
1135 if (info->size <= PAGE_SIZE)
1136 kfree(info->entries[cpu]);
1137 else
1138 vfree(info->entries[cpu]);
1139 }
1140 kfree(info);
1141 }
1142
1143 static struct ipt_table_info *alloc_table_info(unsigned int size)
1144 {
1145 struct ipt_table_info *newinfo;
1146 int cpu;
1147
1148 newinfo = kzalloc(sizeof(struct ipt_table_info), GFP_KERNEL);
1149 if (!newinfo)
1150 return NULL;
1151
1152 newinfo->size = size;
1153
1154 for_each_cpu(cpu) {
1155 if (size <= PAGE_SIZE)
1156 newinfo->entries[cpu] = kmalloc_node(size,
1157 GFP_KERNEL,
1158 cpu_to_node(cpu));
1159 else
1160 newinfo->entries[cpu] = vmalloc_node(size, cpu_to_node(cpu));
1161 if (newinfo->entries[cpu] == 0) {
1162 free_table_info(newinfo);
1163 return NULL;
1164 }
1165 }
1166
1167 return newinfo;
1168 }
1169
1170 static int
1171 do_replace(void __user *user, unsigned int len)
1172 {
1173 int ret;
1174 struct ipt_replace tmp;
1175 struct ipt_table *t;
1176 struct ipt_table_info *newinfo, *oldinfo;
1177 struct ipt_counters *counters;
1178 void *loc_cpu_entry, *loc_cpu_old_entry;
1179
1180 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1181 return -EFAULT;
1182
1183 /* Hack: Causes ipchains to give correct error msg --RR */
1184 if (len != sizeof(tmp) + tmp.size)
1185 return -ENOPROTOOPT;
1186
1187 /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
1188 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1189 return -ENOMEM;
1190
1191 newinfo = alloc_table_info(tmp.size);
1192 if (!newinfo)
1193 return -ENOMEM;
1194
1195 /* choose the copy that is our node/cpu */
1196 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1197 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1198 tmp.size) != 0) {
1199 ret = -EFAULT;
1200 goto free_newinfo;
1201 }
1202
1203 counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
1204 if (!counters) {
1205 ret = -ENOMEM;
1206 goto free_newinfo;
1207 }
1208
1209 ret = translate_table(tmp.name, tmp.valid_hooks,
1210 newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
1211 tmp.hook_entry, tmp.underflow);
1212 if (ret != 0)
1213 goto free_newinfo_counters;
1214
1215 duprintf("ip_tables: Translated table\n");
1216
1217 t = try_then_request_module(find_table_lock(tmp.name),
1218 "iptable_%s", tmp.name);
1219 if (!t || IS_ERR(t)) {
1220 ret = t ? PTR_ERR(t) : -ENOENT;
1221 goto free_newinfo_counters_untrans;
1222 }
1223
1224 /* You lied! */
1225 if (tmp.valid_hooks != t->valid_hooks) {
1226 duprintf("Valid hook crap: %08X vs %08X\n",
1227 tmp.valid_hooks, t->valid_hooks);
1228 ret = -EINVAL;
1229 goto put_module;
1230 }
1231
1232 oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1233 if (!oldinfo)
1234 goto put_module;
1235
1236 /* Update module usage count based on number of rules */
1237 duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1238 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1239 if ((oldinfo->number > oldinfo->initial_entries) ||
1240 (newinfo->number <= oldinfo->initial_entries))
1241 module_put(t->me);
1242 if ((oldinfo->number > oldinfo->initial_entries) &&
1243 (newinfo->number <= oldinfo->initial_entries))
1244 module_put(t->me);
1245
1246 /* Get the old counters. */
1247 get_counters(oldinfo, counters);
1248 /* Decrease module usage counts and free resource */
1249 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1250 IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
1251 free_table_info(oldinfo);
1252 if (copy_to_user(tmp.counters, counters,
1253 sizeof(struct ipt_counters) * tmp.num_counters) != 0)
1254 ret = -EFAULT;
1255 vfree(counters);
1256 up(&ipt_mutex);
1257 return ret;
1258
1259 put_module:
1260 module_put(t->me);
1261 up(&ipt_mutex);
1262 free_newinfo_counters_untrans:
1263 IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
1264 free_newinfo_counters:
1265 vfree(counters);
1266 free_newinfo:
1267 free_table_info(newinfo);
1268 return ret;
1269 }
1270
1271 /* We're lazy, and add to the first CPU; overflow works its fey magic
1272 * and everything is OK. */
1273 static inline int
1274 add_counter_to_entry(struct ipt_entry *e,
1275 const struct ipt_counters addme[],
1276 unsigned int *i)
1277 {
1278 #if 0
1279 duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1280 *i,
1281 (long unsigned int)e->counters.pcnt,
1282 (long unsigned int)e->counters.bcnt,
1283 (long unsigned int)addme[*i].pcnt,
1284 (long unsigned int)addme[*i].bcnt);
1285 #endif
1286
1287 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1288
1289 (*i)++;
1290 return 0;
1291 }
1292
1293 static int
1294 do_add_counters(void __user *user, unsigned int len)
1295 {
1296 unsigned int i;
1297 struct ipt_counters_info tmp, *paddc;
1298 struct ipt_table *t;
1299 int ret = 0;
1300 void *loc_cpu_entry;
1301
1302 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1303 return -EFAULT;
1304
1305 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
1306 return -EINVAL;
1307
1308 paddc = vmalloc_node(len, numa_node_id());
1309 if (!paddc)
1310 return -ENOMEM;
1311
1312 if (copy_from_user(paddc, user, len) != 0) {
1313 ret = -EFAULT;
1314 goto free;
1315 }
1316
1317 t = find_table_lock(tmp.name);
1318 if (!t || IS_ERR(t)) {
1319 ret = t ? PTR_ERR(t) : -ENOENT;
1320 goto free;
1321 }
1322
1323 write_lock_bh(&t->lock);
1324 if (t->private->number != paddc->num_counters) {
1325 ret = -EINVAL;
1326 goto unlock_up_free;
1327 }
1328
1329 i = 0;
1330 /* Choose the copy that is on our node */
1331 loc_cpu_entry = t->private->entries[raw_smp_processor_id()];
1332 IPT_ENTRY_ITERATE(loc_cpu_entry,
1333 t->private->size,
1334 add_counter_to_entry,
1335 paddc->counters,
1336 &i);
1337 unlock_up_free:
1338 write_unlock_bh(&t->lock);
1339 up(&ipt_mutex);
1340 module_put(t->me);
1341 free:
1342 vfree(paddc);
1343
1344 return ret;
1345 }
1346
1347 static int
1348 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1349 {
1350 int ret;
1351
1352 if (!capable(CAP_NET_ADMIN))
1353 return -EPERM;
1354
1355 switch (cmd) {
1356 case IPT_SO_SET_REPLACE:
1357 ret = do_replace(user, len);
1358 break;
1359
1360 case IPT_SO_SET_ADD_COUNTERS:
1361 ret = do_add_counters(user, len);
1362 break;
1363
1364 default:
1365 duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
1366 ret = -EINVAL;
1367 }
1368
1369 return ret;
1370 }
1371
1372 static int
1373 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1374 {
1375 int ret;
1376
1377 if (!capable(CAP_NET_ADMIN))
1378 return -EPERM;
1379
1380 switch (cmd) {
1381 case IPT_SO_GET_INFO: {
1382 char name[IPT_TABLE_MAXNAMELEN];
1383 struct ipt_table *t;
1384
1385 if (*len != sizeof(struct ipt_getinfo)) {
1386 duprintf("length %u != %u\n", *len,
1387 sizeof(struct ipt_getinfo));
1388 ret = -EINVAL;
1389 break;
1390 }
1391
1392 if (copy_from_user(name, user, sizeof(name)) != 0) {
1393 ret = -EFAULT;
1394 break;
1395 }
1396 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1397
1398 t = try_then_request_module(find_table_lock(name),
1399 "iptable_%s", name);
1400 if (t && !IS_ERR(t)) {
1401 struct ipt_getinfo info;
1402
1403 info.valid_hooks = t->valid_hooks;
1404 memcpy(info.hook_entry, t->private->hook_entry,
1405 sizeof(info.hook_entry));
1406 memcpy(info.underflow, t->private->underflow,
1407 sizeof(info.underflow));
1408 info.num_entries = t->private->number;
1409 info.size = t->private->size;
1410 memcpy(info.name, name, sizeof(info.name));
1411
1412 if (copy_to_user(user, &info, *len) != 0)
1413 ret = -EFAULT;
1414 else
1415 ret = 0;
1416 up(&ipt_mutex);
1417 module_put(t->me);
1418 } else
1419 ret = t ? PTR_ERR(t) : -ENOENT;
1420 }
1421 break;
1422
1423 case IPT_SO_GET_ENTRIES: {
1424 struct ipt_get_entries get;
1425
1426 if (*len < sizeof(get)) {
1427 duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1428 ret = -EINVAL;
1429 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1430 ret = -EFAULT;
1431 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1432 duprintf("get_entries: %u != %u\n", *len,
1433 sizeof(struct ipt_get_entries) + get.size);
1434 ret = -EINVAL;
1435 } else
1436 ret = get_entries(&get, user);
1437 break;
1438 }
1439
1440 case IPT_SO_GET_REVISION_MATCH:
1441 case IPT_SO_GET_REVISION_TARGET: {
1442 struct ipt_get_revision rev;
1443 int (*revfn)(const char *, u8, int *);
1444
1445 if (*len != sizeof(rev)) {
1446 ret = -EINVAL;
1447 break;
1448 }
1449 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
1450 ret = -EFAULT;
1451 break;
1452 }
1453
1454 if (cmd == IPT_SO_GET_REVISION_TARGET)
1455 revfn = target_revfn;
1456 else
1457 revfn = match_revfn;
1458
1459 try_then_request_module(find_revision(rev.name, rev.revision,
1460 revfn, &ret),
1461 "ipt_%s", rev.name);
1462 break;
1463 }
1464
1465 default:
1466 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1467 ret = -EINVAL;
1468 }
1469
1470 return ret;
1471 }
1472
1473 /* Registration hooks for targets. */
1474 int
1475 ipt_register_target(struct ipt_target *target)
1476 {
1477 int ret;
1478
1479 ret = down_interruptible(&ipt_mutex);
1480 if (ret != 0)
1481 return ret;
1482 list_add(&target->list, &ipt_target);
1483 up(&ipt_mutex);
1484 return ret;
1485 }
1486
1487 void
1488 ipt_unregister_target(struct ipt_target *target)
1489 {
1490 down(&ipt_mutex);
1491 LIST_DELETE(&ipt_target, target);
1492 up(&ipt_mutex);
1493 }
1494
1495 int
1496 ipt_register_match(struct ipt_match *match)
1497 {
1498 int ret;
1499
1500 ret = down_interruptible(&ipt_mutex);
1501 if (ret != 0)
1502 return ret;
1503
1504 list_add(&match->list, &ipt_match);
1505 up(&ipt_mutex);
1506
1507 return ret;
1508 }
1509
1510 void
1511 ipt_unregister_match(struct ipt_match *match)
1512 {
1513 down(&ipt_mutex);
1514 LIST_DELETE(&ipt_match, match);
1515 up(&ipt_mutex);
1516 }
1517
1518 int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
1519 {
1520 int ret;
1521 struct ipt_table_info *newinfo;
1522 static struct ipt_table_info bootstrap
1523 = { 0, 0, 0, { 0 }, { 0 }, { } };
1524 void *loc_cpu_entry;
1525
1526 newinfo = alloc_table_info(repl->size);
1527 if (!newinfo)
1528 return -ENOMEM;
1529
1530 /* choose the copy on our node/cpu
1531 * but dont care of preemption
1532 */
1533 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1534 memcpy(loc_cpu_entry, repl->entries, repl->size);
1535
1536 ret = translate_table(table->name, table->valid_hooks,
1537 newinfo, loc_cpu_entry, repl->size,
1538 repl->num_entries,
1539 repl->hook_entry,
1540 repl->underflow);
1541 if (ret != 0) {
1542 free_table_info(newinfo);
1543 return ret;
1544 }
1545
1546 ret = down_interruptible(&ipt_mutex);
1547 if (ret != 0) {
1548 free_table_info(newinfo);
1549 return ret;
1550 }
1551
1552 /* Don't autoload: we'd eat our tail... */
1553 if (list_named_find(&ipt_tables, table->name)) {
1554 ret = -EEXIST;
1555 goto free_unlock;
1556 }
1557
1558 /* Simplifies replace_table code. */
1559 table->private = &bootstrap;
1560 if (!replace_table(table, 0, newinfo, &ret))
1561 goto free_unlock;
1562
1563 duprintf("table->private->number = %u\n",
1564 table->private->number);
1565
1566 /* save number of initial entries */
1567 table->private->initial_entries = table->private->number;
1568
1569 rwlock_init(&table->lock);
1570 list_prepend(&ipt_tables, table);
1571
1572 unlock:
1573 up(&ipt_mutex);
1574 return ret;
1575
1576 free_unlock:
1577 free_table_info(newinfo);
1578 goto unlock;
1579 }
1580
1581 void ipt_unregister_table(struct ipt_table *table)
1582 {
1583 void *loc_cpu_entry;
1584
1585 down(&ipt_mutex);
1586 LIST_DELETE(&ipt_tables, table);
1587 up(&ipt_mutex);
1588
1589 /* Decrease module usage counts and free resources */
1590 loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
1591 IPT_ENTRY_ITERATE(loc_cpu_entry, table->private->size,
1592 cleanup_entry, NULL);
1593 free_table_info(table->private);
1594 }
1595
1596 /* Returns 1 if the port is matched by the range, 0 otherwise */
1597 static inline int
1598 port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1599 {
1600 int ret;
1601
1602 ret = (port >= min && port <= max) ^ invert;
1603 return ret;
1604 }
1605
1606 static int
1607 tcp_find_option(u_int8_t option,
1608 const struct sk_buff *skb,
1609 unsigned int optlen,
1610 int invert,
1611 int *hotdrop)
1612 {
1613 /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
1614 u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
1615 unsigned int i;
1616
1617 duprintf("tcp_match: finding option\n");
1618
1619 if (!optlen)
1620 return invert;
1621
1622 /* If we don't have the whole header, drop packet. */
1623 op = skb_header_pointer(skb,
1624 skb->nh.iph->ihl*4 + sizeof(struct tcphdr),
1625 optlen, _opt);
1626 if (op == NULL) {
1627 *hotdrop = 1;
1628 return 0;
1629 }
1630
1631 for (i = 0; i < optlen; ) {
1632 if (op[i] == option) return !invert;
1633 if (op[i] < 2) i++;
1634 else i += op[i+1]?:1;
1635 }
1636
1637 return invert;
1638 }
1639
1640 static int
1641 tcp_match(const struct sk_buff *skb,
1642 const struct net_device *in,
1643 const struct net_device *out,
1644 const void *matchinfo,
1645 int offset,
1646 int *hotdrop)
1647 {
1648 struct tcphdr _tcph, *th;
1649 const struct ipt_tcp *tcpinfo = matchinfo;
1650
1651 if (offset) {
1652 /* To quote Alan:
1653
1654 Don't allow a fragment of TCP 8 bytes in. Nobody normal
1655 causes this. Its a cracker trying to break in by doing a
1656 flag overwrite to pass the direction checks.
1657 */
1658 if (offset == 1) {
1659 duprintf("Dropping evil TCP offset=1 frag.\n");
1660 *hotdrop = 1;
1661 }
1662 /* Must not be a fragment. */
1663 return 0;
1664 }
1665
1666 #define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1667
1668 th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1669 sizeof(_tcph), &_tcph);
1670 if (th == NULL) {
1671 /* We've been asked to examine this packet, and we
1672 can't. Hence, no choice but to drop. */
1673 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1674 *hotdrop = 1;
1675 return 0;
1676 }
1677
1678 if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1679 ntohs(th->source),
1680 !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)))
1681 return 0;
1682 if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1683 ntohs(th->dest),
1684 !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)))
1685 return 0;
1686 if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
1687 == tcpinfo->flg_cmp,
1688 IPT_TCP_INV_FLAGS))
1689 return 0;
1690 if (tcpinfo->option) {
1691 if (th->doff * 4 < sizeof(_tcph)) {
1692 *hotdrop = 1;
1693 return 0;
1694 }
1695 if (!tcp_find_option(tcpinfo->option, skb,
1696 th->doff*4 - sizeof(_tcph),
1697 tcpinfo->invflags & IPT_TCP_INV_OPTION,
1698 hotdrop))
1699 return 0;
1700 }
1701 return 1;
1702 }
1703
1704 /* Called when user tries to insert an entry of this type. */
1705 static int
1706 tcp_checkentry(const char *tablename,
1707 const struct ipt_ip *ip,
1708 void *matchinfo,
1709 unsigned int matchsize,
1710 unsigned int hook_mask)
1711 {
1712 const struct ipt_tcp *tcpinfo = matchinfo;
1713
1714 /* Must specify proto == TCP, and no unknown invflags */
1715 return ip->proto == IPPROTO_TCP
1716 && !(ip->invflags & IPT_INV_PROTO)
1717 && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
1718 && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
1719 }
1720
1721 static int
1722 udp_match(const struct sk_buff *skb,
1723 const struct net_device *in,
1724 const struct net_device *out,
1725 const void *matchinfo,
1726 int offset,
1727 int *hotdrop)
1728 {
1729 struct udphdr _udph, *uh;
1730 const struct ipt_udp *udpinfo = matchinfo;
1731
1732 /* Must not be a fragment. */
1733 if (offset)
1734 return 0;
1735
1736 uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1737 sizeof(_udph), &_udph);
1738 if (uh == NULL) {
1739 /* We've been asked to examine this packet, and we
1740 can't. Hence, no choice but to drop. */
1741 duprintf("Dropping evil UDP tinygram.\n");
1742 *hotdrop = 1;
1743 return 0;
1744 }
1745
1746 return port_match(udpinfo->spts[0], udpinfo->spts[1],
1747 ntohs(uh->source),
1748 !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
1749 && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1750 ntohs(uh->dest),
1751 !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
1752 }
1753
1754 /* Called when user tries to insert an entry of this type. */
1755 static int
1756 udp_checkentry(const char *tablename,
1757 const struct ipt_ip *ip,
1758 void *matchinfo,
1759 unsigned int matchinfosize,
1760 unsigned int hook_mask)
1761 {
1762 const struct ipt_udp *udpinfo = matchinfo;
1763
1764 /* Must specify proto == UDP, and no unknown invflags */
1765 if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
1766 duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
1767 IPPROTO_UDP);
1768 return 0;
1769 }
1770 if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
1771 duprintf("ipt_udp: matchsize %u != %u\n",
1772 matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
1773 return 0;
1774 }
1775 if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
1776 duprintf("ipt_udp: unknown flags %X\n",
1777 udpinfo->invflags);
1778 return 0;
1779 }
1780
1781 return 1;
1782 }
1783
1784 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1785 static inline int
1786 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1787 u_int8_t type, u_int8_t code,
1788 int invert)
1789 {
1790 return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1791 ^ invert;
1792 }
1793
1794 static int
1795 icmp_match(const struct sk_buff *skb,
1796 const struct net_device *in,
1797 const struct net_device *out,
1798 const void *matchinfo,
1799 int offset,
1800 int *hotdrop)
1801 {
1802 struct icmphdr _icmph, *ic;
1803 const struct ipt_icmp *icmpinfo = matchinfo;
1804
1805 /* Must not be a fragment. */
1806 if (offset)
1807 return 0;
1808
1809 ic = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1810 sizeof(_icmph), &_icmph);
1811 if (ic == NULL) {
1812 /* We've been asked to examine this packet, and we
1813 * can't. Hence, no choice but to drop.
1814 */
1815 duprintf("Dropping evil ICMP tinygram.\n");
1816 *hotdrop = 1;
1817 return 0;
1818 }
1819
1820 return icmp_type_code_match(icmpinfo->type,
1821 icmpinfo->code[0],
1822 icmpinfo->code[1],
1823 ic->type, ic->code,
1824 !!(icmpinfo->invflags&IPT_ICMP_INV));
1825 }
1826
1827 /* Called when user tries to insert an entry of this type. */
1828 static int
1829 icmp_checkentry(const char *tablename,
1830 const struct ipt_ip *ip,
1831 void *matchinfo,
1832 unsigned int matchsize,
1833 unsigned int hook_mask)
1834 {
1835 const struct ipt_icmp *icmpinfo = matchinfo;
1836
1837 /* Must specify proto == ICMP, and no unknown invflags */
1838 return ip->proto == IPPROTO_ICMP
1839 && !(ip->invflags & IPT_INV_PROTO)
1840 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1841 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1842 }
1843
1844 /* The built-in targets: standard (NULL) and error. */
1845 static struct ipt_target ipt_standard_target = {
1846 .name = IPT_STANDARD_TARGET,
1847 };
1848
1849 static struct ipt_target ipt_error_target = {
1850 .name = IPT_ERROR_TARGET,
1851 .target = ipt_error,
1852 };
1853
1854 static struct nf_sockopt_ops ipt_sockopts = {
1855 .pf = PF_INET,
1856 .set_optmin = IPT_BASE_CTL,
1857 .set_optmax = IPT_SO_SET_MAX+1,
1858 .set = do_ipt_set_ctl,
1859 .get_optmin = IPT_BASE_CTL,
1860 .get_optmax = IPT_SO_GET_MAX+1,
1861 .get = do_ipt_get_ctl,
1862 };
1863
1864 static struct ipt_match tcp_matchstruct = {
1865 .name = "tcp",
1866 .match = &tcp_match,
1867 .checkentry = &tcp_checkentry,
1868 };
1869
1870 static struct ipt_match udp_matchstruct = {
1871 .name = "udp",
1872 .match = &udp_match,
1873 .checkentry = &udp_checkentry,
1874 };
1875
1876 static struct ipt_match icmp_matchstruct = {
1877 .name = "icmp",
1878 .match = &icmp_match,
1879 .checkentry = &icmp_checkentry,
1880 };
1881
1882 #ifdef CONFIG_PROC_FS
1883 static inline int print_name(const char *i,
1884 off_t start_offset, char *buffer, int length,
1885 off_t *pos, unsigned int *count)
1886 {
1887 if ((*count)++ >= start_offset) {
1888 unsigned int namelen;
1889
1890 namelen = sprintf(buffer + *pos, "%s\n",
1891 i + sizeof(struct list_head));
1892 if (*pos + namelen > length) {
1893 /* Stop iterating */
1894 return 1;
1895 }
1896 *pos += namelen;
1897 }
1898 return 0;
1899 }
1900
1901 static inline int print_target(const struct ipt_target *t,
1902 off_t start_offset, char *buffer, int length,
1903 off_t *pos, unsigned int *count)
1904 {
1905 if (t == &ipt_standard_target || t == &ipt_error_target)
1906 return 0;
1907 return print_name((char *)t, start_offset, buffer, length, pos, count);
1908 }
1909
1910 static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
1911 {
1912 off_t pos = 0;
1913 unsigned int count = 0;
1914
1915 if (down_interruptible(&ipt_mutex) != 0)
1916 return 0;
1917
1918 LIST_FIND(&ipt_tables, print_name, void *,
1919 offset, buffer, length, &pos, &count);
1920
1921 up(&ipt_mutex);
1922
1923 /* `start' hack - see fs/proc/generic.c line ~105 */
1924 *start=(char *)((unsigned long)count-offset);
1925 return pos;
1926 }
1927
1928 static int ipt_get_targets(char *buffer, char **start, off_t offset, int length)
1929 {
1930 off_t pos = 0;
1931 unsigned int count = 0;
1932
1933 if (down_interruptible(&ipt_mutex) != 0)
1934 return 0;
1935
1936 LIST_FIND(&ipt_target, print_target, struct ipt_target *,
1937 offset, buffer, length, &pos, &count);
1938
1939 up(&ipt_mutex);
1940
1941 *start = (char *)((unsigned long)count - offset);
1942 return pos;
1943 }
1944
1945 static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
1946 {
1947 off_t pos = 0;
1948 unsigned int count = 0;
1949
1950 if (down_interruptible(&ipt_mutex) != 0)
1951 return 0;
1952
1953 LIST_FIND(&ipt_match, print_name, void *,
1954 offset, buffer, length, &pos, &count);
1955
1956 up(&ipt_mutex);
1957
1958 *start = (char *)((unsigned long)count - offset);
1959 return pos;
1960 }
1961
1962 static const struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
1963 { { "ip_tables_names", ipt_get_tables },
1964 { "ip_tables_targets", ipt_get_targets },
1965 { "ip_tables_matches", ipt_get_matches },
1966 { NULL, NULL} };
1967 #endif /*CONFIG_PROC_FS*/
1968
1969 static int __init init(void)
1970 {
1971 int ret;
1972
1973 /* Noone else will be downing sem now, so we won't sleep */
1974 down(&ipt_mutex);
1975 list_append(&ipt_target, &ipt_standard_target);
1976 list_append(&ipt_target, &ipt_error_target);
1977 list_append(&ipt_match, &tcp_matchstruct);
1978 list_append(&ipt_match, &udp_matchstruct);
1979 list_append(&ipt_match, &icmp_matchstruct);
1980 up(&ipt_mutex);
1981
1982 /* Register setsockopt */
1983 ret = nf_register_sockopt(&ipt_sockopts);
1984 if (ret < 0) {
1985 duprintf("Unable to register sockopts.\n");
1986 return ret;
1987 }
1988
1989 #ifdef CONFIG_PROC_FS
1990 {
1991 struct proc_dir_entry *proc;
1992 int i;
1993
1994 for (i = 0; ipt_proc_entry[i].name; i++) {
1995 proc = proc_net_create(ipt_proc_entry[i].name, 0,
1996 ipt_proc_entry[i].get_info);
1997 if (!proc) {
1998 while (--i >= 0)
1999 proc_net_remove(ipt_proc_entry[i].name);
2000 nf_unregister_sockopt(&ipt_sockopts);
2001 return -ENOMEM;
2002 }
2003 proc->owner = THIS_MODULE;
2004 }
2005 }
2006 #endif
2007
2008 printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
2009 return 0;
2010 }
2011
2012 static void __exit fini(void)
2013 {
2014 nf_unregister_sockopt(&ipt_sockopts);
2015 #ifdef CONFIG_PROC_FS
2016 {
2017 int i;
2018 for (i = 0; ipt_proc_entry[i].name; i++)
2019 proc_net_remove(ipt_proc_entry[i].name);
2020 }
2021 #endif
2022 }
2023
2024 EXPORT_SYMBOL(ipt_register_table);
2025 EXPORT_SYMBOL(ipt_unregister_table);
2026 EXPORT_SYMBOL(ipt_register_match);
2027 EXPORT_SYMBOL(ipt_unregister_match);
2028 EXPORT_SYMBOL(ipt_do_table);
2029 EXPORT_SYMBOL(ipt_register_target);
2030 EXPORT_SYMBOL(ipt_unregister_target);
2031 EXPORT_SYMBOL(ipt_find_target);
2032
2033 module_init(init);
2034 module_exit(fini);