]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - net/ipv4/netfilter/ip_tables.c
Linux-2.6.12-rc2
[mirror_ubuntu-bionic-kernel.git] / net / ipv4 / netfilter / ip_tables.c
1 /*
2 * Packet matching code.
3 *
4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5 * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12 * - increase module usage count as soon as we have rules inside
13 * a table
14 */
15 #include <linux/config.h>
16 #include <linux/cache.h>
17 #include <linux/skbuff.h>
18 #include <linux/kmod.h>
19 #include <linux/vmalloc.h>
20 #include <linux/netdevice.h>
21 #include <linux/module.h>
22 #include <linux/tcp.h>
23 #include <linux/udp.h>
24 #include <linux/icmp.h>
25 #include <net/ip.h>
26 #include <asm/uaccess.h>
27 #include <asm/semaphore.h>
28 #include <linux/proc_fs.h>
29 #include <linux/err.h>
30
31 #include <linux/netfilter_ipv4/ip_tables.h>
32
33 MODULE_LICENSE("GPL");
34 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
35 MODULE_DESCRIPTION("IPv4 packet filter");
36
37 /*#define DEBUG_IP_FIREWALL*/
38 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
39 /*#define DEBUG_IP_FIREWALL_USER*/
40
41 #ifdef DEBUG_IP_FIREWALL
42 #define dprintf(format, args...) printk(format , ## args)
43 #else
44 #define dprintf(format, args...)
45 #endif
46
47 #ifdef DEBUG_IP_FIREWALL_USER
48 #define duprintf(format, args...) printk(format , ## args)
49 #else
50 #define duprintf(format, args...)
51 #endif
52
53 #ifdef CONFIG_NETFILTER_DEBUG
54 #define IP_NF_ASSERT(x) \
55 do { \
56 if (!(x)) \
57 printk("IP_NF_ASSERT: %s:%s:%u\n", \
58 __FUNCTION__, __FILE__, __LINE__); \
59 } while(0)
60 #else
61 #define IP_NF_ASSERT(x)
62 #endif
63 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
64
65 static DECLARE_MUTEX(ipt_mutex);
66
67 /* Must have mutex */
68 #define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
69 #define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
70 #include <linux/netfilter_ipv4/lockhelp.h>
71 #include <linux/netfilter_ipv4/listhelp.h>
72
73 #if 0
74 /* All the better to debug you with... */
75 #define static
76 #define inline
77 #endif
78
79 /*
80 We keep a set of rules for each CPU, so we can avoid write-locking
81 them in the softirq when updating the counters and therefore
82 only need to read-lock in the softirq; doing a write_lock_bh() in user
83 context stops packets coming through and allows user context to read
84 the counters or update the rules.
85
86 To be cache friendly on SMP, we arrange them like so:
87 [ n-entries ]
88 ... cache-align padding ...
89 [ n-entries ]
90
91 Hence the start of any table is given by get_table() below. */
92
93 /* The table itself */
94 struct ipt_table_info
95 {
96 /* Size per table */
97 unsigned int size;
98 /* Number of entries: FIXME. --RR */
99 unsigned int number;
100 /* Initial number of entries. Needed for module usage count */
101 unsigned int initial_entries;
102
103 /* Entry points and underflows */
104 unsigned int hook_entry[NF_IP_NUMHOOKS];
105 unsigned int underflow[NF_IP_NUMHOOKS];
106
107 /* ipt_entry tables: one per CPU */
108 char entries[0] ____cacheline_aligned;
109 };
110
111 static LIST_HEAD(ipt_target);
112 static LIST_HEAD(ipt_match);
113 static LIST_HEAD(ipt_tables);
114 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
115
116 #ifdef CONFIG_SMP
117 #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
118 #else
119 #define TABLE_OFFSET(t,p) 0
120 #endif
121
122 #if 0
123 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
124 #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
125 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
126 #endif
127
128 /* Returns whether matches rule or not. */
129 static inline int
130 ip_packet_match(const struct iphdr *ip,
131 const char *indev,
132 const char *outdev,
133 const struct ipt_ip *ipinfo,
134 int isfrag)
135 {
136 size_t i;
137 unsigned long ret;
138
139 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
140
141 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
142 IPT_INV_SRCIP)
143 || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
144 IPT_INV_DSTIP)) {
145 dprintf("Source or dest mismatch.\n");
146
147 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
148 NIPQUAD(ip->saddr),
149 NIPQUAD(ipinfo->smsk.s_addr),
150 NIPQUAD(ipinfo->src.s_addr),
151 ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
152 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
153 NIPQUAD(ip->daddr),
154 NIPQUAD(ipinfo->dmsk.s_addr),
155 NIPQUAD(ipinfo->dst.s_addr),
156 ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
157 return 0;
158 }
159
160 /* Look for ifname matches; this should unroll nicely. */
161 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
162 ret |= (((const unsigned long *)indev)[i]
163 ^ ((const unsigned long *)ipinfo->iniface)[i])
164 & ((const unsigned long *)ipinfo->iniface_mask)[i];
165 }
166
167 if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
168 dprintf("VIA in mismatch (%s vs %s).%s\n",
169 indev, ipinfo->iniface,
170 ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
171 return 0;
172 }
173
174 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
175 ret |= (((const unsigned long *)outdev)[i]
176 ^ ((const unsigned long *)ipinfo->outiface)[i])
177 & ((const unsigned long *)ipinfo->outiface_mask)[i];
178 }
179
180 if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
181 dprintf("VIA out mismatch (%s vs %s).%s\n",
182 outdev, ipinfo->outiface,
183 ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
184 return 0;
185 }
186
187 /* Check specific protocol */
188 if (ipinfo->proto
189 && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
190 dprintf("Packet protocol %hi does not match %hi.%s\n",
191 ip->protocol, ipinfo->proto,
192 ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
193 return 0;
194 }
195
196 /* If we have a fragment rule but the packet is not a fragment
197 * then we return zero */
198 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
199 dprintf("Fragment rule but not fragment.%s\n",
200 ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
201 return 0;
202 }
203
204 return 1;
205 }
206
207 static inline int
208 ip_checkentry(const struct ipt_ip *ip)
209 {
210 if (ip->flags & ~IPT_F_MASK) {
211 duprintf("Unknown flag bits set: %08X\n",
212 ip->flags & ~IPT_F_MASK);
213 return 0;
214 }
215 if (ip->invflags & ~IPT_INV_MASK) {
216 duprintf("Unknown invflag bits set: %08X\n",
217 ip->invflags & ~IPT_INV_MASK);
218 return 0;
219 }
220 return 1;
221 }
222
223 static unsigned int
224 ipt_error(struct sk_buff **pskb,
225 const struct net_device *in,
226 const struct net_device *out,
227 unsigned int hooknum,
228 const void *targinfo,
229 void *userinfo)
230 {
231 if (net_ratelimit())
232 printk("ip_tables: error: `%s'\n", (char *)targinfo);
233
234 return NF_DROP;
235 }
236
237 static inline
238 int do_match(struct ipt_entry_match *m,
239 const struct sk_buff *skb,
240 const struct net_device *in,
241 const struct net_device *out,
242 int offset,
243 int *hotdrop)
244 {
245 /* Stop iteration if it doesn't match */
246 if (!m->u.kernel.match->match(skb, in, out, m->data, offset, hotdrop))
247 return 1;
248 else
249 return 0;
250 }
251
252 static inline struct ipt_entry *
253 get_entry(void *base, unsigned int offset)
254 {
255 return (struct ipt_entry *)(base + offset);
256 }
257
258 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
259 unsigned int
260 ipt_do_table(struct sk_buff **pskb,
261 unsigned int hook,
262 const struct net_device *in,
263 const struct net_device *out,
264 struct ipt_table *table,
265 void *userdata)
266 {
267 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
268 u_int16_t offset;
269 struct iphdr *ip;
270 u_int16_t datalen;
271 int hotdrop = 0;
272 /* Initializing verdict to NF_DROP keeps gcc happy. */
273 unsigned int verdict = NF_DROP;
274 const char *indev, *outdev;
275 void *table_base;
276 struct ipt_entry *e, *back;
277
278 /* Initialization */
279 ip = (*pskb)->nh.iph;
280 datalen = (*pskb)->len - ip->ihl * 4;
281 indev = in ? in->name : nulldevname;
282 outdev = out ? out->name : nulldevname;
283 /* We handle fragments by dealing with the first fragment as
284 * if it was a normal packet. All other fragments are treated
285 * normally, except that they will NEVER match rules that ask
286 * things we don't know, ie. tcp syn flag or ports). If the
287 * rule is also a fragment-specific rule, non-fragments won't
288 * match it. */
289 offset = ntohs(ip->frag_off) & IP_OFFSET;
290
291 read_lock_bh(&table->lock);
292 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
293 table_base = (void *)table->private->entries
294 + TABLE_OFFSET(table->private, smp_processor_id());
295 e = get_entry(table_base, table->private->hook_entry[hook]);
296
297 #ifdef CONFIG_NETFILTER_DEBUG
298 /* Check noone else using our table */
299 if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
300 && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
301 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
302 smp_processor_id(),
303 table->name,
304 &((struct ipt_entry *)table_base)->comefrom,
305 ((struct ipt_entry *)table_base)->comefrom);
306 }
307 ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
308 #endif
309
310 /* For return from builtin chain */
311 back = get_entry(table_base, table->private->underflow[hook]);
312
313 do {
314 IP_NF_ASSERT(e);
315 IP_NF_ASSERT(back);
316 (*pskb)->nfcache |= e->nfcache;
317 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
318 struct ipt_entry_target *t;
319
320 if (IPT_MATCH_ITERATE(e, do_match,
321 *pskb, in, out,
322 offset, &hotdrop) != 0)
323 goto no_match;
324
325 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
326
327 t = ipt_get_target(e);
328 IP_NF_ASSERT(t->u.kernel.target);
329 /* Standard target? */
330 if (!t->u.kernel.target->target) {
331 int v;
332
333 v = ((struct ipt_standard_target *)t)->verdict;
334 if (v < 0) {
335 /* Pop from stack? */
336 if (v != IPT_RETURN) {
337 verdict = (unsigned)(-v) - 1;
338 break;
339 }
340 e = back;
341 back = get_entry(table_base,
342 back->comefrom);
343 continue;
344 }
345 if (table_base + v
346 != (void *)e + e->next_offset) {
347 /* Save old back ptr in next entry */
348 struct ipt_entry *next
349 = (void *)e + e->next_offset;
350 next->comefrom
351 = (void *)back - table_base;
352 /* set back pointer to next entry */
353 back = next;
354 }
355
356 e = get_entry(table_base, v);
357 } else {
358 /* Targets which reenter must return
359 abs. verdicts */
360 #ifdef CONFIG_NETFILTER_DEBUG
361 ((struct ipt_entry *)table_base)->comefrom
362 = 0xeeeeeeec;
363 #endif
364 verdict = t->u.kernel.target->target(pskb,
365 in, out,
366 hook,
367 t->data,
368 userdata);
369
370 #ifdef CONFIG_NETFILTER_DEBUG
371 if (((struct ipt_entry *)table_base)->comefrom
372 != 0xeeeeeeec
373 && verdict == IPT_CONTINUE) {
374 printk("Target %s reentered!\n",
375 t->u.kernel.target->name);
376 verdict = NF_DROP;
377 }
378 ((struct ipt_entry *)table_base)->comefrom
379 = 0x57acc001;
380 #endif
381 /* Target might have changed stuff. */
382 ip = (*pskb)->nh.iph;
383 datalen = (*pskb)->len - ip->ihl * 4;
384
385 if (verdict == IPT_CONTINUE)
386 e = (void *)e + e->next_offset;
387 else
388 /* Verdict */
389 break;
390 }
391 } else {
392
393 no_match:
394 e = (void *)e + e->next_offset;
395 }
396 } while (!hotdrop);
397
398 #ifdef CONFIG_NETFILTER_DEBUG
399 ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
400 #endif
401 read_unlock_bh(&table->lock);
402
403 #ifdef DEBUG_ALLOW_ALL
404 return NF_ACCEPT;
405 #else
406 if (hotdrop)
407 return NF_DROP;
408 else return verdict;
409 #endif
410 }
411
412 /*
413 * These are weird, but module loading must not be done with mutex
414 * held (since they will register), and we have to have a single
415 * function to use try_then_request_module().
416 */
417
418 /* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */
419 static inline struct ipt_table *find_table_lock(const char *name)
420 {
421 struct ipt_table *t;
422
423 if (down_interruptible(&ipt_mutex) != 0)
424 return ERR_PTR(-EINTR);
425
426 list_for_each_entry(t, &ipt_tables, list)
427 if (strcmp(t->name, name) == 0 && try_module_get(t->me))
428 return t;
429 up(&ipt_mutex);
430 return NULL;
431 }
432
433 /* Find match, grabs ref. Returns ERR_PTR() on error. */
434 static inline struct ipt_match *find_match(const char *name, u8 revision)
435 {
436 struct ipt_match *m;
437 int err = 0;
438
439 if (down_interruptible(&ipt_mutex) != 0)
440 return ERR_PTR(-EINTR);
441
442 list_for_each_entry(m, &ipt_match, list) {
443 if (strcmp(m->name, name) == 0) {
444 if (m->revision == revision) {
445 if (try_module_get(m->me)) {
446 up(&ipt_mutex);
447 return m;
448 }
449 } else
450 err = -EPROTOTYPE; /* Found something. */
451 }
452 }
453 up(&ipt_mutex);
454 return ERR_PTR(err);
455 }
456
457 /* Find target, grabs ref. Returns ERR_PTR() on error. */
458 static inline struct ipt_target *find_target(const char *name, u8 revision)
459 {
460 struct ipt_target *t;
461 int err = 0;
462
463 if (down_interruptible(&ipt_mutex) != 0)
464 return ERR_PTR(-EINTR);
465
466 list_for_each_entry(t, &ipt_target, list) {
467 if (strcmp(t->name, name) == 0) {
468 if (t->revision == revision) {
469 if (try_module_get(t->me)) {
470 up(&ipt_mutex);
471 return t;
472 }
473 } else
474 err = -EPROTOTYPE; /* Found something. */
475 }
476 }
477 up(&ipt_mutex);
478 return ERR_PTR(err);
479 }
480
481 struct ipt_target *ipt_find_target(const char *name, u8 revision)
482 {
483 struct ipt_target *target;
484
485 target = try_then_request_module(find_target(name, revision),
486 "ipt_%s", name);
487 if (IS_ERR(target) || !target)
488 return NULL;
489 return target;
490 }
491
492 static int match_revfn(const char *name, u8 revision, int *bestp)
493 {
494 struct ipt_match *m;
495 int have_rev = 0;
496
497 list_for_each_entry(m, &ipt_match, list) {
498 if (strcmp(m->name, name) == 0) {
499 if (m->revision > *bestp)
500 *bestp = m->revision;
501 if (m->revision == revision)
502 have_rev = 1;
503 }
504 }
505 return have_rev;
506 }
507
508 static int target_revfn(const char *name, u8 revision, int *bestp)
509 {
510 struct ipt_target *t;
511 int have_rev = 0;
512
513 list_for_each_entry(t, &ipt_target, list) {
514 if (strcmp(t->name, name) == 0) {
515 if (t->revision > *bestp)
516 *bestp = t->revision;
517 if (t->revision == revision)
518 have_rev = 1;
519 }
520 }
521 return have_rev;
522 }
523
524 /* Returns true or false (if no such extension at all) */
525 static inline int find_revision(const char *name, u8 revision,
526 int (*revfn)(const char *, u8, int *),
527 int *err)
528 {
529 int have_rev, best = -1;
530
531 if (down_interruptible(&ipt_mutex) != 0) {
532 *err = -EINTR;
533 return 1;
534 }
535 have_rev = revfn(name, revision, &best);
536 up(&ipt_mutex);
537
538 /* Nothing at all? Return 0 to try loading module. */
539 if (best == -1) {
540 *err = -ENOENT;
541 return 0;
542 }
543
544 *err = best;
545 if (!have_rev)
546 *err = -EPROTONOSUPPORT;
547 return 1;
548 }
549
550
551 /* All zeroes == unconditional rule. */
552 static inline int
553 unconditional(const struct ipt_ip *ip)
554 {
555 unsigned int i;
556
557 for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
558 if (((__u32 *)ip)[i])
559 return 0;
560
561 return 1;
562 }
563
564 /* Figures out from what hook each rule can be called: returns 0 if
565 there are loops. Puts hook bitmask in comefrom. */
566 static int
567 mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
568 {
569 unsigned int hook;
570
571 /* No recursion; use packet counter to save back ptrs (reset
572 to 0 as we leave), and comefrom to save source hook bitmask */
573 for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
574 unsigned int pos = newinfo->hook_entry[hook];
575 struct ipt_entry *e
576 = (struct ipt_entry *)(newinfo->entries + pos);
577
578 if (!(valid_hooks & (1 << hook)))
579 continue;
580
581 /* Set initial back pointer. */
582 e->counters.pcnt = pos;
583
584 for (;;) {
585 struct ipt_standard_target *t
586 = (void *)ipt_get_target(e);
587
588 if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
589 printk("iptables: loop hook %u pos %u %08X.\n",
590 hook, pos, e->comefrom);
591 return 0;
592 }
593 e->comefrom
594 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
595
596 /* Unconditional return/END. */
597 if (e->target_offset == sizeof(struct ipt_entry)
598 && (strcmp(t->target.u.user.name,
599 IPT_STANDARD_TARGET) == 0)
600 && t->verdict < 0
601 && unconditional(&e->ip)) {
602 unsigned int oldpos, size;
603
604 /* Return: backtrack through the last
605 big jump. */
606 do {
607 e->comefrom ^= (1<<NF_IP_NUMHOOKS);
608 #ifdef DEBUG_IP_FIREWALL_USER
609 if (e->comefrom
610 & (1 << NF_IP_NUMHOOKS)) {
611 duprintf("Back unset "
612 "on hook %u "
613 "rule %u\n",
614 hook, pos);
615 }
616 #endif
617 oldpos = pos;
618 pos = e->counters.pcnt;
619 e->counters.pcnt = 0;
620
621 /* We're at the start. */
622 if (pos == oldpos)
623 goto next;
624
625 e = (struct ipt_entry *)
626 (newinfo->entries + pos);
627 } while (oldpos == pos + e->next_offset);
628
629 /* Move along one */
630 size = e->next_offset;
631 e = (struct ipt_entry *)
632 (newinfo->entries + pos + size);
633 e->counters.pcnt = pos;
634 pos += size;
635 } else {
636 int newpos = t->verdict;
637
638 if (strcmp(t->target.u.user.name,
639 IPT_STANDARD_TARGET) == 0
640 && newpos >= 0) {
641 /* This a jump; chase it. */
642 duprintf("Jump rule %u -> %u\n",
643 pos, newpos);
644 } else {
645 /* ... this is a fallthru */
646 newpos = pos + e->next_offset;
647 }
648 e = (struct ipt_entry *)
649 (newinfo->entries + newpos);
650 e->counters.pcnt = pos;
651 pos = newpos;
652 }
653 }
654 next:
655 duprintf("Finished chain %u\n", hook);
656 }
657 return 1;
658 }
659
660 static inline int
661 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
662 {
663 if (i && (*i)-- == 0)
664 return 1;
665
666 if (m->u.kernel.match->destroy)
667 m->u.kernel.match->destroy(m->data,
668 m->u.match_size - sizeof(*m));
669 module_put(m->u.kernel.match->me);
670 return 0;
671 }
672
673 static inline int
674 standard_check(const struct ipt_entry_target *t,
675 unsigned int max_offset)
676 {
677 struct ipt_standard_target *targ = (void *)t;
678
679 /* Check standard info. */
680 if (t->u.target_size
681 != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
682 duprintf("standard_check: target size %u != %u\n",
683 t->u.target_size,
684 IPT_ALIGN(sizeof(struct ipt_standard_target)));
685 return 0;
686 }
687
688 if (targ->verdict >= 0
689 && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
690 duprintf("ipt_standard_check: bad verdict (%i)\n",
691 targ->verdict);
692 return 0;
693 }
694
695 if (targ->verdict < -NF_MAX_VERDICT - 1) {
696 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
697 targ->verdict);
698 return 0;
699 }
700 return 1;
701 }
702
703 static inline int
704 check_match(struct ipt_entry_match *m,
705 const char *name,
706 const struct ipt_ip *ip,
707 unsigned int hookmask,
708 unsigned int *i)
709 {
710 struct ipt_match *match;
711
712 match = try_then_request_module(find_match(m->u.user.name,
713 m->u.user.revision),
714 "ipt_%s", m->u.user.name);
715 if (IS_ERR(match) || !match) {
716 duprintf("check_match: `%s' not found\n", m->u.user.name);
717 return match ? PTR_ERR(match) : -ENOENT;
718 }
719 m->u.kernel.match = match;
720
721 if (m->u.kernel.match->checkentry
722 && !m->u.kernel.match->checkentry(name, ip, m->data,
723 m->u.match_size - sizeof(*m),
724 hookmask)) {
725 module_put(m->u.kernel.match->me);
726 duprintf("ip_tables: check failed for `%s'.\n",
727 m->u.kernel.match->name);
728 return -EINVAL;
729 }
730
731 (*i)++;
732 return 0;
733 }
734
735 static struct ipt_target ipt_standard_target;
736
737 static inline int
738 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
739 unsigned int *i)
740 {
741 struct ipt_entry_target *t;
742 struct ipt_target *target;
743 int ret;
744 unsigned int j;
745
746 if (!ip_checkentry(&e->ip)) {
747 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
748 return -EINVAL;
749 }
750
751 j = 0;
752 ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
753 if (ret != 0)
754 goto cleanup_matches;
755
756 t = ipt_get_target(e);
757 target = try_then_request_module(find_target(t->u.user.name,
758 t->u.user.revision),
759 "ipt_%s", t->u.user.name);
760 if (IS_ERR(target) || !target) {
761 duprintf("check_entry: `%s' not found\n", t->u.user.name);
762 ret = target ? PTR_ERR(target) : -ENOENT;
763 goto cleanup_matches;
764 }
765 t->u.kernel.target = target;
766
767 if (t->u.kernel.target == &ipt_standard_target) {
768 if (!standard_check(t, size)) {
769 ret = -EINVAL;
770 goto cleanup_matches;
771 }
772 } else if (t->u.kernel.target->checkentry
773 && !t->u.kernel.target->checkentry(name, e, t->data,
774 t->u.target_size
775 - sizeof(*t),
776 e->comefrom)) {
777 module_put(t->u.kernel.target->me);
778 duprintf("ip_tables: check failed for `%s'.\n",
779 t->u.kernel.target->name);
780 ret = -EINVAL;
781 goto cleanup_matches;
782 }
783
784 (*i)++;
785 return 0;
786
787 cleanup_matches:
788 IPT_MATCH_ITERATE(e, cleanup_match, &j);
789 return ret;
790 }
791
792 static inline int
793 check_entry_size_and_hooks(struct ipt_entry *e,
794 struct ipt_table_info *newinfo,
795 unsigned char *base,
796 unsigned char *limit,
797 const unsigned int *hook_entries,
798 const unsigned int *underflows,
799 unsigned int *i)
800 {
801 unsigned int h;
802
803 if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
804 || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
805 duprintf("Bad offset %p\n", e);
806 return -EINVAL;
807 }
808
809 if (e->next_offset
810 < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
811 duprintf("checking: element %p size %u\n",
812 e, e->next_offset);
813 return -EINVAL;
814 }
815
816 /* Check hooks & underflows */
817 for (h = 0; h < NF_IP_NUMHOOKS; h++) {
818 if ((unsigned char *)e - base == hook_entries[h])
819 newinfo->hook_entry[h] = hook_entries[h];
820 if ((unsigned char *)e - base == underflows[h])
821 newinfo->underflow[h] = underflows[h];
822 }
823
824 /* FIXME: underflows must be unconditional, standard verdicts
825 < 0 (not IPT_RETURN). --RR */
826
827 /* Clear counters and comefrom */
828 e->counters = ((struct ipt_counters) { 0, 0 });
829 e->comefrom = 0;
830
831 (*i)++;
832 return 0;
833 }
834
835 static inline int
836 cleanup_entry(struct ipt_entry *e, unsigned int *i)
837 {
838 struct ipt_entry_target *t;
839
840 if (i && (*i)-- == 0)
841 return 1;
842
843 /* Cleanup all matches */
844 IPT_MATCH_ITERATE(e, cleanup_match, NULL);
845 t = ipt_get_target(e);
846 if (t->u.kernel.target->destroy)
847 t->u.kernel.target->destroy(t->data,
848 t->u.target_size - sizeof(*t));
849 module_put(t->u.kernel.target->me);
850 return 0;
851 }
852
853 /* Checks and translates the user-supplied table segment (held in
854 newinfo) */
855 static int
856 translate_table(const char *name,
857 unsigned int valid_hooks,
858 struct ipt_table_info *newinfo,
859 unsigned int size,
860 unsigned int number,
861 const unsigned int *hook_entries,
862 const unsigned int *underflows)
863 {
864 unsigned int i;
865 int ret;
866
867 newinfo->size = size;
868 newinfo->number = number;
869
870 /* Init all hooks to impossible value. */
871 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
872 newinfo->hook_entry[i] = 0xFFFFFFFF;
873 newinfo->underflow[i] = 0xFFFFFFFF;
874 }
875
876 duprintf("translate_table: size %u\n", newinfo->size);
877 i = 0;
878 /* Walk through entries, checking offsets. */
879 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
880 check_entry_size_and_hooks,
881 newinfo,
882 newinfo->entries,
883 newinfo->entries + size,
884 hook_entries, underflows, &i);
885 if (ret != 0)
886 return ret;
887
888 if (i != number) {
889 duprintf("translate_table: %u not %u entries\n",
890 i, number);
891 return -EINVAL;
892 }
893
894 /* Check hooks all assigned */
895 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
896 /* Only hooks which are valid */
897 if (!(valid_hooks & (1 << i)))
898 continue;
899 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
900 duprintf("Invalid hook entry %u %u\n",
901 i, hook_entries[i]);
902 return -EINVAL;
903 }
904 if (newinfo->underflow[i] == 0xFFFFFFFF) {
905 duprintf("Invalid underflow %u %u\n",
906 i, underflows[i]);
907 return -EINVAL;
908 }
909 }
910
911 if (!mark_source_chains(newinfo, valid_hooks))
912 return -ELOOP;
913
914 /* Finally, each sanity check must pass */
915 i = 0;
916 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
917 check_entry, name, size, &i);
918
919 if (ret != 0) {
920 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
921 cleanup_entry, &i);
922 return ret;
923 }
924
925 /* And one copy for every other CPU */
926 for (i = 1; i < num_possible_cpus(); i++) {
927 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
928 newinfo->entries,
929 SMP_ALIGN(newinfo->size));
930 }
931
932 return ret;
933 }
934
935 static struct ipt_table_info *
936 replace_table(struct ipt_table *table,
937 unsigned int num_counters,
938 struct ipt_table_info *newinfo,
939 int *error)
940 {
941 struct ipt_table_info *oldinfo;
942
943 #ifdef CONFIG_NETFILTER_DEBUG
944 {
945 struct ipt_entry *table_base;
946 unsigned int i;
947
948 for (i = 0; i < num_possible_cpus(); i++) {
949 table_base =
950 (void *)newinfo->entries
951 + TABLE_OFFSET(newinfo, i);
952
953 table_base->comefrom = 0xdead57ac;
954 }
955 }
956 #endif
957
958 /* Do the substitution. */
959 write_lock_bh(&table->lock);
960 /* Check inside lock: is the old number correct? */
961 if (num_counters != table->private->number) {
962 duprintf("num_counters != table->private->number (%u/%u)\n",
963 num_counters, table->private->number);
964 write_unlock_bh(&table->lock);
965 *error = -EAGAIN;
966 return NULL;
967 }
968 oldinfo = table->private;
969 table->private = newinfo;
970 newinfo->initial_entries = oldinfo->initial_entries;
971 write_unlock_bh(&table->lock);
972
973 return oldinfo;
974 }
975
976 /* Gets counters. */
977 static inline int
978 add_entry_to_counter(const struct ipt_entry *e,
979 struct ipt_counters total[],
980 unsigned int *i)
981 {
982 ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
983
984 (*i)++;
985 return 0;
986 }
987
988 static void
989 get_counters(const struct ipt_table_info *t,
990 struct ipt_counters counters[])
991 {
992 unsigned int cpu;
993 unsigned int i;
994
995 for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
996 i = 0;
997 IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
998 t->size,
999 add_entry_to_counter,
1000 counters,
1001 &i);
1002 }
1003 }
1004
1005 static int
1006 copy_entries_to_user(unsigned int total_size,
1007 struct ipt_table *table,
1008 void __user *userptr)
1009 {
1010 unsigned int off, num, countersize;
1011 struct ipt_entry *e;
1012 struct ipt_counters *counters;
1013 int ret = 0;
1014
1015 /* We need atomic snapshot of counters: rest doesn't change
1016 (other than comefrom, which userspace doesn't care
1017 about). */
1018 countersize = sizeof(struct ipt_counters) * table->private->number;
1019 counters = vmalloc(countersize);
1020
1021 if (counters == NULL)
1022 return -ENOMEM;
1023
1024 /* First, sum counters... */
1025 memset(counters, 0, countersize);
1026 write_lock_bh(&table->lock);
1027 get_counters(table->private, counters);
1028 write_unlock_bh(&table->lock);
1029
1030 /* ... then copy entire thing from CPU 0... */
1031 if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
1032 ret = -EFAULT;
1033 goto free_counters;
1034 }
1035
1036 /* FIXME: use iterator macros --RR */
1037 /* ... then go back and fix counters and names */
1038 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
1039 unsigned int i;
1040 struct ipt_entry_match *m;
1041 struct ipt_entry_target *t;
1042
1043 e = (struct ipt_entry *)(table->private->entries + off);
1044 if (copy_to_user(userptr + off
1045 + offsetof(struct ipt_entry, counters),
1046 &counters[num],
1047 sizeof(counters[num])) != 0) {
1048 ret = -EFAULT;
1049 goto free_counters;
1050 }
1051
1052 for (i = sizeof(struct ipt_entry);
1053 i < e->target_offset;
1054 i += m->u.match_size) {
1055 m = (void *)e + i;
1056
1057 if (copy_to_user(userptr + off + i
1058 + offsetof(struct ipt_entry_match,
1059 u.user.name),
1060 m->u.kernel.match->name,
1061 strlen(m->u.kernel.match->name)+1)
1062 != 0) {
1063 ret = -EFAULT;
1064 goto free_counters;
1065 }
1066 }
1067
1068 t = ipt_get_target(e);
1069 if (copy_to_user(userptr + off + e->target_offset
1070 + offsetof(struct ipt_entry_target,
1071 u.user.name),
1072 t->u.kernel.target->name,
1073 strlen(t->u.kernel.target->name)+1) != 0) {
1074 ret = -EFAULT;
1075 goto free_counters;
1076 }
1077 }
1078
1079 free_counters:
1080 vfree(counters);
1081 return ret;
1082 }
1083
1084 static int
1085 get_entries(const struct ipt_get_entries *entries,
1086 struct ipt_get_entries __user *uptr)
1087 {
1088 int ret;
1089 struct ipt_table *t;
1090
1091 t = find_table_lock(entries->name);
1092 if (t && !IS_ERR(t)) {
1093 duprintf("t->private->number = %u\n",
1094 t->private->number);
1095 if (entries->size == t->private->size)
1096 ret = copy_entries_to_user(t->private->size,
1097 t, uptr->entrytable);
1098 else {
1099 duprintf("get_entries: I've got %u not %u!\n",
1100 t->private->size,
1101 entries->size);
1102 ret = -EINVAL;
1103 }
1104 module_put(t->me);
1105 up(&ipt_mutex);
1106 } else
1107 ret = t ? PTR_ERR(t) : -ENOENT;
1108
1109 return ret;
1110 }
1111
1112 static int
1113 do_replace(void __user *user, unsigned int len)
1114 {
1115 int ret;
1116 struct ipt_replace tmp;
1117 struct ipt_table *t;
1118 struct ipt_table_info *newinfo, *oldinfo;
1119 struct ipt_counters *counters;
1120
1121 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1122 return -EFAULT;
1123
1124 /* Hack: Causes ipchains to give correct error msg --RR */
1125 if (len != sizeof(tmp) + tmp.size)
1126 return -ENOPROTOOPT;
1127
1128 /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
1129 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1130 return -ENOMEM;
1131
1132 newinfo = vmalloc(sizeof(struct ipt_table_info)
1133 + SMP_ALIGN(tmp.size) * num_possible_cpus());
1134 if (!newinfo)
1135 return -ENOMEM;
1136
1137 if (copy_from_user(newinfo->entries, user + sizeof(tmp),
1138 tmp.size) != 0) {
1139 ret = -EFAULT;
1140 goto free_newinfo;
1141 }
1142
1143 counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
1144 if (!counters) {
1145 ret = -ENOMEM;
1146 goto free_newinfo;
1147 }
1148 memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
1149
1150 ret = translate_table(tmp.name, tmp.valid_hooks,
1151 newinfo, tmp.size, tmp.num_entries,
1152 tmp.hook_entry, tmp.underflow);
1153 if (ret != 0)
1154 goto free_newinfo_counters;
1155
1156 duprintf("ip_tables: Translated table\n");
1157
1158 t = try_then_request_module(find_table_lock(tmp.name),
1159 "iptable_%s", tmp.name);
1160 if (!t || IS_ERR(t)) {
1161 ret = t ? PTR_ERR(t) : -ENOENT;
1162 goto free_newinfo_counters_untrans;
1163 }
1164
1165 /* You lied! */
1166 if (tmp.valid_hooks != t->valid_hooks) {
1167 duprintf("Valid hook crap: %08X vs %08X\n",
1168 tmp.valid_hooks, t->valid_hooks);
1169 ret = -EINVAL;
1170 goto put_module;
1171 }
1172
1173 oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1174 if (!oldinfo)
1175 goto put_module;
1176
1177 /* Update module usage count based on number of rules */
1178 duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1179 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1180 if ((oldinfo->number > oldinfo->initial_entries) ||
1181 (newinfo->number <= oldinfo->initial_entries))
1182 module_put(t->me);
1183 if ((oldinfo->number > oldinfo->initial_entries) &&
1184 (newinfo->number <= oldinfo->initial_entries))
1185 module_put(t->me);
1186
1187 /* Get the old counters. */
1188 get_counters(oldinfo, counters);
1189 /* Decrease module usage counts and free resource */
1190 IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
1191 vfree(oldinfo);
1192 if (copy_to_user(tmp.counters, counters,
1193 sizeof(struct ipt_counters) * tmp.num_counters) != 0)
1194 ret = -EFAULT;
1195 vfree(counters);
1196 up(&ipt_mutex);
1197 return ret;
1198
1199 put_module:
1200 module_put(t->me);
1201 up(&ipt_mutex);
1202 free_newinfo_counters_untrans:
1203 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
1204 free_newinfo_counters:
1205 vfree(counters);
1206 free_newinfo:
1207 vfree(newinfo);
1208 return ret;
1209 }
1210
1211 /* We're lazy, and add to the first CPU; overflow works its fey magic
1212 * and everything is OK. */
1213 static inline int
1214 add_counter_to_entry(struct ipt_entry *e,
1215 const struct ipt_counters addme[],
1216 unsigned int *i)
1217 {
1218 #if 0
1219 duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1220 *i,
1221 (long unsigned int)e->counters.pcnt,
1222 (long unsigned int)e->counters.bcnt,
1223 (long unsigned int)addme[*i].pcnt,
1224 (long unsigned int)addme[*i].bcnt);
1225 #endif
1226
1227 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1228
1229 (*i)++;
1230 return 0;
1231 }
1232
1233 static int
1234 do_add_counters(void __user *user, unsigned int len)
1235 {
1236 unsigned int i;
1237 struct ipt_counters_info tmp, *paddc;
1238 struct ipt_table *t;
1239 int ret = 0;
1240
1241 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1242 return -EFAULT;
1243
1244 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
1245 return -EINVAL;
1246
1247 paddc = vmalloc(len);
1248 if (!paddc)
1249 return -ENOMEM;
1250
1251 if (copy_from_user(paddc, user, len) != 0) {
1252 ret = -EFAULT;
1253 goto free;
1254 }
1255
1256 t = find_table_lock(tmp.name);
1257 if (!t || IS_ERR(t)) {
1258 ret = t ? PTR_ERR(t) : -ENOENT;
1259 goto free;
1260 }
1261
1262 write_lock_bh(&t->lock);
1263 if (t->private->number != paddc->num_counters) {
1264 ret = -EINVAL;
1265 goto unlock_up_free;
1266 }
1267
1268 i = 0;
1269 IPT_ENTRY_ITERATE(t->private->entries,
1270 t->private->size,
1271 add_counter_to_entry,
1272 paddc->counters,
1273 &i);
1274 unlock_up_free:
1275 write_unlock_bh(&t->lock);
1276 up(&ipt_mutex);
1277 module_put(t->me);
1278 free:
1279 vfree(paddc);
1280
1281 return ret;
1282 }
1283
1284 static int
1285 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1286 {
1287 int ret;
1288
1289 if (!capable(CAP_NET_ADMIN))
1290 return -EPERM;
1291
1292 switch (cmd) {
1293 case IPT_SO_SET_REPLACE:
1294 ret = do_replace(user, len);
1295 break;
1296
1297 case IPT_SO_SET_ADD_COUNTERS:
1298 ret = do_add_counters(user, len);
1299 break;
1300
1301 default:
1302 duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
1303 ret = -EINVAL;
1304 }
1305
1306 return ret;
1307 }
1308
1309 static int
1310 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1311 {
1312 int ret;
1313
1314 if (!capable(CAP_NET_ADMIN))
1315 return -EPERM;
1316
1317 switch (cmd) {
1318 case IPT_SO_GET_INFO: {
1319 char name[IPT_TABLE_MAXNAMELEN];
1320 struct ipt_table *t;
1321
1322 if (*len != sizeof(struct ipt_getinfo)) {
1323 duprintf("length %u != %u\n", *len,
1324 sizeof(struct ipt_getinfo));
1325 ret = -EINVAL;
1326 break;
1327 }
1328
1329 if (copy_from_user(name, user, sizeof(name)) != 0) {
1330 ret = -EFAULT;
1331 break;
1332 }
1333 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1334
1335 t = try_then_request_module(find_table_lock(name),
1336 "iptable_%s", name);
1337 if (t && !IS_ERR(t)) {
1338 struct ipt_getinfo info;
1339
1340 info.valid_hooks = t->valid_hooks;
1341 memcpy(info.hook_entry, t->private->hook_entry,
1342 sizeof(info.hook_entry));
1343 memcpy(info.underflow, t->private->underflow,
1344 sizeof(info.underflow));
1345 info.num_entries = t->private->number;
1346 info.size = t->private->size;
1347 memcpy(info.name, name, sizeof(info.name));
1348
1349 if (copy_to_user(user, &info, *len) != 0)
1350 ret = -EFAULT;
1351 else
1352 ret = 0;
1353 up(&ipt_mutex);
1354 module_put(t->me);
1355 } else
1356 ret = t ? PTR_ERR(t) : -ENOENT;
1357 }
1358 break;
1359
1360 case IPT_SO_GET_ENTRIES: {
1361 struct ipt_get_entries get;
1362
1363 if (*len < sizeof(get)) {
1364 duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1365 ret = -EINVAL;
1366 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1367 ret = -EFAULT;
1368 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1369 duprintf("get_entries: %u != %u\n", *len,
1370 sizeof(struct ipt_get_entries) + get.size);
1371 ret = -EINVAL;
1372 } else
1373 ret = get_entries(&get, user);
1374 break;
1375 }
1376
1377 case IPT_SO_GET_REVISION_MATCH:
1378 case IPT_SO_GET_REVISION_TARGET: {
1379 struct ipt_get_revision rev;
1380 int (*revfn)(const char *, u8, int *);
1381
1382 if (*len != sizeof(rev)) {
1383 ret = -EINVAL;
1384 break;
1385 }
1386 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
1387 ret = -EFAULT;
1388 break;
1389 }
1390
1391 if (cmd == IPT_SO_GET_REVISION_TARGET)
1392 revfn = target_revfn;
1393 else
1394 revfn = match_revfn;
1395
1396 try_then_request_module(find_revision(rev.name, rev.revision,
1397 revfn, &ret),
1398 "ipt_%s", rev.name);
1399 break;
1400 }
1401
1402 default:
1403 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1404 ret = -EINVAL;
1405 }
1406
1407 return ret;
1408 }
1409
1410 /* Registration hooks for targets. */
1411 int
1412 ipt_register_target(struct ipt_target *target)
1413 {
1414 int ret;
1415
1416 ret = down_interruptible(&ipt_mutex);
1417 if (ret != 0)
1418 return ret;
1419 list_add(&target->list, &ipt_target);
1420 up(&ipt_mutex);
1421 return ret;
1422 }
1423
1424 void
1425 ipt_unregister_target(struct ipt_target *target)
1426 {
1427 down(&ipt_mutex);
1428 LIST_DELETE(&ipt_target, target);
1429 up(&ipt_mutex);
1430 }
1431
1432 int
1433 ipt_register_match(struct ipt_match *match)
1434 {
1435 int ret;
1436
1437 ret = down_interruptible(&ipt_mutex);
1438 if (ret != 0)
1439 return ret;
1440
1441 list_add(&match->list, &ipt_match);
1442 up(&ipt_mutex);
1443
1444 return ret;
1445 }
1446
1447 void
1448 ipt_unregister_match(struct ipt_match *match)
1449 {
1450 down(&ipt_mutex);
1451 LIST_DELETE(&ipt_match, match);
1452 up(&ipt_mutex);
1453 }
1454
1455 int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
1456 {
1457 int ret;
1458 struct ipt_table_info *newinfo;
1459 static struct ipt_table_info bootstrap
1460 = { 0, 0, 0, { 0 }, { 0 }, { } };
1461
1462 newinfo = vmalloc(sizeof(struct ipt_table_info)
1463 + SMP_ALIGN(repl->size) * num_possible_cpus());
1464 if (!newinfo)
1465 return -ENOMEM;
1466
1467 memcpy(newinfo->entries, repl->entries, repl->size);
1468
1469 ret = translate_table(table->name, table->valid_hooks,
1470 newinfo, repl->size,
1471 repl->num_entries,
1472 repl->hook_entry,
1473 repl->underflow);
1474 if (ret != 0) {
1475 vfree(newinfo);
1476 return ret;
1477 }
1478
1479 ret = down_interruptible(&ipt_mutex);
1480 if (ret != 0) {
1481 vfree(newinfo);
1482 return ret;
1483 }
1484
1485 /* Don't autoload: we'd eat our tail... */
1486 if (list_named_find(&ipt_tables, table->name)) {
1487 ret = -EEXIST;
1488 goto free_unlock;
1489 }
1490
1491 /* Simplifies replace_table code. */
1492 table->private = &bootstrap;
1493 if (!replace_table(table, 0, newinfo, &ret))
1494 goto free_unlock;
1495
1496 duprintf("table->private->number = %u\n",
1497 table->private->number);
1498
1499 /* save number of initial entries */
1500 table->private->initial_entries = table->private->number;
1501
1502 rwlock_init(&table->lock);
1503 list_prepend(&ipt_tables, table);
1504
1505 unlock:
1506 up(&ipt_mutex);
1507 return ret;
1508
1509 free_unlock:
1510 vfree(newinfo);
1511 goto unlock;
1512 }
1513
1514 void ipt_unregister_table(struct ipt_table *table)
1515 {
1516 down(&ipt_mutex);
1517 LIST_DELETE(&ipt_tables, table);
1518 up(&ipt_mutex);
1519
1520 /* Decrease module usage counts and free resources */
1521 IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
1522 cleanup_entry, NULL);
1523 vfree(table->private);
1524 }
1525
1526 /* Returns 1 if the port is matched by the range, 0 otherwise */
1527 static inline int
1528 port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1529 {
1530 int ret;
1531
1532 ret = (port >= min && port <= max) ^ invert;
1533 return ret;
1534 }
1535
1536 static int
1537 tcp_find_option(u_int8_t option,
1538 const struct sk_buff *skb,
1539 unsigned int optlen,
1540 int invert,
1541 int *hotdrop)
1542 {
1543 /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
1544 u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
1545 unsigned int i;
1546
1547 duprintf("tcp_match: finding option\n");
1548
1549 if (!optlen)
1550 return invert;
1551
1552 /* If we don't have the whole header, drop packet. */
1553 op = skb_header_pointer(skb,
1554 skb->nh.iph->ihl*4 + sizeof(struct tcphdr),
1555 optlen, _opt);
1556 if (op == NULL) {
1557 *hotdrop = 1;
1558 return 0;
1559 }
1560
1561 for (i = 0; i < optlen; ) {
1562 if (op[i] == option) return !invert;
1563 if (op[i] < 2) i++;
1564 else i += op[i+1]?:1;
1565 }
1566
1567 return invert;
1568 }
1569
1570 static int
1571 tcp_match(const struct sk_buff *skb,
1572 const struct net_device *in,
1573 const struct net_device *out,
1574 const void *matchinfo,
1575 int offset,
1576 int *hotdrop)
1577 {
1578 struct tcphdr _tcph, *th;
1579 const struct ipt_tcp *tcpinfo = matchinfo;
1580
1581 if (offset) {
1582 /* To quote Alan:
1583
1584 Don't allow a fragment of TCP 8 bytes in. Nobody normal
1585 causes this. Its a cracker trying to break in by doing a
1586 flag overwrite to pass the direction checks.
1587 */
1588 if (offset == 1) {
1589 duprintf("Dropping evil TCP offset=1 frag.\n");
1590 *hotdrop = 1;
1591 }
1592 /* Must not be a fragment. */
1593 return 0;
1594 }
1595
1596 #define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1597
1598 th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1599 sizeof(_tcph), &_tcph);
1600 if (th == NULL) {
1601 /* We've been asked to examine this packet, and we
1602 can't. Hence, no choice but to drop. */
1603 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1604 *hotdrop = 1;
1605 return 0;
1606 }
1607
1608 if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1609 ntohs(th->source),
1610 !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)))
1611 return 0;
1612 if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1613 ntohs(th->dest),
1614 !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)))
1615 return 0;
1616 if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
1617 == tcpinfo->flg_cmp,
1618 IPT_TCP_INV_FLAGS))
1619 return 0;
1620 if (tcpinfo->option) {
1621 if (th->doff * 4 < sizeof(_tcph)) {
1622 *hotdrop = 1;
1623 return 0;
1624 }
1625 if (!tcp_find_option(tcpinfo->option, skb,
1626 th->doff*4 - sizeof(_tcph),
1627 tcpinfo->invflags & IPT_TCP_INV_OPTION,
1628 hotdrop))
1629 return 0;
1630 }
1631 return 1;
1632 }
1633
1634 /* Called when user tries to insert an entry of this type. */
1635 static int
1636 tcp_checkentry(const char *tablename,
1637 const struct ipt_ip *ip,
1638 void *matchinfo,
1639 unsigned int matchsize,
1640 unsigned int hook_mask)
1641 {
1642 const struct ipt_tcp *tcpinfo = matchinfo;
1643
1644 /* Must specify proto == TCP, and no unknown invflags */
1645 return ip->proto == IPPROTO_TCP
1646 && !(ip->invflags & IPT_INV_PROTO)
1647 && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
1648 && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
1649 }
1650
1651 static int
1652 udp_match(const struct sk_buff *skb,
1653 const struct net_device *in,
1654 const struct net_device *out,
1655 const void *matchinfo,
1656 int offset,
1657 int *hotdrop)
1658 {
1659 struct udphdr _udph, *uh;
1660 const struct ipt_udp *udpinfo = matchinfo;
1661
1662 /* Must not be a fragment. */
1663 if (offset)
1664 return 0;
1665
1666 uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1667 sizeof(_udph), &_udph);
1668 if (uh == NULL) {
1669 /* We've been asked to examine this packet, and we
1670 can't. Hence, no choice but to drop. */
1671 duprintf("Dropping evil UDP tinygram.\n");
1672 *hotdrop = 1;
1673 return 0;
1674 }
1675
1676 return port_match(udpinfo->spts[0], udpinfo->spts[1],
1677 ntohs(uh->source),
1678 !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
1679 && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1680 ntohs(uh->dest),
1681 !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
1682 }
1683
1684 /* Called when user tries to insert an entry of this type. */
1685 static int
1686 udp_checkentry(const char *tablename,
1687 const struct ipt_ip *ip,
1688 void *matchinfo,
1689 unsigned int matchinfosize,
1690 unsigned int hook_mask)
1691 {
1692 const struct ipt_udp *udpinfo = matchinfo;
1693
1694 /* Must specify proto == UDP, and no unknown invflags */
1695 if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
1696 duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
1697 IPPROTO_UDP);
1698 return 0;
1699 }
1700 if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
1701 duprintf("ipt_udp: matchsize %u != %u\n",
1702 matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
1703 return 0;
1704 }
1705 if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
1706 duprintf("ipt_udp: unknown flags %X\n",
1707 udpinfo->invflags);
1708 return 0;
1709 }
1710
1711 return 1;
1712 }
1713
1714 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1715 static inline int
1716 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1717 u_int8_t type, u_int8_t code,
1718 int invert)
1719 {
1720 return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1721 ^ invert;
1722 }
1723
1724 static int
1725 icmp_match(const struct sk_buff *skb,
1726 const struct net_device *in,
1727 const struct net_device *out,
1728 const void *matchinfo,
1729 int offset,
1730 int *hotdrop)
1731 {
1732 struct icmphdr _icmph, *ic;
1733 const struct ipt_icmp *icmpinfo = matchinfo;
1734
1735 /* Must not be a fragment. */
1736 if (offset)
1737 return 0;
1738
1739 ic = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1740 sizeof(_icmph), &_icmph);
1741 if (ic == NULL) {
1742 /* We've been asked to examine this packet, and we
1743 * can't. Hence, no choice but to drop.
1744 */
1745 duprintf("Dropping evil ICMP tinygram.\n");
1746 *hotdrop = 1;
1747 return 0;
1748 }
1749
1750 return icmp_type_code_match(icmpinfo->type,
1751 icmpinfo->code[0],
1752 icmpinfo->code[1],
1753 ic->type, ic->code,
1754 !!(icmpinfo->invflags&IPT_ICMP_INV));
1755 }
1756
1757 /* Called when user tries to insert an entry of this type. */
1758 static int
1759 icmp_checkentry(const char *tablename,
1760 const struct ipt_ip *ip,
1761 void *matchinfo,
1762 unsigned int matchsize,
1763 unsigned int hook_mask)
1764 {
1765 const struct ipt_icmp *icmpinfo = matchinfo;
1766
1767 /* Must specify proto == ICMP, and no unknown invflags */
1768 return ip->proto == IPPROTO_ICMP
1769 && !(ip->invflags & IPT_INV_PROTO)
1770 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1771 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1772 }
1773
1774 /* The built-in targets: standard (NULL) and error. */
1775 static struct ipt_target ipt_standard_target = {
1776 .name = IPT_STANDARD_TARGET,
1777 };
1778
1779 static struct ipt_target ipt_error_target = {
1780 .name = IPT_ERROR_TARGET,
1781 .target = ipt_error,
1782 };
1783
1784 static struct nf_sockopt_ops ipt_sockopts = {
1785 .pf = PF_INET,
1786 .set_optmin = IPT_BASE_CTL,
1787 .set_optmax = IPT_SO_SET_MAX+1,
1788 .set = do_ipt_set_ctl,
1789 .get_optmin = IPT_BASE_CTL,
1790 .get_optmax = IPT_SO_GET_MAX+1,
1791 .get = do_ipt_get_ctl,
1792 };
1793
1794 static struct ipt_match tcp_matchstruct = {
1795 .name = "tcp",
1796 .match = &tcp_match,
1797 .checkentry = &tcp_checkentry,
1798 };
1799
1800 static struct ipt_match udp_matchstruct = {
1801 .name = "udp",
1802 .match = &udp_match,
1803 .checkentry = &udp_checkentry,
1804 };
1805
1806 static struct ipt_match icmp_matchstruct = {
1807 .name = "icmp",
1808 .match = &icmp_match,
1809 .checkentry = &icmp_checkentry,
1810 };
1811
1812 #ifdef CONFIG_PROC_FS
1813 static inline int print_name(const char *i,
1814 off_t start_offset, char *buffer, int length,
1815 off_t *pos, unsigned int *count)
1816 {
1817 if ((*count)++ >= start_offset) {
1818 unsigned int namelen;
1819
1820 namelen = sprintf(buffer + *pos, "%s\n",
1821 i + sizeof(struct list_head));
1822 if (*pos + namelen > length) {
1823 /* Stop iterating */
1824 return 1;
1825 }
1826 *pos += namelen;
1827 }
1828 return 0;
1829 }
1830
1831 static inline int print_target(const struct ipt_target *t,
1832 off_t start_offset, char *buffer, int length,
1833 off_t *pos, unsigned int *count)
1834 {
1835 if (t == &ipt_standard_target || t == &ipt_error_target)
1836 return 0;
1837 return print_name((char *)t, start_offset, buffer, length, pos, count);
1838 }
1839
1840 static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
1841 {
1842 off_t pos = 0;
1843 unsigned int count = 0;
1844
1845 if (down_interruptible(&ipt_mutex) != 0)
1846 return 0;
1847
1848 LIST_FIND(&ipt_tables, print_name, void *,
1849 offset, buffer, length, &pos, &count);
1850
1851 up(&ipt_mutex);
1852
1853 /* `start' hack - see fs/proc/generic.c line ~105 */
1854 *start=(char *)((unsigned long)count-offset);
1855 return pos;
1856 }
1857
1858 static int ipt_get_targets(char *buffer, char **start, off_t offset, int length)
1859 {
1860 off_t pos = 0;
1861 unsigned int count = 0;
1862
1863 if (down_interruptible(&ipt_mutex) != 0)
1864 return 0;
1865
1866 LIST_FIND(&ipt_target, print_target, struct ipt_target *,
1867 offset, buffer, length, &pos, &count);
1868
1869 up(&ipt_mutex);
1870
1871 *start = (char *)((unsigned long)count - offset);
1872 return pos;
1873 }
1874
1875 static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
1876 {
1877 off_t pos = 0;
1878 unsigned int count = 0;
1879
1880 if (down_interruptible(&ipt_mutex) != 0)
1881 return 0;
1882
1883 LIST_FIND(&ipt_match, print_name, void *,
1884 offset, buffer, length, &pos, &count);
1885
1886 up(&ipt_mutex);
1887
1888 *start = (char *)((unsigned long)count - offset);
1889 return pos;
1890 }
1891
1892 static struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
1893 { { "ip_tables_names", ipt_get_tables },
1894 { "ip_tables_targets", ipt_get_targets },
1895 { "ip_tables_matches", ipt_get_matches },
1896 { NULL, NULL} };
1897 #endif /*CONFIG_PROC_FS*/
1898
1899 static int __init init(void)
1900 {
1901 int ret;
1902
1903 /* Noone else will be downing sem now, so we won't sleep */
1904 down(&ipt_mutex);
1905 list_append(&ipt_target, &ipt_standard_target);
1906 list_append(&ipt_target, &ipt_error_target);
1907 list_append(&ipt_match, &tcp_matchstruct);
1908 list_append(&ipt_match, &udp_matchstruct);
1909 list_append(&ipt_match, &icmp_matchstruct);
1910 up(&ipt_mutex);
1911
1912 /* Register setsockopt */
1913 ret = nf_register_sockopt(&ipt_sockopts);
1914 if (ret < 0) {
1915 duprintf("Unable to register sockopts.\n");
1916 return ret;
1917 }
1918
1919 #ifdef CONFIG_PROC_FS
1920 {
1921 struct proc_dir_entry *proc;
1922 int i;
1923
1924 for (i = 0; ipt_proc_entry[i].name; i++) {
1925 proc = proc_net_create(ipt_proc_entry[i].name, 0,
1926 ipt_proc_entry[i].get_info);
1927 if (!proc) {
1928 while (--i >= 0)
1929 proc_net_remove(ipt_proc_entry[i].name);
1930 nf_unregister_sockopt(&ipt_sockopts);
1931 return -ENOMEM;
1932 }
1933 proc->owner = THIS_MODULE;
1934 }
1935 }
1936 #endif
1937
1938 printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
1939 return 0;
1940 }
1941
1942 static void __exit fini(void)
1943 {
1944 nf_unregister_sockopt(&ipt_sockopts);
1945 #ifdef CONFIG_PROC_FS
1946 {
1947 int i;
1948 for (i = 0; ipt_proc_entry[i].name; i++)
1949 proc_net_remove(ipt_proc_entry[i].name);
1950 }
1951 #endif
1952 }
1953
1954 EXPORT_SYMBOL(ipt_register_table);
1955 EXPORT_SYMBOL(ipt_unregister_table);
1956 EXPORT_SYMBOL(ipt_register_match);
1957 EXPORT_SYMBOL(ipt_unregister_match);
1958 EXPORT_SYMBOL(ipt_do_table);
1959 EXPORT_SYMBOL(ipt_register_target);
1960 EXPORT_SYMBOL(ipt_unregister_target);
1961 EXPORT_SYMBOL(ipt_find_target);
1962
1963 module_init(init);
1964 module_exit(fini);