]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - net/ipv4/netfilter/ip_tables.c
Merge HEAD from ../scsi-misc-2.6-old
[mirror_ubuntu-bionic-kernel.git] / net / ipv4 / netfilter / ip_tables.c
1 /*
2 * Packet matching code.
3 *
4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5 * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12 * - increase module usage count as soon as we have rules inside
13 * a table
14 */
15 #include <linux/config.h>
16 #include <linux/cache.h>
17 #include <linux/skbuff.h>
18 #include <linux/kmod.h>
19 #include <linux/vmalloc.h>
20 #include <linux/netdevice.h>
21 #include <linux/module.h>
22 #include <linux/tcp.h>
23 #include <linux/udp.h>
24 #include <linux/icmp.h>
25 #include <net/ip.h>
26 #include <asm/uaccess.h>
27 #include <asm/semaphore.h>
28 #include <linux/proc_fs.h>
29 #include <linux/err.h>
30 #include <linux/cpumask.h>
31
32 #include <linux/netfilter_ipv4/ip_tables.h>
33
34 MODULE_LICENSE("GPL");
35 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
36 MODULE_DESCRIPTION("IPv4 packet filter");
37
38 /*#define DEBUG_IP_FIREWALL*/
39 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
40 /*#define DEBUG_IP_FIREWALL_USER*/
41
42 #ifdef DEBUG_IP_FIREWALL
43 #define dprintf(format, args...) printk(format , ## args)
44 #else
45 #define dprintf(format, args...)
46 #endif
47
48 #ifdef DEBUG_IP_FIREWALL_USER
49 #define duprintf(format, args...) printk(format , ## args)
50 #else
51 #define duprintf(format, args...)
52 #endif
53
54 #ifdef CONFIG_NETFILTER_DEBUG
55 #define IP_NF_ASSERT(x) \
56 do { \
57 if (!(x)) \
58 printk("IP_NF_ASSERT: %s:%s:%u\n", \
59 __FUNCTION__, __FILE__, __LINE__); \
60 } while(0)
61 #else
62 #define IP_NF_ASSERT(x)
63 #endif
64 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
65
66 static DECLARE_MUTEX(ipt_mutex);
67
68 /* Must have mutex */
69 #define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
70 #define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
71 #include <linux/netfilter_ipv4/listhelp.h>
72
73 #if 0
74 /* All the better to debug you with... */
75 #define static
76 #define inline
77 #endif
78
79 /*
80 We keep a set of rules for each CPU, so we can avoid write-locking
81 them in the softirq when updating the counters and therefore
82 only need to read-lock in the softirq; doing a write_lock_bh() in user
83 context stops packets coming through and allows user context to read
84 the counters or update the rules.
85
86 To be cache friendly on SMP, we arrange them like so:
87 [ n-entries ]
88 ... cache-align padding ...
89 [ n-entries ]
90
91 Hence the start of any table is given by get_table() below. */
92
93 /* The table itself */
94 struct ipt_table_info
95 {
96 /* Size per table */
97 unsigned int size;
98 /* Number of entries: FIXME. --RR */
99 unsigned int number;
100 /* Initial number of entries. Needed for module usage count */
101 unsigned int initial_entries;
102
103 /* Entry points and underflows */
104 unsigned int hook_entry[NF_IP_NUMHOOKS];
105 unsigned int underflow[NF_IP_NUMHOOKS];
106
107 /* ipt_entry tables: one per CPU */
108 char entries[0] ____cacheline_aligned;
109 };
110
111 static LIST_HEAD(ipt_target);
112 static LIST_HEAD(ipt_match);
113 static LIST_HEAD(ipt_tables);
114 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
115
116 #ifdef CONFIG_SMP
117 #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
118 #else
119 #define TABLE_OFFSET(t,p) 0
120 #endif
121
122 #if 0
123 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
124 #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
125 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
126 #endif
127
128 /* Returns whether matches rule or not. */
129 static inline int
130 ip_packet_match(const struct iphdr *ip,
131 const char *indev,
132 const char *outdev,
133 const struct ipt_ip *ipinfo,
134 int isfrag)
135 {
136 size_t i;
137 unsigned long ret;
138
139 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
140
141 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
142 IPT_INV_SRCIP)
143 || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
144 IPT_INV_DSTIP)) {
145 dprintf("Source or dest mismatch.\n");
146
147 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
148 NIPQUAD(ip->saddr),
149 NIPQUAD(ipinfo->smsk.s_addr),
150 NIPQUAD(ipinfo->src.s_addr),
151 ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
152 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
153 NIPQUAD(ip->daddr),
154 NIPQUAD(ipinfo->dmsk.s_addr),
155 NIPQUAD(ipinfo->dst.s_addr),
156 ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
157 return 0;
158 }
159
160 /* Look for ifname matches; this should unroll nicely. */
161 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
162 ret |= (((const unsigned long *)indev)[i]
163 ^ ((const unsigned long *)ipinfo->iniface)[i])
164 & ((const unsigned long *)ipinfo->iniface_mask)[i];
165 }
166
167 if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
168 dprintf("VIA in mismatch (%s vs %s).%s\n",
169 indev, ipinfo->iniface,
170 ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
171 return 0;
172 }
173
174 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
175 ret |= (((const unsigned long *)outdev)[i]
176 ^ ((const unsigned long *)ipinfo->outiface)[i])
177 & ((const unsigned long *)ipinfo->outiface_mask)[i];
178 }
179
180 if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
181 dprintf("VIA out mismatch (%s vs %s).%s\n",
182 outdev, ipinfo->outiface,
183 ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
184 return 0;
185 }
186
187 /* Check specific protocol */
188 if (ipinfo->proto
189 && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
190 dprintf("Packet protocol %hi does not match %hi.%s\n",
191 ip->protocol, ipinfo->proto,
192 ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
193 return 0;
194 }
195
196 /* If we have a fragment rule but the packet is not a fragment
197 * then we return zero */
198 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
199 dprintf("Fragment rule but not fragment.%s\n",
200 ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
201 return 0;
202 }
203
204 return 1;
205 }
206
207 static inline int
208 ip_checkentry(const struct ipt_ip *ip)
209 {
210 if (ip->flags & ~IPT_F_MASK) {
211 duprintf("Unknown flag bits set: %08X\n",
212 ip->flags & ~IPT_F_MASK);
213 return 0;
214 }
215 if (ip->invflags & ~IPT_INV_MASK) {
216 duprintf("Unknown invflag bits set: %08X\n",
217 ip->invflags & ~IPT_INV_MASK);
218 return 0;
219 }
220 return 1;
221 }
222
223 static unsigned int
224 ipt_error(struct sk_buff **pskb,
225 const struct net_device *in,
226 const struct net_device *out,
227 unsigned int hooknum,
228 const void *targinfo,
229 void *userinfo)
230 {
231 if (net_ratelimit())
232 printk("ip_tables: error: `%s'\n", (char *)targinfo);
233
234 return NF_DROP;
235 }
236
237 static inline
238 int do_match(struct ipt_entry_match *m,
239 const struct sk_buff *skb,
240 const struct net_device *in,
241 const struct net_device *out,
242 int offset,
243 int *hotdrop)
244 {
245 /* Stop iteration if it doesn't match */
246 if (!m->u.kernel.match->match(skb, in, out, m->data, offset, hotdrop))
247 return 1;
248 else
249 return 0;
250 }
251
252 static inline struct ipt_entry *
253 get_entry(void *base, unsigned int offset)
254 {
255 return (struct ipt_entry *)(base + offset);
256 }
257
258 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
259 unsigned int
260 ipt_do_table(struct sk_buff **pskb,
261 unsigned int hook,
262 const struct net_device *in,
263 const struct net_device *out,
264 struct ipt_table *table,
265 void *userdata)
266 {
267 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
268 u_int16_t offset;
269 struct iphdr *ip;
270 u_int16_t datalen;
271 int hotdrop = 0;
272 /* Initializing verdict to NF_DROP keeps gcc happy. */
273 unsigned int verdict = NF_DROP;
274 const char *indev, *outdev;
275 void *table_base;
276 struct ipt_entry *e, *back;
277
278 /* Initialization */
279 ip = (*pskb)->nh.iph;
280 datalen = (*pskb)->len - ip->ihl * 4;
281 indev = in ? in->name : nulldevname;
282 outdev = out ? out->name : nulldevname;
283 /* We handle fragments by dealing with the first fragment as
284 * if it was a normal packet. All other fragments are treated
285 * normally, except that they will NEVER match rules that ask
286 * things we don't know, ie. tcp syn flag or ports). If the
287 * rule is also a fragment-specific rule, non-fragments won't
288 * match it. */
289 offset = ntohs(ip->frag_off) & IP_OFFSET;
290
291 read_lock_bh(&table->lock);
292 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
293 table_base = (void *)table->private->entries
294 + TABLE_OFFSET(table->private, smp_processor_id());
295 e = get_entry(table_base, table->private->hook_entry[hook]);
296
297 #ifdef CONFIG_NETFILTER_DEBUG
298 /* Check noone else using our table */
299 if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
300 && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
301 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
302 smp_processor_id(),
303 table->name,
304 &((struct ipt_entry *)table_base)->comefrom,
305 ((struct ipt_entry *)table_base)->comefrom);
306 }
307 ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
308 #endif
309
310 /* For return from builtin chain */
311 back = get_entry(table_base, table->private->underflow[hook]);
312
313 do {
314 IP_NF_ASSERT(e);
315 IP_NF_ASSERT(back);
316 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
317 struct ipt_entry_target *t;
318
319 if (IPT_MATCH_ITERATE(e, do_match,
320 *pskb, in, out,
321 offset, &hotdrop) != 0)
322 goto no_match;
323
324 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
325
326 t = ipt_get_target(e);
327 IP_NF_ASSERT(t->u.kernel.target);
328 /* Standard target? */
329 if (!t->u.kernel.target->target) {
330 int v;
331
332 v = ((struct ipt_standard_target *)t)->verdict;
333 if (v < 0) {
334 /* Pop from stack? */
335 if (v != IPT_RETURN) {
336 verdict = (unsigned)(-v) - 1;
337 break;
338 }
339 e = back;
340 back = get_entry(table_base,
341 back->comefrom);
342 continue;
343 }
344 if (table_base + v != (void *)e + e->next_offset
345 && !(e->ip.flags & IPT_F_GOTO)) {
346 /* Save old back ptr in next entry */
347 struct ipt_entry *next
348 = (void *)e + e->next_offset;
349 next->comefrom
350 = (void *)back - table_base;
351 /* set back pointer to next entry */
352 back = next;
353 }
354
355 e = get_entry(table_base, v);
356 } else {
357 /* Targets which reenter must return
358 abs. verdicts */
359 #ifdef CONFIG_NETFILTER_DEBUG
360 ((struct ipt_entry *)table_base)->comefrom
361 = 0xeeeeeeec;
362 #endif
363 verdict = t->u.kernel.target->target(pskb,
364 in, out,
365 hook,
366 t->data,
367 userdata);
368
369 #ifdef CONFIG_NETFILTER_DEBUG
370 if (((struct ipt_entry *)table_base)->comefrom
371 != 0xeeeeeeec
372 && verdict == IPT_CONTINUE) {
373 printk("Target %s reentered!\n",
374 t->u.kernel.target->name);
375 verdict = NF_DROP;
376 }
377 ((struct ipt_entry *)table_base)->comefrom
378 = 0x57acc001;
379 #endif
380 /* Target might have changed stuff. */
381 ip = (*pskb)->nh.iph;
382 datalen = (*pskb)->len - ip->ihl * 4;
383
384 if (verdict == IPT_CONTINUE)
385 e = (void *)e + e->next_offset;
386 else
387 /* Verdict */
388 break;
389 }
390 } else {
391
392 no_match:
393 e = (void *)e + e->next_offset;
394 }
395 } while (!hotdrop);
396
397 #ifdef CONFIG_NETFILTER_DEBUG
398 ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
399 #endif
400 read_unlock_bh(&table->lock);
401
402 #ifdef DEBUG_ALLOW_ALL
403 return NF_ACCEPT;
404 #else
405 if (hotdrop)
406 return NF_DROP;
407 else return verdict;
408 #endif
409 }
410
411 /*
412 * These are weird, but module loading must not be done with mutex
413 * held (since they will register), and we have to have a single
414 * function to use try_then_request_module().
415 */
416
417 /* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */
418 static inline struct ipt_table *find_table_lock(const char *name)
419 {
420 struct ipt_table *t;
421
422 if (down_interruptible(&ipt_mutex) != 0)
423 return ERR_PTR(-EINTR);
424
425 list_for_each_entry(t, &ipt_tables, list)
426 if (strcmp(t->name, name) == 0 && try_module_get(t->me))
427 return t;
428 up(&ipt_mutex);
429 return NULL;
430 }
431
432 /* Find match, grabs ref. Returns ERR_PTR() on error. */
433 static inline struct ipt_match *find_match(const char *name, u8 revision)
434 {
435 struct ipt_match *m;
436 int err = 0;
437
438 if (down_interruptible(&ipt_mutex) != 0)
439 return ERR_PTR(-EINTR);
440
441 list_for_each_entry(m, &ipt_match, list) {
442 if (strcmp(m->name, name) == 0) {
443 if (m->revision == revision) {
444 if (try_module_get(m->me)) {
445 up(&ipt_mutex);
446 return m;
447 }
448 } else
449 err = -EPROTOTYPE; /* Found something. */
450 }
451 }
452 up(&ipt_mutex);
453 return ERR_PTR(err);
454 }
455
456 /* Find target, grabs ref. Returns ERR_PTR() on error. */
457 static inline struct ipt_target *find_target(const char *name, u8 revision)
458 {
459 struct ipt_target *t;
460 int err = 0;
461
462 if (down_interruptible(&ipt_mutex) != 0)
463 return ERR_PTR(-EINTR);
464
465 list_for_each_entry(t, &ipt_target, list) {
466 if (strcmp(t->name, name) == 0) {
467 if (t->revision == revision) {
468 if (try_module_get(t->me)) {
469 up(&ipt_mutex);
470 return t;
471 }
472 } else
473 err = -EPROTOTYPE; /* Found something. */
474 }
475 }
476 up(&ipt_mutex);
477 return ERR_PTR(err);
478 }
479
480 struct ipt_target *ipt_find_target(const char *name, u8 revision)
481 {
482 struct ipt_target *target;
483
484 target = try_then_request_module(find_target(name, revision),
485 "ipt_%s", name);
486 if (IS_ERR(target) || !target)
487 return NULL;
488 return target;
489 }
490
491 static int match_revfn(const char *name, u8 revision, int *bestp)
492 {
493 struct ipt_match *m;
494 int have_rev = 0;
495
496 list_for_each_entry(m, &ipt_match, list) {
497 if (strcmp(m->name, name) == 0) {
498 if (m->revision > *bestp)
499 *bestp = m->revision;
500 if (m->revision == revision)
501 have_rev = 1;
502 }
503 }
504 return have_rev;
505 }
506
507 static int target_revfn(const char *name, u8 revision, int *bestp)
508 {
509 struct ipt_target *t;
510 int have_rev = 0;
511
512 list_for_each_entry(t, &ipt_target, list) {
513 if (strcmp(t->name, name) == 0) {
514 if (t->revision > *bestp)
515 *bestp = t->revision;
516 if (t->revision == revision)
517 have_rev = 1;
518 }
519 }
520 return have_rev;
521 }
522
523 /* Returns true or false (if no such extension at all) */
524 static inline int find_revision(const char *name, u8 revision,
525 int (*revfn)(const char *, u8, int *),
526 int *err)
527 {
528 int have_rev, best = -1;
529
530 if (down_interruptible(&ipt_mutex) != 0) {
531 *err = -EINTR;
532 return 1;
533 }
534 have_rev = revfn(name, revision, &best);
535 up(&ipt_mutex);
536
537 /* Nothing at all? Return 0 to try loading module. */
538 if (best == -1) {
539 *err = -ENOENT;
540 return 0;
541 }
542
543 *err = best;
544 if (!have_rev)
545 *err = -EPROTONOSUPPORT;
546 return 1;
547 }
548
549
550 /* All zeroes == unconditional rule. */
551 static inline int
552 unconditional(const struct ipt_ip *ip)
553 {
554 unsigned int i;
555
556 for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
557 if (((__u32 *)ip)[i])
558 return 0;
559
560 return 1;
561 }
562
563 /* Figures out from what hook each rule can be called: returns 0 if
564 there are loops. Puts hook bitmask in comefrom. */
565 static int
566 mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
567 {
568 unsigned int hook;
569
570 /* No recursion; use packet counter to save back ptrs (reset
571 to 0 as we leave), and comefrom to save source hook bitmask */
572 for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
573 unsigned int pos = newinfo->hook_entry[hook];
574 struct ipt_entry *e
575 = (struct ipt_entry *)(newinfo->entries + pos);
576
577 if (!(valid_hooks & (1 << hook)))
578 continue;
579
580 /* Set initial back pointer. */
581 e->counters.pcnt = pos;
582
583 for (;;) {
584 struct ipt_standard_target *t
585 = (void *)ipt_get_target(e);
586
587 if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
588 printk("iptables: loop hook %u pos %u %08X.\n",
589 hook, pos, e->comefrom);
590 return 0;
591 }
592 e->comefrom
593 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
594
595 /* Unconditional return/END. */
596 if (e->target_offset == sizeof(struct ipt_entry)
597 && (strcmp(t->target.u.user.name,
598 IPT_STANDARD_TARGET) == 0)
599 && t->verdict < 0
600 && unconditional(&e->ip)) {
601 unsigned int oldpos, size;
602
603 /* Return: backtrack through the last
604 big jump. */
605 do {
606 e->comefrom ^= (1<<NF_IP_NUMHOOKS);
607 #ifdef DEBUG_IP_FIREWALL_USER
608 if (e->comefrom
609 & (1 << NF_IP_NUMHOOKS)) {
610 duprintf("Back unset "
611 "on hook %u "
612 "rule %u\n",
613 hook, pos);
614 }
615 #endif
616 oldpos = pos;
617 pos = e->counters.pcnt;
618 e->counters.pcnt = 0;
619
620 /* We're at the start. */
621 if (pos == oldpos)
622 goto next;
623
624 e = (struct ipt_entry *)
625 (newinfo->entries + pos);
626 } while (oldpos == pos + e->next_offset);
627
628 /* Move along one */
629 size = e->next_offset;
630 e = (struct ipt_entry *)
631 (newinfo->entries + pos + size);
632 e->counters.pcnt = pos;
633 pos += size;
634 } else {
635 int newpos = t->verdict;
636
637 if (strcmp(t->target.u.user.name,
638 IPT_STANDARD_TARGET) == 0
639 && newpos >= 0) {
640 /* This a jump; chase it. */
641 duprintf("Jump rule %u -> %u\n",
642 pos, newpos);
643 } else {
644 /* ... this is a fallthru */
645 newpos = pos + e->next_offset;
646 }
647 e = (struct ipt_entry *)
648 (newinfo->entries + newpos);
649 e->counters.pcnt = pos;
650 pos = newpos;
651 }
652 }
653 next:
654 duprintf("Finished chain %u\n", hook);
655 }
656 return 1;
657 }
658
659 static inline int
660 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
661 {
662 if (i && (*i)-- == 0)
663 return 1;
664
665 if (m->u.kernel.match->destroy)
666 m->u.kernel.match->destroy(m->data,
667 m->u.match_size - sizeof(*m));
668 module_put(m->u.kernel.match->me);
669 return 0;
670 }
671
672 static inline int
673 standard_check(const struct ipt_entry_target *t,
674 unsigned int max_offset)
675 {
676 struct ipt_standard_target *targ = (void *)t;
677
678 /* Check standard info. */
679 if (t->u.target_size
680 != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
681 duprintf("standard_check: target size %u != %u\n",
682 t->u.target_size,
683 IPT_ALIGN(sizeof(struct ipt_standard_target)));
684 return 0;
685 }
686
687 if (targ->verdict >= 0
688 && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
689 duprintf("ipt_standard_check: bad verdict (%i)\n",
690 targ->verdict);
691 return 0;
692 }
693
694 if (targ->verdict < -NF_MAX_VERDICT - 1) {
695 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
696 targ->verdict);
697 return 0;
698 }
699 return 1;
700 }
701
702 static inline int
703 check_match(struct ipt_entry_match *m,
704 const char *name,
705 const struct ipt_ip *ip,
706 unsigned int hookmask,
707 unsigned int *i)
708 {
709 struct ipt_match *match;
710
711 match = try_then_request_module(find_match(m->u.user.name,
712 m->u.user.revision),
713 "ipt_%s", m->u.user.name);
714 if (IS_ERR(match) || !match) {
715 duprintf("check_match: `%s' not found\n", m->u.user.name);
716 return match ? PTR_ERR(match) : -ENOENT;
717 }
718 m->u.kernel.match = match;
719
720 if (m->u.kernel.match->checkentry
721 && !m->u.kernel.match->checkentry(name, ip, m->data,
722 m->u.match_size - sizeof(*m),
723 hookmask)) {
724 module_put(m->u.kernel.match->me);
725 duprintf("ip_tables: check failed for `%s'.\n",
726 m->u.kernel.match->name);
727 return -EINVAL;
728 }
729
730 (*i)++;
731 return 0;
732 }
733
734 static struct ipt_target ipt_standard_target;
735
736 static inline int
737 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
738 unsigned int *i)
739 {
740 struct ipt_entry_target *t;
741 struct ipt_target *target;
742 int ret;
743 unsigned int j;
744
745 if (!ip_checkentry(&e->ip)) {
746 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
747 return -EINVAL;
748 }
749
750 j = 0;
751 ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
752 if (ret != 0)
753 goto cleanup_matches;
754
755 t = ipt_get_target(e);
756 target = try_then_request_module(find_target(t->u.user.name,
757 t->u.user.revision),
758 "ipt_%s", t->u.user.name);
759 if (IS_ERR(target) || !target) {
760 duprintf("check_entry: `%s' not found\n", t->u.user.name);
761 ret = target ? PTR_ERR(target) : -ENOENT;
762 goto cleanup_matches;
763 }
764 t->u.kernel.target = target;
765
766 if (t->u.kernel.target == &ipt_standard_target) {
767 if (!standard_check(t, size)) {
768 ret = -EINVAL;
769 goto cleanup_matches;
770 }
771 } else if (t->u.kernel.target->checkentry
772 && !t->u.kernel.target->checkentry(name, e, t->data,
773 t->u.target_size
774 - sizeof(*t),
775 e->comefrom)) {
776 module_put(t->u.kernel.target->me);
777 duprintf("ip_tables: check failed for `%s'.\n",
778 t->u.kernel.target->name);
779 ret = -EINVAL;
780 goto cleanup_matches;
781 }
782
783 (*i)++;
784 return 0;
785
786 cleanup_matches:
787 IPT_MATCH_ITERATE(e, cleanup_match, &j);
788 return ret;
789 }
790
791 static inline int
792 check_entry_size_and_hooks(struct ipt_entry *e,
793 struct ipt_table_info *newinfo,
794 unsigned char *base,
795 unsigned char *limit,
796 const unsigned int *hook_entries,
797 const unsigned int *underflows,
798 unsigned int *i)
799 {
800 unsigned int h;
801
802 if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
803 || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
804 duprintf("Bad offset %p\n", e);
805 return -EINVAL;
806 }
807
808 if (e->next_offset
809 < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
810 duprintf("checking: element %p size %u\n",
811 e, e->next_offset);
812 return -EINVAL;
813 }
814
815 /* Check hooks & underflows */
816 for (h = 0; h < NF_IP_NUMHOOKS; h++) {
817 if ((unsigned char *)e - base == hook_entries[h])
818 newinfo->hook_entry[h] = hook_entries[h];
819 if ((unsigned char *)e - base == underflows[h])
820 newinfo->underflow[h] = underflows[h];
821 }
822
823 /* FIXME: underflows must be unconditional, standard verdicts
824 < 0 (not IPT_RETURN). --RR */
825
826 /* Clear counters and comefrom */
827 e->counters = ((struct ipt_counters) { 0, 0 });
828 e->comefrom = 0;
829
830 (*i)++;
831 return 0;
832 }
833
834 static inline int
835 cleanup_entry(struct ipt_entry *e, unsigned int *i)
836 {
837 struct ipt_entry_target *t;
838
839 if (i && (*i)-- == 0)
840 return 1;
841
842 /* Cleanup all matches */
843 IPT_MATCH_ITERATE(e, cleanup_match, NULL);
844 t = ipt_get_target(e);
845 if (t->u.kernel.target->destroy)
846 t->u.kernel.target->destroy(t->data,
847 t->u.target_size - sizeof(*t));
848 module_put(t->u.kernel.target->me);
849 return 0;
850 }
851
852 /* Checks and translates the user-supplied table segment (held in
853 newinfo) */
854 static int
855 translate_table(const char *name,
856 unsigned int valid_hooks,
857 struct ipt_table_info *newinfo,
858 unsigned int size,
859 unsigned int number,
860 const unsigned int *hook_entries,
861 const unsigned int *underflows)
862 {
863 unsigned int i;
864 int ret;
865
866 newinfo->size = size;
867 newinfo->number = number;
868
869 /* Init all hooks to impossible value. */
870 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
871 newinfo->hook_entry[i] = 0xFFFFFFFF;
872 newinfo->underflow[i] = 0xFFFFFFFF;
873 }
874
875 duprintf("translate_table: size %u\n", newinfo->size);
876 i = 0;
877 /* Walk through entries, checking offsets. */
878 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
879 check_entry_size_and_hooks,
880 newinfo,
881 newinfo->entries,
882 newinfo->entries + size,
883 hook_entries, underflows, &i);
884 if (ret != 0)
885 return ret;
886
887 if (i != number) {
888 duprintf("translate_table: %u not %u entries\n",
889 i, number);
890 return -EINVAL;
891 }
892
893 /* Check hooks all assigned */
894 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
895 /* Only hooks which are valid */
896 if (!(valid_hooks & (1 << i)))
897 continue;
898 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
899 duprintf("Invalid hook entry %u %u\n",
900 i, hook_entries[i]);
901 return -EINVAL;
902 }
903 if (newinfo->underflow[i] == 0xFFFFFFFF) {
904 duprintf("Invalid underflow %u %u\n",
905 i, underflows[i]);
906 return -EINVAL;
907 }
908 }
909
910 if (!mark_source_chains(newinfo, valid_hooks))
911 return -ELOOP;
912
913 /* Finally, each sanity check must pass */
914 i = 0;
915 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
916 check_entry, name, size, &i);
917
918 if (ret != 0) {
919 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
920 cleanup_entry, &i);
921 return ret;
922 }
923
924 /* And one copy for every other CPU */
925 for_each_cpu(i) {
926 if (i == 0)
927 continue;
928 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size) * i,
929 newinfo->entries,
930 SMP_ALIGN(newinfo->size));
931 }
932
933 return ret;
934 }
935
936 static struct ipt_table_info *
937 replace_table(struct ipt_table *table,
938 unsigned int num_counters,
939 struct ipt_table_info *newinfo,
940 int *error)
941 {
942 struct ipt_table_info *oldinfo;
943
944 #ifdef CONFIG_NETFILTER_DEBUG
945 {
946 struct ipt_entry *table_base;
947 unsigned int i;
948
949 for_each_cpu(i) {
950 table_base =
951 (void *)newinfo->entries
952 + TABLE_OFFSET(newinfo, i);
953
954 table_base->comefrom = 0xdead57ac;
955 }
956 }
957 #endif
958
959 /* Do the substitution. */
960 write_lock_bh(&table->lock);
961 /* Check inside lock: is the old number correct? */
962 if (num_counters != table->private->number) {
963 duprintf("num_counters != table->private->number (%u/%u)\n",
964 num_counters, table->private->number);
965 write_unlock_bh(&table->lock);
966 *error = -EAGAIN;
967 return NULL;
968 }
969 oldinfo = table->private;
970 table->private = newinfo;
971 newinfo->initial_entries = oldinfo->initial_entries;
972 write_unlock_bh(&table->lock);
973
974 return oldinfo;
975 }
976
977 /* Gets counters. */
978 static inline int
979 add_entry_to_counter(const struct ipt_entry *e,
980 struct ipt_counters total[],
981 unsigned int *i)
982 {
983 ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
984
985 (*i)++;
986 return 0;
987 }
988
989 static void
990 get_counters(const struct ipt_table_info *t,
991 struct ipt_counters counters[])
992 {
993 unsigned int cpu;
994 unsigned int i;
995
996 for_each_cpu(cpu) {
997 i = 0;
998 IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
999 t->size,
1000 add_entry_to_counter,
1001 counters,
1002 &i);
1003 }
1004 }
1005
1006 static int
1007 copy_entries_to_user(unsigned int total_size,
1008 struct ipt_table *table,
1009 void __user *userptr)
1010 {
1011 unsigned int off, num, countersize;
1012 struct ipt_entry *e;
1013 struct ipt_counters *counters;
1014 int ret = 0;
1015
1016 /* We need atomic snapshot of counters: rest doesn't change
1017 (other than comefrom, which userspace doesn't care
1018 about). */
1019 countersize = sizeof(struct ipt_counters) * table->private->number;
1020 counters = vmalloc(countersize);
1021
1022 if (counters == NULL)
1023 return -ENOMEM;
1024
1025 /* First, sum counters... */
1026 memset(counters, 0, countersize);
1027 write_lock_bh(&table->lock);
1028 get_counters(table->private, counters);
1029 write_unlock_bh(&table->lock);
1030
1031 /* ... then copy entire thing from CPU 0... */
1032 if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
1033 ret = -EFAULT;
1034 goto free_counters;
1035 }
1036
1037 /* FIXME: use iterator macros --RR */
1038 /* ... then go back and fix counters and names */
1039 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
1040 unsigned int i;
1041 struct ipt_entry_match *m;
1042 struct ipt_entry_target *t;
1043
1044 e = (struct ipt_entry *)(table->private->entries + off);
1045 if (copy_to_user(userptr + off
1046 + offsetof(struct ipt_entry, counters),
1047 &counters[num],
1048 sizeof(counters[num])) != 0) {
1049 ret = -EFAULT;
1050 goto free_counters;
1051 }
1052
1053 for (i = sizeof(struct ipt_entry);
1054 i < e->target_offset;
1055 i += m->u.match_size) {
1056 m = (void *)e + i;
1057
1058 if (copy_to_user(userptr + off + i
1059 + offsetof(struct ipt_entry_match,
1060 u.user.name),
1061 m->u.kernel.match->name,
1062 strlen(m->u.kernel.match->name)+1)
1063 != 0) {
1064 ret = -EFAULT;
1065 goto free_counters;
1066 }
1067 }
1068
1069 t = ipt_get_target(e);
1070 if (copy_to_user(userptr + off + e->target_offset
1071 + offsetof(struct ipt_entry_target,
1072 u.user.name),
1073 t->u.kernel.target->name,
1074 strlen(t->u.kernel.target->name)+1) != 0) {
1075 ret = -EFAULT;
1076 goto free_counters;
1077 }
1078 }
1079
1080 free_counters:
1081 vfree(counters);
1082 return ret;
1083 }
1084
1085 static int
1086 get_entries(const struct ipt_get_entries *entries,
1087 struct ipt_get_entries __user *uptr)
1088 {
1089 int ret;
1090 struct ipt_table *t;
1091
1092 t = find_table_lock(entries->name);
1093 if (t && !IS_ERR(t)) {
1094 duprintf("t->private->number = %u\n",
1095 t->private->number);
1096 if (entries->size == t->private->size)
1097 ret = copy_entries_to_user(t->private->size,
1098 t, uptr->entrytable);
1099 else {
1100 duprintf("get_entries: I've got %u not %u!\n",
1101 t->private->size,
1102 entries->size);
1103 ret = -EINVAL;
1104 }
1105 module_put(t->me);
1106 up(&ipt_mutex);
1107 } else
1108 ret = t ? PTR_ERR(t) : -ENOENT;
1109
1110 return ret;
1111 }
1112
1113 static int
1114 do_replace(void __user *user, unsigned int len)
1115 {
1116 int ret;
1117 struct ipt_replace tmp;
1118 struct ipt_table *t;
1119 struct ipt_table_info *newinfo, *oldinfo;
1120 struct ipt_counters *counters;
1121
1122 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1123 return -EFAULT;
1124
1125 /* Hack: Causes ipchains to give correct error msg --RR */
1126 if (len != sizeof(tmp) + tmp.size)
1127 return -ENOPROTOOPT;
1128
1129 /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
1130 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1131 return -ENOMEM;
1132
1133 newinfo = vmalloc(sizeof(struct ipt_table_info)
1134 + SMP_ALIGN(tmp.size) *
1135 (highest_possible_processor_id()+1));
1136 if (!newinfo)
1137 return -ENOMEM;
1138
1139 if (copy_from_user(newinfo->entries, user + sizeof(tmp),
1140 tmp.size) != 0) {
1141 ret = -EFAULT;
1142 goto free_newinfo;
1143 }
1144
1145 counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
1146 if (!counters) {
1147 ret = -ENOMEM;
1148 goto free_newinfo;
1149 }
1150 memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
1151
1152 ret = translate_table(tmp.name, tmp.valid_hooks,
1153 newinfo, tmp.size, tmp.num_entries,
1154 tmp.hook_entry, tmp.underflow);
1155 if (ret != 0)
1156 goto free_newinfo_counters;
1157
1158 duprintf("ip_tables: Translated table\n");
1159
1160 t = try_then_request_module(find_table_lock(tmp.name),
1161 "iptable_%s", tmp.name);
1162 if (!t || IS_ERR(t)) {
1163 ret = t ? PTR_ERR(t) : -ENOENT;
1164 goto free_newinfo_counters_untrans;
1165 }
1166
1167 /* You lied! */
1168 if (tmp.valid_hooks != t->valid_hooks) {
1169 duprintf("Valid hook crap: %08X vs %08X\n",
1170 tmp.valid_hooks, t->valid_hooks);
1171 ret = -EINVAL;
1172 goto put_module;
1173 }
1174
1175 oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1176 if (!oldinfo)
1177 goto put_module;
1178
1179 /* Update module usage count based on number of rules */
1180 duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1181 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1182 if ((oldinfo->number > oldinfo->initial_entries) ||
1183 (newinfo->number <= oldinfo->initial_entries))
1184 module_put(t->me);
1185 if ((oldinfo->number > oldinfo->initial_entries) &&
1186 (newinfo->number <= oldinfo->initial_entries))
1187 module_put(t->me);
1188
1189 /* Get the old counters. */
1190 get_counters(oldinfo, counters);
1191 /* Decrease module usage counts and free resource */
1192 IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
1193 vfree(oldinfo);
1194 if (copy_to_user(tmp.counters, counters,
1195 sizeof(struct ipt_counters) * tmp.num_counters) != 0)
1196 ret = -EFAULT;
1197 vfree(counters);
1198 up(&ipt_mutex);
1199 return ret;
1200
1201 put_module:
1202 module_put(t->me);
1203 up(&ipt_mutex);
1204 free_newinfo_counters_untrans:
1205 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
1206 free_newinfo_counters:
1207 vfree(counters);
1208 free_newinfo:
1209 vfree(newinfo);
1210 return ret;
1211 }
1212
1213 /* We're lazy, and add to the first CPU; overflow works its fey magic
1214 * and everything is OK. */
1215 static inline int
1216 add_counter_to_entry(struct ipt_entry *e,
1217 const struct ipt_counters addme[],
1218 unsigned int *i)
1219 {
1220 #if 0
1221 duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1222 *i,
1223 (long unsigned int)e->counters.pcnt,
1224 (long unsigned int)e->counters.bcnt,
1225 (long unsigned int)addme[*i].pcnt,
1226 (long unsigned int)addme[*i].bcnt);
1227 #endif
1228
1229 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1230
1231 (*i)++;
1232 return 0;
1233 }
1234
1235 static int
1236 do_add_counters(void __user *user, unsigned int len)
1237 {
1238 unsigned int i;
1239 struct ipt_counters_info tmp, *paddc;
1240 struct ipt_table *t;
1241 int ret = 0;
1242
1243 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1244 return -EFAULT;
1245
1246 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
1247 return -EINVAL;
1248
1249 paddc = vmalloc(len);
1250 if (!paddc)
1251 return -ENOMEM;
1252
1253 if (copy_from_user(paddc, user, len) != 0) {
1254 ret = -EFAULT;
1255 goto free;
1256 }
1257
1258 t = find_table_lock(tmp.name);
1259 if (!t || IS_ERR(t)) {
1260 ret = t ? PTR_ERR(t) : -ENOENT;
1261 goto free;
1262 }
1263
1264 write_lock_bh(&t->lock);
1265 if (t->private->number != paddc->num_counters) {
1266 ret = -EINVAL;
1267 goto unlock_up_free;
1268 }
1269
1270 i = 0;
1271 IPT_ENTRY_ITERATE(t->private->entries,
1272 t->private->size,
1273 add_counter_to_entry,
1274 paddc->counters,
1275 &i);
1276 unlock_up_free:
1277 write_unlock_bh(&t->lock);
1278 up(&ipt_mutex);
1279 module_put(t->me);
1280 free:
1281 vfree(paddc);
1282
1283 return ret;
1284 }
1285
1286 static int
1287 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1288 {
1289 int ret;
1290
1291 if (!capable(CAP_NET_ADMIN))
1292 return -EPERM;
1293
1294 switch (cmd) {
1295 case IPT_SO_SET_REPLACE:
1296 ret = do_replace(user, len);
1297 break;
1298
1299 case IPT_SO_SET_ADD_COUNTERS:
1300 ret = do_add_counters(user, len);
1301 break;
1302
1303 default:
1304 duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
1305 ret = -EINVAL;
1306 }
1307
1308 return ret;
1309 }
1310
1311 static int
1312 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1313 {
1314 int ret;
1315
1316 if (!capable(CAP_NET_ADMIN))
1317 return -EPERM;
1318
1319 switch (cmd) {
1320 case IPT_SO_GET_INFO: {
1321 char name[IPT_TABLE_MAXNAMELEN];
1322 struct ipt_table *t;
1323
1324 if (*len != sizeof(struct ipt_getinfo)) {
1325 duprintf("length %u != %u\n", *len,
1326 sizeof(struct ipt_getinfo));
1327 ret = -EINVAL;
1328 break;
1329 }
1330
1331 if (copy_from_user(name, user, sizeof(name)) != 0) {
1332 ret = -EFAULT;
1333 break;
1334 }
1335 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1336
1337 t = try_then_request_module(find_table_lock(name),
1338 "iptable_%s", name);
1339 if (t && !IS_ERR(t)) {
1340 struct ipt_getinfo info;
1341
1342 info.valid_hooks = t->valid_hooks;
1343 memcpy(info.hook_entry, t->private->hook_entry,
1344 sizeof(info.hook_entry));
1345 memcpy(info.underflow, t->private->underflow,
1346 sizeof(info.underflow));
1347 info.num_entries = t->private->number;
1348 info.size = t->private->size;
1349 memcpy(info.name, name, sizeof(info.name));
1350
1351 if (copy_to_user(user, &info, *len) != 0)
1352 ret = -EFAULT;
1353 else
1354 ret = 0;
1355 up(&ipt_mutex);
1356 module_put(t->me);
1357 } else
1358 ret = t ? PTR_ERR(t) : -ENOENT;
1359 }
1360 break;
1361
1362 case IPT_SO_GET_ENTRIES: {
1363 struct ipt_get_entries get;
1364
1365 if (*len < sizeof(get)) {
1366 duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1367 ret = -EINVAL;
1368 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1369 ret = -EFAULT;
1370 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1371 duprintf("get_entries: %u != %u\n", *len,
1372 sizeof(struct ipt_get_entries) + get.size);
1373 ret = -EINVAL;
1374 } else
1375 ret = get_entries(&get, user);
1376 break;
1377 }
1378
1379 case IPT_SO_GET_REVISION_MATCH:
1380 case IPT_SO_GET_REVISION_TARGET: {
1381 struct ipt_get_revision rev;
1382 int (*revfn)(const char *, u8, int *);
1383
1384 if (*len != sizeof(rev)) {
1385 ret = -EINVAL;
1386 break;
1387 }
1388 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
1389 ret = -EFAULT;
1390 break;
1391 }
1392
1393 if (cmd == IPT_SO_GET_REVISION_TARGET)
1394 revfn = target_revfn;
1395 else
1396 revfn = match_revfn;
1397
1398 try_then_request_module(find_revision(rev.name, rev.revision,
1399 revfn, &ret),
1400 "ipt_%s", rev.name);
1401 break;
1402 }
1403
1404 default:
1405 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1406 ret = -EINVAL;
1407 }
1408
1409 return ret;
1410 }
1411
1412 /* Registration hooks for targets. */
1413 int
1414 ipt_register_target(struct ipt_target *target)
1415 {
1416 int ret;
1417
1418 ret = down_interruptible(&ipt_mutex);
1419 if (ret != 0)
1420 return ret;
1421 list_add(&target->list, &ipt_target);
1422 up(&ipt_mutex);
1423 return ret;
1424 }
1425
1426 void
1427 ipt_unregister_target(struct ipt_target *target)
1428 {
1429 down(&ipt_mutex);
1430 LIST_DELETE(&ipt_target, target);
1431 up(&ipt_mutex);
1432 }
1433
1434 int
1435 ipt_register_match(struct ipt_match *match)
1436 {
1437 int ret;
1438
1439 ret = down_interruptible(&ipt_mutex);
1440 if (ret != 0)
1441 return ret;
1442
1443 list_add(&match->list, &ipt_match);
1444 up(&ipt_mutex);
1445
1446 return ret;
1447 }
1448
1449 void
1450 ipt_unregister_match(struct ipt_match *match)
1451 {
1452 down(&ipt_mutex);
1453 LIST_DELETE(&ipt_match, match);
1454 up(&ipt_mutex);
1455 }
1456
1457 int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
1458 {
1459 int ret;
1460 struct ipt_table_info *newinfo;
1461 static struct ipt_table_info bootstrap
1462 = { 0, 0, 0, { 0 }, { 0 }, { } };
1463
1464 newinfo = vmalloc(sizeof(struct ipt_table_info)
1465 + SMP_ALIGN(repl->size) *
1466 (highest_possible_processor_id()+1));
1467 if (!newinfo)
1468 return -ENOMEM;
1469
1470 memcpy(newinfo->entries, repl->entries, repl->size);
1471
1472 ret = translate_table(table->name, table->valid_hooks,
1473 newinfo, repl->size,
1474 repl->num_entries,
1475 repl->hook_entry,
1476 repl->underflow);
1477 if (ret != 0) {
1478 vfree(newinfo);
1479 return ret;
1480 }
1481
1482 ret = down_interruptible(&ipt_mutex);
1483 if (ret != 0) {
1484 vfree(newinfo);
1485 return ret;
1486 }
1487
1488 /* Don't autoload: we'd eat our tail... */
1489 if (list_named_find(&ipt_tables, table->name)) {
1490 ret = -EEXIST;
1491 goto free_unlock;
1492 }
1493
1494 /* Simplifies replace_table code. */
1495 table->private = &bootstrap;
1496 if (!replace_table(table, 0, newinfo, &ret))
1497 goto free_unlock;
1498
1499 duprintf("table->private->number = %u\n",
1500 table->private->number);
1501
1502 /* save number of initial entries */
1503 table->private->initial_entries = table->private->number;
1504
1505 rwlock_init(&table->lock);
1506 list_prepend(&ipt_tables, table);
1507
1508 unlock:
1509 up(&ipt_mutex);
1510 return ret;
1511
1512 free_unlock:
1513 vfree(newinfo);
1514 goto unlock;
1515 }
1516
1517 void ipt_unregister_table(struct ipt_table *table)
1518 {
1519 down(&ipt_mutex);
1520 LIST_DELETE(&ipt_tables, table);
1521 up(&ipt_mutex);
1522
1523 /* Decrease module usage counts and free resources */
1524 IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
1525 cleanup_entry, NULL);
1526 vfree(table->private);
1527 }
1528
1529 /* Returns 1 if the port is matched by the range, 0 otherwise */
1530 static inline int
1531 port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1532 {
1533 int ret;
1534
1535 ret = (port >= min && port <= max) ^ invert;
1536 return ret;
1537 }
1538
1539 static int
1540 tcp_find_option(u_int8_t option,
1541 const struct sk_buff *skb,
1542 unsigned int optlen,
1543 int invert,
1544 int *hotdrop)
1545 {
1546 /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
1547 u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
1548 unsigned int i;
1549
1550 duprintf("tcp_match: finding option\n");
1551
1552 if (!optlen)
1553 return invert;
1554
1555 /* If we don't have the whole header, drop packet. */
1556 op = skb_header_pointer(skb,
1557 skb->nh.iph->ihl*4 + sizeof(struct tcphdr),
1558 optlen, _opt);
1559 if (op == NULL) {
1560 *hotdrop = 1;
1561 return 0;
1562 }
1563
1564 for (i = 0; i < optlen; ) {
1565 if (op[i] == option) return !invert;
1566 if (op[i] < 2) i++;
1567 else i += op[i+1]?:1;
1568 }
1569
1570 return invert;
1571 }
1572
1573 static int
1574 tcp_match(const struct sk_buff *skb,
1575 const struct net_device *in,
1576 const struct net_device *out,
1577 const void *matchinfo,
1578 int offset,
1579 int *hotdrop)
1580 {
1581 struct tcphdr _tcph, *th;
1582 const struct ipt_tcp *tcpinfo = matchinfo;
1583
1584 if (offset) {
1585 /* To quote Alan:
1586
1587 Don't allow a fragment of TCP 8 bytes in. Nobody normal
1588 causes this. Its a cracker trying to break in by doing a
1589 flag overwrite to pass the direction checks.
1590 */
1591 if (offset == 1) {
1592 duprintf("Dropping evil TCP offset=1 frag.\n");
1593 *hotdrop = 1;
1594 }
1595 /* Must not be a fragment. */
1596 return 0;
1597 }
1598
1599 #define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1600
1601 th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1602 sizeof(_tcph), &_tcph);
1603 if (th == NULL) {
1604 /* We've been asked to examine this packet, and we
1605 can't. Hence, no choice but to drop. */
1606 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1607 *hotdrop = 1;
1608 return 0;
1609 }
1610
1611 if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1612 ntohs(th->source),
1613 !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)))
1614 return 0;
1615 if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1616 ntohs(th->dest),
1617 !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)))
1618 return 0;
1619 if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
1620 == tcpinfo->flg_cmp,
1621 IPT_TCP_INV_FLAGS))
1622 return 0;
1623 if (tcpinfo->option) {
1624 if (th->doff * 4 < sizeof(_tcph)) {
1625 *hotdrop = 1;
1626 return 0;
1627 }
1628 if (!tcp_find_option(tcpinfo->option, skb,
1629 th->doff*4 - sizeof(_tcph),
1630 tcpinfo->invflags & IPT_TCP_INV_OPTION,
1631 hotdrop))
1632 return 0;
1633 }
1634 return 1;
1635 }
1636
1637 /* Called when user tries to insert an entry of this type. */
1638 static int
1639 tcp_checkentry(const char *tablename,
1640 const struct ipt_ip *ip,
1641 void *matchinfo,
1642 unsigned int matchsize,
1643 unsigned int hook_mask)
1644 {
1645 const struct ipt_tcp *tcpinfo = matchinfo;
1646
1647 /* Must specify proto == TCP, and no unknown invflags */
1648 return ip->proto == IPPROTO_TCP
1649 && !(ip->invflags & IPT_INV_PROTO)
1650 && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
1651 && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
1652 }
1653
1654 static int
1655 udp_match(const struct sk_buff *skb,
1656 const struct net_device *in,
1657 const struct net_device *out,
1658 const void *matchinfo,
1659 int offset,
1660 int *hotdrop)
1661 {
1662 struct udphdr _udph, *uh;
1663 const struct ipt_udp *udpinfo = matchinfo;
1664
1665 /* Must not be a fragment. */
1666 if (offset)
1667 return 0;
1668
1669 uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1670 sizeof(_udph), &_udph);
1671 if (uh == NULL) {
1672 /* We've been asked to examine this packet, and we
1673 can't. Hence, no choice but to drop. */
1674 duprintf("Dropping evil UDP tinygram.\n");
1675 *hotdrop = 1;
1676 return 0;
1677 }
1678
1679 return port_match(udpinfo->spts[0], udpinfo->spts[1],
1680 ntohs(uh->source),
1681 !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
1682 && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1683 ntohs(uh->dest),
1684 !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
1685 }
1686
1687 /* Called when user tries to insert an entry of this type. */
1688 static int
1689 udp_checkentry(const char *tablename,
1690 const struct ipt_ip *ip,
1691 void *matchinfo,
1692 unsigned int matchinfosize,
1693 unsigned int hook_mask)
1694 {
1695 const struct ipt_udp *udpinfo = matchinfo;
1696
1697 /* Must specify proto == UDP, and no unknown invflags */
1698 if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
1699 duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
1700 IPPROTO_UDP);
1701 return 0;
1702 }
1703 if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
1704 duprintf("ipt_udp: matchsize %u != %u\n",
1705 matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
1706 return 0;
1707 }
1708 if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
1709 duprintf("ipt_udp: unknown flags %X\n",
1710 udpinfo->invflags);
1711 return 0;
1712 }
1713
1714 return 1;
1715 }
1716
1717 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1718 static inline int
1719 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1720 u_int8_t type, u_int8_t code,
1721 int invert)
1722 {
1723 return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1724 ^ invert;
1725 }
1726
1727 static int
1728 icmp_match(const struct sk_buff *skb,
1729 const struct net_device *in,
1730 const struct net_device *out,
1731 const void *matchinfo,
1732 int offset,
1733 int *hotdrop)
1734 {
1735 struct icmphdr _icmph, *ic;
1736 const struct ipt_icmp *icmpinfo = matchinfo;
1737
1738 /* Must not be a fragment. */
1739 if (offset)
1740 return 0;
1741
1742 ic = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1743 sizeof(_icmph), &_icmph);
1744 if (ic == NULL) {
1745 /* We've been asked to examine this packet, and we
1746 * can't. Hence, no choice but to drop.
1747 */
1748 duprintf("Dropping evil ICMP tinygram.\n");
1749 *hotdrop = 1;
1750 return 0;
1751 }
1752
1753 return icmp_type_code_match(icmpinfo->type,
1754 icmpinfo->code[0],
1755 icmpinfo->code[1],
1756 ic->type, ic->code,
1757 !!(icmpinfo->invflags&IPT_ICMP_INV));
1758 }
1759
1760 /* Called when user tries to insert an entry of this type. */
1761 static int
1762 icmp_checkentry(const char *tablename,
1763 const struct ipt_ip *ip,
1764 void *matchinfo,
1765 unsigned int matchsize,
1766 unsigned int hook_mask)
1767 {
1768 const struct ipt_icmp *icmpinfo = matchinfo;
1769
1770 /* Must specify proto == ICMP, and no unknown invflags */
1771 return ip->proto == IPPROTO_ICMP
1772 && !(ip->invflags & IPT_INV_PROTO)
1773 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1774 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1775 }
1776
1777 /* The built-in targets: standard (NULL) and error. */
1778 static struct ipt_target ipt_standard_target = {
1779 .name = IPT_STANDARD_TARGET,
1780 };
1781
1782 static struct ipt_target ipt_error_target = {
1783 .name = IPT_ERROR_TARGET,
1784 .target = ipt_error,
1785 };
1786
1787 static struct nf_sockopt_ops ipt_sockopts = {
1788 .pf = PF_INET,
1789 .set_optmin = IPT_BASE_CTL,
1790 .set_optmax = IPT_SO_SET_MAX+1,
1791 .set = do_ipt_set_ctl,
1792 .get_optmin = IPT_BASE_CTL,
1793 .get_optmax = IPT_SO_GET_MAX+1,
1794 .get = do_ipt_get_ctl,
1795 };
1796
1797 static struct ipt_match tcp_matchstruct = {
1798 .name = "tcp",
1799 .match = &tcp_match,
1800 .checkentry = &tcp_checkentry,
1801 };
1802
1803 static struct ipt_match udp_matchstruct = {
1804 .name = "udp",
1805 .match = &udp_match,
1806 .checkentry = &udp_checkentry,
1807 };
1808
1809 static struct ipt_match icmp_matchstruct = {
1810 .name = "icmp",
1811 .match = &icmp_match,
1812 .checkentry = &icmp_checkentry,
1813 };
1814
1815 #ifdef CONFIG_PROC_FS
1816 static inline int print_name(const char *i,
1817 off_t start_offset, char *buffer, int length,
1818 off_t *pos, unsigned int *count)
1819 {
1820 if ((*count)++ >= start_offset) {
1821 unsigned int namelen;
1822
1823 namelen = sprintf(buffer + *pos, "%s\n",
1824 i + sizeof(struct list_head));
1825 if (*pos + namelen > length) {
1826 /* Stop iterating */
1827 return 1;
1828 }
1829 *pos += namelen;
1830 }
1831 return 0;
1832 }
1833
1834 static inline int print_target(const struct ipt_target *t,
1835 off_t start_offset, char *buffer, int length,
1836 off_t *pos, unsigned int *count)
1837 {
1838 if (t == &ipt_standard_target || t == &ipt_error_target)
1839 return 0;
1840 return print_name((char *)t, start_offset, buffer, length, pos, count);
1841 }
1842
1843 static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
1844 {
1845 off_t pos = 0;
1846 unsigned int count = 0;
1847
1848 if (down_interruptible(&ipt_mutex) != 0)
1849 return 0;
1850
1851 LIST_FIND(&ipt_tables, print_name, void *,
1852 offset, buffer, length, &pos, &count);
1853
1854 up(&ipt_mutex);
1855
1856 /* `start' hack - see fs/proc/generic.c line ~105 */
1857 *start=(char *)((unsigned long)count-offset);
1858 return pos;
1859 }
1860
1861 static int ipt_get_targets(char *buffer, char **start, off_t offset, int length)
1862 {
1863 off_t pos = 0;
1864 unsigned int count = 0;
1865
1866 if (down_interruptible(&ipt_mutex) != 0)
1867 return 0;
1868
1869 LIST_FIND(&ipt_target, print_target, struct ipt_target *,
1870 offset, buffer, length, &pos, &count);
1871
1872 up(&ipt_mutex);
1873
1874 *start = (char *)((unsigned long)count - offset);
1875 return pos;
1876 }
1877
1878 static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
1879 {
1880 off_t pos = 0;
1881 unsigned int count = 0;
1882
1883 if (down_interruptible(&ipt_mutex) != 0)
1884 return 0;
1885
1886 LIST_FIND(&ipt_match, print_name, void *,
1887 offset, buffer, length, &pos, &count);
1888
1889 up(&ipt_mutex);
1890
1891 *start = (char *)((unsigned long)count - offset);
1892 return pos;
1893 }
1894
1895 static struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
1896 { { "ip_tables_names", ipt_get_tables },
1897 { "ip_tables_targets", ipt_get_targets },
1898 { "ip_tables_matches", ipt_get_matches },
1899 { NULL, NULL} };
1900 #endif /*CONFIG_PROC_FS*/
1901
1902 static int __init init(void)
1903 {
1904 int ret;
1905
1906 /* Noone else will be downing sem now, so we won't sleep */
1907 down(&ipt_mutex);
1908 list_append(&ipt_target, &ipt_standard_target);
1909 list_append(&ipt_target, &ipt_error_target);
1910 list_append(&ipt_match, &tcp_matchstruct);
1911 list_append(&ipt_match, &udp_matchstruct);
1912 list_append(&ipt_match, &icmp_matchstruct);
1913 up(&ipt_mutex);
1914
1915 /* Register setsockopt */
1916 ret = nf_register_sockopt(&ipt_sockopts);
1917 if (ret < 0) {
1918 duprintf("Unable to register sockopts.\n");
1919 return ret;
1920 }
1921
1922 #ifdef CONFIG_PROC_FS
1923 {
1924 struct proc_dir_entry *proc;
1925 int i;
1926
1927 for (i = 0; ipt_proc_entry[i].name; i++) {
1928 proc = proc_net_create(ipt_proc_entry[i].name, 0,
1929 ipt_proc_entry[i].get_info);
1930 if (!proc) {
1931 while (--i >= 0)
1932 proc_net_remove(ipt_proc_entry[i].name);
1933 nf_unregister_sockopt(&ipt_sockopts);
1934 return -ENOMEM;
1935 }
1936 proc->owner = THIS_MODULE;
1937 }
1938 }
1939 #endif
1940
1941 printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
1942 return 0;
1943 }
1944
1945 static void __exit fini(void)
1946 {
1947 nf_unregister_sockopt(&ipt_sockopts);
1948 #ifdef CONFIG_PROC_FS
1949 {
1950 int i;
1951 for (i = 0; ipt_proc_entry[i].name; i++)
1952 proc_net_remove(ipt_proc_entry[i].name);
1953 }
1954 #endif
1955 }
1956
1957 EXPORT_SYMBOL(ipt_register_table);
1958 EXPORT_SYMBOL(ipt_unregister_table);
1959 EXPORT_SYMBOL(ipt_register_match);
1960 EXPORT_SYMBOL(ipt_unregister_match);
1961 EXPORT_SYMBOL(ipt_do_table);
1962 EXPORT_SYMBOL(ipt_register_target);
1963 EXPORT_SYMBOL(ipt_unregister_target);
1964 EXPORT_SYMBOL(ipt_find_target);
1965
1966 module_init(init);
1967 module_exit(fini);