]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - net/ipv4/netfilter/ip_tables.c
Merge HEAD from ../scsi-iscsi-2.6
[mirror_ubuntu-bionic-kernel.git] / net / ipv4 / netfilter / ip_tables.c
1 /*
2 * Packet matching code.
3 *
4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5 * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12 * - increase module usage count as soon as we have rules inside
13 * a table
14 */
15 #include <linux/config.h>
16 #include <linux/cache.h>
17 #include <linux/skbuff.h>
18 #include <linux/kmod.h>
19 #include <linux/vmalloc.h>
20 #include <linux/netdevice.h>
21 #include <linux/module.h>
22 #include <linux/tcp.h>
23 #include <linux/udp.h>
24 #include <linux/icmp.h>
25 #include <net/ip.h>
26 #include <asm/uaccess.h>
27 #include <asm/semaphore.h>
28 #include <linux/proc_fs.h>
29 #include <linux/err.h>
30
31 #include <linux/netfilter_ipv4/ip_tables.h>
32
33 MODULE_LICENSE("GPL");
34 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
35 MODULE_DESCRIPTION("IPv4 packet filter");
36
37 /*#define DEBUG_IP_FIREWALL*/
38 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
39 /*#define DEBUG_IP_FIREWALL_USER*/
40
41 #ifdef DEBUG_IP_FIREWALL
42 #define dprintf(format, args...) printk(format , ## args)
43 #else
44 #define dprintf(format, args...)
45 #endif
46
47 #ifdef DEBUG_IP_FIREWALL_USER
48 #define duprintf(format, args...) printk(format , ## args)
49 #else
50 #define duprintf(format, args...)
51 #endif
52
53 #ifdef CONFIG_NETFILTER_DEBUG
54 #define IP_NF_ASSERT(x) \
55 do { \
56 if (!(x)) \
57 printk("IP_NF_ASSERT: %s:%s:%u\n", \
58 __FUNCTION__, __FILE__, __LINE__); \
59 } while(0)
60 #else
61 #define IP_NF_ASSERT(x)
62 #endif
63 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
64
65 static DECLARE_MUTEX(ipt_mutex);
66
67 /* Must have mutex */
68 #define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
69 #define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
70 #include <linux/netfilter_ipv4/listhelp.h>
71
72 #if 0
73 /* All the better to debug you with... */
74 #define static
75 #define inline
76 #endif
77
78 /*
79 We keep a set of rules for each CPU, so we can avoid write-locking
80 them in the softirq when updating the counters and therefore
81 only need to read-lock in the softirq; doing a write_lock_bh() in user
82 context stops packets coming through and allows user context to read
83 the counters or update the rules.
84
85 To be cache friendly on SMP, we arrange them like so:
86 [ n-entries ]
87 ... cache-align padding ...
88 [ n-entries ]
89
90 Hence the start of any table is given by get_table() below. */
91
92 /* The table itself */
93 struct ipt_table_info
94 {
95 /* Size per table */
96 unsigned int size;
97 /* Number of entries: FIXME. --RR */
98 unsigned int number;
99 /* Initial number of entries. Needed for module usage count */
100 unsigned int initial_entries;
101
102 /* Entry points and underflows */
103 unsigned int hook_entry[NF_IP_NUMHOOKS];
104 unsigned int underflow[NF_IP_NUMHOOKS];
105
106 /* ipt_entry tables: one per CPU */
107 char entries[0] ____cacheline_aligned;
108 };
109
110 static LIST_HEAD(ipt_target);
111 static LIST_HEAD(ipt_match);
112 static LIST_HEAD(ipt_tables);
113 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
114
115 #ifdef CONFIG_SMP
116 #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
117 #else
118 #define TABLE_OFFSET(t,p) 0
119 #endif
120
121 #if 0
122 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
123 #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
124 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
125 #endif
126
127 /* Returns whether matches rule or not. */
128 static inline int
129 ip_packet_match(const struct iphdr *ip,
130 const char *indev,
131 const char *outdev,
132 const struct ipt_ip *ipinfo,
133 int isfrag)
134 {
135 size_t i;
136 unsigned long ret;
137
138 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
139
140 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
141 IPT_INV_SRCIP)
142 || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
143 IPT_INV_DSTIP)) {
144 dprintf("Source or dest mismatch.\n");
145
146 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
147 NIPQUAD(ip->saddr),
148 NIPQUAD(ipinfo->smsk.s_addr),
149 NIPQUAD(ipinfo->src.s_addr),
150 ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
151 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
152 NIPQUAD(ip->daddr),
153 NIPQUAD(ipinfo->dmsk.s_addr),
154 NIPQUAD(ipinfo->dst.s_addr),
155 ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
156 return 0;
157 }
158
159 /* Look for ifname matches; this should unroll nicely. */
160 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
161 ret |= (((const unsigned long *)indev)[i]
162 ^ ((const unsigned long *)ipinfo->iniface)[i])
163 & ((const unsigned long *)ipinfo->iniface_mask)[i];
164 }
165
166 if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
167 dprintf("VIA in mismatch (%s vs %s).%s\n",
168 indev, ipinfo->iniface,
169 ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
170 return 0;
171 }
172
173 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
174 ret |= (((const unsigned long *)outdev)[i]
175 ^ ((const unsigned long *)ipinfo->outiface)[i])
176 & ((const unsigned long *)ipinfo->outiface_mask)[i];
177 }
178
179 if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
180 dprintf("VIA out mismatch (%s vs %s).%s\n",
181 outdev, ipinfo->outiface,
182 ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
183 return 0;
184 }
185
186 /* Check specific protocol */
187 if (ipinfo->proto
188 && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
189 dprintf("Packet protocol %hi does not match %hi.%s\n",
190 ip->protocol, ipinfo->proto,
191 ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
192 return 0;
193 }
194
195 /* If we have a fragment rule but the packet is not a fragment
196 * then we return zero */
197 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
198 dprintf("Fragment rule but not fragment.%s\n",
199 ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
200 return 0;
201 }
202
203 return 1;
204 }
205
206 static inline int
207 ip_checkentry(const struct ipt_ip *ip)
208 {
209 if (ip->flags & ~IPT_F_MASK) {
210 duprintf("Unknown flag bits set: %08X\n",
211 ip->flags & ~IPT_F_MASK);
212 return 0;
213 }
214 if (ip->invflags & ~IPT_INV_MASK) {
215 duprintf("Unknown invflag bits set: %08X\n",
216 ip->invflags & ~IPT_INV_MASK);
217 return 0;
218 }
219 return 1;
220 }
221
222 static unsigned int
223 ipt_error(struct sk_buff **pskb,
224 const struct net_device *in,
225 const struct net_device *out,
226 unsigned int hooknum,
227 const void *targinfo,
228 void *userinfo)
229 {
230 if (net_ratelimit())
231 printk("ip_tables: error: `%s'\n", (char *)targinfo);
232
233 return NF_DROP;
234 }
235
236 static inline
237 int do_match(struct ipt_entry_match *m,
238 const struct sk_buff *skb,
239 const struct net_device *in,
240 const struct net_device *out,
241 int offset,
242 int *hotdrop)
243 {
244 /* Stop iteration if it doesn't match */
245 if (!m->u.kernel.match->match(skb, in, out, m->data, offset, hotdrop))
246 return 1;
247 else
248 return 0;
249 }
250
251 static inline struct ipt_entry *
252 get_entry(void *base, unsigned int offset)
253 {
254 return (struct ipt_entry *)(base + offset);
255 }
256
257 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
258 unsigned int
259 ipt_do_table(struct sk_buff **pskb,
260 unsigned int hook,
261 const struct net_device *in,
262 const struct net_device *out,
263 struct ipt_table *table,
264 void *userdata)
265 {
266 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
267 u_int16_t offset;
268 struct iphdr *ip;
269 u_int16_t datalen;
270 int hotdrop = 0;
271 /* Initializing verdict to NF_DROP keeps gcc happy. */
272 unsigned int verdict = NF_DROP;
273 const char *indev, *outdev;
274 void *table_base;
275 struct ipt_entry *e, *back;
276
277 /* Initialization */
278 ip = (*pskb)->nh.iph;
279 datalen = (*pskb)->len - ip->ihl * 4;
280 indev = in ? in->name : nulldevname;
281 outdev = out ? out->name : nulldevname;
282 /* We handle fragments by dealing with the first fragment as
283 * if it was a normal packet. All other fragments are treated
284 * normally, except that they will NEVER match rules that ask
285 * things we don't know, ie. tcp syn flag or ports). If the
286 * rule is also a fragment-specific rule, non-fragments won't
287 * match it. */
288 offset = ntohs(ip->frag_off) & IP_OFFSET;
289
290 read_lock_bh(&table->lock);
291 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
292 table_base = (void *)table->private->entries
293 + TABLE_OFFSET(table->private, smp_processor_id());
294 e = get_entry(table_base, table->private->hook_entry[hook]);
295
296 #ifdef CONFIG_NETFILTER_DEBUG
297 /* Check noone else using our table */
298 if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
299 && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
300 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
301 smp_processor_id(),
302 table->name,
303 &((struct ipt_entry *)table_base)->comefrom,
304 ((struct ipt_entry *)table_base)->comefrom);
305 }
306 ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
307 #endif
308
309 /* For return from builtin chain */
310 back = get_entry(table_base, table->private->underflow[hook]);
311
312 do {
313 IP_NF_ASSERT(e);
314 IP_NF_ASSERT(back);
315 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
316 struct ipt_entry_target *t;
317
318 if (IPT_MATCH_ITERATE(e, do_match,
319 *pskb, in, out,
320 offset, &hotdrop) != 0)
321 goto no_match;
322
323 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
324
325 t = ipt_get_target(e);
326 IP_NF_ASSERT(t->u.kernel.target);
327 /* Standard target? */
328 if (!t->u.kernel.target->target) {
329 int v;
330
331 v = ((struct ipt_standard_target *)t)->verdict;
332 if (v < 0) {
333 /* Pop from stack? */
334 if (v != IPT_RETURN) {
335 verdict = (unsigned)(-v) - 1;
336 break;
337 }
338 e = back;
339 back = get_entry(table_base,
340 back->comefrom);
341 continue;
342 }
343 if (table_base + v != (void *)e + e->next_offset
344 && !(e->ip.flags & IPT_F_GOTO)) {
345 /* Save old back ptr in next entry */
346 struct ipt_entry *next
347 = (void *)e + e->next_offset;
348 next->comefrom
349 = (void *)back - table_base;
350 /* set back pointer to next entry */
351 back = next;
352 }
353
354 e = get_entry(table_base, v);
355 } else {
356 /* Targets which reenter must return
357 abs. verdicts */
358 #ifdef CONFIG_NETFILTER_DEBUG
359 ((struct ipt_entry *)table_base)->comefrom
360 = 0xeeeeeeec;
361 #endif
362 verdict = t->u.kernel.target->target(pskb,
363 in, out,
364 hook,
365 t->data,
366 userdata);
367
368 #ifdef CONFIG_NETFILTER_DEBUG
369 if (((struct ipt_entry *)table_base)->comefrom
370 != 0xeeeeeeec
371 && verdict == IPT_CONTINUE) {
372 printk("Target %s reentered!\n",
373 t->u.kernel.target->name);
374 verdict = NF_DROP;
375 }
376 ((struct ipt_entry *)table_base)->comefrom
377 = 0x57acc001;
378 #endif
379 /* Target might have changed stuff. */
380 ip = (*pskb)->nh.iph;
381 datalen = (*pskb)->len - ip->ihl * 4;
382
383 if (verdict == IPT_CONTINUE)
384 e = (void *)e + e->next_offset;
385 else
386 /* Verdict */
387 break;
388 }
389 } else {
390
391 no_match:
392 e = (void *)e + e->next_offset;
393 }
394 } while (!hotdrop);
395
396 #ifdef CONFIG_NETFILTER_DEBUG
397 ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
398 #endif
399 read_unlock_bh(&table->lock);
400
401 #ifdef DEBUG_ALLOW_ALL
402 return NF_ACCEPT;
403 #else
404 if (hotdrop)
405 return NF_DROP;
406 else return verdict;
407 #endif
408 }
409
410 /*
411 * These are weird, but module loading must not be done with mutex
412 * held (since they will register), and we have to have a single
413 * function to use try_then_request_module().
414 */
415
416 /* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */
417 static inline struct ipt_table *find_table_lock(const char *name)
418 {
419 struct ipt_table *t;
420
421 if (down_interruptible(&ipt_mutex) != 0)
422 return ERR_PTR(-EINTR);
423
424 list_for_each_entry(t, &ipt_tables, list)
425 if (strcmp(t->name, name) == 0 && try_module_get(t->me))
426 return t;
427 up(&ipt_mutex);
428 return NULL;
429 }
430
431 /* Find match, grabs ref. Returns ERR_PTR() on error. */
432 static inline struct ipt_match *find_match(const char *name, u8 revision)
433 {
434 struct ipt_match *m;
435 int err = 0;
436
437 if (down_interruptible(&ipt_mutex) != 0)
438 return ERR_PTR(-EINTR);
439
440 list_for_each_entry(m, &ipt_match, list) {
441 if (strcmp(m->name, name) == 0) {
442 if (m->revision == revision) {
443 if (try_module_get(m->me)) {
444 up(&ipt_mutex);
445 return m;
446 }
447 } else
448 err = -EPROTOTYPE; /* Found something. */
449 }
450 }
451 up(&ipt_mutex);
452 return ERR_PTR(err);
453 }
454
455 /* Find target, grabs ref. Returns ERR_PTR() on error. */
456 static inline struct ipt_target *find_target(const char *name, u8 revision)
457 {
458 struct ipt_target *t;
459 int err = 0;
460
461 if (down_interruptible(&ipt_mutex) != 0)
462 return ERR_PTR(-EINTR);
463
464 list_for_each_entry(t, &ipt_target, list) {
465 if (strcmp(t->name, name) == 0) {
466 if (t->revision == revision) {
467 if (try_module_get(t->me)) {
468 up(&ipt_mutex);
469 return t;
470 }
471 } else
472 err = -EPROTOTYPE; /* Found something. */
473 }
474 }
475 up(&ipt_mutex);
476 return ERR_PTR(err);
477 }
478
479 struct ipt_target *ipt_find_target(const char *name, u8 revision)
480 {
481 struct ipt_target *target;
482
483 target = try_then_request_module(find_target(name, revision),
484 "ipt_%s", name);
485 if (IS_ERR(target) || !target)
486 return NULL;
487 return target;
488 }
489
490 static int match_revfn(const char *name, u8 revision, int *bestp)
491 {
492 struct ipt_match *m;
493 int have_rev = 0;
494
495 list_for_each_entry(m, &ipt_match, list) {
496 if (strcmp(m->name, name) == 0) {
497 if (m->revision > *bestp)
498 *bestp = m->revision;
499 if (m->revision == revision)
500 have_rev = 1;
501 }
502 }
503 return have_rev;
504 }
505
506 static int target_revfn(const char *name, u8 revision, int *bestp)
507 {
508 struct ipt_target *t;
509 int have_rev = 0;
510
511 list_for_each_entry(t, &ipt_target, list) {
512 if (strcmp(t->name, name) == 0) {
513 if (t->revision > *bestp)
514 *bestp = t->revision;
515 if (t->revision == revision)
516 have_rev = 1;
517 }
518 }
519 return have_rev;
520 }
521
522 /* Returns true or false (if no such extension at all) */
523 static inline int find_revision(const char *name, u8 revision,
524 int (*revfn)(const char *, u8, int *),
525 int *err)
526 {
527 int have_rev, best = -1;
528
529 if (down_interruptible(&ipt_mutex) != 0) {
530 *err = -EINTR;
531 return 1;
532 }
533 have_rev = revfn(name, revision, &best);
534 up(&ipt_mutex);
535
536 /* Nothing at all? Return 0 to try loading module. */
537 if (best == -1) {
538 *err = -ENOENT;
539 return 0;
540 }
541
542 *err = best;
543 if (!have_rev)
544 *err = -EPROTONOSUPPORT;
545 return 1;
546 }
547
548
549 /* All zeroes == unconditional rule. */
550 static inline int
551 unconditional(const struct ipt_ip *ip)
552 {
553 unsigned int i;
554
555 for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
556 if (((__u32 *)ip)[i])
557 return 0;
558
559 return 1;
560 }
561
562 /* Figures out from what hook each rule can be called: returns 0 if
563 there are loops. Puts hook bitmask in comefrom. */
564 static int
565 mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
566 {
567 unsigned int hook;
568
569 /* No recursion; use packet counter to save back ptrs (reset
570 to 0 as we leave), and comefrom to save source hook bitmask */
571 for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
572 unsigned int pos = newinfo->hook_entry[hook];
573 struct ipt_entry *e
574 = (struct ipt_entry *)(newinfo->entries + pos);
575
576 if (!(valid_hooks & (1 << hook)))
577 continue;
578
579 /* Set initial back pointer. */
580 e->counters.pcnt = pos;
581
582 for (;;) {
583 struct ipt_standard_target *t
584 = (void *)ipt_get_target(e);
585
586 if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
587 printk("iptables: loop hook %u pos %u %08X.\n",
588 hook, pos, e->comefrom);
589 return 0;
590 }
591 e->comefrom
592 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
593
594 /* Unconditional return/END. */
595 if (e->target_offset == sizeof(struct ipt_entry)
596 && (strcmp(t->target.u.user.name,
597 IPT_STANDARD_TARGET) == 0)
598 && t->verdict < 0
599 && unconditional(&e->ip)) {
600 unsigned int oldpos, size;
601
602 /* Return: backtrack through the last
603 big jump. */
604 do {
605 e->comefrom ^= (1<<NF_IP_NUMHOOKS);
606 #ifdef DEBUG_IP_FIREWALL_USER
607 if (e->comefrom
608 & (1 << NF_IP_NUMHOOKS)) {
609 duprintf("Back unset "
610 "on hook %u "
611 "rule %u\n",
612 hook, pos);
613 }
614 #endif
615 oldpos = pos;
616 pos = e->counters.pcnt;
617 e->counters.pcnt = 0;
618
619 /* We're at the start. */
620 if (pos == oldpos)
621 goto next;
622
623 e = (struct ipt_entry *)
624 (newinfo->entries + pos);
625 } while (oldpos == pos + e->next_offset);
626
627 /* Move along one */
628 size = e->next_offset;
629 e = (struct ipt_entry *)
630 (newinfo->entries + pos + size);
631 e->counters.pcnt = pos;
632 pos += size;
633 } else {
634 int newpos = t->verdict;
635
636 if (strcmp(t->target.u.user.name,
637 IPT_STANDARD_TARGET) == 0
638 && newpos >= 0) {
639 /* This a jump; chase it. */
640 duprintf("Jump rule %u -> %u\n",
641 pos, newpos);
642 } else {
643 /* ... this is a fallthru */
644 newpos = pos + e->next_offset;
645 }
646 e = (struct ipt_entry *)
647 (newinfo->entries + newpos);
648 e->counters.pcnt = pos;
649 pos = newpos;
650 }
651 }
652 next:
653 duprintf("Finished chain %u\n", hook);
654 }
655 return 1;
656 }
657
658 static inline int
659 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
660 {
661 if (i && (*i)-- == 0)
662 return 1;
663
664 if (m->u.kernel.match->destroy)
665 m->u.kernel.match->destroy(m->data,
666 m->u.match_size - sizeof(*m));
667 module_put(m->u.kernel.match->me);
668 return 0;
669 }
670
671 static inline int
672 standard_check(const struct ipt_entry_target *t,
673 unsigned int max_offset)
674 {
675 struct ipt_standard_target *targ = (void *)t;
676
677 /* Check standard info. */
678 if (t->u.target_size
679 != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
680 duprintf("standard_check: target size %u != %u\n",
681 t->u.target_size,
682 IPT_ALIGN(sizeof(struct ipt_standard_target)));
683 return 0;
684 }
685
686 if (targ->verdict >= 0
687 && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
688 duprintf("ipt_standard_check: bad verdict (%i)\n",
689 targ->verdict);
690 return 0;
691 }
692
693 if (targ->verdict < -NF_MAX_VERDICT - 1) {
694 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
695 targ->verdict);
696 return 0;
697 }
698 return 1;
699 }
700
701 static inline int
702 check_match(struct ipt_entry_match *m,
703 const char *name,
704 const struct ipt_ip *ip,
705 unsigned int hookmask,
706 unsigned int *i)
707 {
708 struct ipt_match *match;
709
710 match = try_then_request_module(find_match(m->u.user.name,
711 m->u.user.revision),
712 "ipt_%s", m->u.user.name);
713 if (IS_ERR(match) || !match) {
714 duprintf("check_match: `%s' not found\n", m->u.user.name);
715 return match ? PTR_ERR(match) : -ENOENT;
716 }
717 m->u.kernel.match = match;
718
719 if (m->u.kernel.match->checkentry
720 && !m->u.kernel.match->checkentry(name, ip, m->data,
721 m->u.match_size - sizeof(*m),
722 hookmask)) {
723 module_put(m->u.kernel.match->me);
724 duprintf("ip_tables: check failed for `%s'.\n",
725 m->u.kernel.match->name);
726 return -EINVAL;
727 }
728
729 (*i)++;
730 return 0;
731 }
732
733 static struct ipt_target ipt_standard_target;
734
735 static inline int
736 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
737 unsigned int *i)
738 {
739 struct ipt_entry_target *t;
740 struct ipt_target *target;
741 int ret;
742 unsigned int j;
743
744 if (!ip_checkentry(&e->ip)) {
745 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
746 return -EINVAL;
747 }
748
749 j = 0;
750 ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
751 if (ret != 0)
752 goto cleanup_matches;
753
754 t = ipt_get_target(e);
755 target = try_then_request_module(find_target(t->u.user.name,
756 t->u.user.revision),
757 "ipt_%s", t->u.user.name);
758 if (IS_ERR(target) || !target) {
759 duprintf("check_entry: `%s' not found\n", t->u.user.name);
760 ret = target ? PTR_ERR(target) : -ENOENT;
761 goto cleanup_matches;
762 }
763 t->u.kernel.target = target;
764
765 if (t->u.kernel.target == &ipt_standard_target) {
766 if (!standard_check(t, size)) {
767 ret = -EINVAL;
768 goto cleanup_matches;
769 }
770 } else if (t->u.kernel.target->checkentry
771 && !t->u.kernel.target->checkentry(name, e, t->data,
772 t->u.target_size
773 - sizeof(*t),
774 e->comefrom)) {
775 module_put(t->u.kernel.target->me);
776 duprintf("ip_tables: check failed for `%s'.\n",
777 t->u.kernel.target->name);
778 ret = -EINVAL;
779 goto cleanup_matches;
780 }
781
782 (*i)++;
783 return 0;
784
785 cleanup_matches:
786 IPT_MATCH_ITERATE(e, cleanup_match, &j);
787 return ret;
788 }
789
790 static inline int
791 check_entry_size_and_hooks(struct ipt_entry *e,
792 struct ipt_table_info *newinfo,
793 unsigned char *base,
794 unsigned char *limit,
795 const unsigned int *hook_entries,
796 const unsigned int *underflows,
797 unsigned int *i)
798 {
799 unsigned int h;
800
801 if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
802 || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
803 duprintf("Bad offset %p\n", e);
804 return -EINVAL;
805 }
806
807 if (e->next_offset
808 < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
809 duprintf("checking: element %p size %u\n",
810 e, e->next_offset);
811 return -EINVAL;
812 }
813
814 /* Check hooks & underflows */
815 for (h = 0; h < NF_IP_NUMHOOKS; h++) {
816 if ((unsigned char *)e - base == hook_entries[h])
817 newinfo->hook_entry[h] = hook_entries[h];
818 if ((unsigned char *)e - base == underflows[h])
819 newinfo->underflow[h] = underflows[h];
820 }
821
822 /* FIXME: underflows must be unconditional, standard verdicts
823 < 0 (not IPT_RETURN). --RR */
824
825 /* Clear counters and comefrom */
826 e->counters = ((struct ipt_counters) { 0, 0 });
827 e->comefrom = 0;
828
829 (*i)++;
830 return 0;
831 }
832
833 static inline int
834 cleanup_entry(struct ipt_entry *e, unsigned int *i)
835 {
836 struct ipt_entry_target *t;
837
838 if (i && (*i)-- == 0)
839 return 1;
840
841 /* Cleanup all matches */
842 IPT_MATCH_ITERATE(e, cleanup_match, NULL);
843 t = ipt_get_target(e);
844 if (t->u.kernel.target->destroy)
845 t->u.kernel.target->destroy(t->data,
846 t->u.target_size - sizeof(*t));
847 module_put(t->u.kernel.target->me);
848 return 0;
849 }
850
851 /* Checks and translates the user-supplied table segment (held in
852 newinfo) */
853 static int
854 translate_table(const char *name,
855 unsigned int valid_hooks,
856 struct ipt_table_info *newinfo,
857 unsigned int size,
858 unsigned int number,
859 const unsigned int *hook_entries,
860 const unsigned int *underflows)
861 {
862 unsigned int i;
863 int ret;
864
865 newinfo->size = size;
866 newinfo->number = number;
867
868 /* Init all hooks to impossible value. */
869 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
870 newinfo->hook_entry[i] = 0xFFFFFFFF;
871 newinfo->underflow[i] = 0xFFFFFFFF;
872 }
873
874 duprintf("translate_table: size %u\n", newinfo->size);
875 i = 0;
876 /* Walk through entries, checking offsets. */
877 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
878 check_entry_size_and_hooks,
879 newinfo,
880 newinfo->entries,
881 newinfo->entries + size,
882 hook_entries, underflows, &i);
883 if (ret != 0)
884 return ret;
885
886 if (i != number) {
887 duprintf("translate_table: %u not %u entries\n",
888 i, number);
889 return -EINVAL;
890 }
891
892 /* Check hooks all assigned */
893 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
894 /* Only hooks which are valid */
895 if (!(valid_hooks & (1 << i)))
896 continue;
897 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
898 duprintf("Invalid hook entry %u %u\n",
899 i, hook_entries[i]);
900 return -EINVAL;
901 }
902 if (newinfo->underflow[i] == 0xFFFFFFFF) {
903 duprintf("Invalid underflow %u %u\n",
904 i, underflows[i]);
905 return -EINVAL;
906 }
907 }
908
909 if (!mark_source_chains(newinfo, valid_hooks))
910 return -ELOOP;
911
912 /* Finally, each sanity check must pass */
913 i = 0;
914 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
915 check_entry, name, size, &i);
916
917 if (ret != 0) {
918 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
919 cleanup_entry, &i);
920 return ret;
921 }
922
923 /* And one copy for every other CPU */
924 for (i = 1; i < num_possible_cpus(); i++) {
925 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
926 newinfo->entries,
927 SMP_ALIGN(newinfo->size));
928 }
929
930 return ret;
931 }
932
933 static struct ipt_table_info *
934 replace_table(struct ipt_table *table,
935 unsigned int num_counters,
936 struct ipt_table_info *newinfo,
937 int *error)
938 {
939 struct ipt_table_info *oldinfo;
940
941 #ifdef CONFIG_NETFILTER_DEBUG
942 {
943 struct ipt_entry *table_base;
944 unsigned int i;
945
946 for (i = 0; i < num_possible_cpus(); i++) {
947 table_base =
948 (void *)newinfo->entries
949 + TABLE_OFFSET(newinfo, i);
950
951 table_base->comefrom = 0xdead57ac;
952 }
953 }
954 #endif
955
956 /* Do the substitution. */
957 write_lock_bh(&table->lock);
958 /* Check inside lock: is the old number correct? */
959 if (num_counters != table->private->number) {
960 duprintf("num_counters != table->private->number (%u/%u)\n",
961 num_counters, table->private->number);
962 write_unlock_bh(&table->lock);
963 *error = -EAGAIN;
964 return NULL;
965 }
966 oldinfo = table->private;
967 table->private = newinfo;
968 newinfo->initial_entries = oldinfo->initial_entries;
969 write_unlock_bh(&table->lock);
970
971 return oldinfo;
972 }
973
974 /* Gets counters. */
975 static inline int
976 add_entry_to_counter(const struct ipt_entry *e,
977 struct ipt_counters total[],
978 unsigned int *i)
979 {
980 ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
981
982 (*i)++;
983 return 0;
984 }
985
986 static void
987 get_counters(const struct ipt_table_info *t,
988 struct ipt_counters counters[])
989 {
990 unsigned int cpu;
991 unsigned int i;
992
993 for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
994 i = 0;
995 IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
996 t->size,
997 add_entry_to_counter,
998 counters,
999 &i);
1000 }
1001 }
1002
1003 static int
1004 copy_entries_to_user(unsigned int total_size,
1005 struct ipt_table *table,
1006 void __user *userptr)
1007 {
1008 unsigned int off, num, countersize;
1009 struct ipt_entry *e;
1010 struct ipt_counters *counters;
1011 int ret = 0;
1012
1013 /* We need atomic snapshot of counters: rest doesn't change
1014 (other than comefrom, which userspace doesn't care
1015 about). */
1016 countersize = sizeof(struct ipt_counters) * table->private->number;
1017 counters = vmalloc(countersize);
1018
1019 if (counters == NULL)
1020 return -ENOMEM;
1021
1022 /* First, sum counters... */
1023 memset(counters, 0, countersize);
1024 write_lock_bh(&table->lock);
1025 get_counters(table->private, counters);
1026 write_unlock_bh(&table->lock);
1027
1028 /* ... then copy entire thing from CPU 0... */
1029 if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
1030 ret = -EFAULT;
1031 goto free_counters;
1032 }
1033
1034 /* FIXME: use iterator macros --RR */
1035 /* ... then go back and fix counters and names */
1036 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
1037 unsigned int i;
1038 struct ipt_entry_match *m;
1039 struct ipt_entry_target *t;
1040
1041 e = (struct ipt_entry *)(table->private->entries + off);
1042 if (copy_to_user(userptr + off
1043 + offsetof(struct ipt_entry, counters),
1044 &counters[num],
1045 sizeof(counters[num])) != 0) {
1046 ret = -EFAULT;
1047 goto free_counters;
1048 }
1049
1050 for (i = sizeof(struct ipt_entry);
1051 i < e->target_offset;
1052 i += m->u.match_size) {
1053 m = (void *)e + i;
1054
1055 if (copy_to_user(userptr + off + i
1056 + offsetof(struct ipt_entry_match,
1057 u.user.name),
1058 m->u.kernel.match->name,
1059 strlen(m->u.kernel.match->name)+1)
1060 != 0) {
1061 ret = -EFAULT;
1062 goto free_counters;
1063 }
1064 }
1065
1066 t = ipt_get_target(e);
1067 if (copy_to_user(userptr + off + e->target_offset
1068 + offsetof(struct ipt_entry_target,
1069 u.user.name),
1070 t->u.kernel.target->name,
1071 strlen(t->u.kernel.target->name)+1) != 0) {
1072 ret = -EFAULT;
1073 goto free_counters;
1074 }
1075 }
1076
1077 free_counters:
1078 vfree(counters);
1079 return ret;
1080 }
1081
1082 static int
1083 get_entries(const struct ipt_get_entries *entries,
1084 struct ipt_get_entries __user *uptr)
1085 {
1086 int ret;
1087 struct ipt_table *t;
1088
1089 t = find_table_lock(entries->name);
1090 if (t && !IS_ERR(t)) {
1091 duprintf("t->private->number = %u\n",
1092 t->private->number);
1093 if (entries->size == t->private->size)
1094 ret = copy_entries_to_user(t->private->size,
1095 t, uptr->entrytable);
1096 else {
1097 duprintf("get_entries: I've got %u not %u!\n",
1098 t->private->size,
1099 entries->size);
1100 ret = -EINVAL;
1101 }
1102 module_put(t->me);
1103 up(&ipt_mutex);
1104 } else
1105 ret = t ? PTR_ERR(t) : -ENOENT;
1106
1107 return ret;
1108 }
1109
1110 static int
1111 do_replace(void __user *user, unsigned int len)
1112 {
1113 int ret;
1114 struct ipt_replace tmp;
1115 struct ipt_table *t;
1116 struct ipt_table_info *newinfo, *oldinfo;
1117 struct ipt_counters *counters;
1118
1119 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1120 return -EFAULT;
1121
1122 /* Hack: Causes ipchains to give correct error msg --RR */
1123 if (len != sizeof(tmp) + tmp.size)
1124 return -ENOPROTOOPT;
1125
1126 /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
1127 if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1128 return -ENOMEM;
1129
1130 newinfo = vmalloc(sizeof(struct ipt_table_info)
1131 + SMP_ALIGN(tmp.size) * num_possible_cpus());
1132 if (!newinfo)
1133 return -ENOMEM;
1134
1135 if (copy_from_user(newinfo->entries, user + sizeof(tmp),
1136 tmp.size) != 0) {
1137 ret = -EFAULT;
1138 goto free_newinfo;
1139 }
1140
1141 counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
1142 if (!counters) {
1143 ret = -ENOMEM;
1144 goto free_newinfo;
1145 }
1146 memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
1147
1148 ret = translate_table(tmp.name, tmp.valid_hooks,
1149 newinfo, tmp.size, tmp.num_entries,
1150 tmp.hook_entry, tmp.underflow);
1151 if (ret != 0)
1152 goto free_newinfo_counters;
1153
1154 duprintf("ip_tables: Translated table\n");
1155
1156 t = try_then_request_module(find_table_lock(tmp.name),
1157 "iptable_%s", tmp.name);
1158 if (!t || IS_ERR(t)) {
1159 ret = t ? PTR_ERR(t) : -ENOENT;
1160 goto free_newinfo_counters_untrans;
1161 }
1162
1163 /* You lied! */
1164 if (tmp.valid_hooks != t->valid_hooks) {
1165 duprintf("Valid hook crap: %08X vs %08X\n",
1166 tmp.valid_hooks, t->valid_hooks);
1167 ret = -EINVAL;
1168 goto put_module;
1169 }
1170
1171 oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1172 if (!oldinfo)
1173 goto put_module;
1174
1175 /* Update module usage count based on number of rules */
1176 duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1177 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1178 if ((oldinfo->number > oldinfo->initial_entries) ||
1179 (newinfo->number <= oldinfo->initial_entries))
1180 module_put(t->me);
1181 if ((oldinfo->number > oldinfo->initial_entries) &&
1182 (newinfo->number <= oldinfo->initial_entries))
1183 module_put(t->me);
1184
1185 /* Get the old counters. */
1186 get_counters(oldinfo, counters);
1187 /* Decrease module usage counts and free resource */
1188 IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
1189 vfree(oldinfo);
1190 if (copy_to_user(tmp.counters, counters,
1191 sizeof(struct ipt_counters) * tmp.num_counters) != 0)
1192 ret = -EFAULT;
1193 vfree(counters);
1194 up(&ipt_mutex);
1195 return ret;
1196
1197 put_module:
1198 module_put(t->me);
1199 up(&ipt_mutex);
1200 free_newinfo_counters_untrans:
1201 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
1202 free_newinfo_counters:
1203 vfree(counters);
1204 free_newinfo:
1205 vfree(newinfo);
1206 return ret;
1207 }
1208
1209 /* We're lazy, and add to the first CPU; overflow works its fey magic
1210 * and everything is OK. */
1211 static inline int
1212 add_counter_to_entry(struct ipt_entry *e,
1213 const struct ipt_counters addme[],
1214 unsigned int *i)
1215 {
1216 #if 0
1217 duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1218 *i,
1219 (long unsigned int)e->counters.pcnt,
1220 (long unsigned int)e->counters.bcnt,
1221 (long unsigned int)addme[*i].pcnt,
1222 (long unsigned int)addme[*i].bcnt);
1223 #endif
1224
1225 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1226
1227 (*i)++;
1228 return 0;
1229 }
1230
1231 static int
1232 do_add_counters(void __user *user, unsigned int len)
1233 {
1234 unsigned int i;
1235 struct ipt_counters_info tmp, *paddc;
1236 struct ipt_table *t;
1237 int ret = 0;
1238
1239 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1240 return -EFAULT;
1241
1242 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
1243 return -EINVAL;
1244
1245 paddc = vmalloc(len);
1246 if (!paddc)
1247 return -ENOMEM;
1248
1249 if (copy_from_user(paddc, user, len) != 0) {
1250 ret = -EFAULT;
1251 goto free;
1252 }
1253
1254 t = find_table_lock(tmp.name);
1255 if (!t || IS_ERR(t)) {
1256 ret = t ? PTR_ERR(t) : -ENOENT;
1257 goto free;
1258 }
1259
1260 write_lock_bh(&t->lock);
1261 if (t->private->number != paddc->num_counters) {
1262 ret = -EINVAL;
1263 goto unlock_up_free;
1264 }
1265
1266 i = 0;
1267 IPT_ENTRY_ITERATE(t->private->entries,
1268 t->private->size,
1269 add_counter_to_entry,
1270 paddc->counters,
1271 &i);
1272 unlock_up_free:
1273 write_unlock_bh(&t->lock);
1274 up(&ipt_mutex);
1275 module_put(t->me);
1276 free:
1277 vfree(paddc);
1278
1279 return ret;
1280 }
1281
1282 static int
1283 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1284 {
1285 int ret;
1286
1287 if (!capable(CAP_NET_ADMIN))
1288 return -EPERM;
1289
1290 switch (cmd) {
1291 case IPT_SO_SET_REPLACE:
1292 ret = do_replace(user, len);
1293 break;
1294
1295 case IPT_SO_SET_ADD_COUNTERS:
1296 ret = do_add_counters(user, len);
1297 break;
1298
1299 default:
1300 duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
1301 ret = -EINVAL;
1302 }
1303
1304 return ret;
1305 }
1306
1307 static int
1308 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1309 {
1310 int ret;
1311
1312 if (!capable(CAP_NET_ADMIN))
1313 return -EPERM;
1314
1315 switch (cmd) {
1316 case IPT_SO_GET_INFO: {
1317 char name[IPT_TABLE_MAXNAMELEN];
1318 struct ipt_table *t;
1319
1320 if (*len != sizeof(struct ipt_getinfo)) {
1321 duprintf("length %u != %u\n", *len,
1322 sizeof(struct ipt_getinfo));
1323 ret = -EINVAL;
1324 break;
1325 }
1326
1327 if (copy_from_user(name, user, sizeof(name)) != 0) {
1328 ret = -EFAULT;
1329 break;
1330 }
1331 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1332
1333 t = try_then_request_module(find_table_lock(name),
1334 "iptable_%s", name);
1335 if (t && !IS_ERR(t)) {
1336 struct ipt_getinfo info;
1337
1338 info.valid_hooks = t->valid_hooks;
1339 memcpy(info.hook_entry, t->private->hook_entry,
1340 sizeof(info.hook_entry));
1341 memcpy(info.underflow, t->private->underflow,
1342 sizeof(info.underflow));
1343 info.num_entries = t->private->number;
1344 info.size = t->private->size;
1345 memcpy(info.name, name, sizeof(info.name));
1346
1347 if (copy_to_user(user, &info, *len) != 0)
1348 ret = -EFAULT;
1349 else
1350 ret = 0;
1351 up(&ipt_mutex);
1352 module_put(t->me);
1353 } else
1354 ret = t ? PTR_ERR(t) : -ENOENT;
1355 }
1356 break;
1357
1358 case IPT_SO_GET_ENTRIES: {
1359 struct ipt_get_entries get;
1360
1361 if (*len < sizeof(get)) {
1362 duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1363 ret = -EINVAL;
1364 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1365 ret = -EFAULT;
1366 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1367 duprintf("get_entries: %u != %u\n", *len,
1368 sizeof(struct ipt_get_entries) + get.size);
1369 ret = -EINVAL;
1370 } else
1371 ret = get_entries(&get, user);
1372 break;
1373 }
1374
1375 case IPT_SO_GET_REVISION_MATCH:
1376 case IPT_SO_GET_REVISION_TARGET: {
1377 struct ipt_get_revision rev;
1378 int (*revfn)(const char *, u8, int *);
1379
1380 if (*len != sizeof(rev)) {
1381 ret = -EINVAL;
1382 break;
1383 }
1384 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
1385 ret = -EFAULT;
1386 break;
1387 }
1388
1389 if (cmd == IPT_SO_GET_REVISION_TARGET)
1390 revfn = target_revfn;
1391 else
1392 revfn = match_revfn;
1393
1394 try_then_request_module(find_revision(rev.name, rev.revision,
1395 revfn, &ret),
1396 "ipt_%s", rev.name);
1397 break;
1398 }
1399
1400 default:
1401 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1402 ret = -EINVAL;
1403 }
1404
1405 return ret;
1406 }
1407
1408 /* Registration hooks for targets. */
1409 int
1410 ipt_register_target(struct ipt_target *target)
1411 {
1412 int ret;
1413
1414 ret = down_interruptible(&ipt_mutex);
1415 if (ret != 0)
1416 return ret;
1417 list_add(&target->list, &ipt_target);
1418 up(&ipt_mutex);
1419 return ret;
1420 }
1421
1422 void
1423 ipt_unregister_target(struct ipt_target *target)
1424 {
1425 down(&ipt_mutex);
1426 LIST_DELETE(&ipt_target, target);
1427 up(&ipt_mutex);
1428 }
1429
1430 int
1431 ipt_register_match(struct ipt_match *match)
1432 {
1433 int ret;
1434
1435 ret = down_interruptible(&ipt_mutex);
1436 if (ret != 0)
1437 return ret;
1438
1439 list_add(&match->list, &ipt_match);
1440 up(&ipt_mutex);
1441
1442 return ret;
1443 }
1444
1445 void
1446 ipt_unregister_match(struct ipt_match *match)
1447 {
1448 down(&ipt_mutex);
1449 LIST_DELETE(&ipt_match, match);
1450 up(&ipt_mutex);
1451 }
1452
1453 int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
1454 {
1455 int ret;
1456 struct ipt_table_info *newinfo;
1457 static struct ipt_table_info bootstrap
1458 = { 0, 0, 0, { 0 }, { 0 }, { } };
1459
1460 newinfo = vmalloc(sizeof(struct ipt_table_info)
1461 + SMP_ALIGN(repl->size) * num_possible_cpus());
1462 if (!newinfo)
1463 return -ENOMEM;
1464
1465 memcpy(newinfo->entries, repl->entries, repl->size);
1466
1467 ret = translate_table(table->name, table->valid_hooks,
1468 newinfo, repl->size,
1469 repl->num_entries,
1470 repl->hook_entry,
1471 repl->underflow);
1472 if (ret != 0) {
1473 vfree(newinfo);
1474 return ret;
1475 }
1476
1477 ret = down_interruptible(&ipt_mutex);
1478 if (ret != 0) {
1479 vfree(newinfo);
1480 return ret;
1481 }
1482
1483 /* Don't autoload: we'd eat our tail... */
1484 if (list_named_find(&ipt_tables, table->name)) {
1485 ret = -EEXIST;
1486 goto free_unlock;
1487 }
1488
1489 /* Simplifies replace_table code. */
1490 table->private = &bootstrap;
1491 if (!replace_table(table, 0, newinfo, &ret))
1492 goto free_unlock;
1493
1494 duprintf("table->private->number = %u\n",
1495 table->private->number);
1496
1497 /* save number of initial entries */
1498 table->private->initial_entries = table->private->number;
1499
1500 rwlock_init(&table->lock);
1501 list_prepend(&ipt_tables, table);
1502
1503 unlock:
1504 up(&ipt_mutex);
1505 return ret;
1506
1507 free_unlock:
1508 vfree(newinfo);
1509 goto unlock;
1510 }
1511
1512 void ipt_unregister_table(struct ipt_table *table)
1513 {
1514 down(&ipt_mutex);
1515 LIST_DELETE(&ipt_tables, table);
1516 up(&ipt_mutex);
1517
1518 /* Decrease module usage counts and free resources */
1519 IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
1520 cleanup_entry, NULL);
1521 vfree(table->private);
1522 }
1523
1524 /* Returns 1 if the port is matched by the range, 0 otherwise */
1525 static inline int
1526 port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1527 {
1528 int ret;
1529
1530 ret = (port >= min && port <= max) ^ invert;
1531 return ret;
1532 }
1533
1534 static int
1535 tcp_find_option(u_int8_t option,
1536 const struct sk_buff *skb,
1537 unsigned int optlen,
1538 int invert,
1539 int *hotdrop)
1540 {
1541 /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
1542 u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
1543 unsigned int i;
1544
1545 duprintf("tcp_match: finding option\n");
1546
1547 if (!optlen)
1548 return invert;
1549
1550 /* If we don't have the whole header, drop packet. */
1551 op = skb_header_pointer(skb,
1552 skb->nh.iph->ihl*4 + sizeof(struct tcphdr),
1553 optlen, _opt);
1554 if (op == NULL) {
1555 *hotdrop = 1;
1556 return 0;
1557 }
1558
1559 for (i = 0; i < optlen; ) {
1560 if (op[i] == option) return !invert;
1561 if (op[i] < 2) i++;
1562 else i += op[i+1]?:1;
1563 }
1564
1565 return invert;
1566 }
1567
1568 static int
1569 tcp_match(const struct sk_buff *skb,
1570 const struct net_device *in,
1571 const struct net_device *out,
1572 const void *matchinfo,
1573 int offset,
1574 int *hotdrop)
1575 {
1576 struct tcphdr _tcph, *th;
1577 const struct ipt_tcp *tcpinfo = matchinfo;
1578
1579 if (offset) {
1580 /* To quote Alan:
1581
1582 Don't allow a fragment of TCP 8 bytes in. Nobody normal
1583 causes this. Its a cracker trying to break in by doing a
1584 flag overwrite to pass the direction checks.
1585 */
1586 if (offset == 1) {
1587 duprintf("Dropping evil TCP offset=1 frag.\n");
1588 *hotdrop = 1;
1589 }
1590 /* Must not be a fragment. */
1591 return 0;
1592 }
1593
1594 #define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1595
1596 th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1597 sizeof(_tcph), &_tcph);
1598 if (th == NULL) {
1599 /* We've been asked to examine this packet, and we
1600 can't. Hence, no choice but to drop. */
1601 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1602 *hotdrop = 1;
1603 return 0;
1604 }
1605
1606 if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1607 ntohs(th->source),
1608 !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)))
1609 return 0;
1610 if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1611 ntohs(th->dest),
1612 !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)))
1613 return 0;
1614 if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
1615 == tcpinfo->flg_cmp,
1616 IPT_TCP_INV_FLAGS))
1617 return 0;
1618 if (tcpinfo->option) {
1619 if (th->doff * 4 < sizeof(_tcph)) {
1620 *hotdrop = 1;
1621 return 0;
1622 }
1623 if (!tcp_find_option(tcpinfo->option, skb,
1624 th->doff*4 - sizeof(_tcph),
1625 tcpinfo->invflags & IPT_TCP_INV_OPTION,
1626 hotdrop))
1627 return 0;
1628 }
1629 return 1;
1630 }
1631
1632 /* Called when user tries to insert an entry of this type. */
1633 static int
1634 tcp_checkentry(const char *tablename,
1635 const struct ipt_ip *ip,
1636 void *matchinfo,
1637 unsigned int matchsize,
1638 unsigned int hook_mask)
1639 {
1640 const struct ipt_tcp *tcpinfo = matchinfo;
1641
1642 /* Must specify proto == TCP, and no unknown invflags */
1643 return ip->proto == IPPROTO_TCP
1644 && !(ip->invflags & IPT_INV_PROTO)
1645 && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
1646 && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
1647 }
1648
1649 static int
1650 udp_match(const struct sk_buff *skb,
1651 const struct net_device *in,
1652 const struct net_device *out,
1653 const void *matchinfo,
1654 int offset,
1655 int *hotdrop)
1656 {
1657 struct udphdr _udph, *uh;
1658 const struct ipt_udp *udpinfo = matchinfo;
1659
1660 /* Must not be a fragment. */
1661 if (offset)
1662 return 0;
1663
1664 uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1665 sizeof(_udph), &_udph);
1666 if (uh == NULL) {
1667 /* We've been asked to examine this packet, and we
1668 can't. Hence, no choice but to drop. */
1669 duprintf("Dropping evil UDP tinygram.\n");
1670 *hotdrop = 1;
1671 return 0;
1672 }
1673
1674 return port_match(udpinfo->spts[0], udpinfo->spts[1],
1675 ntohs(uh->source),
1676 !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
1677 && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1678 ntohs(uh->dest),
1679 !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
1680 }
1681
1682 /* Called when user tries to insert an entry of this type. */
1683 static int
1684 udp_checkentry(const char *tablename,
1685 const struct ipt_ip *ip,
1686 void *matchinfo,
1687 unsigned int matchinfosize,
1688 unsigned int hook_mask)
1689 {
1690 const struct ipt_udp *udpinfo = matchinfo;
1691
1692 /* Must specify proto == UDP, and no unknown invflags */
1693 if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
1694 duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
1695 IPPROTO_UDP);
1696 return 0;
1697 }
1698 if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
1699 duprintf("ipt_udp: matchsize %u != %u\n",
1700 matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
1701 return 0;
1702 }
1703 if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
1704 duprintf("ipt_udp: unknown flags %X\n",
1705 udpinfo->invflags);
1706 return 0;
1707 }
1708
1709 return 1;
1710 }
1711
1712 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1713 static inline int
1714 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1715 u_int8_t type, u_int8_t code,
1716 int invert)
1717 {
1718 return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1719 ^ invert;
1720 }
1721
1722 static int
1723 icmp_match(const struct sk_buff *skb,
1724 const struct net_device *in,
1725 const struct net_device *out,
1726 const void *matchinfo,
1727 int offset,
1728 int *hotdrop)
1729 {
1730 struct icmphdr _icmph, *ic;
1731 const struct ipt_icmp *icmpinfo = matchinfo;
1732
1733 /* Must not be a fragment. */
1734 if (offset)
1735 return 0;
1736
1737 ic = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1738 sizeof(_icmph), &_icmph);
1739 if (ic == NULL) {
1740 /* We've been asked to examine this packet, and we
1741 * can't. Hence, no choice but to drop.
1742 */
1743 duprintf("Dropping evil ICMP tinygram.\n");
1744 *hotdrop = 1;
1745 return 0;
1746 }
1747
1748 return icmp_type_code_match(icmpinfo->type,
1749 icmpinfo->code[0],
1750 icmpinfo->code[1],
1751 ic->type, ic->code,
1752 !!(icmpinfo->invflags&IPT_ICMP_INV));
1753 }
1754
1755 /* Called when user tries to insert an entry of this type. */
1756 static int
1757 icmp_checkentry(const char *tablename,
1758 const struct ipt_ip *ip,
1759 void *matchinfo,
1760 unsigned int matchsize,
1761 unsigned int hook_mask)
1762 {
1763 const struct ipt_icmp *icmpinfo = matchinfo;
1764
1765 /* Must specify proto == ICMP, and no unknown invflags */
1766 return ip->proto == IPPROTO_ICMP
1767 && !(ip->invflags & IPT_INV_PROTO)
1768 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1769 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1770 }
1771
1772 /* The built-in targets: standard (NULL) and error. */
1773 static struct ipt_target ipt_standard_target = {
1774 .name = IPT_STANDARD_TARGET,
1775 };
1776
1777 static struct ipt_target ipt_error_target = {
1778 .name = IPT_ERROR_TARGET,
1779 .target = ipt_error,
1780 };
1781
1782 static struct nf_sockopt_ops ipt_sockopts = {
1783 .pf = PF_INET,
1784 .set_optmin = IPT_BASE_CTL,
1785 .set_optmax = IPT_SO_SET_MAX+1,
1786 .set = do_ipt_set_ctl,
1787 .get_optmin = IPT_BASE_CTL,
1788 .get_optmax = IPT_SO_GET_MAX+1,
1789 .get = do_ipt_get_ctl,
1790 };
1791
1792 static struct ipt_match tcp_matchstruct = {
1793 .name = "tcp",
1794 .match = &tcp_match,
1795 .checkentry = &tcp_checkentry,
1796 };
1797
1798 static struct ipt_match udp_matchstruct = {
1799 .name = "udp",
1800 .match = &udp_match,
1801 .checkentry = &udp_checkentry,
1802 };
1803
1804 static struct ipt_match icmp_matchstruct = {
1805 .name = "icmp",
1806 .match = &icmp_match,
1807 .checkentry = &icmp_checkentry,
1808 };
1809
1810 #ifdef CONFIG_PROC_FS
1811 static inline int print_name(const char *i,
1812 off_t start_offset, char *buffer, int length,
1813 off_t *pos, unsigned int *count)
1814 {
1815 if ((*count)++ >= start_offset) {
1816 unsigned int namelen;
1817
1818 namelen = sprintf(buffer + *pos, "%s\n",
1819 i + sizeof(struct list_head));
1820 if (*pos + namelen > length) {
1821 /* Stop iterating */
1822 return 1;
1823 }
1824 *pos += namelen;
1825 }
1826 return 0;
1827 }
1828
1829 static inline int print_target(const struct ipt_target *t,
1830 off_t start_offset, char *buffer, int length,
1831 off_t *pos, unsigned int *count)
1832 {
1833 if (t == &ipt_standard_target || t == &ipt_error_target)
1834 return 0;
1835 return print_name((char *)t, start_offset, buffer, length, pos, count);
1836 }
1837
1838 static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
1839 {
1840 off_t pos = 0;
1841 unsigned int count = 0;
1842
1843 if (down_interruptible(&ipt_mutex) != 0)
1844 return 0;
1845
1846 LIST_FIND(&ipt_tables, print_name, void *,
1847 offset, buffer, length, &pos, &count);
1848
1849 up(&ipt_mutex);
1850
1851 /* `start' hack - see fs/proc/generic.c line ~105 */
1852 *start=(char *)((unsigned long)count-offset);
1853 return pos;
1854 }
1855
1856 static int ipt_get_targets(char *buffer, char **start, off_t offset, int length)
1857 {
1858 off_t pos = 0;
1859 unsigned int count = 0;
1860
1861 if (down_interruptible(&ipt_mutex) != 0)
1862 return 0;
1863
1864 LIST_FIND(&ipt_target, print_target, struct ipt_target *,
1865 offset, buffer, length, &pos, &count);
1866
1867 up(&ipt_mutex);
1868
1869 *start = (char *)((unsigned long)count - offset);
1870 return pos;
1871 }
1872
1873 static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
1874 {
1875 off_t pos = 0;
1876 unsigned int count = 0;
1877
1878 if (down_interruptible(&ipt_mutex) != 0)
1879 return 0;
1880
1881 LIST_FIND(&ipt_match, print_name, void *,
1882 offset, buffer, length, &pos, &count);
1883
1884 up(&ipt_mutex);
1885
1886 *start = (char *)((unsigned long)count - offset);
1887 return pos;
1888 }
1889
1890 static struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
1891 { { "ip_tables_names", ipt_get_tables },
1892 { "ip_tables_targets", ipt_get_targets },
1893 { "ip_tables_matches", ipt_get_matches },
1894 { NULL, NULL} };
1895 #endif /*CONFIG_PROC_FS*/
1896
1897 static int __init init(void)
1898 {
1899 int ret;
1900
1901 /* Noone else will be downing sem now, so we won't sleep */
1902 down(&ipt_mutex);
1903 list_append(&ipt_target, &ipt_standard_target);
1904 list_append(&ipt_target, &ipt_error_target);
1905 list_append(&ipt_match, &tcp_matchstruct);
1906 list_append(&ipt_match, &udp_matchstruct);
1907 list_append(&ipt_match, &icmp_matchstruct);
1908 up(&ipt_mutex);
1909
1910 /* Register setsockopt */
1911 ret = nf_register_sockopt(&ipt_sockopts);
1912 if (ret < 0) {
1913 duprintf("Unable to register sockopts.\n");
1914 return ret;
1915 }
1916
1917 #ifdef CONFIG_PROC_FS
1918 {
1919 struct proc_dir_entry *proc;
1920 int i;
1921
1922 for (i = 0; ipt_proc_entry[i].name; i++) {
1923 proc = proc_net_create(ipt_proc_entry[i].name, 0,
1924 ipt_proc_entry[i].get_info);
1925 if (!proc) {
1926 while (--i >= 0)
1927 proc_net_remove(ipt_proc_entry[i].name);
1928 nf_unregister_sockopt(&ipt_sockopts);
1929 return -ENOMEM;
1930 }
1931 proc->owner = THIS_MODULE;
1932 }
1933 }
1934 #endif
1935
1936 printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
1937 return 0;
1938 }
1939
1940 static void __exit fini(void)
1941 {
1942 nf_unregister_sockopt(&ipt_sockopts);
1943 #ifdef CONFIG_PROC_FS
1944 {
1945 int i;
1946 for (i = 0; ipt_proc_entry[i].name; i++)
1947 proc_net_remove(ipt_proc_entry[i].name);
1948 }
1949 #endif
1950 }
1951
1952 EXPORT_SYMBOL(ipt_register_table);
1953 EXPORT_SYMBOL(ipt_unregister_table);
1954 EXPORT_SYMBOL(ipt_register_match);
1955 EXPORT_SYMBOL(ipt_unregister_match);
1956 EXPORT_SYMBOL(ipt_do_table);
1957 EXPORT_SYMBOL(ipt_register_target);
1958 EXPORT_SYMBOL(ipt_unregister_target);
1959 EXPORT_SYMBOL(ipt_find_target);
1960
1961 module_init(init);
1962 module_exit(fini);