]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* Cluster IP hashmark target |
2 | * (C) 2003-2004 by Harald Welte <laforge@netfilter.org> | |
3 | * based on ideas of Fabio Olive Leite <olive@unixforge.org> | |
4 | * | |
5 | * Development of this code funded by SuSE Linux AG, http://www.suse.com/ | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License version 2 as | |
9 | * published by the Free Software Foundation. | |
10 | * | |
11 | */ | |
12 | #include <linux/module.h> | |
13 | #include <linux/config.h> | |
14 | #include <linux/proc_fs.h> | |
15 | #include <linux/jhash.h> | |
136e92bb | 16 | #include <linux/bitops.h> |
1da177e4 LT |
17 | #include <linux/skbuff.h> |
18 | #include <linux/ip.h> | |
19 | #include <linux/tcp.h> | |
20 | #include <linux/udp.h> | |
21 | #include <linux/icmp.h> | |
22 | #include <linux/if_arp.h> | |
23 | #include <linux/proc_fs.h> | |
24 | #include <linux/seq_file.h> | |
25 | ||
26 | #include <net/checksum.h> | |
27 | ||
28 | #include <linux/netfilter_arp.h> | |
29 | ||
30 | #include <linux/netfilter_ipv4/ip_tables.h> | |
31 | #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> | |
9fb9cbb1 | 32 | #include <net/netfilter/nf_conntrack_compat.h> |
1da177e4 | 33 | |
136e92bb | 34 | #define CLUSTERIP_VERSION "0.8" |
1da177e4 LT |
35 | |
36 | #define DEBUG_CLUSTERIP | |
37 | ||
38 | #ifdef DEBUG_CLUSTERIP | |
39 | #define DEBUGP printk | |
40 | #else | |
41 | #define DEBUGP | |
42 | #endif | |
43 | ||
e45b1be8 PM |
44 | #define ASSERT_READ_LOCK(x) |
45 | ||
1da177e4 LT |
46 | MODULE_LICENSE("GPL"); |
47 | MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); | |
48 | MODULE_DESCRIPTION("iptables target for CLUSTERIP"); | |
49 | ||
50 | struct clusterip_config { | |
51 | struct list_head list; /* list of all configs */ | |
52 | atomic_t refcount; /* reference count */ | |
44513624 KK |
53 | atomic_t entries; /* number of entries/rules |
54 | * referencing us */ | |
1da177e4 LT |
55 | |
56 | u_int32_t clusterip; /* the IP address */ | |
57 | u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ | |
58 | struct net_device *dev; /* device */ | |
59 | u_int16_t num_total_nodes; /* total number of nodes */ | |
136e92bb | 60 | unsigned long local_nodes; /* node number array */ |
1da177e4 LT |
61 | |
62 | #ifdef CONFIG_PROC_FS | |
63 | struct proc_dir_entry *pde; /* proc dir entry */ | |
64 | #endif | |
65 | enum clusterip_hashmode hash_mode; /* which hashing mode */ | |
66 | u_int32_t hash_initval; /* hash initialization */ | |
67 | }; | |
68 | ||
69 | static LIST_HEAD(clusterip_configs); | |
70 | ||
136e92bb | 71 | /* clusterip_lock protects the clusterip_configs list */ |
e45b1be8 | 72 | static DEFINE_RWLOCK(clusterip_lock); |
1da177e4 LT |
73 | |
74 | #ifdef CONFIG_PROC_FS | |
75 | static struct file_operations clusterip_proc_fops; | |
76 | static struct proc_dir_entry *clusterip_procdir; | |
77 | #endif | |
78 | ||
79 | static inline void | |
44513624 KK |
80 | clusterip_config_get(struct clusterip_config *c) |
81 | { | |
1da177e4 LT |
82 | atomic_inc(&c->refcount); |
83 | } | |
84 | ||
85 | static inline void | |
44513624 KK |
86 | clusterip_config_put(struct clusterip_config *c) |
87 | { | |
88 | if (atomic_dec_and_test(&c->refcount)) | |
89 | kfree(c); | |
90 | } | |
91 | ||
92 | /* increase the count of entries(rules) using/referencing this config */ | |
93 | static inline void | |
94 | clusterip_config_entry_get(struct clusterip_config *c) | |
95 | { | |
96 | atomic_inc(&c->entries); | |
97 | } | |
98 | ||
99 | /* decrease the count of entries using/referencing this config. If last | |
100 | * entry(rule) is removed, remove the config from lists, but don't free it | |
101 | * yet, since proc-files could still be holding references */ | |
102 | static inline void | |
103 | clusterip_config_entry_put(struct clusterip_config *c) | |
104 | { | |
105 | if (atomic_dec_and_test(&c->entries)) { | |
e45b1be8 | 106 | write_lock_bh(&clusterip_lock); |
1da177e4 | 107 | list_del(&c->list); |
e45b1be8 | 108 | write_unlock_bh(&clusterip_lock); |
44513624 | 109 | |
1da177e4 LT |
110 | dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0); |
111 | dev_put(c->dev); | |
44513624 KK |
112 | |
113 | /* In case anyone still accesses the file, the open/close | |
114 | * functions are also incrementing the refcount on their own, | |
115 | * so it's safe to remove the entry even if it's in use. */ | |
116 | #ifdef CONFIG_PROC_FS | |
117 | remove_proc_entry(c->pde->name, c->pde->parent); | |
118 | #endif | |
1da177e4 LT |
119 | } |
120 | } | |
121 | ||
1da177e4 LT |
122 | static struct clusterip_config * |
123 | __clusterip_config_find(u_int32_t clusterip) | |
124 | { | |
125 | struct list_head *pos; | |
126 | ||
e45b1be8 | 127 | ASSERT_READ_LOCK(&clusterip_lock); |
1da177e4 LT |
128 | list_for_each(pos, &clusterip_configs) { |
129 | struct clusterip_config *c = list_entry(pos, | |
130 | struct clusterip_config, list); | |
131 | if (c->clusterip == clusterip) { | |
132 | return c; | |
133 | } | |
134 | } | |
135 | ||
136 | return NULL; | |
137 | } | |
138 | ||
139 | static inline struct clusterip_config * | |
44513624 | 140 | clusterip_config_find_get(u_int32_t clusterip, int entry) |
1da177e4 LT |
141 | { |
142 | struct clusterip_config *c; | |
143 | ||
e45b1be8 | 144 | read_lock_bh(&clusterip_lock); |
1da177e4 LT |
145 | c = __clusterip_config_find(clusterip); |
146 | if (!c) { | |
e45b1be8 | 147 | read_unlock_bh(&clusterip_lock); |
1da177e4 LT |
148 | return NULL; |
149 | } | |
150 | atomic_inc(&c->refcount); | |
44513624 KK |
151 | if (entry) |
152 | atomic_inc(&c->entries); | |
e45b1be8 | 153 | read_unlock_bh(&clusterip_lock); |
1da177e4 LT |
154 | |
155 | return c; | |
156 | } | |
157 | ||
136e92bb KK |
158 | static void |
159 | clusterip_config_init_nodelist(struct clusterip_config *c, | |
160 | const struct ipt_clusterip_tgt_info *i) | |
161 | { | |
162 | int n; | |
163 | ||
164 | for (n = 0; n < i->num_local_nodes; n++) { | |
165 | set_bit(i->local_nodes[n] - 1, &c->local_nodes); | |
166 | } | |
167 | } | |
168 | ||
1da177e4 LT |
169 | static struct clusterip_config * |
170 | clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, | |
171 | struct net_device *dev) | |
172 | { | |
173 | struct clusterip_config *c; | |
174 | char buffer[16]; | |
175 | ||
176 | c = kmalloc(sizeof(*c), GFP_ATOMIC); | |
177 | if (!c) | |
178 | return NULL; | |
179 | ||
180 | memset(c, 0, sizeof(*c)); | |
181 | c->dev = dev; | |
182 | c->clusterip = ip; | |
183 | memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); | |
184 | c->num_total_nodes = i->num_total_nodes; | |
136e92bb | 185 | clusterip_config_init_nodelist(c, i); |
1da177e4 LT |
186 | c->hash_mode = i->hash_mode; |
187 | c->hash_initval = i->hash_initval; | |
188 | atomic_set(&c->refcount, 1); | |
44513624 | 189 | atomic_set(&c->entries, 1); |
1da177e4 LT |
190 | |
191 | #ifdef CONFIG_PROC_FS | |
192 | /* create proc dir entry */ | |
193 | sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip)); | |
194 | c->pde = create_proc_entry(buffer, S_IWUSR|S_IRUSR, clusterip_procdir); | |
195 | if (!c->pde) { | |
196 | kfree(c); | |
197 | return NULL; | |
198 | } | |
199 | c->pde->proc_fops = &clusterip_proc_fops; | |
200 | c->pde->data = c; | |
201 | #endif | |
202 | ||
e45b1be8 | 203 | write_lock_bh(&clusterip_lock); |
1da177e4 | 204 | list_add(&c->list, &clusterip_configs); |
e45b1be8 | 205 | write_unlock_bh(&clusterip_lock); |
1da177e4 LT |
206 | |
207 | return c; | |
208 | } | |
209 | ||
210 | static int | |
211 | clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) | |
212 | { | |
1da177e4 | 213 | |
136e92bb KK |
214 | if (nodenum == 0 || |
215 | nodenum > c->num_total_nodes) | |
1da177e4 | 216 | return 1; |
1da177e4 | 217 | |
136e92bb KK |
218 | /* check if we already have this number in our bitfield */ |
219 | if (test_and_set_bit(nodenum - 1, &c->local_nodes)) | |
220 | return 1; | |
1da177e4 | 221 | |
1da177e4 LT |
222 | return 0; |
223 | } | |
224 | ||
225 | static int | |
226 | clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) | |
227 | { | |
136e92bb KK |
228 | if (nodenum == 0 || |
229 | nodenum > c->num_total_nodes) | |
1da177e4 | 230 | return 1; |
1da177e4 | 231 | |
136e92bb KK |
232 | if (test_and_clear_bit(nodenum - 1, &c->local_nodes)) |
233 | return 0; | |
1da177e4 | 234 | |
1da177e4 LT |
235 | return 1; |
236 | } | |
237 | ||
238 | static inline u_int32_t | |
239 | clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) | |
240 | { | |
241 | struct iphdr *iph = skb->nh.iph; | |
242 | unsigned long hashval; | |
243 | u_int16_t sport, dport; | |
244 | struct tcphdr *th; | |
245 | struct udphdr *uh; | |
246 | struct icmphdr *ih; | |
247 | ||
248 | switch (iph->protocol) { | |
249 | case IPPROTO_TCP: | |
250 | th = (void *)iph+iph->ihl*4; | |
251 | sport = ntohs(th->source); | |
252 | dport = ntohs(th->dest); | |
253 | break; | |
254 | case IPPROTO_UDP: | |
255 | uh = (void *)iph+iph->ihl*4; | |
256 | sport = ntohs(uh->source); | |
257 | dport = ntohs(uh->dest); | |
258 | break; | |
259 | case IPPROTO_ICMP: | |
260 | ih = (void *)iph+iph->ihl*4; | |
261 | sport = ntohs(ih->un.echo.id); | |
262 | dport = (ih->type<<8)|ih->code; | |
263 | break; | |
264 | default: | |
265 | if (net_ratelimit()) { | |
266 | printk(KERN_NOTICE "CLUSTERIP: unknown protocol `%u'\n", | |
267 | iph->protocol); | |
268 | } | |
269 | sport = dport = 0; | |
270 | } | |
271 | ||
272 | switch (config->hash_mode) { | |
273 | case CLUSTERIP_HASHMODE_SIP: | |
274 | hashval = jhash_1word(ntohl(iph->saddr), | |
275 | config->hash_initval); | |
276 | break; | |
277 | case CLUSTERIP_HASHMODE_SIP_SPT: | |
278 | hashval = jhash_2words(ntohl(iph->saddr), sport, | |
279 | config->hash_initval); | |
280 | break; | |
281 | case CLUSTERIP_HASHMODE_SIP_SPT_DPT: | |
282 | hashval = jhash_3words(ntohl(iph->saddr), sport, dport, | |
283 | config->hash_initval); | |
284 | break; | |
285 | default: | |
286 | /* to make gcc happy */ | |
287 | hashval = 0; | |
288 | /* This cannot happen, unless the check function wasn't called | |
289 | * at rule load time */ | |
290 | printk("CLUSTERIP: unknown mode `%u'\n", config->hash_mode); | |
291 | BUG(); | |
292 | break; | |
293 | } | |
294 | ||
295 | /* node numbers are 1..n, not 0..n */ | |
296 | return ((hashval % config->num_total_nodes)+1); | |
297 | } | |
298 | ||
299 | static inline int | |
300 | clusterip_responsible(struct clusterip_config *config, u_int32_t hash) | |
301 | { | |
136e92bb | 302 | return test_bit(hash - 1, &config->local_nodes); |
1da177e4 LT |
303 | } |
304 | ||
305 | /*********************************************************************** | |
306 | * IPTABLES TARGET | |
307 | ***********************************************************************/ | |
308 | ||
309 | static unsigned int | |
310 | target(struct sk_buff **pskb, | |
311 | const struct net_device *in, | |
312 | const struct net_device *out, | |
313 | unsigned int hooknum, | |
c4986734 | 314 | const struct xt_target *target, |
1da177e4 LT |
315 | const void *targinfo, |
316 | void *userinfo) | |
317 | { | |
318 | const struct ipt_clusterip_tgt_info *cipinfo = targinfo; | |
319 | enum ip_conntrack_info ctinfo; | |
9fb9cbb1 | 320 | u_int32_t *mark, hash; |
1da177e4 LT |
321 | |
322 | /* don't need to clusterip_config_get() here, since refcount | |
323 | * is only decremented by destroy() - and ip_tables guarantees | |
324 | * that the ->target() function isn't called after ->destroy() */ | |
325 | ||
9fb9cbb1 YK |
326 | mark = nf_ct_get_mark((*pskb), &ctinfo); |
327 | if (mark == NULL) { | |
1da177e4 LT |
328 | printk(KERN_ERR "CLUSTERIP: no conntrack!\n"); |
329 | /* FIXME: need to drop invalid ones, since replies | |
330 | * to outgoing connections of other nodes will be | |
331 | * marked as INVALID */ | |
332 | return NF_DROP; | |
333 | } | |
334 | ||
335 | /* special case: ICMP error handling. conntrack distinguishes between | |
336 | * error messages (RELATED) and information requests (see below) */ | |
337 | if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP | |
338 | && (ctinfo == IP_CT_RELATED | |
5d927eb0 | 339 | || ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY)) |
1da177e4 LT |
340 | return IPT_CONTINUE; |
341 | ||
342 | /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO, | |
343 | * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here | |
344 | * on, which all have an ID field [relevant for hashing]. */ | |
345 | ||
346 | hash = clusterip_hashfn(*pskb, cipinfo->config); | |
347 | ||
348 | switch (ctinfo) { | |
349 | case IP_CT_NEW: | |
9fb9cbb1 | 350 | *mark = hash; |
1da177e4 LT |
351 | break; |
352 | case IP_CT_RELATED: | |
353 | case IP_CT_RELATED+IP_CT_IS_REPLY: | |
354 | /* FIXME: we don't handle expectations at the | |
355 | * moment. they can arrive on a different node than | |
356 | * the master connection (e.g. FTP passive mode) */ | |
357 | case IP_CT_ESTABLISHED: | |
358 | case IP_CT_ESTABLISHED+IP_CT_IS_REPLY: | |
359 | break; | |
360 | default: | |
361 | break; | |
362 | } | |
363 | ||
364 | #ifdef DEBUG_CLUSTERP | |
365 | DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); | |
366 | #endif | |
9fb9cbb1 | 367 | DEBUGP("hash=%u ct_hash=%u ", hash, *mark); |
1da177e4 LT |
368 | if (!clusterip_responsible(cipinfo->config, hash)) { |
369 | DEBUGP("not responsible\n"); | |
370 | return NF_DROP; | |
371 | } | |
372 | DEBUGP("responsible\n"); | |
373 | ||
374 | /* despite being received via linklayer multicast, this is | |
375 | * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */ | |
376 | (*pskb)->pkt_type = PACKET_HOST; | |
377 | ||
378 | return IPT_CONTINUE; | |
379 | } | |
380 | ||
381 | static int | |
382 | checkentry(const char *tablename, | |
2e4e6a17 | 383 | const void *e_void, |
c4986734 | 384 | const struct xt_target *target, |
1da177e4 LT |
385 | void *targinfo, |
386 | unsigned int targinfosize, | |
387 | unsigned int hook_mask) | |
388 | { | |
389 | struct ipt_clusterip_tgt_info *cipinfo = targinfo; | |
2e4e6a17 | 390 | const struct ipt_entry *e = e_void; |
1da177e4 LT |
391 | |
392 | struct clusterip_config *config; | |
393 | ||
1da177e4 LT |
394 | if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP && |
395 | cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT && | |
396 | cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) { | |
397 | printk(KERN_WARNING "CLUSTERIP: unknown mode `%u'\n", | |
398 | cipinfo->hash_mode); | |
399 | return 0; | |
400 | ||
401 | } | |
402 | if (e->ip.dmsk.s_addr != 0xffffffff | |
403 | || e->ip.dst.s_addr == 0) { | |
404 | printk(KERN_ERR "CLUSTERIP: Please specify destination IP\n"); | |
405 | return 0; | |
406 | } | |
407 | ||
408 | /* FIXME: further sanity checks */ | |
409 | ||
44513624 KK |
410 | config = clusterip_config_find_get(e->ip.dst.s_addr, 1); |
411 | if (config) { | |
412 | if (cipinfo->config != NULL) { | |
413 | /* Case A: This is an entry that gets reloaded, since | |
414 | * it still has a cipinfo->config pointer. Simply | |
415 | * increase the entry refcount and return */ | |
416 | if (cipinfo->config != config) { | |
417 | printk(KERN_ERR "CLUSTERIP: Reloaded entry " | |
418 | "has invalid config pointer!\n"); | |
419 | return 0; | |
420 | } | |
421 | clusterip_config_entry_get(cipinfo->config); | |
422 | } else { | |
423 | /* Case B: This is a new rule referring to an existing | |
424 | * clusterip config. */ | |
425 | cipinfo->config = config; | |
426 | clusterip_config_entry_get(cipinfo->config); | |
427 | } | |
428 | } else { | |
429 | /* Case C: This is a completely new clusterip config */ | |
1da177e4 LT |
430 | if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { |
431 | printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr)); | |
432 | return 0; | |
433 | } else { | |
434 | struct net_device *dev; | |
435 | ||
436 | if (e->ip.iniface[0] == '\0') { | |
437 | printk(KERN_WARNING "CLUSTERIP: Please specify an interface name\n"); | |
438 | return 0; | |
439 | } | |
440 | ||
441 | dev = dev_get_by_name(e->ip.iniface); | |
442 | if (!dev) { | |
443 | printk(KERN_WARNING "CLUSTERIP: no such interface %s\n", e->ip.iniface); | |
444 | return 0; | |
445 | } | |
446 | ||
447 | config = clusterip_config_init(cipinfo, | |
448 | e->ip.dst.s_addr, dev); | |
449 | if (!config) { | |
450 | printk(KERN_WARNING "CLUSTERIP: cannot allocate config\n"); | |
451 | dev_put(dev); | |
452 | return 0; | |
453 | } | |
454 | dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0); | |
455 | } | |
44513624 | 456 | cipinfo->config = config; |
1da177e4 LT |
457 | } |
458 | ||
1da177e4 LT |
459 | return 1; |
460 | } | |
461 | ||
462 | /* drop reference count of cluster config when rule is deleted */ | |
c4986734 PM |
463 | static void destroy(const struct xt_target *target, void *targinfo, |
464 | unsigned int targinfosize) | |
1da177e4 | 465 | { |
c4986734 | 466 | struct ipt_clusterip_tgt_info *cipinfo = targinfo; |
1da177e4 | 467 | |
44513624 KK |
468 | /* if no more entries are referencing the config, remove it |
469 | * from the list and destroy the proc entry */ | |
470 | clusterip_config_entry_put(cipinfo->config); | |
471 | ||
1da177e4 LT |
472 | clusterip_config_put(cipinfo->config); |
473 | } | |
474 | ||
1d5cd909 PM |
475 | static struct ipt_target clusterip_tgt = { |
476 | .name = "CLUSTERIP", | |
477 | .target = target, | |
478 | .targetsize = sizeof(struct ipt_clusterip_tgt_info), | |
479 | .checkentry = checkentry, | |
480 | .destroy = destroy, | |
481 | .me = THIS_MODULE | |
1da177e4 LT |
482 | }; |
483 | ||
484 | ||
485 | /*********************************************************************** | |
486 | * ARP MANGLING CODE | |
487 | ***********************************************************************/ | |
488 | ||
489 | /* hardcoded for 48bit ethernet and 32bit ipv4 addresses */ | |
490 | struct arp_payload { | |
491 | u_int8_t src_hw[ETH_ALEN]; | |
492 | u_int32_t src_ip; | |
493 | u_int8_t dst_hw[ETH_ALEN]; | |
494 | u_int32_t dst_ip; | |
495 | } __attribute__ ((packed)); | |
496 | ||
497 | #ifdef CLUSTERIP_DEBUG | |
498 | static void arp_print(struct arp_payload *payload) | |
499 | { | |
500 | #define HBUFFERLEN 30 | |
501 | char hbuffer[HBUFFERLEN]; | |
502 | int j,k; | |
503 | const char hexbuf[]= "0123456789abcdef"; | |
504 | ||
505 | for (k=0, j=0; k < HBUFFERLEN-3 && j < ETH_ALEN; j++) { | |
506 | hbuffer[k++]=hexbuf[(payload->src_hw[j]>>4)&15]; | |
507 | hbuffer[k++]=hexbuf[payload->src_hw[j]&15]; | |
508 | hbuffer[k++]=':'; | |
509 | } | |
510 | hbuffer[--k]='\0'; | |
511 | ||
512 | printk("src %u.%u.%u.%u@%s, dst %u.%u.%u.%u\n", | |
513 | NIPQUAD(payload->src_ip), hbuffer, | |
514 | NIPQUAD(payload->dst_ip)); | |
515 | } | |
516 | #endif | |
517 | ||
518 | static unsigned int | |
519 | arp_mangle(unsigned int hook, | |
520 | struct sk_buff **pskb, | |
521 | const struct net_device *in, | |
522 | const struct net_device *out, | |
523 | int (*okfn)(struct sk_buff *)) | |
524 | { | |
525 | struct arphdr *arp = (*pskb)->nh.arph; | |
526 | struct arp_payload *payload; | |
527 | struct clusterip_config *c; | |
528 | ||
529 | /* we don't care about non-ethernet and non-ipv4 ARP */ | |
530 | if (arp->ar_hrd != htons(ARPHRD_ETHER) | |
531 | || arp->ar_pro != htons(ETH_P_IP) | |
532 | || arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN) | |
533 | return NF_ACCEPT; | |
534 | ||
4095ebf1 HW |
535 | /* we only want to mangle arp requests and replies */ |
536 | if (arp->ar_op != htons(ARPOP_REPLY) | |
537 | && arp->ar_op != htons(ARPOP_REQUEST)) | |
1da177e4 LT |
538 | return NF_ACCEPT; |
539 | ||
540 | payload = (void *)(arp+1); | |
541 | ||
542 | /* if there is no clusterip configuration for the arp reply's | |
543 | * source ip, we don't want to mangle it */ | |
44513624 | 544 | c = clusterip_config_find_get(payload->src_ip, 0); |
1da177e4 LT |
545 | if (!c) |
546 | return NF_ACCEPT; | |
547 | ||
548 | /* normally the linux kernel always replies to arp queries of | |
549 | * addresses on different interfacs. However, in the CLUSTERIP case | |
550 | * this wouldn't work, since we didn't subscribe the mcast group on | |
551 | * other interfaces */ | |
552 | if (c->dev != out) { | |
553 | DEBUGP("CLUSTERIP: not mangling arp reply on different " | |
554 | "interface: cip'%s'-skb'%s'\n", c->dev->name, out->name); | |
555 | clusterip_config_put(c); | |
556 | return NF_ACCEPT; | |
557 | } | |
558 | ||
559 | /* mangle reply hardware address */ | |
560 | memcpy(payload->src_hw, c->clustermac, arp->ar_hln); | |
561 | ||
562 | #ifdef CLUSTERIP_DEBUG | |
563 | DEBUGP(KERN_DEBUG "CLUSTERIP mangled arp reply: "); | |
564 | arp_print(payload); | |
565 | #endif | |
566 | ||
567 | clusterip_config_put(c); | |
568 | ||
569 | return NF_ACCEPT; | |
570 | } | |
571 | ||
572 | static struct nf_hook_ops cip_arp_ops = { | |
573 | .hook = arp_mangle, | |
574 | .pf = NF_ARP, | |
575 | .hooknum = NF_ARP_OUT, | |
576 | .priority = -1 | |
577 | }; | |
578 | ||
579 | /*********************************************************************** | |
580 | * PROC DIR HANDLING | |
581 | ***********************************************************************/ | |
582 | ||
583 | #ifdef CONFIG_PROC_FS | |
584 | ||
136e92bb KK |
585 | struct clusterip_seq_position { |
586 | unsigned int pos; /* position */ | |
587 | unsigned int weight; /* number of bits set == size */ | |
588 | unsigned int bit; /* current bit */ | |
589 | unsigned long val; /* current value */ | |
590 | }; | |
591 | ||
1da177e4 LT |
592 | static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) |
593 | { | |
594 | struct proc_dir_entry *pde = s->private; | |
595 | struct clusterip_config *c = pde->data; | |
136e92bb KK |
596 | unsigned int weight; |
597 | u_int32_t local_nodes; | |
598 | struct clusterip_seq_position *idx; | |
599 | ||
600 | /* FIXME: possible race */ | |
601 | local_nodes = c->local_nodes; | |
602 | weight = hweight32(local_nodes); | |
603 | if (*pos >= weight) | |
1da177e4 LT |
604 | return NULL; |
605 | ||
136e92bb KK |
606 | idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL); |
607 | if (!idx) | |
1da177e4 LT |
608 | return ERR_PTR(-ENOMEM); |
609 | ||
136e92bb KK |
610 | idx->pos = *pos; |
611 | idx->weight = weight; | |
612 | idx->bit = ffs(local_nodes); | |
613 | idx->val = local_nodes; | |
614 | clear_bit(idx->bit - 1, &idx->val); | |
615 | ||
616 | return idx; | |
1da177e4 LT |
617 | } |
618 | ||
619 | static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) | |
620 | { | |
136e92bb | 621 | struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; |
1da177e4 | 622 | |
136e92bb KK |
623 | *pos = ++idx->pos; |
624 | if (*pos >= idx->weight) { | |
1da177e4 LT |
625 | kfree(v); |
626 | return NULL; | |
627 | } | |
136e92bb KK |
628 | idx->bit = ffs(idx->val); |
629 | clear_bit(idx->bit - 1, &idx->val); | |
630 | return idx; | |
1da177e4 LT |
631 | } |
632 | ||
633 | static void clusterip_seq_stop(struct seq_file *s, void *v) | |
634 | { | |
635 | kfree(v); | |
1da177e4 LT |
636 | } |
637 | ||
638 | static int clusterip_seq_show(struct seq_file *s, void *v) | |
639 | { | |
136e92bb | 640 | struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; |
1da177e4 | 641 | |
136e92bb | 642 | if (idx->pos != 0) |
1da177e4 | 643 | seq_putc(s, ','); |
1da177e4 | 644 | |
136e92bb KK |
645 | seq_printf(s, "%u", idx->bit); |
646 | ||
647 | if (idx->pos == idx->weight - 1) | |
1da177e4 LT |
648 | seq_putc(s, '\n'); |
649 | ||
650 | return 0; | |
651 | } | |
652 | ||
653 | static struct seq_operations clusterip_seq_ops = { | |
654 | .start = clusterip_seq_start, | |
655 | .next = clusterip_seq_next, | |
656 | .stop = clusterip_seq_stop, | |
657 | .show = clusterip_seq_show, | |
658 | }; | |
659 | ||
660 | static int clusterip_proc_open(struct inode *inode, struct file *file) | |
661 | { | |
662 | int ret = seq_open(file, &clusterip_seq_ops); | |
663 | ||
664 | if (!ret) { | |
665 | struct seq_file *sf = file->private_data; | |
666 | struct proc_dir_entry *pde = PDE(inode); | |
667 | struct clusterip_config *c = pde->data; | |
668 | ||
669 | sf->private = pde; | |
670 | ||
671 | clusterip_config_get(c); | |
672 | } | |
673 | ||
674 | return ret; | |
675 | } | |
676 | ||
677 | static int clusterip_proc_release(struct inode *inode, struct file *file) | |
678 | { | |
679 | struct proc_dir_entry *pde = PDE(inode); | |
680 | struct clusterip_config *c = pde->data; | |
681 | int ret; | |
682 | ||
683 | ret = seq_release(inode, file); | |
684 | ||
685 | if (!ret) | |
686 | clusterip_config_put(c); | |
687 | ||
688 | return ret; | |
689 | } | |
690 | ||
691 | static ssize_t clusterip_proc_write(struct file *file, const char __user *input, | |
692 | size_t size, loff_t *ofs) | |
693 | { | |
694 | #define PROC_WRITELEN 10 | |
695 | char buffer[PROC_WRITELEN+1]; | |
696 | struct proc_dir_entry *pde = PDE(file->f_dentry->d_inode); | |
697 | struct clusterip_config *c = pde->data; | |
698 | unsigned long nodenum; | |
699 | ||
700 | if (copy_from_user(buffer, input, PROC_WRITELEN)) | |
701 | return -EFAULT; | |
702 | ||
703 | if (*buffer == '+') { | |
704 | nodenum = simple_strtoul(buffer+1, NULL, 10); | |
705 | if (clusterip_add_node(c, nodenum)) | |
706 | return -ENOMEM; | |
707 | } else if (*buffer == '-') { | |
708 | nodenum = simple_strtoul(buffer+1, NULL,10); | |
709 | if (clusterip_del_node(c, nodenum)) | |
710 | return -ENOENT; | |
711 | } else | |
712 | return -EIO; | |
713 | ||
714 | return size; | |
715 | } | |
716 | ||
717 | static struct file_operations clusterip_proc_fops = { | |
718 | .owner = THIS_MODULE, | |
719 | .open = clusterip_proc_open, | |
720 | .read = seq_read, | |
721 | .write = clusterip_proc_write, | |
722 | .llseek = seq_lseek, | |
723 | .release = clusterip_proc_release, | |
724 | }; | |
725 | ||
726 | #endif /* CONFIG_PROC_FS */ | |
727 | ||
728 | static int init_or_cleanup(int fini) | |
729 | { | |
730 | int ret; | |
731 | ||
732 | if (fini) | |
733 | goto cleanup; | |
734 | ||
735 | if (ipt_register_target(&clusterip_tgt)) { | |
736 | ret = -EINVAL; | |
737 | goto cleanup_none; | |
738 | } | |
739 | ||
740 | if (nf_register_hook(&cip_arp_ops) < 0) { | |
741 | ret = -EINVAL; | |
742 | goto cleanup_target; | |
743 | } | |
744 | ||
745 | #ifdef CONFIG_PROC_FS | |
746 | clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", proc_net); | |
747 | if (!clusterip_procdir) { | |
748 | printk(KERN_ERR "CLUSTERIP: Unable to proc dir entry\n"); | |
749 | ret = -ENOMEM; | |
750 | goto cleanup_hook; | |
751 | } | |
752 | #endif /* CONFIG_PROC_FS */ | |
753 | ||
754 | printk(KERN_NOTICE "ClusterIP Version %s loaded successfully\n", | |
755 | CLUSTERIP_VERSION); | |
756 | ||
757 | return 0; | |
758 | ||
759 | cleanup: | |
760 | printk(KERN_NOTICE "ClusterIP Version %s unloading\n", | |
761 | CLUSTERIP_VERSION); | |
762 | #ifdef CONFIG_PROC_FS | |
763 | remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent); | |
764 | #endif | |
765 | cleanup_hook: | |
766 | nf_unregister_hook(&cip_arp_ops); | |
767 | cleanup_target: | |
768 | ipt_unregister_target(&clusterip_tgt); | |
769 | cleanup_none: | |
770 | return -EINVAL; | |
771 | } | |
772 | ||
773 | static int __init init(void) | |
774 | { | |
775 | return init_or_cleanup(0); | |
776 | } | |
777 | ||
778 | static void __exit fini(void) | |
779 | { | |
780 | init_or_cleanup(1); | |
781 | } | |
782 | ||
783 | module_init(init); | |
784 | module_exit(fini); |