]>
Commit | Line | Data |
---|---|---|
e905a9ed | 1 | /* Cluster IP hashmark target |
1da177e4 LT |
2 | * (C) 2003-2004 by Harald Welte <laforge@netfilter.org> |
3 | * based on ideas of Fabio Olive Leite <olive@unixforge.org> | |
4 | * | |
5 | * Development of this code funded by SuSE Linux AG, http://www.suse.com/ | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License version 2 as | |
9 | * published by the Free Software Foundation. | |
10 | * | |
11 | */ | |
12 | #include <linux/module.h> | |
1da177e4 LT |
13 | #include <linux/proc_fs.h> |
14 | #include <linux/jhash.h> | |
136e92bb | 15 | #include <linux/bitops.h> |
1da177e4 LT |
16 | #include <linux/skbuff.h> |
17 | #include <linux/ip.h> | |
18 | #include <linux/tcp.h> | |
19 | #include <linux/udp.h> | |
20 | #include <linux/icmp.h> | |
21 | #include <linux/if_arp.h> | |
1da177e4 | 22 | #include <linux/seq_file.h> |
1da177e4 | 23 | #include <linux/netfilter_arp.h> |
6709dbbb | 24 | #include <linux/netfilter/x_tables.h> |
1da177e4 LT |
25 | #include <linux/netfilter_ipv4/ip_tables.h> |
26 | #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> | |
587aa641 | 27 | #include <net/netfilter/nf_conntrack.h> |
457c4cbc | 28 | #include <net/net_namespace.h> |
587aa641 | 29 | #include <net/checksum.h> |
1da177e4 | 30 | |
136e92bb | 31 | #define CLUSTERIP_VERSION "0.8" |
1da177e4 | 32 | |
1da177e4 LT |
33 | MODULE_LICENSE("GPL"); |
34 | MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); | |
35 | MODULE_DESCRIPTION("iptables target for CLUSTERIP"); | |
36 | ||
37 | struct clusterip_config { | |
38 | struct list_head list; /* list of all configs */ | |
39 | atomic_t refcount; /* reference count */ | |
44513624 KK |
40 | atomic_t entries; /* number of entries/rules |
41 | * referencing us */ | |
1da177e4 | 42 | |
6a19d614 | 43 | __be32 clusterip; /* the IP address */ |
1da177e4 LT |
44 | u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ |
45 | struct net_device *dev; /* device */ | |
46 | u_int16_t num_total_nodes; /* total number of nodes */ | |
136e92bb | 47 | unsigned long local_nodes; /* node number array */ |
1da177e4 LT |
48 | |
49 | #ifdef CONFIG_PROC_FS | |
50 | struct proc_dir_entry *pde; /* proc dir entry */ | |
51 | #endif | |
52 | enum clusterip_hashmode hash_mode; /* which hashing mode */ | |
53 | u_int32_t hash_initval; /* hash initialization */ | |
54 | }; | |
55 | ||
56 | static LIST_HEAD(clusterip_configs); | |
57 | ||
136e92bb | 58 | /* clusterip_lock protects the clusterip_configs list */ |
e45b1be8 | 59 | static DEFINE_RWLOCK(clusterip_lock); |
1da177e4 LT |
60 | |
61 | #ifdef CONFIG_PROC_FS | |
9a32144e | 62 | static const struct file_operations clusterip_proc_fops; |
1da177e4 LT |
63 | static struct proc_dir_entry *clusterip_procdir; |
64 | #endif | |
65 | ||
66 | static inline void | |
44513624 KK |
67 | clusterip_config_get(struct clusterip_config *c) |
68 | { | |
1da177e4 LT |
69 | atomic_inc(&c->refcount); |
70 | } | |
71 | ||
72 | static inline void | |
44513624 KK |
73 | clusterip_config_put(struct clusterip_config *c) |
74 | { | |
75 | if (atomic_dec_and_test(&c->refcount)) | |
76 | kfree(c); | |
77 | } | |
78 | ||
79 | /* increase the count of entries(rules) using/referencing this config */ | |
80 | static inline void | |
81 | clusterip_config_entry_get(struct clusterip_config *c) | |
82 | { | |
83 | atomic_inc(&c->entries); | |
84 | } | |
85 | ||
86 | /* decrease the count of entries using/referencing this config. If last | |
87 | * entry(rule) is removed, remove the config from lists, but don't free it | |
88 | * yet, since proc-files could still be holding references */ | |
89 | static inline void | |
90 | clusterip_config_entry_put(struct clusterip_config *c) | |
91 | { | |
92 | if (atomic_dec_and_test(&c->entries)) { | |
e45b1be8 | 93 | write_lock_bh(&clusterip_lock); |
1da177e4 | 94 | list_del(&c->list); |
e45b1be8 | 95 | write_unlock_bh(&clusterip_lock); |
44513624 | 96 | |
1da177e4 LT |
97 | dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0); |
98 | dev_put(c->dev); | |
44513624 KK |
99 | |
100 | /* In case anyone still accesses the file, the open/close | |
101 | * functions are also incrementing the refcount on their own, | |
102 | * so it's safe to remove the entry even if it's in use. */ | |
103 | #ifdef CONFIG_PROC_FS | |
104 | remove_proc_entry(c->pde->name, c->pde->parent); | |
105 | #endif | |
1da177e4 LT |
106 | } |
107 | } | |
108 | ||
1da177e4 | 109 | static struct clusterip_config * |
6a19d614 | 110 | __clusterip_config_find(__be32 clusterip) |
1da177e4 | 111 | { |
4c610979 | 112 | struct clusterip_config *c; |
1da177e4 | 113 | |
4c610979 | 114 | list_for_each_entry(c, &clusterip_configs, list) { |
7c4e36bc | 115 | if (c->clusterip == clusterip) |
1da177e4 | 116 | return c; |
1da177e4 LT |
117 | } |
118 | ||
119 | return NULL; | |
120 | } | |
121 | ||
122 | static inline struct clusterip_config * | |
6a19d614 | 123 | clusterip_config_find_get(__be32 clusterip, int entry) |
1da177e4 LT |
124 | { |
125 | struct clusterip_config *c; | |
126 | ||
e45b1be8 | 127 | read_lock_bh(&clusterip_lock); |
1da177e4 LT |
128 | c = __clusterip_config_find(clusterip); |
129 | if (!c) { | |
e45b1be8 | 130 | read_unlock_bh(&clusterip_lock); |
1da177e4 LT |
131 | return NULL; |
132 | } | |
133 | atomic_inc(&c->refcount); | |
44513624 KK |
134 | if (entry) |
135 | atomic_inc(&c->entries); | |
e45b1be8 | 136 | read_unlock_bh(&clusterip_lock); |
1da177e4 LT |
137 | |
138 | return c; | |
139 | } | |
140 | ||
136e92bb KK |
141 | static void |
142 | clusterip_config_init_nodelist(struct clusterip_config *c, | |
143 | const struct ipt_clusterip_tgt_info *i) | |
144 | { | |
145 | int n; | |
146 | ||
7c4e36bc | 147 | for (n = 0; n < i->num_local_nodes; n++) |
136e92bb | 148 | set_bit(i->local_nodes[n] - 1, &c->local_nodes); |
136e92bb KK |
149 | } |
150 | ||
1da177e4 | 151 | static struct clusterip_config * |
6a19d614 | 152 | clusterip_config_init(struct ipt_clusterip_tgt_info *i, __be32 ip, |
1da177e4 LT |
153 | struct net_device *dev) |
154 | { | |
155 | struct clusterip_config *c; | |
1da177e4 | 156 | |
0da974f4 | 157 | c = kzalloc(sizeof(*c), GFP_ATOMIC); |
1da177e4 LT |
158 | if (!c) |
159 | return NULL; | |
160 | ||
1da177e4 LT |
161 | c->dev = dev; |
162 | c->clusterip = ip; | |
163 | memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); | |
164 | c->num_total_nodes = i->num_total_nodes; | |
136e92bb | 165 | clusterip_config_init_nodelist(c, i); |
1da177e4 LT |
166 | c->hash_mode = i->hash_mode; |
167 | c->hash_initval = i->hash_initval; | |
168 | atomic_set(&c->refcount, 1); | |
44513624 | 169 | atomic_set(&c->entries, 1); |
1da177e4 LT |
170 | |
171 | #ifdef CONFIG_PROC_FS | |
76592584 PM |
172 | { |
173 | char buffer[16]; | |
174 | ||
175 | /* create proc dir entry */ | |
176 | sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip)); | |
177 | c->pde = create_proc_entry(buffer, S_IWUSR|S_IRUSR, | |
178 | clusterip_procdir); | |
179 | if (!c->pde) { | |
180 | kfree(c); | |
181 | return NULL; | |
182 | } | |
1da177e4 LT |
183 | } |
184 | c->pde->proc_fops = &clusterip_proc_fops; | |
185 | c->pde->data = c; | |
186 | #endif | |
187 | ||
e45b1be8 | 188 | write_lock_bh(&clusterip_lock); |
1da177e4 | 189 | list_add(&c->list, &clusterip_configs); |
e45b1be8 | 190 | write_unlock_bh(&clusterip_lock); |
1da177e4 LT |
191 | |
192 | return c; | |
193 | } | |
194 | ||
76592584 | 195 | #ifdef CONFIG_PROC_FS |
1da177e4 LT |
196 | static int |
197 | clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) | |
198 | { | |
1da177e4 | 199 | |
136e92bb KK |
200 | if (nodenum == 0 || |
201 | nodenum > c->num_total_nodes) | |
1da177e4 | 202 | return 1; |
1da177e4 | 203 | |
136e92bb KK |
204 | /* check if we already have this number in our bitfield */ |
205 | if (test_and_set_bit(nodenum - 1, &c->local_nodes)) | |
206 | return 1; | |
1da177e4 | 207 | |
1da177e4 LT |
208 | return 0; |
209 | } | |
210 | ||
e1931b78 | 211 | static bool |
1da177e4 LT |
212 | clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) |
213 | { | |
136e92bb KK |
214 | if (nodenum == 0 || |
215 | nodenum > c->num_total_nodes) | |
e1931b78 | 216 | return true; |
e905a9ed | 217 | |
136e92bb | 218 | if (test_and_clear_bit(nodenum - 1, &c->local_nodes)) |
e1931b78 | 219 | return false; |
1da177e4 | 220 | |
e1931b78 | 221 | return true; |
1da177e4 | 222 | } |
76592584 | 223 | #endif |
1da177e4 LT |
224 | |
225 | static inline u_int32_t | |
a47362a2 JE |
226 | clusterip_hashfn(const struct sk_buff *skb, |
227 | const struct clusterip_config *config) | |
1da177e4 | 228 | { |
a47362a2 | 229 | const struct iphdr *iph = ip_hdr(skb); |
1da177e4 LT |
230 | unsigned long hashval; |
231 | u_int16_t sport, dport; | |
a47362a2 | 232 | const u_int16_t *ports; |
1da177e4 LT |
233 | |
234 | switch (iph->protocol) { | |
235 | case IPPROTO_TCP: | |
1da177e4 | 236 | case IPPROTO_UDP: |
a8d0f952 | 237 | case IPPROTO_UDPLITE: |
957dc80a PM |
238 | case IPPROTO_SCTP: |
239 | case IPPROTO_DCCP: | |
1da177e4 | 240 | case IPPROTO_ICMP: |
a47362a2 | 241 | ports = (const void *)iph+iph->ihl*4; |
957dc80a PM |
242 | sport = ports[0]; |
243 | dport = ports[1]; | |
1da177e4 LT |
244 | break; |
245 | default: | |
7c4e36bc | 246 | if (net_ratelimit()) |
1da177e4 LT |
247 | printk(KERN_NOTICE "CLUSTERIP: unknown protocol `%u'\n", |
248 | iph->protocol); | |
1da177e4 LT |
249 | sport = dport = 0; |
250 | } | |
251 | ||
252 | switch (config->hash_mode) { | |
253 | case CLUSTERIP_HASHMODE_SIP: | |
254 | hashval = jhash_1word(ntohl(iph->saddr), | |
255 | config->hash_initval); | |
256 | break; | |
257 | case CLUSTERIP_HASHMODE_SIP_SPT: | |
e905a9ed | 258 | hashval = jhash_2words(ntohl(iph->saddr), sport, |
1da177e4 LT |
259 | config->hash_initval); |
260 | break; | |
261 | case CLUSTERIP_HASHMODE_SIP_SPT_DPT: | |
262 | hashval = jhash_3words(ntohl(iph->saddr), sport, dport, | |
263 | config->hash_initval); | |
264 | break; | |
265 | default: | |
266 | /* to make gcc happy */ | |
267 | hashval = 0; | |
268 | /* This cannot happen, unless the check function wasn't called | |
269 | * at rule load time */ | |
270 | printk("CLUSTERIP: unknown mode `%u'\n", config->hash_mode); | |
271 | BUG(); | |
272 | break; | |
273 | } | |
274 | ||
275 | /* node numbers are 1..n, not 0..n */ | |
34498825 | 276 | return (((u64)hashval * config->num_total_nodes) >> 32) + 1; |
1da177e4 LT |
277 | } |
278 | ||
279 | static inline int | |
a47362a2 | 280 | clusterip_responsible(const struct clusterip_config *config, u_int32_t hash) |
1da177e4 | 281 | { |
136e92bb | 282 | return test_bit(hash - 1, &config->local_nodes); |
1da177e4 LT |
283 | } |
284 | ||
e905a9ed YH |
285 | /*********************************************************************** |
286 | * IPTABLES TARGET | |
1da177e4 LT |
287 | ***********************************************************************/ |
288 | ||
289 | static unsigned int | |
d3c5ee6d JE |
290 | clusterip_tg(struct sk_buff *skb, const struct net_device *in, |
291 | const struct net_device *out, unsigned int hooknum, | |
292 | const struct xt_target *target, const void *targinfo) | |
1da177e4 LT |
293 | { |
294 | const struct ipt_clusterip_tgt_info *cipinfo = targinfo; | |
587aa641 | 295 | struct nf_conn *ct; |
1da177e4 | 296 | enum ip_conntrack_info ctinfo; |
587aa641 | 297 | u_int32_t hash; |
1da177e4 LT |
298 | |
299 | /* don't need to clusterip_config_get() here, since refcount | |
300 | * is only decremented by destroy() - and ip_tables guarantees | |
301 | * that the ->target() function isn't called after ->destroy() */ | |
302 | ||
3db05fea | 303 | ct = nf_ct_get(skb, &ctinfo); |
587aa641 | 304 | if (ct == NULL) { |
1da177e4 LT |
305 | printk(KERN_ERR "CLUSTERIP: no conntrack!\n"); |
306 | /* FIXME: need to drop invalid ones, since replies | |
e905a9ed | 307 | * to outgoing connections of other nodes will be |
1da177e4 LT |
308 | * marked as INVALID */ |
309 | return NF_DROP; | |
310 | } | |
311 | ||
312 | /* special case: ICMP error handling. conntrack distinguishes between | |
313 | * error messages (RELATED) and information requests (see below) */ | |
3db05fea | 314 | if (ip_hdr(skb)->protocol == IPPROTO_ICMP |
e905a9ed | 315 | && (ctinfo == IP_CT_RELATED |
5d927eb0 | 316 | || ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY)) |
6709dbbb | 317 | return XT_CONTINUE; |
1da177e4 | 318 | |
e905a9ed | 319 | /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO, |
1da177e4 LT |
320 | * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here |
321 | * on, which all have an ID field [relevant for hashing]. */ | |
322 | ||
3db05fea | 323 | hash = clusterip_hashfn(skb, cipinfo->config); |
1da177e4 LT |
324 | |
325 | switch (ctinfo) { | |
326 | case IP_CT_NEW: | |
587aa641 | 327 | ct->mark = hash; |
1da177e4 LT |
328 | break; |
329 | case IP_CT_RELATED: | |
330 | case IP_CT_RELATED+IP_CT_IS_REPLY: | |
331 | /* FIXME: we don't handle expectations at the | |
332 | * moment. they can arrive on a different node than | |
333 | * the master connection (e.g. FTP passive mode) */ | |
334 | case IP_CT_ESTABLISHED: | |
335 | case IP_CT_ESTABLISHED+IP_CT_IS_REPLY: | |
336 | break; | |
337 | default: | |
338 | break; | |
339 | } | |
340 | ||
0d53778e | 341 | #ifdef DEBUG |
1da177e4 LT |
342 | DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); |
343 | #endif | |
0d53778e | 344 | pr_debug("hash=%u ct_hash=%u ", hash, ct->mark); |
1da177e4 | 345 | if (!clusterip_responsible(cipinfo->config, hash)) { |
0d53778e | 346 | pr_debug("not responsible\n"); |
1da177e4 LT |
347 | return NF_DROP; |
348 | } | |
0d53778e | 349 | pr_debug("responsible\n"); |
1da177e4 LT |
350 | |
351 | /* despite being received via linklayer multicast, this is | |
352 | * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */ | |
3db05fea | 353 | skb->pkt_type = PACKET_HOST; |
1da177e4 | 354 | |
6709dbbb | 355 | return XT_CONTINUE; |
1da177e4 LT |
356 | } |
357 | ||
e1931b78 | 358 | static bool |
d3c5ee6d JE |
359 | clusterip_tg_check(const char *tablename, const void *e_void, |
360 | const struct xt_target *target, void *targinfo, | |
361 | unsigned int hook_mask) | |
1da177e4 LT |
362 | { |
363 | struct ipt_clusterip_tgt_info *cipinfo = targinfo; | |
2e4e6a17 | 364 | const struct ipt_entry *e = e_void; |
1da177e4 LT |
365 | |
366 | struct clusterip_config *config; | |
367 | ||
1da177e4 LT |
368 | if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP && |
369 | cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT && | |
370 | cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) { | |
371 | printk(KERN_WARNING "CLUSTERIP: unknown mode `%u'\n", | |
372 | cipinfo->hash_mode); | |
e1931b78 | 373 | return false; |
1da177e4 LT |
374 | |
375 | } | |
6a19d614 | 376 | if (e->ip.dmsk.s_addr != htonl(0xffffffff) |
1da177e4 LT |
377 | || e->ip.dst.s_addr == 0) { |
378 | printk(KERN_ERR "CLUSTERIP: Please specify destination IP\n"); | |
e1931b78 | 379 | return false; |
1da177e4 LT |
380 | } |
381 | ||
382 | /* FIXME: further sanity checks */ | |
383 | ||
44513624 | 384 | config = clusterip_config_find_get(e->ip.dst.s_addr, 1); |
d3c3f424 | 385 | if (!config) { |
1da177e4 LT |
386 | if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { |
387 | printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr)); | |
e1931b78 | 388 | return false; |
1da177e4 LT |
389 | } else { |
390 | struct net_device *dev; | |
391 | ||
392 | if (e->ip.iniface[0] == '\0') { | |
393 | printk(KERN_WARNING "CLUSTERIP: Please specify an interface name\n"); | |
e1931b78 | 394 | return false; |
1da177e4 LT |
395 | } |
396 | ||
881d966b | 397 | dev = dev_get_by_name(&init_net, e->ip.iniface); |
1da177e4 LT |
398 | if (!dev) { |
399 | printk(KERN_WARNING "CLUSTERIP: no such interface %s\n", e->ip.iniface); | |
e1931b78 | 400 | return false; |
1da177e4 LT |
401 | } |
402 | ||
e905a9ed | 403 | config = clusterip_config_init(cipinfo, |
1da177e4 LT |
404 | e->ip.dst.s_addr, dev); |
405 | if (!config) { | |
406 | printk(KERN_WARNING "CLUSTERIP: cannot allocate config\n"); | |
407 | dev_put(dev); | |
e1931b78 | 408 | return false; |
1da177e4 LT |
409 | } |
410 | dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0); | |
411 | } | |
412 | } | |
d3c3f424 | 413 | cipinfo->config = config; |
1da177e4 | 414 | |
11078c37 YK |
415 | if (nf_ct_l3proto_try_module_get(target->family) < 0) { |
416 | printk(KERN_WARNING "can't load conntrack support for " | |
df54aae0 | 417 | "proto=%u\n", target->family); |
e1931b78 | 418 | return false; |
11078c37 YK |
419 | } |
420 | ||
e1931b78 | 421 | return true; |
1da177e4 LT |
422 | } |
423 | ||
424 | /* drop reference count of cluster config when rule is deleted */ | |
d3c5ee6d | 425 | static void clusterip_tg_destroy(const struct xt_target *target, void *targinfo) |
1da177e4 | 426 | { |
c4986734 | 427 | struct ipt_clusterip_tgt_info *cipinfo = targinfo; |
1da177e4 | 428 | |
44513624 KK |
429 | /* if no more entries are referencing the config, remove it |
430 | * from the list and destroy the proc entry */ | |
431 | clusterip_config_entry_put(cipinfo->config); | |
432 | ||
1da177e4 | 433 | clusterip_config_put(cipinfo->config); |
11078c37 YK |
434 | |
435 | nf_ct_l3proto_module_put(target->family); | |
1da177e4 LT |
436 | } |
437 | ||
d3c3f424 PM |
438 | #ifdef CONFIG_COMPAT |
439 | struct compat_ipt_clusterip_tgt_info | |
440 | { | |
441 | u_int32_t flags; | |
442 | u_int8_t clustermac[6]; | |
443 | u_int16_t num_total_nodes; | |
444 | u_int16_t num_local_nodes; | |
445 | u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; | |
446 | u_int32_t hash_mode; | |
447 | u_int32_t hash_initval; | |
448 | compat_uptr_t config; | |
449 | }; | |
450 | #endif /* CONFIG_COMPAT */ | |
451 | ||
d3c5ee6d | 452 | static struct xt_target clusterip_tg_reg __read_mostly = { |
1d5cd909 | 453 | .name = "CLUSTERIP", |
6709dbbb | 454 | .family = AF_INET, |
d3c5ee6d JE |
455 | .target = clusterip_tg, |
456 | .checkentry = clusterip_tg_check, | |
457 | .destroy = clusterip_tg_destroy, | |
d3c3f424 PM |
458 | .targetsize = sizeof(struct ipt_clusterip_tgt_info), |
459 | #ifdef CONFIG_COMPAT | |
460 | .compatsize = sizeof(struct compat_ipt_clusterip_tgt_info), | |
461 | #endif /* CONFIG_COMPAT */ | |
1d5cd909 | 462 | .me = THIS_MODULE |
1da177e4 LT |
463 | }; |
464 | ||
465 | ||
e905a9ed YH |
466 | /*********************************************************************** |
467 | * ARP MANGLING CODE | |
1da177e4 LT |
468 | ***********************************************************************/ |
469 | ||
470 | /* hardcoded for 48bit ethernet and 32bit ipv4 addresses */ | |
471 | struct arp_payload { | |
472 | u_int8_t src_hw[ETH_ALEN]; | |
6a19d614 | 473 | __be32 src_ip; |
1da177e4 | 474 | u_int8_t dst_hw[ETH_ALEN]; |
6a19d614 | 475 | __be32 dst_ip; |
1da177e4 LT |
476 | } __attribute__ ((packed)); |
477 | ||
0d53778e | 478 | #ifdef DEBUG |
e905a9ed | 479 | static void arp_print(struct arp_payload *payload) |
1da177e4 LT |
480 | { |
481 | #define HBUFFERLEN 30 | |
482 | char hbuffer[HBUFFERLEN]; | |
483 | int j,k; | |
484 | const char hexbuf[]= "0123456789abcdef"; | |
485 | ||
486 | for (k=0, j=0; k < HBUFFERLEN-3 && j < ETH_ALEN; j++) { | |
487 | hbuffer[k++]=hexbuf[(payload->src_hw[j]>>4)&15]; | |
488 | hbuffer[k++]=hexbuf[payload->src_hw[j]&15]; | |
489 | hbuffer[k++]=':'; | |
490 | } | |
491 | hbuffer[--k]='\0'; | |
492 | ||
e905a9ed | 493 | printk("src %u.%u.%u.%u@%s, dst %u.%u.%u.%u\n", |
1da177e4 LT |
494 | NIPQUAD(payload->src_ip), hbuffer, |
495 | NIPQUAD(payload->dst_ip)); | |
496 | } | |
497 | #endif | |
498 | ||
499 | static unsigned int | |
500 | arp_mangle(unsigned int hook, | |
3db05fea | 501 | struct sk_buff *skb, |
1da177e4 LT |
502 | const struct net_device *in, |
503 | const struct net_device *out, | |
504 | int (*okfn)(struct sk_buff *)) | |
505 | { | |
3db05fea | 506 | struct arphdr *arp = arp_hdr(skb); |
1da177e4 LT |
507 | struct arp_payload *payload; |
508 | struct clusterip_config *c; | |
509 | ||
510 | /* we don't care about non-ethernet and non-ipv4 ARP */ | |
511 | if (arp->ar_hrd != htons(ARPHRD_ETHER) | |
512 | || arp->ar_pro != htons(ETH_P_IP) | |
513 | || arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN) | |
514 | return NF_ACCEPT; | |
515 | ||
4095ebf1 HW |
516 | /* we only want to mangle arp requests and replies */ |
517 | if (arp->ar_op != htons(ARPOP_REPLY) | |
518 | && arp->ar_op != htons(ARPOP_REQUEST)) | |
1da177e4 LT |
519 | return NF_ACCEPT; |
520 | ||
521 | payload = (void *)(arp+1); | |
522 | ||
e905a9ed | 523 | /* if there is no clusterip configuration for the arp reply's |
1da177e4 | 524 | * source ip, we don't want to mangle it */ |
44513624 | 525 | c = clusterip_config_find_get(payload->src_ip, 0); |
1da177e4 LT |
526 | if (!c) |
527 | return NF_ACCEPT; | |
528 | ||
e905a9ed | 529 | /* normally the linux kernel always replies to arp queries of |
1da177e4 LT |
530 | * addresses on different interfacs. However, in the CLUSTERIP case |
531 | * this wouldn't work, since we didn't subscribe the mcast group on | |
532 | * other interfaces */ | |
533 | if (c->dev != out) { | |
0d53778e PM |
534 | pr_debug("CLUSTERIP: not mangling arp reply on different " |
535 | "interface: cip'%s'-skb'%s'\n", | |
536 | c->dev->name, out->name); | |
1da177e4 LT |
537 | clusterip_config_put(c); |
538 | return NF_ACCEPT; | |
539 | } | |
540 | ||
541 | /* mangle reply hardware address */ | |
542 | memcpy(payload->src_hw, c->clustermac, arp->ar_hln); | |
543 | ||
0d53778e PM |
544 | #ifdef DEBUG |
545 | pr_debug(KERN_DEBUG "CLUSTERIP mangled arp reply: "); | |
1da177e4 LT |
546 | arp_print(payload); |
547 | #endif | |
548 | ||
549 | clusterip_config_put(c); | |
550 | ||
551 | return NF_ACCEPT; | |
552 | } | |
553 | ||
1999414a | 554 | static struct nf_hook_ops cip_arp_ops __read_mostly = { |
1da177e4 LT |
555 | .hook = arp_mangle, |
556 | .pf = NF_ARP, | |
557 | .hooknum = NF_ARP_OUT, | |
558 | .priority = -1 | |
559 | }; | |
560 | ||
e905a9ed YH |
561 | /*********************************************************************** |
562 | * PROC DIR HANDLING | |
1da177e4 LT |
563 | ***********************************************************************/ |
564 | ||
565 | #ifdef CONFIG_PROC_FS | |
566 | ||
136e92bb KK |
567 | struct clusterip_seq_position { |
568 | unsigned int pos; /* position */ | |
569 | unsigned int weight; /* number of bits set == size */ | |
570 | unsigned int bit; /* current bit */ | |
571 | unsigned long val; /* current value */ | |
572 | }; | |
573 | ||
1da177e4 LT |
574 | static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) |
575 | { | |
576 | struct proc_dir_entry *pde = s->private; | |
577 | struct clusterip_config *c = pde->data; | |
136e92bb KK |
578 | unsigned int weight; |
579 | u_int32_t local_nodes; | |
580 | struct clusterip_seq_position *idx; | |
581 | ||
582 | /* FIXME: possible race */ | |
583 | local_nodes = c->local_nodes; | |
584 | weight = hweight32(local_nodes); | |
585 | if (*pos >= weight) | |
1da177e4 LT |
586 | return NULL; |
587 | ||
136e92bb KK |
588 | idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL); |
589 | if (!idx) | |
1da177e4 LT |
590 | return ERR_PTR(-ENOMEM); |
591 | ||
136e92bb KK |
592 | idx->pos = *pos; |
593 | idx->weight = weight; | |
594 | idx->bit = ffs(local_nodes); | |
595 | idx->val = local_nodes; | |
596 | clear_bit(idx->bit - 1, &idx->val); | |
597 | ||
598 | return idx; | |
1da177e4 LT |
599 | } |
600 | ||
601 | static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) | |
602 | { | |
136e92bb | 603 | struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; |
1da177e4 | 604 | |
136e92bb KK |
605 | *pos = ++idx->pos; |
606 | if (*pos >= idx->weight) { | |
1da177e4 LT |
607 | kfree(v); |
608 | return NULL; | |
609 | } | |
136e92bb KK |
610 | idx->bit = ffs(idx->val); |
611 | clear_bit(idx->bit - 1, &idx->val); | |
612 | return idx; | |
1da177e4 LT |
613 | } |
614 | ||
615 | static void clusterip_seq_stop(struct seq_file *s, void *v) | |
616 | { | |
617 | kfree(v); | |
1da177e4 LT |
618 | } |
619 | ||
620 | static int clusterip_seq_show(struct seq_file *s, void *v) | |
621 | { | |
136e92bb | 622 | struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; |
1da177e4 | 623 | |
e905a9ed | 624 | if (idx->pos != 0) |
1da177e4 | 625 | seq_putc(s, ','); |
1da177e4 | 626 | |
136e92bb KK |
627 | seq_printf(s, "%u", idx->bit); |
628 | ||
629 | if (idx->pos == idx->weight - 1) | |
1da177e4 LT |
630 | seq_putc(s, '\n'); |
631 | ||
632 | return 0; | |
633 | } | |
634 | ||
56b3d975 | 635 | static const struct seq_operations clusterip_seq_ops = { |
1da177e4 LT |
636 | .start = clusterip_seq_start, |
637 | .next = clusterip_seq_next, | |
638 | .stop = clusterip_seq_stop, | |
639 | .show = clusterip_seq_show, | |
640 | }; | |
641 | ||
642 | static int clusterip_proc_open(struct inode *inode, struct file *file) | |
643 | { | |
644 | int ret = seq_open(file, &clusterip_seq_ops); | |
645 | ||
646 | if (!ret) { | |
647 | struct seq_file *sf = file->private_data; | |
648 | struct proc_dir_entry *pde = PDE(inode); | |
649 | struct clusterip_config *c = pde->data; | |
650 | ||
651 | sf->private = pde; | |
652 | ||
653 | clusterip_config_get(c); | |
654 | } | |
655 | ||
656 | return ret; | |
657 | } | |
658 | ||
659 | static int clusterip_proc_release(struct inode *inode, struct file *file) | |
660 | { | |
661 | struct proc_dir_entry *pde = PDE(inode); | |
662 | struct clusterip_config *c = pde->data; | |
663 | int ret; | |
664 | ||
665 | ret = seq_release(inode, file); | |
666 | ||
667 | if (!ret) | |
668 | clusterip_config_put(c); | |
669 | ||
670 | return ret; | |
671 | } | |
672 | ||
673 | static ssize_t clusterip_proc_write(struct file *file, const char __user *input, | |
674 | size_t size, loff_t *ofs) | |
675 | { | |
676 | #define PROC_WRITELEN 10 | |
677 | char buffer[PROC_WRITELEN+1]; | |
6df81ab2 | 678 | struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); |
1da177e4 LT |
679 | struct clusterip_config *c = pde->data; |
680 | unsigned long nodenum; | |
681 | ||
682 | if (copy_from_user(buffer, input, PROC_WRITELEN)) | |
683 | return -EFAULT; | |
684 | ||
685 | if (*buffer == '+') { | |
686 | nodenum = simple_strtoul(buffer+1, NULL, 10); | |
687 | if (clusterip_add_node(c, nodenum)) | |
688 | return -ENOMEM; | |
689 | } else if (*buffer == '-') { | |
690 | nodenum = simple_strtoul(buffer+1, NULL,10); | |
691 | if (clusterip_del_node(c, nodenum)) | |
692 | return -ENOENT; | |
693 | } else | |
694 | return -EIO; | |
695 | ||
696 | return size; | |
697 | } | |
698 | ||
9a32144e | 699 | static const struct file_operations clusterip_proc_fops = { |
1da177e4 LT |
700 | .owner = THIS_MODULE, |
701 | .open = clusterip_proc_open, | |
702 | .read = seq_read, | |
703 | .write = clusterip_proc_write, | |
704 | .llseek = seq_lseek, | |
705 | .release = clusterip_proc_release, | |
706 | }; | |
707 | ||
708 | #endif /* CONFIG_PROC_FS */ | |
709 | ||
d3c5ee6d | 710 | static int __init clusterip_tg_init(void) |
1da177e4 LT |
711 | { |
712 | int ret; | |
713 | ||
d3c5ee6d | 714 | ret = xt_register_target(&clusterip_tg_reg); |
32292a7f PM |
715 | if (ret < 0) |
716 | return ret; | |
1da177e4 | 717 | |
32292a7f PM |
718 | ret = nf_register_hook(&cip_arp_ops); |
719 | if (ret < 0) | |
1da177e4 | 720 | goto cleanup_target; |
1da177e4 LT |
721 | |
722 | #ifdef CONFIG_PROC_FS | |
457c4cbc | 723 | clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", init_net.proc_net); |
1da177e4 LT |
724 | if (!clusterip_procdir) { |
725 | printk(KERN_ERR "CLUSTERIP: Unable to proc dir entry\n"); | |
726 | ret = -ENOMEM; | |
727 | goto cleanup_hook; | |
728 | } | |
729 | #endif /* CONFIG_PROC_FS */ | |
730 | ||
731 | printk(KERN_NOTICE "ClusterIP Version %s loaded successfully\n", | |
732 | CLUSTERIP_VERSION); | |
1da177e4 LT |
733 | return 0; |
734 | ||
76592584 | 735 | #ifdef CONFIG_PROC_FS |
1da177e4 LT |
736 | cleanup_hook: |
737 | nf_unregister_hook(&cip_arp_ops); | |
76592584 | 738 | #endif /* CONFIG_PROC_FS */ |
1da177e4 | 739 | cleanup_target: |
d3c5ee6d | 740 | xt_unregister_target(&clusterip_tg_reg); |
32292a7f | 741 | return ret; |
1da177e4 LT |
742 | } |
743 | ||
d3c5ee6d | 744 | static void __exit clusterip_tg_exit(void) |
1da177e4 | 745 | { |
32292a7f PM |
746 | printk(KERN_NOTICE "ClusterIP Version %s unloading\n", |
747 | CLUSTERIP_VERSION); | |
748 | #ifdef CONFIG_PROC_FS | |
749 | remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent); | |
750 | #endif | |
751 | nf_unregister_hook(&cip_arp_ops); | |
d3c5ee6d | 752 | xt_unregister_target(&clusterip_tg_reg); |
1da177e4 LT |
753 | } |
754 | ||
d3c5ee6d JE |
755 | module_init(clusterip_tg_init); |
756 | module_exit(clusterip_tg_exit); |