]>
Commit | Line | Data |
---|---|---|
e905a9ed | 1 | /* Cluster IP hashmark target |
1da177e4 LT |
2 | * (C) 2003-2004 by Harald Welte <laforge@netfilter.org> |
3 | * based on ideas of Fabio Olive Leite <olive@unixforge.org> | |
4 | * | |
5 | * Development of this code funded by SuSE Linux AG, http://www.suse.com/ | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License version 2 as | |
9 | * published by the Free Software Foundation. | |
10 | * | |
11 | */ | |
ff67e4e4 | 12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
1da177e4 | 13 | #include <linux/module.h> |
1da177e4 LT |
14 | #include <linux/proc_fs.h> |
15 | #include <linux/jhash.h> | |
136e92bb | 16 | #include <linux/bitops.h> |
1da177e4 LT |
17 | #include <linux/skbuff.h> |
18 | #include <linux/ip.h> | |
19 | #include <linux/tcp.h> | |
20 | #include <linux/udp.h> | |
21 | #include <linux/icmp.h> | |
22 | #include <linux/if_arp.h> | |
1da177e4 | 23 | #include <linux/seq_file.h> |
1da177e4 | 24 | #include <linux/netfilter_arp.h> |
6709dbbb | 25 | #include <linux/netfilter/x_tables.h> |
1da177e4 LT |
26 | #include <linux/netfilter_ipv4/ip_tables.h> |
27 | #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> | |
587aa641 | 28 | #include <net/netfilter/nf_conntrack.h> |
457c4cbc | 29 | #include <net/net_namespace.h> |
587aa641 | 30 | #include <net/checksum.h> |
1da177e4 | 31 | |
136e92bb | 32 | #define CLUSTERIP_VERSION "0.8" |
1da177e4 | 33 | |
1da177e4 LT |
34 | MODULE_LICENSE("GPL"); |
35 | MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); | |
2ae15b64 | 36 | MODULE_DESCRIPTION("Xtables: CLUSTERIP target"); |
1da177e4 LT |
37 | |
38 | struct clusterip_config { | |
39 | struct list_head list; /* list of all configs */ | |
40 | atomic_t refcount; /* reference count */ | |
44513624 KK |
41 | atomic_t entries; /* number of entries/rules |
42 | * referencing us */ | |
1da177e4 | 43 | |
6a19d614 | 44 | __be32 clusterip; /* the IP address */ |
1da177e4 LT |
45 | u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ |
46 | struct net_device *dev; /* device */ | |
47 | u_int16_t num_total_nodes; /* total number of nodes */ | |
136e92bb | 48 | unsigned long local_nodes; /* node number array */ |
1da177e4 LT |
49 | |
50 | #ifdef CONFIG_PROC_FS | |
51 | struct proc_dir_entry *pde; /* proc dir entry */ | |
52 | #endif | |
53 | enum clusterip_hashmode hash_mode; /* which hashing mode */ | |
54 | u_int32_t hash_initval; /* hash initialization */ | |
55 | }; | |
56 | ||
57 | static LIST_HEAD(clusterip_configs); | |
58 | ||
136e92bb | 59 | /* clusterip_lock protects the clusterip_configs list */ |
e45b1be8 | 60 | static DEFINE_RWLOCK(clusterip_lock); |
1da177e4 LT |
61 | |
62 | #ifdef CONFIG_PROC_FS | |
9a32144e | 63 | static const struct file_operations clusterip_proc_fops; |
1da177e4 LT |
64 | static struct proc_dir_entry *clusterip_procdir; |
65 | #endif | |
66 | ||
67 | static inline void | |
44513624 KK |
68 | clusterip_config_get(struct clusterip_config *c) |
69 | { | |
1da177e4 LT |
70 | atomic_inc(&c->refcount); |
71 | } | |
72 | ||
73 | static inline void | |
44513624 KK |
74 | clusterip_config_put(struct clusterip_config *c) |
75 | { | |
76 | if (atomic_dec_and_test(&c->refcount)) | |
77 | kfree(c); | |
78 | } | |
79 | ||
44513624 KK |
80 | /* decrease the count of entries using/referencing this config. If last |
81 | * entry(rule) is removed, remove the config from lists, but don't free it | |
82 | * yet, since proc-files could still be holding references */ | |
83 | static inline void | |
84 | clusterip_config_entry_put(struct clusterip_config *c) | |
85 | { | |
4dee9597 | 86 | write_lock_bh(&clusterip_lock); |
44513624 | 87 | if (atomic_dec_and_test(&c->entries)) { |
1da177e4 | 88 | list_del(&c->list); |
e45b1be8 | 89 | write_unlock_bh(&clusterip_lock); |
44513624 | 90 | |
1da177e4 LT |
91 | dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0); |
92 | dev_put(c->dev); | |
44513624 KK |
93 | |
94 | /* In case anyone still accesses the file, the open/close | |
95 | * functions are also incrementing the refcount on their own, | |
96 | * so it's safe to remove the entry even if it's in use. */ | |
97 | #ifdef CONFIG_PROC_FS | |
98 | remove_proc_entry(c->pde->name, c->pde->parent); | |
99 | #endif | |
4dee9597 | 100 | return; |
1da177e4 | 101 | } |
4dee9597 | 102 | write_unlock_bh(&clusterip_lock); |
1da177e4 LT |
103 | } |
104 | ||
1da177e4 | 105 | static struct clusterip_config * |
6a19d614 | 106 | __clusterip_config_find(__be32 clusterip) |
1da177e4 | 107 | { |
4c610979 | 108 | struct clusterip_config *c; |
1da177e4 | 109 | |
4c610979 | 110 | list_for_each_entry(c, &clusterip_configs, list) { |
7c4e36bc | 111 | if (c->clusterip == clusterip) |
1da177e4 | 112 | return c; |
1da177e4 LT |
113 | } |
114 | ||
115 | return NULL; | |
116 | } | |
117 | ||
118 | static inline struct clusterip_config * | |
6a19d614 | 119 | clusterip_config_find_get(__be32 clusterip, int entry) |
1da177e4 LT |
120 | { |
121 | struct clusterip_config *c; | |
122 | ||
e45b1be8 | 123 | read_lock_bh(&clusterip_lock); |
1da177e4 LT |
124 | c = __clusterip_config_find(clusterip); |
125 | if (!c) { | |
e45b1be8 | 126 | read_unlock_bh(&clusterip_lock); |
1da177e4 LT |
127 | return NULL; |
128 | } | |
129 | atomic_inc(&c->refcount); | |
44513624 KK |
130 | if (entry) |
131 | atomic_inc(&c->entries); | |
e45b1be8 | 132 | read_unlock_bh(&clusterip_lock); |
1da177e4 LT |
133 | |
134 | return c; | |
135 | } | |
136 | ||
136e92bb KK |
137 | static void |
138 | clusterip_config_init_nodelist(struct clusterip_config *c, | |
139 | const struct ipt_clusterip_tgt_info *i) | |
140 | { | |
141 | int n; | |
142 | ||
7c4e36bc | 143 | for (n = 0; n < i->num_local_nodes; n++) |
136e92bb | 144 | set_bit(i->local_nodes[n] - 1, &c->local_nodes); |
136e92bb KK |
145 | } |
146 | ||
1da177e4 | 147 | static struct clusterip_config * |
3cf93c96 | 148 | clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, |
1da177e4 LT |
149 | struct net_device *dev) |
150 | { | |
151 | struct clusterip_config *c; | |
1da177e4 | 152 | |
0da974f4 | 153 | c = kzalloc(sizeof(*c), GFP_ATOMIC); |
1da177e4 LT |
154 | if (!c) |
155 | return NULL; | |
156 | ||
1da177e4 LT |
157 | c->dev = dev; |
158 | c->clusterip = ip; | |
159 | memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); | |
160 | c->num_total_nodes = i->num_total_nodes; | |
136e92bb | 161 | clusterip_config_init_nodelist(c, i); |
1da177e4 LT |
162 | c->hash_mode = i->hash_mode; |
163 | c->hash_initval = i->hash_initval; | |
164 | atomic_set(&c->refcount, 1); | |
44513624 | 165 | atomic_set(&c->entries, 1); |
1da177e4 LT |
166 | |
167 | #ifdef CONFIG_PROC_FS | |
76592584 PM |
168 | { |
169 | char buffer[16]; | |
170 | ||
171 | /* create proc dir entry */ | |
cffee385 | 172 | sprintf(buffer, "%pI4", &ip); |
6e79d85d DL |
173 | c->pde = proc_create_data(buffer, S_IWUSR|S_IRUSR, |
174 | clusterip_procdir, | |
175 | &clusterip_proc_fops, c); | |
76592584 PM |
176 | if (!c->pde) { |
177 | kfree(c); | |
178 | return NULL; | |
179 | } | |
1da177e4 | 180 | } |
1da177e4 LT |
181 | #endif |
182 | ||
e45b1be8 | 183 | write_lock_bh(&clusterip_lock); |
1da177e4 | 184 | list_add(&c->list, &clusterip_configs); |
e45b1be8 | 185 | write_unlock_bh(&clusterip_lock); |
1da177e4 LT |
186 | |
187 | return c; | |
188 | } | |
189 | ||
76592584 | 190 | #ifdef CONFIG_PROC_FS |
1da177e4 LT |
191 | static int |
192 | clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) | |
193 | { | |
1da177e4 | 194 | |
136e92bb KK |
195 | if (nodenum == 0 || |
196 | nodenum > c->num_total_nodes) | |
1da177e4 | 197 | return 1; |
1da177e4 | 198 | |
136e92bb KK |
199 | /* check if we already have this number in our bitfield */ |
200 | if (test_and_set_bit(nodenum - 1, &c->local_nodes)) | |
201 | return 1; | |
1da177e4 | 202 | |
1da177e4 LT |
203 | return 0; |
204 | } | |
205 | ||
e1931b78 | 206 | static bool |
1da177e4 LT |
207 | clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) |
208 | { | |
136e92bb KK |
209 | if (nodenum == 0 || |
210 | nodenum > c->num_total_nodes) | |
e1931b78 | 211 | return true; |
e905a9ed | 212 | |
136e92bb | 213 | if (test_and_clear_bit(nodenum - 1, &c->local_nodes)) |
e1931b78 | 214 | return false; |
1da177e4 | 215 | |
e1931b78 | 216 | return true; |
1da177e4 | 217 | } |
76592584 | 218 | #endif |
1da177e4 LT |
219 | |
220 | static inline u_int32_t | |
a47362a2 JE |
221 | clusterip_hashfn(const struct sk_buff *skb, |
222 | const struct clusterip_config *config) | |
1da177e4 | 223 | { |
a47362a2 | 224 | const struct iphdr *iph = ip_hdr(skb); |
1da177e4 LT |
225 | unsigned long hashval; |
226 | u_int16_t sport, dport; | |
a47362a2 | 227 | const u_int16_t *ports; |
1da177e4 LT |
228 | |
229 | switch (iph->protocol) { | |
230 | case IPPROTO_TCP: | |
1da177e4 | 231 | case IPPROTO_UDP: |
a8d0f952 | 232 | case IPPROTO_UDPLITE: |
957dc80a PM |
233 | case IPPROTO_SCTP: |
234 | case IPPROTO_DCCP: | |
1da177e4 | 235 | case IPPROTO_ICMP: |
a47362a2 | 236 | ports = (const void *)iph+iph->ihl*4; |
957dc80a PM |
237 | sport = ports[0]; |
238 | dport = ports[1]; | |
1da177e4 LT |
239 | break; |
240 | default: | |
7c4e36bc | 241 | if (net_ratelimit()) |
ff67e4e4 | 242 | pr_info("unknown protocol %u\n", iph->protocol); |
1da177e4 LT |
243 | sport = dport = 0; |
244 | } | |
245 | ||
246 | switch (config->hash_mode) { | |
247 | case CLUSTERIP_HASHMODE_SIP: | |
248 | hashval = jhash_1word(ntohl(iph->saddr), | |
249 | config->hash_initval); | |
250 | break; | |
251 | case CLUSTERIP_HASHMODE_SIP_SPT: | |
e905a9ed | 252 | hashval = jhash_2words(ntohl(iph->saddr), sport, |
1da177e4 LT |
253 | config->hash_initval); |
254 | break; | |
255 | case CLUSTERIP_HASHMODE_SIP_SPT_DPT: | |
256 | hashval = jhash_3words(ntohl(iph->saddr), sport, dport, | |
257 | config->hash_initval); | |
258 | break; | |
259 | default: | |
260 | /* to make gcc happy */ | |
261 | hashval = 0; | |
262 | /* This cannot happen, unless the check function wasn't called | |
263 | * at rule load time */ | |
ff67e4e4 | 264 | pr_info("unknown mode %u\n", config->hash_mode); |
1da177e4 LT |
265 | BUG(); |
266 | break; | |
267 | } | |
268 | ||
269 | /* node numbers are 1..n, not 0..n */ | |
34498825 | 270 | return (((u64)hashval * config->num_total_nodes) >> 32) + 1; |
1da177e4 LT |
271 | } |
272 | ||
273 | static inline int | |
a47362a2 | 274 | clusterip_responsible(const struct clusterip_config *config, u_int32_t hash) |
1da177e4 | 275 | { |
136e92bb | 276 | return test_bit(hash - 1, &config->local_nodes); |
1da177e4 LT |
277 | } |
278 | ||
e905a9ed YH |
279 | /*********************************************************************** |
280 | * IPTABLES TARGET | |
1da177e4 LT |
281 | ***********************************************************************/ |
282 | ||
283 | static unsigned int | |
7eb35586 | 284 | clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par) |
1da177e4 | 285 | { |
7eb35586 | 286 | const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; |
587aa641 | 287 | struct nf_conn *ct; |
1da177e4 | 288 | enum ip_conntrack_info ctinfo; |
587aa641 | 289 | u_int32_t hash; |
1da177e4 LT |
290 | |
291 | /* don't need to clusterip_config_get() here, since refcount | |
292 | * is only decremented by destroy() - and ip_tables guarantees | |
293 | * that the ->target() function isn't called after ->destroy() */ | |
294 | ||
3db05fea | 295 | ct = nf_ct_get(skb, &ctinfo); |
587aa641 | 296 | if (ct == NULL) { |
ff67e4e4 | 297 | pr_info("no conntrack!\n"); |
1da177e4 | 298 | /* FIXME: need to drop invalid ones, since replies |
e905a9ed | 299 | * to outgoing connections of other nodes will be |
1da177e4 LT |
300 | * marked as INVALID */ |
301 | return NF_DROP; | |
302 | } | |
303 | ||
304 | /* special case: ICMP error handling. conntrack distinguishes between | |
305 | * error messages (RELATED) and information requests (see below) */ | |
3666ed1c JP |
306 | if (ip_hdr(skb)->protocol == IPPROTO_ICMP && |
307 | (ctinfo == IP_CT_RELATED || | |
308 | ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)) | |
6709dbbb | 309 | return XT_CONTINUE; |
1da177e4 | 310 | |
e905a9ed | 311 | /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO, |
1da177e4 LT |
312 | * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here |
313 | * on, which all have an ID field [relevant for hashing]. */ | |
314 | ||
3db05fea | 315 | hash = clusterip_hashfn(skb, cipinfo->config); |
1da177e4 LT |
316 | |
317 | switch (ctinfo) { | |
318 | case IP_CT_NEW: | |
587aa641 | 319 | ct->mark = hash; |
1da177e4 LT |
320 | break; |
321 | case IP_CT_RELATED: | |
322 | case IP_CT_RELATED+IP_CT_IS_REPLY: | |
323 | /* FIXME: we don't handle expectations at the | |
324 | * moment. they can arrive on a different node than | |
325 | * the master connection (e.g. FTP passive mode) */ | |
326 | case IP_CT_ESTABLISHED: | |
327 | case IP_CT_ESTABLISHED+IP_CT_IS_REPLY: | |
328 | break; | |
329 | default: | |
330 | break; | |
331 | } | |
332 | ||
0d53778e | 333 | #ifdef DEBUG |
3c9fba65 | 334 | nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); |
1da177e4 | 335 | #endif |
0d53778e | 336 | pr_debug("hash=%u ct_hash=%u ", hash, ct->mark); |
1da177e4 | 337 | if (!clusterip_responsible(cipinfo->config, hash)) { |
0d53778e | 338 | pr_debug("not responsible\n"); |
1da177e4 LT |
339 | return NF_DROP; |
340 | } | |
0d53778e | 341 | pr_debug("responsible\n"); |
1da177e4 LT |
342 | |
343 | /* despite being received via linklayer multicast, this is | |
344 | * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */ | |
3db05fea | 345 | skb->pkt_type = PACKET_HOST; |
1da177e4 | 346 | |
6709dbbb | 347 | return XT_CONTINUE; |
1da177e4 LT |
348 | } |
349 | ||
135367b8 | 350 | static int clusterip_tg_check(const struct xt_tgchk_param *par) |
1da177e4 | 351 | { |
af5d6dc2 JE |
352 | struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; |
353 | const struct ipt_entry *e = par->entryinfo; | |
1da177e4 | 354 | struct clusterip_config *config; |
4a5a5c73 | 355 | int ret; |
1da177e4 | 356 | |
1da177e4 LT |
357 | if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP && |
358 | cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT && | |
359 | cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) { | |
ff67e4e4 | 360 | pr_info("unknown mode %u\n", cipinfo->hash_mode); |
d6b00a53 | 361 | return -EINVAL; |
1da177e4 LT |
362 | |
363 | } | |
3666ed1c JP |
364 | if (e->ip.dmsk.s_addr != htonl(0xffffffff) || |
365 | e->ip.dst.s_addr == 0) { | |
ff67e4e4 | 366 | pr_info("Please specify destination IP\n"); |
d6b00a53 | 367 | return -EINVAL; |
1da177e4 LT |
368 | } |
369 | ||
370 | /* FIXME: further sanity checks */ | |
371 | ||
44513624 | 372 | config = clusterip_config_find_get(e->ip.dst.s_addr, 1); |
d3c3f424 | 373 | if (!config) { |
1da177e4 | 374 | if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { |
ff67e4e4 JE |
375 | pr_info("no config found for %pI4, need 'new'\n", |
376 | &e->ip.dst.s_addr); | |
d6b00a53 | 377 | return -EINVAL; |
1da177e4 LT |
378 | } else { |
379 | struct net_device *dev; | |
380 | ||
381 | if (e->ip.iniface[0] == '\0') { | |
ff67e4e4 | 382 | pr_info("Please specify an interface name\n"); |
d6b00a53 | 383 | return -EINVAL; |
1da177e4 LT |
384 | } |
385 | ||
881d966b | 386 | dev = dev_get_by_name(&init_net, e->ip.iniface); |
1da177e4 | 387 | if (!dev) { |
ff67e4e4 JE |
388 | pr_info("no such interface %s\n", |
389 | e->ip.iniface); | |
4a5a5c73 | 390 | return -ENOENT; |
1da177e4 LT |
391 | } |
392 | ||
e905a9ed | 393 | config = clusterip_config_init(cipinfo, |
1da177e4 LT |
394 | e->ip.dst.s_addr, dev); |
395 | if (!config) { | |
ff67e4e4 | 396 | pr_info("cannot allocate config\n"); |
1da177e4 | 397 | dev_put(dev); |
4a5a5c73 | 398 | return -ENOMEM; |
1da177e4 LT |
399 | } |
400 | dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0); | |
401 | } | |
402 | } | |
d3c3f424 | 403 | cipinfo->config = config; |
1da177e4 | 404 | |
4a5a5c73 | 405 | ret = nf_ct_l3proto_try_module_get(par->family); |
f95c74e3 | 406 | if (ret < 0) |
ff67e4e4 JE |
407 | pr_info("cannot load conntrack support for proto=%u\n", |
408 | par->family); | |
f95c74e3 | 409 | return ret; |
1da177e4 LT |
410 | } |
411 | ||
412 | /* drop reference count of cluster config when rule is deleted */ | |
a2df1648 | 413 | static void clusterip_tg_destroy(const struct xt_tgdtor_param *par) |
1da177e4 | 414 | { |
a2df1648 | 415 | const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; |
1da177e4 | 416 | |
44513624 KK |
417 | /* if no more entries are referencing the config, remove it |
418 | * from the list and destroy the proc entry */ | |
419 | clusterip_config_entry_put(cipinfo->config); | |
420 | ||
1da177e4 | 421 | clusterip_config_put(cipinfo->config); |
11078c37 | 422 | |
0d345455 | 423 | nf_ct_l3proto_module_put(par->family); |
1da177e4 LT |
424 | } |
425 | ||
d3c3f424 PM |
426 | #ifdef CONFIG_COMPAT |
427 | struct compat_ipt_clusterip_tgt_info | |
428 | { | |
429 | u_int32_t flags; | |
430 | u_int8_t clustermac[6]; | |
431 | u_int16_t num_total_nodes; | |
432 | u_int16_t num_local_nodes; | |
433 | u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; | |
434 | u_int32_t hash_mode; | |
435 | u_int32_t hash_initval; | |
436 | compat_uptr_t config; | |
437 | }; | |
438 | #endif /* CONFIG_COMPAT */ | |
439 | ||
d3c5ee6d | 440 | static struct xt_target clusterip_tg_reg __read_mostly = { |
1d5cd909 | 441 | .name = "CLUSTERIP", |
ee999d8b | 442 | .family = NFPROTO_IPV4, |
d3c5ee6d JE |
443 | .target = clusterip_tg, |
444 | .checkentry = clusterip_tg_check, | |
445 | .destroy = clusterip_tg_destroy, | |
d3c3f424 PM |
446 | .targetsize = sizeof(struct ipt_clusterip_tgt_info), |
447 | #ifdef CONFIG_COMPAT | |
448 | .compatsize = sizeof(struct compat_ipt_clusterip_tgt_info), | |
449 | #endif /* CONFIG_COMPAT */ | |
1d5cd909 | 450 | .me = THIS_MODULE |
1da177e4 LT |
451 | }; |
452 | ||
453 | ||
e905a9ed YH |
454 | /*********************************************************************** |
455 | * ARP MANGLING CODE | |
1da177e4 LT |
456 | ***********************************************************************/ |
457 | ||
458 | /* hardcoded for 48bit ethernet and 32bit ipv4 addresses */ | |
459 | struct arp_payload { | |
460 | u_int8_t src_hw[ETH_ALEN]; | |
6a19d614 | 461 | __be32 src_ip; |
1da177e4 | 462 | u_int8_t dst_hw[ETH_ALEN]; |
6a19d614 | 463 | __be32 dst_ip; |
1da177e4 LT |
464 | } __attribute__ ((packed)); |
465 | ||
0d53778e | 466 | #ifdef DEBUG |
e905a9ed | 467 | static void arp_print(struct arp_payload *payload) |
1da177e4 LT |
468 | { |
469 | #define HBUFFERLEN 30 | |
470 | char hbuffer[HBUFFERLEN]; | |
471 | int j,k; | |
1da177e4 LT |
472 | |
473 | for (k=0, j=0; k < HBUFFERLEN-3 && j < ETH_ALEN; j++) { | |
6a8341b6 HH |
474 | hbuffer[k++] = hex_asc_hi(payload->src_hw[j]); |
475 | hbuffer[k++] = hex_asc_lo(payload->src_hw[j]); | |
1da177e4 LT |
476 | hbuffer[k++]=':'; |
477 | } | |
478 | hbuffer[--k]='\0'; | |
479 | ||
ff67e4e4 JE |
480 | pr_debug("src %pI4@%s, dst %pI4\n", |
481 | &payload->src_ip, hbuffer, &payload->dst_ip); | |
1da177e4 LT |
482 | } |
483 | #endif | |
484 | ||
485 | static unsigned int | |
486 | arp_mangle(unsigned int hook, | |
3db05fea | 487 | struct sk_buff *skb, |
1da177e4 LT |
488 | const struct net_device *in, |
489 | const struct net_device *out, | |
490 | int (*okfn)(struct sk_buff *)) | |
491 | { | |
3db05fea | 492 | struct arphdr *arp = arp_hdr(skb); |
1da177e4 LT |
493 | struct arp_payload *payload; |
494 | struct clusterip_config *c; | |
495 | ||
496 | /* we don't care about non-ethernet and non-ipv4 ARP */ | |
3666ed1c JP |
497 | if (arp->ar_hrd != htons(ARPHRD_ETHER) || |
498 | arp->ar_pro != htons(ETH_P_IP) || | |
499 | arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN) | |
1da177e4 LT |
500 | return NF_ACCEPT; |
501 | ||
4095ebf1 | 502 | /* we only want to mangle arp requests and replies */ |
3666ed1c JP |
503 | if (arp->ar_op != htons(ARPOP_REPLY) && |
504 | arp->ar_op != htons(ARPOP_REQUEST)) | |
1da177e4 LT |
505 | return NF_ACCEPT; |
506 | ||
507 | payload = (void *)(arp+1); | |
508 | ||
e905a9ed | 509 | /* if there is no clusterip configuration for the arp reply's |
1da177e4 | 510 | * source ip, we don't want to mangle it */ |
44513624 | 511 | c = clusterip_config_find_get(payload->src_ip, 0); |
1da177e4 LT |
512 | if (!c) |
513 | return NF_ACCEPT; | |
514 | ||
e905a9ed | 515 | /* normally the linux kernel always replies to arp queries of |
1da177e4 LT |
516 | * addresses on different interfacs. However, in the CLUSTERIP case |
517 | * this wouldn't work, since we didn't subscribe the mcast group on | |
518 | * other interfaces */ | |
519 | if (c->dev != out) { | |
ff67e4e4 | 520 | pr_debug("not mangling arp reply on different " |
0d53778e PM |
521 | "interface: cip'%s'-skb'%s'\n", |
522 | c->dev->name, out->name); | |
1da177e4 LT |
523 | clusterip_config_put(c); |
524 | return NF_ACCEPT; | |
525 | } | |
526 | ||
527 | /* mangle reply hardware address */ | |
528 | memcpy(payload->src_hw, c->clustermac, arp->ar_hln); | |
529 | ||
0d53778e | 530 | #ifdef DEBUG |
ff67e4e4 | 531 | pr_debug("mangled arp reply: "); |
1da177e4 LT |
532 | arp_print(payload); |
533 | #endif | |
534 | ||
535 | clusterip_config_put(c); | |
536 | ||
537 | return NF_ACCEPT; | |
538 | } | |
539 | ||
1999414a | 540 | static struct nf_hook_ops cip_arp_ops __read_mostly = { |
1da177e4 | 541 | .hook = arp_mangle, |
ee999d8b | 542 | .pf = NFPROTO_ARP, |
1da177e4 LT |
543 | .hooknum = NF_ARP_OUT, |
544 | .priority = -1 | |
545 | }; | |
546 | ||
e905a9ed YH |
547 | /*********************************************************************** |
548 | * PROC DIR HANDLING | |
1da177e4 LT |
549 | ***********************************************************************/ |
550 | ||
551 | #ifdef CONFIG_PROC_FS | |
552 | ||
136e92bb KK |
553 | struct clusterip_seq_position { |
554 | unsigned int pos; /* position */ | |
555 | unsigned int weight; /* number of bits set == size */ | |
556 | unsigned int bit; /* current bit */ | |
557 | unsigned long val; /* current value */ | |
558 | }; | |
559 | ||
1da177e4 LT |
560 | static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) |
561 | { | |
47778147 | 562 | struct clusterip_config *c = s->private; |
136e92bb KK |
563 | unsigned int weight; |
564 | u_int32_t local_nodes; | |
565 | struct clusterip_seq_position *idx; | |
566 | ||
567 | /* FIXME: possible race */ | |
568 | local_nodes = c->local_nodes; | |
569 | weight = hweight32(local_nodes); | |
570 | if (*pos >= weight) | |
1da177e4 LT |
571 | return NULL; |
572 | ||
136e92bb KK |
573 | idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL); |
574 | if (!idx) | |
1da177e4 LT |
575 | return ERR_PTR(-ENOMEM); |
576 | ||
136e92bb KK |
577 | idx->pos = *pos; |
578 | idx->weight = weight; | |
579 | idx->bit = ffs(local_nodes); | |
580 | idx->val = local_nodes; | |
581 | clear_bit(idx->bit - 1, &idx->val); | |
582 | ||
583 | return idx; | |
1da177e4 LT |
584 | } |
585 | ||
586 | static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) | |
587 | { | |
3cf93c96 | 588 | struct clusterip_seq_position *idx = v; |
1da177e4 | 589 | |
136e92bb KK |
590 | *pos = ++idx->pos; |
591 | if (*pos >= idx->weight) { | |
1da177e4 LT |
592 | kfree(v); |
593 | return NULL; | |
594 | } | |
136e92bb KK |
595 | idx->bit = ffs(idx->val); |
596 | clear_bit(idx->bit - 1, &idx->val); | |
597 | return idx; | |
1da177e4 LT |
598 | } |
599 | ||
600 | static void clusterip_seq_stop(struct seq_file *s, void *v) | |
601 | { | |
602 | kfree(v); | |
1da177e4 LT |
603 | } |
604 | ||
605 | static int clusterip_seq_show(struct seq_file *s, void *v) | |
606 | { | |
3cf93c96 | 607 | struct clusterip_seq_position *idx = v; |
1da177e4 | 608 | |
e905a9ed | 609 | if (idx->pos != 0) |
1da177e4 | 610 | seq_putc(s, ','); |
1da177e4 | 611 | |
136e92bb KK |
612 | seq_printf(s, "%u", idx->bit); |
613 | ||
614 | if (idx->pos == idx->weight - 1) | |
1da177e4 LT |
615 | seq_putc(s, '\n'); |
616 | ||
617 | return 0; | |
618 | } | |
619 | ||
56b3d975 | 620 | static const struct seq_operations clusterip_seq_ops = { |
1da177e4 LT |
621 | .start = clusterip_seq_start, |
622 | .next = clusterip_seq_next, | |
623 | .stop = clusterip_seq_stop, | |
624 | .show = clusterip_seq_show, | |
625 | }; | |
626 | ||
627 | static int clusterip_proc_open(struct inode *inode, struct file *file) | |
628 | { | |
629 | int ret = seq_open(file, &clusterip_seq_ops); | |
630 | ||
631 | if (!ret) { | |
632 | struct seq_file *sf = file->private_data; | |
47778147 | 633 | struct clusterip_config *c = PDE(inode)->data; |
1da177e4 | 634 | |
47778147 | 635 | sf->private = c; |
1da177e4 LT |
636 | |
637 | clusterip_config_get(c); | |
638 | } | |
639 | ||
640 | return ret; | |
641 | } | |
642 | ||
643 | static int clusterip_proc_release(struct inode *inode, struct file *file) | |
644 | { | |
47778147 | 645 | struct clusterip_config *c = PDE(inode)->data; |
1da177e4 LT |
646 | int ret; |
647 | ||
648 | ret = seq_release(inode, file); | |
649 | ||
650 | if (!ret) | |
651 | clusterip_config_put(c); | |
652 | ||
653 | return ret; | |
654 | } | |
655 | ||
656 | static ssize_t clusterip_proc_write(struct file *file, const char __user *input, | |
657 | size_t size, loff_t *ofs) | |
658 | { | |
47778147 | 659 | struct clusterip_config *c = PDE(file->f_path.dentry->d_inode)->data; |
1da177e4 LT |
660 | #define PROC_WRITELEN 10 |
661 | char buffer[PROC_WRITELEN+1]; | |
1da177e4 LT |
662 | unsigned long nodenum; |
663 | ||
664 | if (copy_from_user(buffer, input, PROC_WRITELEN)) | |
665 | return -EFAULT; | |
666 | ||
667 | if (*buffer == '+') { | |
668 | nodenum = simple_strtoul(buffer+1, NULL, 10); | |
669 | if (clusterip_add_node(c, nodenum)) | |
670 | return -ENOMEM; | |
671 | } else if (*buffer == '-') { | |
672 | nodenum = simple_strtoul(buffer+1, NULL,10); | |
673 | if (clusterip_del_node(c, nodenum)) | |
674 | return -ENOENT; | |
675 | } else | |
676 | return -EIO; | |
677 | ||
678 | return size; | |
679 | } | |
680 | ||
9a32144e | 681 | static const struct file_operations clusterip_proc_fops = { |
1da177e4 LT |
682 | .owner = THIS_MODULE, |
683 | .open = clusterip_proc_open, | |
684 | .read = seq_read, | |
685 | .write = clusterip_proc_write, | |
686 | .llseek = seq_lseek, | |
687 | .release = clusterip_proc_release, | |
688 | }; | |
689 | ||
690 | #endif /* CONFIG_PROC_FS */ | |
691 | ||
d3c5ee6d | 692 | static int __init clusterip_tg_init(void) |
1da177e4 LT |
693 | { |
694 | int ret; | |
695 | ||
d3c5ee6d | 696 | ret = xt_register_target(&clusterip_tg_reg); |
32292a7f PM |
697 | if (ret < 0) |
698 | return ret; | |
1da177e4 | 699 | |
32292a7f PM |
700 | ret = nf_register_hook(&cip_arp_ops); |
701 | if (ret < 0) | |
1da177e4 | 702 | goto cleanup_target; |
1da177e4 LT |
703 | |
704 | #ifdef CONFIG_PROC_FS | |
457c4cbc | 705 | clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", init_net.proc_net); |
1da177e4 | 706 | if (!clusterip_procdir) { |
ff67e4e4 | 707 | pr_err("Unable to proc dir entry\n"); |
1da177e4 LT |
708 | ret = -ENOMEM; |
709 | goto cleanup_hook; | |
710 | } | |
711 | #endif /* CONFIG_PROC_FS */ | |
712 | ||
ff67e4e4 | 713 | pr_info("ClusterIP Version %s loaded successfully\n", |
1da177e4 | 714 | CLUSTERIP_VERSION); |
1da177e4 LT |
715 | return 0; |
716 | ||
76592584 | 717 | #ifdef CONFIG_PROC_FS |
1da177e4 LT |
718 | cleanup_hook: |
719 | nf_unregister_hook(&cip_arp_ops); | |
76592584 | 720 | #endif /* CONFIG_PROC_FS */ |
1da177e4 | 721 | cleanup_target: |
d3c5ee6d | 722 | xt_unregister_target(&clusterip_tg_reg); |
32292a7f | 723 | return ret; |
1da177e4 LT |
724 | } |
725 | ||
d3c5ee6d | 726 | static void __exit clusterip_tg_exit(void) |
1da177e4 | 727 | { |
ff67e4e4 | 728 | pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION); |
32292a7f PM |
729 | #ifdef CONFIG_PROC_FS |
730 | remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent); | |
731 | #endif | |
732 | nf_unregister_hook(&cip_arp_ops); | |
d3c5ee6d | 733 | xt_unregister_target(&clusterip_tg_reg); |
1da177e4 LT |
734 | } |
735 | ||
d3c5ee6d JE |
736 | module_init(clusterip_tg_init); |
737 | module_exit(clusterip_tg_exit); |