]>
Commit | Line | Data |
---|---|---|
e905a9ed | 1 | /* Cluster IP hashmark target |
1da177e4 LT |
2 | * (C) 2003-2004 by Harald Welte <laforge@netfilter.org> |
3 | * based on ideas of Fabio Olive Leite <olive@unixforge.org> | |
4 | * | |
5 | * Development of this code funded by SuSE Linux AG, http://www.suse.com/ | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License version 2 as | |
9 | * published by the Free Software Foundation. | |
10 | * | |
11 | */ | |
ff67e4e4 | 12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
1da177e4 | 13 | #include <linux/module.h> |
1da177e4 LT |
14 | #include <linux/proc_fs.h> |
15 | #include <linux/jhash.h> | |
136e92bb | 16 | #include <linux/bitops.h> |
1da177e4 | 17 | #include <linux/skbuff.h> |
5a0e3ad6 | 18 | #include <linux/slab.h> |
1da177e4 LT |
19 | #include <linux/ip.h> |
20 | #include <linux/tcp.h> | |
21 | #include <linux/udp.h> | |
22 | #include <linux/icmp.h> | |
23 | #include <linux/if_arp.h> | |
1da177e4 | 24 | #include <linux/seq_file.h> |
1da177e4 | 25 | #include <linux/netfilter_arp.h> |
6709dbbb | 26 | #include <linux/netfilter/x_tables.h> |
1da177e4 LT |
27 | #include <linux/netfilter_ipv4/ip_tables.h> |
28 | #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> | |
587aa641 | 29 | #include <net/netfilter/nf_conntrack.h> |
457c4cbc | 30 | #include <net/net_namespace.h> |
ce4ff76c | 31 | #include <net/netns/generic.h> |
587aa641 | 32 | #include <net/checksum.h> |
3d04ebb6 | 33 | #include <net/ip.h> |
1da177e4 | 34 | |
136e92bb | 35 | #define CLUSTERIP_VERSION "0.8" |
1da177e4 | 36 | |
1da177e4 LT |
37 | MODULE_LICENSE("GPL"); |
38 | MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); | |
2ae15b64 | 39 | MODULE_DESCRIPTION("Xtables: CLUSTERIP target"); |
1da177e4 LT |
40 | |
41 | struct clusterip_config { | |
42 | struct list_head list; /* list of all configs */ | |
43 | atomic_t refcount; /* reference count */ | |
44513624 KK |
44 | atomic_t entries; /* number of entries/rules |
45 | * referencing us */ | |
1da177e4 | 46 | |
6a19d614 | 47 | __be32 clusterip; /* the IP address */ |
1da177e4 LT |
48 | u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ |
49 | struct net_device *dev; /* device */ | |
50 | u_int16_t num_total_nodes; /* total number of nodes */ | |
136e92bb | 51 | unsigned long local_nodes; /* node number array */ |
1da177e4 LT |
52 | |
53 | #ifdef CONFIG_PROC_FS | |
54 | struct proc_dir_entry *pde; /* proc dir entry */ | |
55 | #endif | |
56 | enum clusterip_hashmode hash_mode; /* which hashing mode */ | |
57 | u_int32_t hash_initval; /* hash initialization */ | |
d73f33b1 | 58 | struct rcu_head rcu; |
1da177e4 LT |
59 | }; |
60 | ||
61 | static LIST_HEAD(clusterip_configs); | |
62 | ||
136e92bb | 63 | /* clusterip_lock protects the clusterip_configs list */ |
d73f33b1 | 64 | static DEFINE_SPINLOCK(clusterip_lock); |
1da177e4 LT |
65 | |
66 | #ifdef CONFIG_PROC_FS | |
9a32144e | 67 | static const struct file_operations clusterip_proc_fops; |
1da177e4 LT |
68 | #endif |
69 | ||
ce4ff76c G |
70 | static int clusterip_net_id __read_mostly; |
71 | ||
72 | struct clusterip_net { | |
73 | #ifdef CONFIG_PROC_FS | |
74 | struct proc_dir_entry *procdir; | |
75 | #endif | |
76 | }; | |
77 | ||
1da177e4 | 78 | static inline void |
44513624 KK |
79 | clusterip_config_get(struct clusterip_config *c) |
80 | { | |
1da177e4 LT |
81 | atomic_inc(&c->refcount); |
82 | } | |
83 | ||
d73f33b1 ED |
84 | |
85 | static void clusterip_config_rcu_free(struct rcu_head *head) | |
86 | { | |
87 | kfree(container_of(head, struct clusterip_config, rcu)); | |
88 | } | |
89 | ||
1da177e4 | 90 | static inline void |
44513624 KK |
91 | clusterip_config_put(struct clusterip_config *c) |
92 | { | |
93 | if (atomic_dec_and_test(&c->refcount)) | |
d73f33b1 | 94 | call_rcu_bh(&c->rcu, clusterip_config_rcu_free); |
44513624 KK |
95 | } |
96 | ||
44513624 KK |
97 | /* decrease the count of entries using/referencing this config. If last |
98 | * entry(rule) is removed, remove the config from lists, but don't free it | |
99 | * yet, since proc-files could still be holding references */ | |
100 | static inline void | |
101 | clusterip_config_entry_put(struct clusterip_config *c) | |
102 | { | |
d73f33b1 ED |
103 | local_bh_disable(); |
104 | if (atomic_dec_and_lock(&c->entries, &clusterip_lock)) { | |
105 | list_del_rcu(&c->list); | |
106 | spin_unlock(&clusterip_lock); | |
107 | local_bh_enable(); | |
44513624 | 108 | |
22bedad3 | 109 | dev_mc_del(c->dev, c->clustermac); |
1da177e4 | 110 | dev_put(c->dev); |
44513624 KK |
111 | |
112 | /* In case anyone still accesses the file, the open/close | |
113 | * functions are also incrementing the refcount on their own, | |
114 | * so it's safe to remove the entry even if it's in use. */ | |
115 | #ifdef CONFIG_PROC_FS | |
a8ca16ea | 116 | proc_remove(c->pde); |
44513624 | 117 | #endif |
4dee9597 | 118 | return; |
1da177e4 | 119 | } |
d73f33b1 | 120 | local_bh_enable(); |
1da177e4 LT |
121 | } |
122 | ||
1da177e4 | 123 | static struct clusterip_config * |
6a19d614 | 124 | __clusterip_config_find(__be32 clusterip) |
1da177e4 | 125 | { |
4c610979 | 126 | struct clusterip_config *c; |
1da177e4 | 127 | |
d73f33b1 | 128 | list_for_each_entry_rcu(c, &clusterip_configs, list) { |
7c4e36bc | 129 | if (c->clusterip == clusterip) |
1da177e4 | 130 | return c; |
1da177e4 LT |
131 | } |
132 | ||
133 | return NULL; | |
134 | } | |
135 | ||
136 | static inline struct clusterip_config * | |
6a19d614 | 137 | clusterip_config_find_get(__be32 clusterip, int entry) |
1da177e4 LT |
138 | { |
139 | struct clusterip_config *c; | |
140 | ||
d73f33b1 | 141 | rcu_read_lock_bh(); |
1da177e4 | 142 | c = __clusterip_config_find(clusterip); |
d73f33b1 ED |
143 | if (c) { |
144 | if (unlikely(!atomic_inc_not_zero(&c->refcount))) | |
145 | c = NULL; | |
146 | else if (entry) | |
147 | atomic_inc(&c->entries); | |
1da177e4 | 148 | } |
d73f33b1 | 149 | rcu_read_unlock_bh(); |
1da177e4 LT |
150 | |
151 | return c; | |
152 | } | |
153 | ||
136e92bb KK |
154 | static void |
155 | clusterip_config_init_nodelist(struct clusterip_config *c, | |
156 | const struct ipt_clusterip_tgt_info *i) | |
157 | { | |
158 | int n; | |
159 | ||
7c4e36bc | 160 | for (n = 0; n < i->num_local_nodes; n++) |
136e92bb | 161 | set_bit(i->local_nodes[n] - 1, &c->local_nodes); |
136e92bb KK |
162 | } |
163 | ||
1da177e4 | 164 | static struct clusterip_config * |
3cf93c96 | 165 | clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, |
1da177e4 LT |
166 | struct net_device *dev) |
167 | { | |
168 | struct clusterip_config *c; | |
ce4ff76c | 169 | struct clusterip_net *cn = net_generic(&init_net, clusterip_net_id); |
1da177e4 | 170 | |
0da974f4 | 171 | c = kzalloc(sizeof(*c), GFP_ATOMIC); |
1da177e4 LT |
172 | if (!c) |
173 | return NULL; | |
174 | ||
1da177e4 LT |
175 | c->dev = dev; |
176 | c->clusterip = ip; | |
177 | memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); | |
178 | c->num_total_nodes = i->num_total_nodes; | |
136e92bb | 179 | clusterip_config_init_nodelist(c, i); |
1da177e4 LT |
180 | c->hash_mode = i->hash_mode; |
181 | c->hash_initval = i->hash_initval; | |
182 | atomic_set(&c->refcount, 1); | |
44513624 | 183 | atomic_set(&c->entries, 1); |
1da177e4 LT |
184 | |
185 | #ifdef CONFIG_PROC_FS | |
76592584 PM |
186 | { |
187 | char buffer[16]; | |
188 | ||
189 | /* create proc dir entry */ | |
cffee385 | 190 | sprintf(buffer, "%pI4", &ip); |
6e79d85d | 191 | c->pde = proc_create_data(buffer, S_IWUSR|S_IRUSR, |
ce4ff76c | 192 | cn->procdir, |
6e79d85d | 193 | &clusterip_proc_fops, c); |
76592584 PM |
194 | if (!c->pde) { |
195 | kfree(c); | |
196 | return NULL; | |
197 | } | |
1da177e4 | 198 | } |
1da177e4 LT |
199 | #endif |
200 | ||
d73f33b1 ED |
201 | spin_lock_bh(&clusterip_lock); |
202 | list_add_rcu(&c->list, &clusterip_configs); | |
203 | spin_unlock_bh(&clusterip_lock); | |
1da177e4 LT |
204 | |
205 | return c; | |
206 | } | |
207 | ||
76592584 | 208 | #ifdef CONFIG_PROC_FS |
1da177e4 LT |
209 | static int |
210 | clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) | |
211 | { | |
1da177e4 | 212 | |
136e92bb KK |
213 | if (nodenum == 0 || |
214 | nodenum > c->num_total_nodes) | |
1da177e4 | 215 | return 1; |
1da177e4 | 216 | |
136e92bb KK |
217 | /* check if we already have this number in our bitfield */ |
218 | if (test_and_set_bit(nodenum - 1, &c->local_nodes)) | |
219 | return 1; | |
1da177e4 | 220 | |
1da177e4 LT |
221 | return 0; |
222 | } | |
223 | ||
e1931b78 | 224 | static bool |
1da177e4 LT |
225 | clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) |
226 | { | |
136e92bb KK |
227 | if (nodenum == 0 || |
228 | nodenum > c->num_total_nodes) | |
e1931b78 | 229 | return true; |
e905a9ed | 230 | |
136e92bb | 231 | if (test_and_clear_bit(nodenum - 1, &c->local_nodes)) |
e1931b78 | 232 | return false; |
1da177e4 | 233 | |
e1931b78 | 234 | return true; |
1da177e4 | 235 | } |
76592584 | 236 | #endif |
1da177e4 LT |
237 | |
238 | static inline u_int32_t | |
a47362a2 JE |
239 | clusterip_hashfn(const struct sk_buff *skb, |
240 | const struct clusterip_config *config) | |
1da177e4 | 241 | { |
a47362a2 | 242 | const struct iphdr *iph = ip_hdr(skb); |
1da177e4 | 243 | unsigned long hashval; |
3d04ebb6 CG |
244 | u_int16_t sport = 0, dport = 0; |
245 | int poff; | |
246 | ||
247 | poff = proto_ports_offset(iph->protocol); | |
248 | if (poff >= 0) { | |
249 | const u_int16_t *ports; | |
250 | u16 _ports[2]; | |
251 | ||
252 | ports = skb_header_pointer(skb, iph->ihl * 4 + poff, 4, _ports); | |
253 | if (ports) { | |
254 | sport = ports[0]; | |
255 | dport = ports[1]; | |
256 | } | |
257 | } else { | |
e87cc472 | 258 | net_info_ratelimited("unknown protocol %u\n", iph->protocol); |
1da177e4 LT |
259 | } |
260 | ||
261 | switch (config->hash_mode) { | |
262 | case CLUSTERIP_HASHMODE_SIP: | |
263 | hashval = jhash_1word(ntohl(iph->saddr), | |
264 | config->hash_initval); | |
265 | break; | |
266 | case CLUSTERIP_HASHMODE_SIP_SPT: | |
e905a9ed | 267 | hashval = jhash_2words(ntohl(iph->saddr), sport, |
1da177e4 LT |
268 | config->hash_initval); |
269 | break; | |
270 | case CLUSTERIP_HASHMODE_SIP_SPT_DPT: | |
271 | hashval = jhash_3words(ntohl(iph->saddr), sport, dport, | |
272 | config->hash_initval); | |
273 | break; | |
274 | default: | |
275 | /* to make gcc happy */ | |
276 | hashval = 0; | |
277 | /* This cannot happen, unless the check function wasn't called | |
278 | * at rule load time */ | |
ff67e4e4 | 279 | pr_info("unknown mode %u\n", config->hash_mode); |
1da177e4 LT |
280 | BUG(); |
281 | break; | |
282 | } | |
283 | ||
284 | /* node numbers are 1..n, not 0..n */ | |
34498825 | 285 | return (((u64)hashval * config->num_total_nodes) >> 32) + 1; |
1da177e4 LT |
286 | } |
287 | ||
288 | static inline int | |
a47362a2 | 289 | clusterip_responsible(const struct clusterip_config *config, u_int32_t hash) |
1da177e4 | 290 | { |
136e92bb | 291 | return test_bit(hash - 1, &config->local_nodes); |
1da177e4 LT |
292 | } |
293 | ||
e905a9ed YH |
294 | /*********************************************************************** |
295 | * IPTABLES TARGET | |
1da177e4 LT |
296 | ***********************************************************************/ |
297 | ||
298 | static unsigned int | |
4b560b44 | 299 | clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) |
1da177e4 | 300 | { |
7eb35586 | 301 | const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; |
587aa641 | 302 | struct nf_conn *ct; |
1da177e4 | 303 | enum ip_conntrack_info ctinfo; |
587aa641 | 304 | u_int32_t hash; |
1da177e4 LT |
305 | |
306 | /* don't need to clusterip_config_get() here, since refcount | |
307 | * is only decremented by destroy() - and ip_tables guarantees | |
308 | * that the ->target() function isn't called after ->destroy() */ | |
309 | ||
3db05fea | 310 | ct = nf_ct_get(skb, &ctinfo); |
94d117a1 | 311 | if (ct == NULL) |
1da177e4 | 312 | return NF_DROP; |
1da177e4 LT |
313 | |
314 | /* special case: ICMP error handling. conntrack distinguishes between | |
315 | * error messages (RELATED) and information requests (see below) */ | |
3666ed1c JP |
316 | if (ip_hdr(skb)->protocol == IPPROTO_ICMP && |
317 | (ctinfo == IP_CT_RELATED || | |
fb048833 | 318 | ctinfo == IP_CT_RELATED_REPLY)) |
6709dbbb | 319 | return XT_CONTINUE; |
1da177e4 | 320 | |
e905a9ed | 321 | /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO, |
1da177e4 LT |
322 | * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here |
323 | * on, which all have an ID field [relevant for hashing]. */ | |
324 | ||
3db05fea | 325 | hash = clusterip_hashfn(skb, cipinfo->config); |
1da177e4 LT |
326 | |
327 | switch (ctinfo) { | |
181b1e9c JP |
328 | case IP_CT_NEW: |
329 | ct->mark = hash; | |
330 | break; | |
331 | case IP_CT_RELATED: | |
332 | case IP_CT_RELATED_REPLY: | |
333 | /* FIXME: we don't handle expectations at the moment. | |
334 | * They can arrive on a different node than | |
335 | * the master connection (e.g. FTP passive mode) */ | |
336 | case IP_CT_ESTABLISHED: | |
337 | case IP_CT_ESTABLISHED_REPLY: | |
338 | break; | |
339 | default: /* Prevent gcc warnings */ | |
340 | break; | |
1da177e4 LT |
341 | } |
342 | ||
0d53778e | 343 | #ifdef DEBUG |
3c9fba65 | 344 | nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); |
1da177e4 | 345 | #endif |
0d53778e | 346 | pr_debug("hash=%u ct_hash=%u ", hash, ct->mark); |
1da177e4 | 347 | if (!clusterip_responsible(cipinfo->config, hash)) { |
0d53778e | 348 | pr_debug("not responsible\n"); |
1da177e4 LT |
349 | return NF_DROP; |
350 | } | |
0d53778e | 351 | pr_debug("responsible\n"); |
1da177e4 LT |
352 | |
353 | /* despite being received via linklayer multicast, this is | |
354 | * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */ | |
3db05fea | 355 | skb->pkt_type = PACKET_HOST; |
1da177e4 | 356 | |
6709dbbb | 357 | return XT_CONTINUE; |
1da177e4 LT |
358 | } |
359 | ||
135367b8 | 360 | static int clusterip_tg_check(const struct xt_tgchk_param *par) |
1da177e4 | 361 | { |
af5d6dc2 JE |
362 | struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; |
363 | const struct ipt_entry *e = par->entryinfo; | |
1da177e4 | 364 | struct clusterip_config *config; |
4a5a5c73 | 365 | int ret; |
1da177e4 | 366 | |
1da177e4 LT |
367 | if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP && |
368 | cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT && | |
369 | cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) { | |
ff67e4e4 | 370 | pr_info("unknown mode %u\n", cipinfo->hash_mode); |
d6b00a53 | 371 | return -EINVAL; |
1da177e4 LT |
372 | |
373 | } | |
3666ed1c JP |
374 | if (e->ip.dmsk.s_addr != htonl(0xffffffff) || |
375 | e->ip.dst.s_addr == 0) { | |
ff67e4e4 | 376 | pr_info("Please specify destination IP\n"); |
d6b00a53 | 377 | return -EINVAL; |
1da177e4 LT |
378 | } |
379 | ||
380 | /* FIXME: further sanity checks */ | |
381 | ||
44513624 | 382 | config = clusterip_config_find_get(e->ip.dst.s_addr, 1); |
d3c3f424 | 383 | if (!config) { |
1da177e4 | 384 | if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { |
ff67e4e4 JE |
385 | pr_info("no config found for %pI4, need 'new'\n", |
386 | &e->ip.dst.s_addr); | |
d6b00a53 | 387 | return -EINVAL; |
1da177e4 LT |
388 | } else { |
389 | struct net_device *dev; | |
390 | ||
391 | if (e->ip.iniface[0] == '\0') { | |
ff67e4e4 | 392 | pr_info("Please specify an interface name\n"); |
d6b00a53 | 393 | return -EINVAL; |
1da177e4 LT |
394 | } |
395 | ||
881d966b | 396 | dev = dev_get_by_name(&init_net, e->ip.iniface); |
1da177e4 | 397 | if (!dev) { |
ff67e4e4 JE |
398 | pr_info("no such interface %s\n", |
399 | e->ip.iniface); | |
4a5a5c73 | 400 | return -ENOENT; |
1da177e4 LT |
401 | } |
402 | ||
e905a9ed | 403 | config = clusterip_config_init(cipinfo, |
1da177e4 LT |
404 | e->ip.dst.s_addr, dev); |
405 | if (!config) { | |
1da177e4 | 406 | dev_put(dev); |
4a5a5c73 | 407 | return -ENOMEM; |
1da177e4 | 408 | } |
22bedad3 | 409 | dev_mc_add(config->dev, config->clustermac); |
1da177e4 LT |
410 | } |
411 | } | |
d3c3f424 | 412 | cipinfo->config = config; |
1da177e4 | 413 | |
4a5a5c73 | 414 | ret = nf_ct_l3proto_try_module_get(par->family); |
f95c74e3 | 415 | if (ret < 0) |
ff67e4e4 JE |
416 | pr_info("cannot load conntrack support for proto=%u\n", |
417 | par->family); | |
f95c74e3 | 418 | return ret; |
1da177e4 LT |
419 | } |
420 | ||
421 | /* drop reference count of cluster config when rule is deleted */ | |
a2df1648 | 422 | static void clusterip_tg_destroy(const struct xt_tgdtor_param *par) |
1da177e4 | 423 | { |
a2df1648 | 424 | const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; |
1da177e4 | 425 | |
44513624 KK |
426 | /* if no more entries are referencing the config, remove it |
427 | * from the list and destroy the proc entry */ | |
428 | clusterip_config_entry_put(cipinfo->config); | |
429 | ||
1da177e4 | 430 | clusterip_config_put(cipinfo->config); |
11078c37 | 431 | |
0d345455 | 432 | nf_ct_l3proto_module_put(par->family); |
1da177e4 LT |
433 | } |
434 | ||
d3c3f424 PM |
435 | #ifdef CONFIG_COMPAT |
436 | struct compat_ipt_clusterip_tgt_info | |
437 | { | |
438 | u_int32_t flags; | |
439 | u_int8_t clustermac[6]; | |
440 | u_int16_t num_total_nodes; | |
441 | u_int16_t num_local_nodes; | |
442 | u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; | |
443 | u_int32_t hash_mode; | |
444 | u_int32_t hash_initval; | |
445 | compat_uptr_t config; | |
446 | }; | |
447 | #endif /* CONFIG_COMPAT */ | |
448 | ||
d3c5ee6d | 449 | static struct xt_target clusterip_tg_reg __read_mostly = { |
1d5cd909 | 450 | .name = "CLUSTERIP", |
ee999d8b | 451 | .family = NFPROTO_IPV4, |
d3c5ee6d JE |
452 | .target = clusterip_tg, |
453 | .checkentry = clusterip_tg_check, | |
454 | .destroy = clusterip_tg_destroy, | |
d3c3f424 PM |
455 | .targetsize = sizeof(struct ipt_clusterip_tgt_info), |
456 | #ifdef CONFIG_COMPAT | |
457 | .compatsize = sizeof(struct compat_ipt_clusterip_tgt_info), | |
458 | #endif /* CONFIG_COMPAT */ | |
1d5cd909 | 459 | .me = THIS_MODULE |
1da177e4 LT |
460 | }; |
461 | ||
462 | ||
e905a9ed YH |
463 | /*********************************************************************** |
464 | * ARP MANGLING CODE | |
1da177e4 LT |
465 | ***********************************************************************/ |
466 | ||
467 | /* hardcoded for 48bit ethernet and 32bit ipv4 addresses */ | |
468 | struct arp_payload { | |
469 | u_int8_t src_hw[ETH_ALEN]; | |
6a19d614 | 470 | __be32 src_ip; |
1da177e4 | 471 | u_int8_t dst_hw[ETH_ALEN]; |
6a19d614 | 472 | __be32 dst_ip; |
3f30fc15 | 473 | } __packed; |
1da177e4 | 474 | |
0d53778e | 475 | #ifdef DEBUG |
e905a9ed | 476 | static void arp_print(struct arp_payload *payload) |
1da177e4 LT |
477 | { |
478 | #define HBUFFERLEN 30 | |
479 | char hbuffer[HBUFFERLEN]; | |
480 | int j,k; | |
1da177e4 LT |
481 | |
482 | for (k=0, j=0; k < HBUFFERLEN-3 && j < ETH_ALEN; j++) { | |
6a8341b6 HH |
483 | hbuffer[k++] = hex_asc_hi(payload->src_hw[j]); |
484 | hbuffer[k++] = hex_asc_lo(payload->src_hw[j]); | |
1da177e4 LT |
485 | hbuffer[k++]=':'; |
486 | } | |
487 | hbuffer[--k]='\0'; | |
488 | ||
ff67e4e4 JE |
489 | pr_debug("src %pI4@%s, dst %pI4\n", |
490 | &payload->src_ip, hbuffer, &payload->dst_ip); | |
1da177e4 LT |
491 | } |
492 | #endif | |
493 | ||
494 | static unsigned int | |
495 | arp_mangle(unsigned int hook, | |
3db05fea | 496 | struct sk_buff *skb, |
1da177e4 LT |
497 | const struct net_device *in, |
498 | const struct net_device *out, | |
499 | int (*okfn)(struct sk_buff *)) | |
500 | { | |
3db05fea | 501 | struct arphdr *arp = arp_hdr(skb); |
1da177e4 LT |
502 | struct arp_payload *payload; |
503 | struct clusterip_config *c; | |
504 | ||
505 | /* we don't care about non-ethernet and non-ipv4 ARP */ | |
3666ed1c JP |
506 | if (arp->ar_hrd != htons(ARPHRD_ETHER) || |
507 | arp->ar_pro != htons(ETH_P_IP) || | |
508 | arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN) | |
1da177e4 LT |
509 | return NF_ACCEPT; |
510 | ||
4095ebf1 | 511 | /* we only want to mangle arp requests and replies */ |
3666ed1c JP |
512 | if (arp->ar_op != htons(ARPOP_REPLY) && |
513 | arp->ar_op != htons(ARPOP_REQUEST)) | |
1da177e4 LT |
514 | return NF_ACCEPT; |
515 | ||
516 | payload = (void *)(arp+1); | |
517 | ||
e905a9ed | 518 | /* if there is no clusterip configuration for the arp reply's |
1da177e4 | 519 | * source ip, we don't want to mangle it */ |
44513624 | 520 | c = clusterip_config_find_get(payload->src_ip, 0); |
1da177e4 LT |
521 | if (!c) |
522 | return NF_ACCEPT; | |
523 | ||
e905a9ed | 524 | /* normally the linux kernel always replies to arp queries of |
1da177e4 LT |
525 | * addresses on different interfacs. However, in the CLUSTERIP case |
526 | * this wouldn't work, since we didn't subscribe the mcast group on | |
527 | * other interfaces */ | |
528 | if (c->dev != out) { | |
ff67e4e4 | 529 | pr_debug("not mangling arp reply on different " |
0d53778e PM |
530 | "interface: cip'%s'-skb'%s'\n", |
531 | c->dev->name, out->name); | |
1da177e4 LT |
532 | clusterip_config_put(c); |
533 | return NF_ACCEPT; | |
534 | } | |
535 | ||
536 | /* mangle reply hardware address */ | |
537 | memcpy(payload->src_hw, c->clustermac, arp->ar_hln); | |
538 | ||
0d53778e | 539 | #ifdef DEBUG |
ff67e4e4 | 540 | pr_debug("mangled arp reply: "); |
1da177e4 LT |
541 | arp_print(payload); |
542 | #endif | |
543 | ||
544 | clusterip_config_put(c); | |
545 | ||
546 | return NF_ACCEPT; | |
547 | } | |
548 | ||
1999414a | 549 | static struct nf_hook_ops cip_arp_ops __read_mostly = { |
1da177e4 | 550 | .hook = arp_mangle, |
ee999d8b | 551 | .pf = NFPROTO_ARP, |
1da177e4 LT |
552 | .hooknum = NF_ARP_OUT, |
553 | .priority = -1 | |
554 | }; | |
555 | ||
e905a9ed YH |
556 | /*********************************************************************** |
557 | * PROC DIR HANDLING | |
1da177e4 LT |
558 | ***********************************************************************/ |
559 | ||
560 | #ifdef CONFIG_PROC_FS | |
561 | ||
136e92bb KK |
562 | struct clusterip_seq_position { |
563 | unsigned int pos; /* position */ | |
564 | unsigned int weight; /* number of bits set == size */ | |
565 | unsigned int bit; /* current bit */ | |
566 | unsigned long val; /* current value */ | |
567 | }; | |
568 | ||
1da177e4 LT |
569 | static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) |
570 | { | |
47778147 | 571 | struct clusterip_config *c = s->private; |
136e92bb KK |
572 | unsigned int weight; |
573 | u_int32_t local_nodes; | |
574 | struct clusterip_seq_position *idx; | |
575 | ||
576 | /* FIXME: possible race */ | |
577 | local_nodes = c->local_nodes; | |
578 | weight = hweight32(local_nodes); | |
579 | if (*pos >= weight) | |
1da177e4 LT |
580 | return NULL; |
581 | ||
136e92bb KK |
582 | idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL); |
583 | if (!idx) | |
1da177e4 LT |
584 | return ERR_PTR(-ENOMEM); |
585 | ||
136e92bb KK |
586 | idx->pos = *pos; |
587 | idx->weight = weight; | |
588 | idx->bit = ffs(local_nodes); | |
589 | idx->val = local_nodes; | |
590 | clear_bit(idx->bit - 1, &idx->val); | |
591 | ||
592 | return idx; | |
1da177e4 LT |
593 | } |
594 | ||
595 | static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) | |
596 | { | |
3cf93c96 | 597 | struct clusterip_seq_position *idx = v; |
1da177e4 | 598 | |
136e92bb KK |
599 | *pos = ++idx->pos; |
600 | if (*pos >= idx->weight) { | |
1da177e4 LT |
601 | kfree(v); |
602 | return NULL; | |
603 | } | |
136e92bb KK |
604 | idx->bit = ffs(idx->val); |
605 | clear_bit(idx->bit - 1, &idx->val); | |
606 | return idx; | |
1da177e4 LT |
607 | } |
608 | ||
609 | static void clusterip_seq_stop(struct seq_file *s, void *v) | |
610 | { | |
902a3dd5 ED |
611 | if (!IS_ERR(v)) |
612 | kfree(v); | |
1da177e4 LT |
613 | } |
614 | ||
615 | static int clusterip_seq_show(struct seq_file *s, void *v) | |
616 | { | |
3cf93c96 | 617 | struct clusterip_seq_position *idx = v; |
1da177e4 | 618 | |
e905a9ed | 619 | if (idx->pos != 0) |
1da177e4 | 620 | seq_putc(s, ','); |
1da177e4 | 621 | |
136e92bb KK |
622 | seq_printf(s, "%u", idx->bit); |
623 | ||
624 | if (idx->pos == idx->weight - 1) | |
1da177e4 LT |
625 | seq_putc(s, '\n'); |
626 | ||
627 | return 0; | |
628 | } | |
629 | ||
56b3d975 | 630 | static const struct seq_operations clusterip_seq_ops = { |
1da177e4 LT |
631 | .start = clusterip_seq_start, |
632 | .next = clusterip_seq_next, | |
633 | .stop = clusterip_seq_stop, | |
634 | .show = clusterip_seq_show, | |
635 | }; | |
636 | ||
637 | static int clusterip_proc_open(struct inode *inode, struct file *file) | |
638 | { | |
639 | int ret = seq_open(file, &clusterip_seq_ops); | |
640 | ||
641 | if (!ret) { | |
642 | struct seq_file *sf = file->private_data; | |
d9dda78b | 643 | struct clusterip_config *c = PDE_DATA(inode); |
1da177e4 | 644 | |
47778147 | 645 | sf->private = c; |
1da177e4 LT |
646 | |
647 | clusterip_config_get(c); | |
648 | } | |
649 | ||
650 | return ret; | |
651 | } | |
652 | ||
653 | static int clusterip_proc_release(struct inode *inode, struct file *file) | |
654 | { | |
d9dda78b | 655 | struct clusterip_config *c = PDE_DATA(inode); |
1da177e4 LT |
656 | int ret; |
657 | ||
658 | ret = seq_release(inode, file); | |
659 | ||
660 | if (!ret) | |
661 | clusterip_config_put(c); | |
662 | ||
663 | return ret; | |
664 | } | |
665 | ||
666 | static ssize_t clusterip_proc_write(struct file *file, const char __user *input, | |
667 | size_t size, loff_t *ofs) | |
668 | { | |
d9dda78b | 669 | struct clusterip_config *c = PDE_DATA(file_inode(file)); |
1da177e4 LT |
670 | #define PROC_WRITELEN 10 |
671 | char buffer[PROC_WRITELEN+1]; | |
1da177e4 | 672 | unsigned long nodenum; |
4b5511eb | 673 | int rc; |
1da177e4 | 674 | |
961ed183 VK |
675 | if (size > PROC_WRITELEN) |
676 | return -EIO; | |
677 | if (copy_from_user(buffer, input, size)) | |
1da177e4 | 678 | return -EFAULT; |
961ed183 | 679 | buffer[size] = 0; |
1da177e4 LT |
680 | |
681 | if (*buffer == '+') { | |
4b5511eb AP |
682 | rc = kstrtoul(buffer+1, 10, &nodenum); |
683 | if (rc) | |
684 | return rc; | |
1da177e4 LT |
685 | if (clusterip_add_node(c, nodenum)) |
686 | return -ENOMEM; | |
687 | } else if (*buffer == '-') { | |
4b5511eb AP |
688 | rc = kstrtoul(buffer+1, 10, &nodenum); |
689 | if (rc) | |
690 | return rc; | |
1da177e4 LT |
691 | if (clusterip_del_node(c, nodenum)) |
692 | return -ENOENT; | |
693 | } else | |
694 | return -EIO; | |
695 | ||
696 | return size; | |
697 | } | |
698 | ||
9a32144e | 699 | static const struct file_operations clusterip_proc_fops = { |
1da177e4 LT |
700 | .owner = THIS_MODULE, |
701 | .open = clusterip_proc_open, | |
702 | .read = seq_read, | |
703 | .write = clusterip_proc_write, | |
704 | .llseek = seq_lseek, | |
705 | .release = clusterip_proc_release, | |
706 | }; | |
707 | ||
708 | #endif /* CONFIG_PROC_FS */ | |
709 | ||
ce4ff76c G |
710 | static int clusterip_net_init(struct net *net) |
711 | { | |
712 | #ifdef CONFIG_PROC_FS | |
713 | struct clusterip_net *cn = net_generic(net, clusterip_net_id); | |
714 | ||
715 | cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net); | |
716 | if (!cn->procdir) { | |
717 | pr_err("Unable to proc dir entry\n"); | |
718 | return -ENOMEM; | |
719 | } | |
720 | #endif /* CONFIG_PROC_FS */ | |
721 | ||
722 | return 0; | |
723 | } | |
724 | ||
725 | static void clusterip_net_exit(struct net *net) | |
726 | { | |
727 | #ifdef CONFIG_PROC_FS | |
728 | struct clusterip_net *cn = net_generic(net, clusterip_net_id); | |
729 | proc_remove(cn->procdir); | |
730 | #endif | |
731 | } | |
732 | ||
733 | static struct pernet_operations clusterip_net_ops = { | |
734 | .init = clusterip_net_init, | |
735 | .exit = clusterip_net_exit, | |
736 | .id = &clusterip_net_id, | |
737 | .size = sizeof(struct clusterip_net), | |
738 | }; | |
739 | ||
d3c5ee6d | 740 | static int __init clusterip_tg_init(void) |
1da177e4 LT |
741 | { |
742 | int ret; | |
743 | ||
ce4ff76c | 744 | ret = register_pernet_subsys(&clusterip_net_ops); |
32292a7f PM |
745 | if (ret < 0) |
746 | return ret; | |
1da177e4 | 747 | |
ce4ff76c G |
748 | ret = xt_register_target(&clusterip_tg_reg); |
749 | if (ret < 0) | |
750 | goto cleanup_subsys; | |
751 | ||
32292a7f PM |
752 | ret = nf_register_hook(&cip_arp_ops); |
753 | if (ret < 0) | |
1da177e4 | 754 | goto cleanup_target; |
1da177e4 | 755 | |
ff67e4e4 | 756 | pr_info("ClusterIP Version %s loaded successfully\n", |
1da177e4 | 757 | CLUSTERIP_VERSION); |
ce4ff76c | 758 | |
1da177e4 LT |
759 | return 0; |
760 | ||
1da177e4 | 761 | cleanup_target: |
d3c5ee6d | 762 | xt_unregister_target(&clusterip_tg_reg); |
ce4ff76c G |
763 | cleanup_subsys: |
764 | unregister_pernet_subsys(&clusterip_net_ops); | |
32292a7f | 765 | return ret; |
1da177e4 LT |
766 | } |
767 | ||
d3c5ee6d | 768 | static void __exit clusterip_tg_exit(void) |
1da177e4 | 769 | { |
ff67e4e4 | 770 | pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION); |
ce4ff76c | 771 | |
32292a7f | 772 | nf_unregister_hook(&cip_arp_ops); |
d3c5ee6d | 773 | xt_unregister_target(&clusterip_tg_reg); |
ce4ff76c | 774 | unregister_pernet_subsys(&clusterip_net_ops); |
d73f33b1 ED |
775 | |
776 | /* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */ | |
777 | rcu_barrier_bh(); | |
1da177e4 LT |
778 | } |
779 | ||
d3c5ee6d JE |
780 | module_init(clusterip_tg_init); |
781 | module_exit(clusterip_tg_exit); |