]>
Commit | Line | Data |
---|---|---|
e905a9ed | 1 | /* Cluster IP hashmark target |
1da177e4 LT |
2 | * (C) 2003-2004 by Harald Welte <laforge@netfilter.org> |
3 | * based on ideas of Fabio Olive Leite <olive@unixforge.org> | |
4 | * | |
5 | * Development of this code funded by SuSE Linux AG, http://www.suse.com/ | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License version 2 as | |
9 | * published by the Free Software Foundation. | |
10 | * | |
11 | */ | |
ff67e4e4 | 12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
1da177e4 | 13 | #include <linux/module.h> |
1da177e4 LT |
14 | #include <linux/proc_fs.h> |
15 | #include <linux/jhash.h> | |
136e92bb | 16 | #include <linux/bitops.h> |
1da177e4 | 17 | #include <linux/skbuff.h> |
5a0e3ad6 | 18 | #include <linux/slab.h> |
1da177e4 LT |
19 | #include <linux/ip.h> |
20 | #include <linux/tcp.h> | |
21 | #include <linux/udp.h> | |
22 | #include <linux/icmp.h> | |
23 | #include <linux/if_arp.h> | |
1da177e4 | 24 | #include <linux/seq_file.h> |
b54ab92b | 25 | #include <linux/refcount.h> |
1da177e4 | 26 | #include <linux/netfilter_arp.h> |
6709dbbb | 27 | #include <linux/netfilter/x_tables.h> |
1da177e4 LT |
28 | #include <linux/netfilter_ipv4/ip_tables.h> |
29 | #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> | |
587aa641 | 30 | #include <net/netfilter/nf_conntrack.h> |
457c4cbc | 31 | #include <net/net_namespace.h> |
ce4ff76c | 32 | #include <net/netns/generic.h> |
587aa641 | 33 | #include <net/checksum.h> |
3d04ebb6 | 34 | #include <net/ip.h> |
1da177e4 | 35 | |
136e92bb | 36 | #define CLUSTERIP_VERSION "0.8" |
1da177e4 | 37 | |
1da177e4 LT |
38 | MODULE_LICENSE("GPL"); |
39 | MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); | |
2ae15b64 | 40 | MODULE_DESCRIPTION("Xtables: CLUSTERIP target"); |
1da177e4 LT |
41 | |
42 | struct clusterip_config { | |
43 | struct list_head list; /* list of all configs */ | |
b54ab92b RE |
44 | refcount_t refcount; /* reference count */ |
45 | refcount_t entries; /* number of entries/rules | |
44513624 | 46 | * referencing us */ |
1da177e4 | 47 | |
6a19d614 | 48 | __be32 clusterip; /* the IP address */ |
1da177e4 | 49 | u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ |
202f59af | 50 | int ifindex; /* device ifindex */ |
1da177e4 | 51 | u_int16_t num_total_nodes; /* total number of nodes */ |
136e92bb | 52 | unsigned long local_nodes; /* node number array */ |
1da177e4 LT |
53 | |
54 | #ifdef CONFIG_PROC_FS | |
55 | struct proc_dir_entry *pde; /* proc dir entry */ | |
56 | #endif | |
57 | enum clusterip_hashmode hash_mode; /* which hashing mode */ | |
58 | u_int32_t hash_initval; /* hash initialization */ | |
d73f33b1 | 59 | struct rcu_head rcu; |
202f59af XL |
60 | |
61 | char ifname[IFNAMSIZ]; /* device ifname */ | |
62 | struct notifier_block notifier; /* refresh c->ifindex in it */ | |
1da177e4 LT |
63 | }; |
64 | ||
1da177e4 | 65 | #ifdef CONFIG_PROC_FS |
9a32144e | 66 | static const struct file_operations clusterip_proc_fops; |
1da177e4 | 67 | #endif |
1da177e4 | 68 | |
c7d03a00 | 69 | static unsigned int clusterip_net_id __read_mostly; |
ce4ff76c G |
70 | |
71 | struct clusterip_net { | |
26a89e43 | 72 | struct list_head configs; |
f1e8077f G |
73 | /* lock protects the configs list */ |
74 | spinlock_t lock; | |
1da177e4 LT |
75 | |
76 | #ifdef CONFIG_PROC_FS | |
ce4ff76c | 77 | struct proc_dir_entry *procdir; |
1da177e4 | 78 | #endif |
ce4ff76c | 79 | }; |
1da177e4 LT |
80 | |
81 | static inline void | |
44513624 KK |
82 | clusterip_config_get(struct clusterip_config *c) |
83 | { | |
b54ab92b | 84 | refcount_inc(&c->refcount); |
1da177e4 LT |
85 | } |
86 | ||
d73f33b1 ED |
87 | |
88 | static void clusterip_config_rcu_free(struct rcu_head *head) | |
89 | { | |
90 | kfree(container_of(head, struct clusterip_config, rcu)); | |
91 | } | |
92 | ||
1da177e4 | 93 | static inline void |
44513624 KK |
94 | clusterip_config_put(struct clusterip_config *c) |
95 | { | |
b54ab92b | 96 | if (refcount_dec_and_test(&c->refcount)) |
d73f33b1 | 97 | call_rcu_bh(&c->rcu, clusterip_config_rcu_free); |
44513624 KK |
98 | } |
99 | ||
44513624 KK |
100 | /* decrease the count of entries using/referencing this config. If last |
101 | * entry(rule) is removed, remove the config from lists, but don't free it | |
102 | * yet, since proc-files could still be holding references */ | |
103 | static inline void | |
202f59af | 104 | clusterip_config_entry_put(struct net *net, struct clusterip_config *c) |
44513624 | 105 | { |
d86946d2 | 106 | struct clusterip_net *cn = net_generic(net, clusterip_net_id); |
f1e8077f | 107 | |
d73f33b1 | 108 | local_bh_disable(); |
b54ab92b | 109 | if (refcount_dec_and_lock(&c->entries, &cn->lock)) { |
44513624 KK |
110 | /* In case anyone still accesses the file, the open/close |
111 | * functions are also incrementing the refcount on their own, | |
112 | * so it's safe to remove the entry even if it's in use. */ | |
113 | #ifdef CONFIG_PROC_FS | |
3840538a SD |
114 | if (cn->procdir) |
115 | proc_remove(c->pde); | |
44513624 | 116 | #endif |
3b87bfac CW |
117 | list_del_rcu(&c->list); |
118 | spin_unlock(&cn->lock); | |
119 | local_bh_enable(); | |
120 | ||
121 | unregister_netdevice_notifier(&c->notifier); | |
122 | ||
4dee9597 | 123 | return; |
1da177e4 | 124 | } |
d73f33b1 | 125 | local_bh_enable(); |
1da177e4 LT |
126 | } |
127 | ||
1da177e4 | 128 | static struct clusterip_config * |
b5ef0f85 | 129 | __clusterip_config_find(struct net *net, __be32 clusterip) |
1da177e4 | 130 | { |
4c610979 | 131 | struct clusterip_config *c; |
b5ef0f85 | 132 | struct clusterip_net *cn = net_generic(net, clusterip_net_id); |
1da177e4 | 133 | |
26a89e43 | 134 | list_for_each_entry_rcu(c, &cn->configs, list) { |
7c4e36bc | 135 | if (c->clusterip == clusterip) |
1da177e4 | 136 | return c; |
1da177e4 LT |
137 | } |
138 | ||
139 | return NULL; | |
140 | } | |
141 | ||
142 | static inline struct clusterip_config * | |
b5ef0f85 | 143 | clusterip_config_find_get(struct net *net, __be32 clusterip, int entry) |
1da177e4 LT |
144 | { |
145 | struct clusterip_config *c; | |
146 | ||
d73f33b1 | 147 | rcu_read_lock_bh(); |
b5ef0f85 | 148 | c = __clusterip_config_find(net, clusterip); |
d73f33b1 | 149 | if (c) { |
3fd0b634 AB |
150 | #ifdef CONFIG_PROC_FS |
151 | if (!c->pde) | |
152 | c = NULL; | |
153 | else | |
154 | #endif | |
b54ab92b | 155 | if (unlikely(!refcount_inc_not_zero(&c->refcount))) |
d73f33b1 ED |
156 | c = NULL; |
157 | else if (entry) | |
b54ab92b | 158 | refcount_inc(&c->entries); |
1da177e4 | 159 | } |
d73f33b1 | 160 | rcu_read_unlock_bh(); |
1da177e4 LT |
161 | |
162 | return c; | |
163 | } | |
164 | ||
136e92bb KK |
165 | static void |
166 | clusterip_config_init_nodelist(struct clusterip_config *c, | |
167 | const struct ipt_clusterip_tgt_info *i) | |
168 | { | |
169 | int n; | |
170 | ||
7c4e36bc | 171 | for (n = 0; n < i->num_local_nodes; n++) |
136e92bb | 172 | set_bit(i->local_nodes[n] - 1, &c->local_nodes); |
136e92bb KK |
173 | } |
174 | ||
202f59af XL |
175 | static int |
176 | clusterip_netdev_event(struct notifier_block *this, unsigned long event, | |
177 | void *ptr) | |
1da177e4 | 178 | { |
202f59af | 179 | struct net_device *dev = netdev_notifier_info_to_dev(ptr); |
1da177e4 | 180 | struct clusterip_config *c; |
202f59af XL |
181 | |
182 | c = container_of(this, struct clusterip_config, notifier); | |
183 | switch (event) { | |
184 | case NETDEV_REGISTER: | |
185 | if (!strcmp(dev->name, c->ifname)) { | |
186 | c->ifindex = dev->ifindex; | |
187 | dev_mc_add(dev, c->clustermac); | |
188 | } | |
189 | break; | |
190 | case NETDEV_UNREGISTER: | |
191 | if (dev->ifindex == c->ifindex) { | |
192 | dev_mc_del(dev, c->clustermac); | |
193 | c->ifindex = -1; | |
194 | } | |
195 | break; | |
196 | case NETDEV_CHANGENAME: | |
197 | if (!strcmp(dev->name, c->ifname)) { | |
198 | c->ifindex = dev->ifindex; | |
199 | dev_mc_add(dev, c->clustermac); | |
200 | } else if (dev->ifindex == c->ifindex) { | |
201 | dev_mc_del(dev, c->clustermac); | |
202 | c->ifindex = -1; | |
203 | } | |
204 | break; | |
205 | } | |
206 | ||
207 | return NOTIFY_DONE; | |
208 | } | |
209 | ||
210 | static struct clusterip_config * | |
211 | clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i, | |
212 | __be32 ip, const char *iniface) | |
213 | { | |
6c5d5cfb | 214 | struct clusterip_net *cn = net_generic(net, clusterip_net_id); |
202f59af XL |
215 | struct clusterip_config *c; |
216 | int err; | |
1da177e4 | 217 | |
0da974f4 | 218 | c = kzalloc(sizeof(*c), GFP_ATOMIC); |
1da177e4 | 219 | if (!c) |
6c5d5cfb | 220 | return ERR_PTR(-ENOMEM); |
1da177e4 | 221 | |
202f59af XL |
222 | strcpy(c->ifname, iniface); |
223 | c->ifindex = -1; | |
1da177e4 LT |
224 | c->clusterip = ip; |
225 | memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); | |
226 | c->num_total_nodes = i->num_total_nodes; | |
136e92bb | 227 | clusterip_config_init_nodelist(c, i); |
1da177e4 LT |
228 | c->hash_mode = i->hash_mode; |
229 | c->hash_initval = i->hash_initval; | |
b54ab92b RE |
230 | refcount_set(&c->refcount, 1); |
231 | refcount_set(&c->entries, 1); | |
1da177e4 | 232 | |
6c5d5cfb XL |
233 | spin_lock_bh(&cn->lock); |
234 | if (__clusterip_config_find(net, ip)) { | |
235 | spin_unlock_bh(&cn->lock); | |
236 | kfree(c); | |
237 | ||
238 | return ERR_PTR(-EBUSY); | |
239 | } | |
240 | ||
241 | list_add_rcu(&c->list, &cn->configs); | |
242 | spin_unlock_bh(&cn->lock); | |
243 | ||
1da177e4 | 244 | #ifdef CONFIG_PROC_FS |
76592584 PM |
245 | { |
246 | char buffer[16]; | |
247 | ||
248 | /* create proc dir entry */ | |
cffee385 | 249 | sprintf(buffer, "%pI4", &ip); |
6e79d85d | 250 | c->pde = proc_create_data(buffer, S_IWUSR|S_IRUSR, |
ce4ff76c | 251 | cn->procdir, |
6e79d85d | 252 | &clusterip_proc_fops, c); |
76592584 | 253 | if (!c->pde) { |
202f59af XL |
254 | err = -ENOMEM; |
255 | goto err; | |
76592584 | 256 | } |
1da177e4 | 257 | } |
1da177e4 LT |
258 | #endif |
259 | ||
202f59af XL |
260 | c->notifier.notifier_call = clusterip_netdev_event; |
261 | err = register_netdevice_notifier(&c->notifier); | |
262 | if (!err) | |
263 | return c; | |
264 | ||
265 | #ifdef CONFIG_PROC_FS | |
266 | proc_remove(c->pde); | |
267 | err: | |
268 | #endif | |
269 | spin_lock_bh(&cn->lock); | |
270 | list_del_rcu(&c->list); | |
271 | spin_unlock_bh(&cn->lock); | |
272 | kfree(c); | |
273 | ||
274 | return ERR_PTR(err); | |
1da177e4 LT |
275 | } |
276 | ||
76592584 | 277 | #ifdef CONFIG_PROC_FS |
1da177e4 LT |
278 | static int |
279 | clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) | |
280 | { | |
1da177e4 | 281 | |
136e92bb KK |
282 | if (nodenum == 0 || |
283 | nodenum > c->num_total_nodes) | |
1da177e4 | 284 | return 1; |
1da177e4 | 285 | |
136e92bb KK |
286 | /* check if we already have this number in our bitfield */ |
287 | if (test_and_set_bit(nodenum - 1, &c->local_nodes)) | |
288 | return 1; | |
1da177e4 | 289 | |
1da177e4 LT |
290 | return 0; |
291 | } | |
292 | ||
e1931b78 | 293 | static bool |
1da177e4 LT |
294 | clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) |
295 | { | |
136e92bb KK |
296 | if (nodenum == 0 || |
297 | nodenum > c->num_total_nodes) | |
e1931b78 | 298 | return true; |
e905a9ed | 299 | |
136e92bb | 300 | if (test_and_clear_bit(nodenum - 1, &c->local_nodes)) |
e1931b78 | 301 | return false; |
1da177e4 | 302 | |
e1931b78 | 303 | return true; |
1da177e4 | 304 | } |
76592584 | 305 | #endif |
1da177e4 LT |
306 | |
307 | static inline u_int32_t | |
a47362a2 JE |
308 | clusterip_hashfn(const struct sk_buff *skb, |
309 | const struct clusterip_config *config) | |
1da177e4 | 310 | { |
a47362a2 | 311 | const struct iphdr *iph = ip_hdr(skb); |
1da177e4 | 312 | unsigned long hashval; |
3d04ebb6 CG |
313 | u_int16_t sport = 0, dport = 0; |
314 | int poff; | |
315 | ||
316 | poff = proto_ports_offset(iph->protocol); | |
317 | if (poff >= 0) { | |
318 | const u_int16_t *ports; | |
319 | u16 _ports[2]; | |
320 | ||
321 | ports = skb_header_pointer(skb, iph->ihl * 4 + poff, 4, _ports); | |
322 | if (ports) { | |
323 | sport = ports[0]; | |
324 | dport = ports[1]; | |
325 | } | |
326 | } else { | |
e87cc472 | 327 | net_info_ratelimited("unknown protocol %u\n", iph->protocol); |
1da177e4 LT |
328 | } |
329 | ||
330 | switch (config->hash_mode) { | |
331 | case CLUSTERIP_HASHMODE_SIP: | |
332 | hashval = jhash_1word(ntohl(iph->saddr), | |
333 | config->hash_initval); | |
334 | break; | |
335 | case CLUSTERIP_HASHMODE_SIP_SPT: | |
e905a9ed | 336 | hashval = jhash_2words(ntohl(iph->saddr), sport, |
1da177e4 LT |
337 | config->hash_initval); |
338 | break; | |
339 | case CLUSTERIP_HASHMODE_SIP_SPT_DPT: | |
340 | hashval = jhash_3words(ntohl(iph->saddr), sport, dport, | |
341 | config->hash_initval); | |
342 | break; | |
343 | default: | |
344 | /* to make gcc happy */ | |
345 | hashval = 0; | |
346 | /* This cannot happen, unless the check function wasn't called | |
347 | * at rule load time */ | |
ff67e4e4 | 348 | pr_info("unknown mode %u\n", config->hash_mode); |
1da177e4 LT |
349 | BUG(); |
350 | break; | |
351 | } | |
352 | ||
353 | /* node numbers are 1..n, not 0..n */ | |
8fc54f68 | 354 | return reciprocal_scale(hashval, config->num_total_nodes) + 1; |
1da177e4 LT |
355 | } |
356 | ||
357 | static inline int | |
a47362a2 | 358 | clusterip_responsible(const struct clusterip_config *config, u_int32_t hash) |
1da177e4 | 359 | { |
136e92bb | 360 | return test_bit(hash - 1, &config->local_nodes); |
1da177e4 LT |
361 | } |
362 | ||
e905a9ed YH |
363 | /*********************************************************************** |
364 | * IPTABLES TARGET | |
1da177e4 LT |
365 | ***********************************************************************/ |
366 | ||
367 | static unsigned int | |
4b560b44 | 368 | clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) |
1da177e4 | 369 | { |
7eb35586 | 370 | const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; |
587aa641 | 371 | struct nf_conn *ct; |
1da177e4 | 372 | enum ip_conntrack_info ctinfo; |
587aa641 | 373 | u_int32_t hash; |
1da177e4 LT |
374 | |
375 | /* don't need to clusterip_config_get() here, since refcount | |
376 | * is only decremented by destroy() - and ip_tables guarantees | |
377 | * that the ->target() function isn't called after ->destroy() */ | |
378 | ||
3db05fea | 379 | ct = nf_ct_get(skb, &ctinfo); |
94d117a1 | 380 | if (ct == NULL) |
1da177e4 | 381 | return NF_DROP; |
1da177e4 LT |
382 | |
383 | /* special case: ICMP error handling. conntrack distinguishes between | |
384 | * error messages (RELATED) and information requests (see below) */ | |
3666ed1c JP |
385 | if (ip_hdr(skb)->protocol == IPPROTO_ICMP && |
386 | (ctinfo == IP_CT_RELATED || | |
fb048833 | 387 | ctinfo == IP_CT_RELATED_REPLY)) |
6709dbbb | 388 | return XT_CONTINUE; |
1da177e4 | 389 | |
e905a9ed | 390 | /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO, |
1da177e4 LT |
391 | * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here |
392 | * on, which all have an ID field [relevant for hashing]. */ | |
393 | ||
3db05fea | 394 | hash = clusterip_hashfn(skb, cipinfo->config); |
1da177e4 LT |
395 | |
396 | switch (ctinfo) { | |
181b1e9c JP |
397 | case IP_CT_NEW: |
398 | ct->mark = hash; | |
399 | break; | |
400 | case IP_CT_RELATED: | |
401 | case IP_CT_RELATED_REPLY: | |
402 | /* FIXME: we don't handle expectations at the moment. | |
403 | * They can arrive on a different node than | |
404 | * the master connection (e.g. FTP passive mode) */ | |
405 | case IP_CT_ESTABLISHED: | |
406 | case IP_CT_ESTABLISHED_REPLY: | |
407 | break; | |
408 | default: /* Prevent gcc warnings */ | |
409 | break; | |
1da177e4 LT |
410 | } |
411 | ||
0d53778e | 412 | #ifdef DEBUG |
3c9fba65 | 413 | nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); |
1da177e4 | 414 | #endif |
0d53778e | 415 | pr_debug("hash=%u ct_hash=%u ", hash, ct->mark); |
1da177e4 | 416 | if (!clusterip_responsible(cipinfo->config, hash)) { |
0d53778e | 417 | pr_debug("not responsible\n"); |
1da177e4 LT |
418 | return NF_DROP; |
419 | } | |
0d53778e | 420 | pr_debug("responsible\n"); |
1da177e4 LT |
421 | |
422 | /* despite being received via linklayer multicast, this is | |
423 | * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */ | |
3db05fea | 424 | skb->pkt_type = PACKET_HOST; |
1da177e4 | 425 | |
6709dbbb | 426 | return XT_CONTINUE; |
1da177e4 LT |
427 | } |
428 | ||
135367b8 | 429 | static int clusterip_tg_check(const struct xt_tgchk_param *par) |
1da177e4 | 430 | { |
af5d6dc2 JE |
431 | struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; |
432 | const struct ipt_entry *e = par->entryinfo; | |
1da177e4 | 433 | struct clusterip_config *config; |
52753c43 | 434 | int ret, i; |
1da177e4 | 435 | |
55917a21 PNA |
436 | if (par->nft_compat) { |
437 | pr_err("cannot use CLUSTERIP target from nftables compat\n"); | |
438 | return -EOPNOTSUPP; | |
439 | } | |
440 | ||
1da177e4 LT |
441 | if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP && |
442 | cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT && | |
443 | cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) { | |
ff67e4e4 | 444 | pr_info("unknown mode %u\n", cipinfo->hash_mode); |
d6b00a53 | 445 | return -EINVAL; |
1da177e4 LT |
446 | |
447 | } | |
3666ed1c JP |
448 | if (e->ip.dmsk.s_addr != htonl(0xffffffff) || |
449 | e->ip.dst.s_addr == 0) { | |
ff67e4e4 | 450 | pr_info("Please specify destination IP\n"); |
d6b00a53 | 451 | return -EINVAL; |
1da177e4 | 452 | } |
52753c43 DV |
453 | if (cipinfo->num_local_nodes > ARRAY_SIZE(cipinfo->local_nodes)) { |
454 | pr_info("bad num_local_nodes %u\n", cipinfo->num_local_nodes); | |
455 | return -EINVAL; | |
456 | } | |
457 | for (i = 0; i < cipinfo->num_local_nodes; i++) { | |
458 | if (cipinfo->local_nodes[i] - 1 >= | |
459 | sizeof(config->local_nodes) * 8) { | |
460 | pr_info("bad local_nodes[%d] %u\n", | |
461 | i, cipinfo->local_nodes[i]); | |
462 | return -EINVAL; | |
463 | } | |
464 | } | |
1da177e4 | 465 | |
d86946d2 | 466 | config = clusterip_config_find_get(par->net, e->ip.dst.s_addr, 1); |
d3c3f424 | 467 | if (!config) { |
1da177e4 | 468 | if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { |
ff67e4e4 JE |
469 | pr_info("no config found for %pI4, need 'new'\n", |
470 | &e->ip.dst.s_addr); | |
d6b00a53 | 471 | return -EINVAL; |
1da177e4 LT |
472 | } else { |
473 | struct net_device *dev; | |
474 | ||
475 | if (e->ip.iniface[0] == '\0') { | |
ff67e4e4 | 476 | pr_info("Please specify an interface name\n"); |
d6b00a53 | 477 | return -EINVAL; |
1da177e4 LT |
478 | } |
479 | ||
d86946d2 | 480 | dev = dev_get_by_name(par->net, e->ip.iniface); |
1da177e4 | 481 | if (!dev) { |
ff67e4e4 JE |
482 | pr_info("no such interface %s\n", |
483 | e->ip.iniface); | |
4a5a5c73 | 484 | return -ENOENT; |
1da177e4 | 485 | } |
202f59af | 486 | dev_put(dev); |
1da177e4 | 487 | |
202f59af XL |
488 | config = clusterip_config_init(par->net, cipinfo, |
489 | e->ip.dst.s_addr, | |
490 | e->ip.iniface); | |
491 | if (IS_ERR(config)) | |
6c5d5cfb | 492 | return PTR_ERR(config); |
1da177e4 LT |
493 | } |
494 | } | |
d3c3f424 | 495 | cipinfo->config = config; |
1da177e4 | 496 | |
ecb2421b | 497 | ret = nf_ct_netns_get(par->net, par->family); |
f95c74e3 | 498 | if (ret < 0) |
ff67e4e4 JE |
499 | pr_info("cannot load conntrack support for proto=%u\n", |
500 | par->family); | |
43270b1b PNA |
501 | |
502 | if (!par->net->xt.clusterip_deprecated_warning) { | |
503 | pr_info("ipt_CLUSTERIP is deprecated and it will removed soon, " | |
504 | "use xt_cluster instead\n"); | |
505 | par->net->xt.clusterip_deprecated_warning = true; | |
506 | } | |
507 | ||
f95c74e3 | 508 | return ret; |
1da177e4 LT |
509 | } |
510 | ||
511 | /* drop reference count of cluster config when rule is deleted */ | |
a2df1648 | 512 | static void clusterip_tg_destroy(const struct xt_tgdtor_param *par) |
1da177e4 | 513 | { |
a2df1648 | 514 | const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; |
1da177e4 | 515 | |
44513624 KK |
516 | /* if no more entries are referencing the config, remove it |
517 | * from the list and destroy the proc entry */ | |
202f59af | 518 | clusterip_config_entry_put(par->net, cipinfo->config); |
44513624 | 519 | |
1da177e4 | 520 | clusterip_config_put(cipinfo->config); |
11078c37 | 521 | |
fe50543c | 522 | nf_ct_netns_put(par->net, par->family); |
1da177e4 LT |
523 | } |
524 | ||
d3c3f424 PM |
525 | #ifdef CONFIG_COMPAT |
526 | struct compat_ipt_clusterip_tgt_info | |
527 | { | |
528 | u_int32_t flags; | |
529 | u_int8_t clustermac[6]; | |
530 | u_int16_t num_total_nodes; | |
531 | u_int16_t num_local_nodes; | |
532 | u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; | |
533 | u_int32_t hash_mode; | |
534 | u_int32_t hash_initval; | |
535 | compat_uptr_t config; | |
536 | }; | |
537 | #endif /* CONFIG_COMPAT */ | |
538 | ||
d3c5ee6d | 539 | static struct xt_target clusterip_tg_reg __read_mostly = { |
1d5cd909 | 540 | .name = "CLUSTERIP", |
ee999d8b | 541 | .family = NFPROTO_IPV4, |
d3c5ee6d JE |
542 | .target = clusterip_tg, |
543 | .checkentry = clusterip_tg_check, | |
544 | .destroy = clusterip_tg_destroy, | |
d3c3f424 | 545 | .targetsize = sizeof(struct ipt_clusterip_tgt_info), |
ec231890 | 546 | .usersize = offsetof(struct ipt_clusterip_tgt_info, config), |
d3c3f424 PM |
547 | #ifdef CONFIG_COMPAT |
548 | .compatsize = sizeof(struct compat_ipt_clusterip_tgt_info), | |
549 | #endif /* CONFIG_COMPAT */ | |
1d5cd909 | 550 | .me = THIS_MODULE |
1da177e4 LT |
551 | }; |
552 | ||
553 | ||
e905a9ed YH |
554 | /*********************************************************************** |
555 | * ARP MANGLING CODE | |
1da177e4 LT |
556 | ***********************************************************************/ |
557 | ||
558 | /* hardcoded for 48bit ethernet and 32bit ipv4 addresses */ | |
559 | struct arp_payload { | |
560 | u_int8_t src_hw[ETH_ALEN]; | |
6a19d614 | 561 | __be32 src_ip; |
1da177e4 | 562 | u_int8_t dst_hw[ETH_ALEN]; |
6a19d614 | 563 | __be32 dst_ip; |
3f30fc15 | 564 | } __packed; |
1da177e4 | 565 | |
0d53778e | 566 | #ifdef DEBUG |
e905a9ed | 567 | static void arp_print(struct arp_payload *payload) |
1da177e4 LT |
568 | { |
569 | #define HBUFFERLEN 30 | |
570 | char hbuffer[HBUFFERLEN]; | |
c8d71d08 | 571 | int j, k; |
1da177e4 | 572 | |
c8d71d08 | 573 | for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < ETH_ALEN; j++) { |
6a8341b6 HH |
574 | hbuffer[k++] = hex_asc_hi(payload->src_hw[j]); |
575 | hbuffer[k++] = hex_asc_lo(payload->src_hw[j]); | |
c8d71d08 | 576 | hbuffer[k++] = ':'; |
1da177e4 | 577 | } |
c8d71d08 | 578 | hbuffer[--k] = '\0'; |
1da177e4 | 579 | |
ff67e4e4 JE |
580 | pr_debug("src %pI4@%s, dst %pI4\n", |
581 | &payload->src_ip, hbuffer, &payload->dst_ip); | |
1da177e4 LT |
582 | } |
583 | #endif | |
584 | ||
585 | static unsigned int | |
06198b34 | 586 | arp_mangle(void *priv, |
3db05fea | 587 | struct sk_buff *skb, |
238e54c9 | 588 | const struct nf_hook_state *state) |
1da177e4 | 589 | { |
3db05fea | 590 | struct arphdr *arp = arp_hdr(skb); |
1da177e4 LT |
591 | struct arp_payload *payload; |
592 | struct clusterip_config *c; | |
9dff2c96 | 593 | struct net *net = state->net; |
1da177e4 LT |
594 | |
595 | /* we don't care about non-ethernet and non-ipv4 ARP */ | |
3666ed1c JP |
596 | if (arp->ar_hrd != htons(ARPHRD_ETHER) || |
597 | arp->ar_pro != htons(ETH_P_IP) || | |
598 | arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN) | |
1da177e4 LT |
599 | return NF_ACCEPT; |
600 | ||
4095ebf1 | 601 | /* we only want to mangle arp requests and replies */ |
3666ed1c JP |
602 | if (arp->ar_op != htons(ARPOP_REPLY) && |
603 | arp->ar_op != htons(ARPOP_REQUEST)) | |
1da177e4 LT |
604 | return NF_ACCEPT; |
605 | ||
606 | payload = (void *)(arp+1); | |
607 | ||
e905a9ed | 608 | /* if there is no clusterip configuration for the arp reply's |
1da177e4 | 609 | * source ip, we don't want to mangle it */ |
d86946d2 | 610 | c = clusterip_config_find_get(net, payload->src_ip, 0); |
1da177e4 LT |
611 | if (!c) |
612 | return NF_ACCEPT; | |
613 | ||
e905a9ed | 614 | /* normally the linux kernel always replies to arp queries of |
1da177e4 LT |
615 | * addresses on different interfacs. However, in the CLUSTERIP case |
616 | * this wouldn't work, since we didn't subscribe the mcast group on | |
617 | * other interfaces */ | |
202f59af XL |
618 | if (c->ifindex != state->out->ifindex) { |
619 | pr_debug("not mangling arp reply on different interface: cip'%d'-skb'%d'\n", | |
620 | c->ifindex, state->out->ifindex); | |
1da177e4 LT |
621 | clusterip_config_put(c); |
622 | return NF_ACCEPT; | |
623 | } | |
624 | ||
625 | /* mangle reply hardware address */ | |
626 | memcpy(payload->src_hw, c->clustermac, arp->ar_hln); | |
627 | ||
0d53778e | 628 | #ifdef DEBUG |
ff67e4e4 | 629 | pr_debug("mangled arp reply: "); |
1da177e4 LT |
630 | arp_print(payload); |
631 | #endif | |
632 | ||
633 | clusterip_config_put(c); | |
634 | ||
635 | return NF_ACCEPT; | |
636 | } | |
637 | ||
591bb278 | 638 | static const struct nf_hook_ops cip_arp_ops = { |
1da177e4 | 639 | .hook = arp_mangle, |
ee999d8b | 640 | .pf = NFPROTO_ARP, |
1da177e4 LT |
641 | .hooknum = NF_ARP_OUT, |
642 | .priority = -1 | |
643 | }; | |
644 | ||
e905a9ed YH |
645 | /*********************************************************************** |
646 | * PROC DIR HANDLING | |
1da177e4 LT |
647 | ***********************************************************************/ |
648 | ||
649 | #ifdef CONFIG_PROC_FS | |
650 | ||
136e92bb KK |
651 | struct clusterip_seq_position { |
652 | unsigned int pos; /* position */ | |
653 | unsigned int weight; /* number of bits set == size */ | |
654 | unsigned int bit; /* current bit */ | |
655 | unsigned long val; /* current value */ | |
656 | }; | |
657 | ||
1da177e4 LT |
658 | static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) |
659 | { | |
47778147 | 660 | struct clusterip_config *c = s->private; |
136e92bb KK |
661 | unsigned int weight; |
662 | u_int32_t local_nodes; | |
663 | struct clusterip_seq_position *idx; | |
664 | ||
665 | /* FIXME: possible race */ | |
666 | local_nodes = c->local_nodes; | |
667 | weight = hweight32(local_nodes); | |
668 | if (*pos >= weight) | |
1da177e4 LT |
669 | return NULL; |
670 | ||
136e92bb KK |
671 | idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL); |
672 | if (!idx) | |
1da177e4 LT |
673 | return ERR_PTR(-ENOMEM); |
674 | ||
136e92bb KK |
675 | idx->pos = *pos; |
676 | idx->weight = weight; | |
677 | idx->bit = ffs(local_nodes); | |
678 | idx->val = local_nodes; | |
679 | clear_bit(idx->bit - 1, &idx->val); | |
680 | ||
681 | return idx; | |
1da177e4 LT |
682 | } |
683 | ||
684 | static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) | |
685 | { | |
3cf93c96 | 686 | struct clusterip_seq_position *idx = v; |
1da177e4 | 687 | |
136e92bb KK |
688 | *pos = ++idx->pos; |
689 | if (*pos >= idx->weight) { | |
1da177e4 LT |
690 | kfree(v); |
691 | return NULL; | |
692 | } | |
136e92bb KK |
693 | idx->bit = ffs(idx->val); |
694 | clear_bit(idx->bit - 1, &idx->val); | |
695 | return idx; | |
1da177e4 LT |
696 | } |
697 | ||
698 | static void clusterip_seq_stop(struct seq_file *s, void *v) | |
699 | { | |
902a3dd5 ED |
700 | if (!IS_ERR(v)) |
701 | kfree(v); | |
1da177e4 LT |
702 | } |
703 | ||
704 | static int clusterip_seq_show(struct seq_file *s, void *v) | |
705 | { | |
3cf93c96 | 706 | struct clusterip_seq_position *idx = v; |
1da177e4 | 707 | |
e905a9ed | 708 | if (idx->pos != 0) |
1da177e4 | 709 | seq_putc(s, ','); |
1da177e4 | 710 | |
136e92bb KK |
711 | seq_printf(s, "%u", idx->bit); |
712 | ||
713 | if (idx->pos == idx->weight - 1) | |
1da177e4 LT |
714 | seq_putc(s, '\n'); |
715 | ||
716 | return 0; | |
717 | } | |
718 | ||
56b3d975 | 719 | static const struct seq_operations clusterip_seq_ops = { |
1da177e4 LT |
720 | .start = clusterip_seq_start, |
721 | .next = clusterip_seq_next, | |
722 | .stop = clusterip_seq_stop, | |
723 | .show = clusterip_seq_show, | |
724 | }; | |
725 | ||
726 | static int clusterip_proc_open(struct inode *inode, struct file *file) | |
727 | { | |
728 | int ret = seq_open(file, &clusterip_seq_ops); | |
729 | ||
730 | if (!ret) { | |
731 | struct seq_file *sf = file->private_data; | |
d9dda78b | 732 | struct clusterip_config *c = PDE_DATA(inode); |
1da177e4 | 733 | |
47778147 | 734 | sf->private = c; |
1da177e4 LT |
735 | |
736 | clusterip_config_get(c); | |
737 | } | |
738 | ||
739 | return ret; | |
740 | } | |
741 | ||
742 | static int clusterip_proc_release(struct inode *inode, struct file *file) | |
743 | { | |
d9dda78b | 744 | struct clusterip_config *c = PDE_DATA(inode); |
1da177e4 LT |
745 | int ret; |
746 | ||
747 | ret = seq_release(inode, file); | |
748 | ||
749 | if (!ret) | |
750 | clusterip_config_put(c); | |
751 | ||
752 | return ret; | |
753 | } | |
754 | ||
755 | static ssize_t clusterip_proc_write(struct file *file, const char __user *input, | |
756 | size_t size, loff_t *ofs) | |
757 | { | |
d9dda78b | 758 | struct clusterip_config *c = PDE_DATA(file_inode(file)); |
1da177e4 LT |
759 | #define PROC_WRITELEN 10 |
760 | char buffer[PROC_WRITELEN+1]; | |
1da177e4 | 761 | unsigned long nodenum; |
4b5511eb | 762 | int rc; |
1da177e4 | 763 | |
961ed183 VK |
764 | if (size > PROC_WRITELEN) |
765 | return -EIO; | |
766 | if (copy_from_user(buffer, input, size)) | |
1da177e4 | 767 | return -EFAULT; |
961ed183 | 768 | buffer[size] = 0; |
1da177e4 LT |
769 | |
770 | if (*buffer == '+') { | |
4b5511eb AP |
771 | rc = kstrtoul(buffer+1, 10, &nodenum); |
772 | if (rc) | |
773 | return rc; | |
1da177e4 LT |
774 | if (clusterip_add_node(c, nodenum)) |
775 | return -ENOMEM; | |
776 | } else if (*buffer == '-') { | |
4b5511eb AP |
777 | rc = kstrtoul(buffer+1, 10, &nodenum); |
778 | if (rc) | |
779 | return rc; | |
1da177e4 LT |
780 | if (clusterip_del_node(c, nodenum)) |
781 | return -ENOENT; | |
782 | } else | |
783 | return -EIO; | |
784 | ||
785 | return size; | |
786 | } | |
787 | ||
9a32144e | 788 | static const struct file_operations clusterip_proc_fops = { |
1da177e4 LT |
789 | .owner = THIS_MODULE, |
790 | .open = clusterip_proc_open, | |
791 | .read = seq_read, | |
792 | .write = clusterip_proc_write, | |
793 | .llseek = seq_lseek, | |
794 | .release = clusterip_proc_release, | |
795 | }; | |
796 | ||
797 | #endif /* CONFIG_PROC_FS */ | |
798 | ||
ce4ff76c G |
799 | static int clusterip_net_init(struct net *net) |
800 | { | |
ce4ff76c | 801 | struct clusterip_net *cn = net_generic(net, clusterip_net_id); |
03eb7d49 | 802 | int ret; |
ce4ff76c | 803 | |
26a89e43 G |
804 | INIT_LIST_HEAD(&cn->configs); |
805 | ||
f1e8077f G |
806 | spin_lock_init(&cn->lock); |
807 | ||
03eb7d49 FW |
808 | ret = nf_register_net_hook(net, &cip_arp_ops); |
809 | if (ret < 0) | |
810 | return ret; | |
811 | ||
26a89e43 | 812 | #ifdef CONFIG_PROC_FS |
ce4ff76c G |
813 | cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net); |
814 | if (!cn->procdir) { | |
03eb7d49 | 815 | nf_unregister_net_hook(net, &cip_arp_ops); |
ce4ff76c G |
816 | pr_err("Unable to proc dir entry\n"); |
817 | return -ENOMEM; | |
818 | } | |
819 | #endif /* CONFIG_PROC_FS */ | |
820 | ||
821 | return 0; | |
822 | } | |
823 | ||
824 | static void clusterip_net_exit(struct net *net) | |
825 | { | |
ce4ff76c | 826 | struct clusterip_net *cn = net_generic(net, clusterip_net_id); |
96307a0a | 827 | #ifdef CONFIG_PROC_FS |
ce4ff76c | 828 | proc_remove(cn->procdir); |
3840538a | 829 | cn->procdir = NULL; |
ce4ff76c | 830 | #endif |
03eb7d49 | 831 | nf_unregister_net_hook(net, &cip_arp_ops); |
613d0776 | 832 | WARN_ON_ONCE(!list_empty(&cn->configs)); |
ce4ff76c G |
833 | } |
834 | ||
835 | static struct pernet_operations clusterip_net_ops = { | |
836 | .init = clusterip_net_init, | |
837 | .exit = clusterip_net_exit, | |
838 | .id = &clusterip_net_id, | |
839 | .size = sizeof(struct clusterip_net), | |
840 | }; | |
841 | ||
d3c5ee6d | 842 | static int __init clusterip_tg_init(void) |
1da177e4 LT |
843 | { |
844 | int ret; | |
845 | ||
ce4ff76c | 846 | ret = register_pernet_subsys(&clusterip_net_ops); |
32292a7f PM |
847 | if (ret < 0) |
848 | return ret; | |
1da177e4 | 849 | |
ce4ff76c G |
850 | ret = xt_register_target(&clusterip_tg_reg); |
851 | if (ret < 0) | |
852 | goto cleanup_subsys; | |
853 | ||
ff67e4e4 | 854 | pr_info("ClusterIP Version %s loaded successfully\n", |
1da177e4 | 855 | CLUSTERIP_VERSION); |
ce4ff76c | 856 | |
1da177e4 LT |
857 | return 0; |
858 | ||
ce4ff76c G |
859 | cleanup_subsys: |
860 | unregister_pernet_subsys(&clusterip_net_ops); | |
32292a7f | 861 | return ret; |
1da177e4 LT |
862 | } |
863 | ||
d3c5ee6d | 864 | static void __exit clusterip_tg_exit(void) |
1da177e4 | 865 | { |
ff67e4e4 | 866 | pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION); |
ce4ff76c | 867 | |
d3c5ee6d | 868 | xt_unregister_target(&clusterip_tg_reg); |
ce4ff76c | 869 | unregister_pernet_subsys(&clusterip_net_ops); |
d73f33b1 ED |
870 | |
871 | /* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */ | |
872 | rcu_barrier_bh(); | |
1da177e4 LT |
873 | } |
874 | ||
d3c5ee6d JE |
875 | module_init(clusterip_tg_init); |
876 | module_exit(clusterip_tg_exit); |