2 * IPVS: Destination Hashing scheduling module
4 * Authors: Wensong Zhang <wensong@gnuchina.org>
6 * Inspired by the consistent hashing scheduler patch from
7 * Thomas Proell <proellt@gmx.de>
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
19 * The dh algorithm is to select server by the hash key of destination IP
20 * address. The pseudo code is as follows:
22 * n <- servernode[dest_ip];
24 * (n is overloaded) OR (n.weight <= 0) then
29 * Notes that servernode is a 256-bucket hash table that maps the hash
30 * index derived from packet destination IP address to the current server
31 * array. If the dh scheduler is used in cache cluster, it is good to
32 * combine it with cache_bypass feature. When the statically assigned
33 * server is dead or overloaded, the load balancer can bypass the cache
34 * server and send requests to the original server directly.
38 #define KMSG_COMPONENT "IPVS"
39 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
42 #include <linux/slab.h>
43 #include <linux/module.h>
44 #include <linux/kernel.h>
45 #include <linux/skbuff.h>
47 #include <net/ip_vs.h>
53 struct ip_vs_dh_bucket
{
54 struct ip_vs_dest __rcu
*dest
; /* real server (cache) */
58 * for IPVS DH entry hash table
60 #ifndef CONFIG_IP_VS_DH_TAB_BITS
61 #define CONFIG_IP_VS_DH_TAB_BITS 8
63 #define IP_VS_DH_TAB_BITS CONFIG_IP_VS_DH_TAB_BITS
64 #define IP_VS_DH_TAB_SIZE (1 << IP_VS_DH_TAB_BITS)
65 #define IP_VS_DH_TAB_MASK (IP_VS_DH_TAB_SIZE - 1)
67 struct ip_vs_dh_state
{
68 struct ip_vs_dh_bucket buckets
[IP_VS_DH_TAB_SIZE
];
69 struct rcu_head rcu_head
;
73 * Returns hash value for IPVS DH entry
75 static inline unsigned int ip_vs_dh_hashkey(int af
, const union nf_inet_addr
*addr
)
77 __be32 addr_fold
= addr
->ip
;
79 #ifdef CONFIG_IP_VS_IPV6
81 addr_fold
= addr
->ip6
[0]^addr
->ip6
[1]^
82 addr
->ip6
[2]^addr
->ip6
[3];
84 return (ntohl(addr_fold
)*2654435761UL) & IP_VS_DH_TAB_MASK
;
89 * Get ip_vs_dest associated with supplied parameters.
91 static inline struct ip_vs_dest
*
92 ip_vs_dh_get(int af
, struct ip_vs_dh_state
*s
, const union nf_inet_addr
*addr
)
94 return rcu_dereference(s
->buckets
[ip_vs_dh_hashkey(af
, addr
)].dest
);
99 * Assign all the hash buckets of the specified table with the service.
102 ip_vs_dh_reassign(struct ip_vs_dh_state
*s
, struct ip_vs_service
*svc
)
105 struct ip_vs_dh_bucket
*b
;
107 struct ip_vs_dest
*dest
;
111 p
= &svc
->destinations
;
112 empty
= list_empty(p
);
113 for (i
=0; i
<IP_VS_DH_TAB_SIZE
; i
++) {
114 dest
= rcu_dereference_protected(b
->dest
, 1);
116 ip_vs_dest_put(dest
);
118 RCU_INIT_POINTER(b
->dest
, NULL
);
120 if (p
== &svc
->destinations
)
123 dest
= list_entry(p
, struct ip_vs_dest
, n_list
);
124 ip_vs_dest_hold(dest
);
125 RCU_INIT_POINTER(b
->dest
, dest
);
136 * Flush all the hash buckets of the specified table.
138 static void ip_vs_dh_flush(struct ip_vs_dh_state
*s
)
141 struct ip_vs_dh_bucket
*b
;
142 struct ip_vs_dest
*dest
;
145 for (i
=0; i
<IP_VS_DH_TAB_SIZE
; i
++) {
146 dest
= rcu_dereference_protected(b
->dest
, 1);
148 ip_vs_dest_put(dest
);
149 RCU_INIT_POINTER(b
->dest
, NULL
);
156 static int ip_vs_dh_init_svc(struct ip_vs_service
*svc
)
158 struct ip_vs_dh_state
*s
;
160 /* allocate the DH table for this service */
161 s
= kzalloc(sizeof(struct ip_vs_dh_state
), GFP_KERNEL
);
166 IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for "
168 sizeof(struct ip_vs_dh_bucket
)*IP_VS_DH_TAB_SIZE
);
170 /* assign the hash buckets with current dests */
171 ip_vs_dh_reassign(s
, svc
);
177 static void ip_vs_dh_done_svc(struct ip_vs_service
*svc
)
179 struct ip_vs_dh_state
*s
= svc
->sched_data
;
181 /* got to clean up hash buckets here */
184 /* release the table itself */
185 kfree_rcu(s
, rcu_head
);
186 IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n",
187 sizeof(struct ip_vs_dh_bucket
)*IP_VS_DH_TAB_SIZE
);
191 static int ip_vs_dh_dest_changed(struct ip_vs_service
*svc
,
192 struct ip_vs_dest
*dest
)
194 struct ip_vs_dh_state
*s
= svc
->sched_data
;
196 /* assign the hash buckets with the updated service */
197 ip_vs_dh_reassign(s
, svc
);
204 * If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
205 * consider that the server is overloaded here.
207 static inline int is_overloaded(struct ip_vs_dest
*dest
)
209 return dest
->flags
& IP_VS_DEST_F_OVERLOAD
;
214 * Destination hashing scheduling
216 static struct ip_vs_dest
*
217 ip_vs_dh_schedule(struct ip_vs_service
*svc
, const struct sk_buff
*skb
)
219 struct ip_vs_dest
*dest
;
220 struct ip_vs_dh_state
*s
;
221 struct ip_vs_iphdr iph
;
223 ip_vs_fill_iph_addr_only(svc
->af
, skb
, &iph
);
225 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__
);
227 s
= (struct ip_vs_dh_state
*) svc
->sched_data
;
228 dest
= ip_vs_dh_get(svc
->af
, s
, &iph
.daddr
);
230 || !(dest
->flags
& IP_VS_DEST_F_AVAILABLE
)
231 || atomic_read(&dest
->weight
) <= 0
232 || is_overloaded(dest
)) {
233 ip_vs_scheduler_err(svc
, "no destination available");
237 IP_VS_DBG_BUF(6, "DH: destination IP address %s --> server %s:%d\n",
238 IP_VS_DBG_ADDR(svc
->af
, &iph
.daddr
),
239 IP_VS_DBG_ADDR(svc
->af
, &dest
->addr
),
247 * IPVS DH Scheduler structure
249 static struct ip_vs_scheduler ip_vs_dh_scheduler
=
252 .refcnt
= ATOMIC_INIT(0),
253 .module
= THIS_MODULE
,
254 .n_list
= LIST_HEAD_INIT(ip_vs_dh_scheduler
.n_list
),
255 .init_service
= ip_vs_dh_init_svc
,
256 .done_service
= ip_vs_dh_done_svc
,
257 .add_dest
= ip_vs_dh_dest_changed
,
258 .del_dest
= ip_vs_dh_dest_changed
,
259 .schedule
= ip_vs_dh_schedule
,
263 static int __init
ip_vs_dh_init(void)
265 return register_ip_vs_scheduler(&ip_vs_dh_scheduler
);
269 static void __exit
ip_vs_dh_cleanup(void)
271 unregister_ip_vs_scheduler(&ip_vs_dh_scheduler
);
276 module_init(ip_vs_dh_init
);
277 module_exit(ip_vs_dh_cleanup
);
278 MODULE_LICENSE("GPL");