]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * IPVS: Shortest Expected Delay scheduling module | |
3 | * | |
4 | * Version: $Id: ip_vs_sed.c,v 1.1 2003/05/10 03:06:08 wensong Exp $ | |
5 | * | |
6 | * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> | |
7 | * | |
8 | * This program is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU General Public License | |
10 | * as published by the Free Software Foundation; either version | |
11 | * 2 of the License, or (at your option) any later version. | |
12 | * | |
13 | * Changes: | |
14 | * | |
15 | */ | |
16 | ||
17 | /* | |
18 | * The SED algorithm attempts to minimize each job's expected delay until | |
19 | * completion. The expected delay that the job will experience is | |
20 | * (Ci + 1) / Ui if sent to the ith server, in which Ci is the number of | |
59c51591 | 21 | * jobs on the ith server and Ui is the fixed service rate (weight) of |
1da177e4 LT |
22 | * the ith server. The SED algorithm adopts a greedy policy that each does |
23 | * what is in its own best interest, i.e. to join the queue which would | |
24 | * minimize its expected delay of completion. | |
25 | * | |
26 | * See the following paper for more information: | |
27 | * A. Weinrib and S. Shenker, Greed is not enough: Adaptive load sharing | |
28 | * in large heterogeneous systems. In Proceedings IEEE INFOCOM'88, | |
29 | * pages 986-994, 1988. | |
30 | * | |
31 | * Thanks must go to Marko Buuri <marko@buuri.name> for talking SED to me. | |
32 | * | |
33 | * The difference between SED and WLC is that SED includes the incoming | |
34 | * job in the cost function (the increment of 1). SED may outperform | |
35 | * WLC, while scheduling big jobs under larger heterogeneous systems | |
36 | * (the server weight varies a lot). | |
37 | * | |
38 | */ | |
39 | ||
40 | #include <linux/module.h> | |
41 | #include <linux/kernel.h> | |
42 | ||
43 | #include <net/ip_vs.h> | |
44 | ||
45 | ||
46 | static int | |
47 | ip_vs_sed_init_svc(struct ip_vs_service *svc) | |
48 | { | |
49 | return 0; | |
50 | } | |
51 | ||
52 | ||
53 | static int | |
54 | ip_vs_sed_done_svc(struct ip_vs_service *svc) | |
55 | { | |
56 | return 0; | |
57 | } | |
58 | ||
59 | ||
60 | static int | |
61 | ip_vs_sed_update_svc(struct ip_vs_service *svc) | |
62 | { | |
63 | return 0; | |
64 | } | |
65 | ||
66 | ||
67 | static inline unsigned int | |
68 | ip_vs_sed_dest_overhead(struct ip_vs_dest *dest) | |
69 | { | |
70 | /* | |
71 | * We only use the active connection number in the cost | |
72 | * calculation here. | |
73 | */ | |
74 | return atomic_read(&dest->activeconns) + 1; | |
75 | } | |
76 | ||
77 | ||
78 | /* | |
79 | * Weighted Least Connection scheduling | |
80 | */ | |
81 | static struct ip_vs_dest * | |
82 | ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) | |
83 | { | |
84 | struct ip_vs_dest *dest, *least; | |
85 | unsigned int loh, doh; | |
86 | ||
87 | IP_VS_DBG(6, "ip_vs_sed_schedule(): Scheduling...\n"); | |
88 | ||
89 | /* | |
90 | * We calculate the load of each dest server as follows: | |
91 | * (server expected overhead) / dest->weight | |
92 | * | |
93 | * Remember -- no floats in kernel mode!!! | |
94 | * The comparison of h1*w2 > h2*w1 is equivalent to that of | |
95 | * h1/w1 > h2/w2 | |
96 | * if every weight is larger than zero. | |
97 | * | |
98 | * The server with weight=0 is quiesced and will not receive any | |
99 | * new connections. | |
100 | */ | |
101 | ||
102 | list_for_each_entry(dest, &svc->destinations, n_list) { | |
103 | if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && | |
104 | atomic_read(&dest->weight) > 0) { | |
105 | least = dest; | |
106 | loh = ip_vs_sed_dest_overhead(least); | |
107 | goto nextstage; | |
108 | } | |
109 | } | |
110 | return NULL; | |
111 | ||
112 | /* | |
113 | * Find the destination with the least load. | |
114 | */ | |
115 | nextstage: | |
116 | list_for_each_entry_continue(dest, &svc->destinations, n_list) { | |
117 | if (dest->flags & IP_VS_DEST_F_OVERLOAD) | |
118 | continue; | |
119 | doh = ip_vs_sed_dest_overhead(dest); | |
120 | if (loh * atomic_read(&dest->weight) > | |
121 | doh * atomic_read(&least->weight)) { | |
122 | least = dest; | |
123 | loh = doh; | |
124 | } | |
125 | } | |
126 | ||
127 | IP_VS_DBG(6, "SED: server %u.%u.%u.%u:%u " | |
128 | "activeconns %d refcnt %d weight %d overhead %d\n", | |
129 | NIPQUAD(least->addr), ntohs(least->port), | |
130 | atomic_read(&least->activeconns), | |
131 | atomic_read(&least->refcnt), | |
132 | atomic_read(&least->weight), loh); | |
133 | ||
134 | return least; | |
135 | } | |
136 | ||
137 | ||
138 | static struct ip_vs_scheduler ip_vs_sed_scheduler = | |
139 | { | |
140 | .name = "sed", | |
141 | .refcnt = ATOMIC_INIT(0), | |
142 | .module = THIS_MODULE, | |
143 | .init_service = ip_vs_sed_init_svc, | |
144 | .done_service = ip_vs_sed_done_svc, | |
145 | .update_service = ip_vs_sed_update_svc, | |
146 | .schedule = ip_vs_sed_schedule, | |
147 | }; | |
148 | ||
149 | ||
150 | static int __init ip_vs_sed_init(void) | |
151 | { | |
152 | INIT_LIST_HEAD(&ip_vs_sed_scheduler.n_list); | |
153 | return register_ip_vs_scheduler(&ip_vs_sed_scheduler); | |
154 | } | |
155 | ||
156 | static void __exit ip_vs_sed_cleanup(void) | |
157 | { | |
158 | unregister_ip_vs_scheduler(&ip_vs_sed_scheduler); | |
159 | } | |
160 | ||
161 | module_init(ip_vs_sed_init); | |
162 | module_exit(ip_vs_sed_cleanup); | |
163 | MODULE_LICENSE("GPL"); |