]> git.proxmox.com Git - mirror_ubuntu-eoan-kernel.git/blame - net/netfilter/ipvs/ip_vs_dh.c
treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 152
[mirror_ubuntu-eoan-kernel.git] / net / netfilter / ipvs / ip_vs_dh.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
cb7f6a7b
JV
2/*
3 * IPVS: Destination Hashing scheduling module
4 *
5 * Authors: Wensong Zhang <wensong@gnuchina.org>
6 *
7 * Inspired by the consistent hashing scheduler patch from
8 * Thomas Proell <proellt@gmx.de>
9 *
cb7f6a7b 10 * Changes:
cb7f6a7b
JV
11 */
12
13/*
14 * The dh algorithm is to select server by the hash key of destination IP
15 * address. The pseudo code is as follows:
16 *
17 * n <- servernode[dest_ip];
18 * if (n is dead) OR
19 * (n is overloaded) OR (n.weight <= 0) then
20 * return NULL;
21 *
22 * return n;
23 *
24 * Notes that servernode is a 256-bucket hash table that maps the hash
25 * index derived from packet destination IP address to the current server
26 * array. If the dh scheduler is used in cache cluster, it is good to
27 * combine it with cache_bypass feature. When the statically assigned
28 * server is dead or overloaded, the load balancer can bypass the cache
29 * server and send requests to the original server directly.
30 *
31 */
32
9aada7ac
HE
33#define KMSG_COMPONENT "IPVS"
34#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
35
cb7f6a7b 36#include <linux/ip.h>
5a0e3ad6 37#include <linux/slab.h>
cb7f6a7b
JV
38#include <linux/module.h>
39#include <linux/kernel.h>
40#include <linux/skbuff.h>
9a17740e 41#include <linux/hash.h>
cb7f6a7b
JV
42
43#include <net/ip_vs.h>
44
45
46/*
47 * IPVS DH bucket
48 */
49struct ip_vs_dh_bucket {
8f3d0023 50 struct ip_vs_dest __rcu *dest; /* real server (cache) */
cb7f6a7b
JV
51};
52
53/*
54 * for IPVS DH entry hash table
55 */
56#ifndef CONFIG_IP_VS_DH_TAB_BITS
57#define CONFIG_IP_VS_DH_TAB_BITS 8
58#endif
59#define IP_VS_DH_TAB_BITS CONFIG_IP_VS_DH_TAB_BITS
60#define IP_VS_DH_TAB_SIZE (1 << IP_VS_DH_TAB_BITS)
61#define IP_VS_DH_TAB_MASK (IP_VS_DH_TAB_SIZE - 1)
62
8f3d0023
JA
63struct ip_vs_dh_state {
64 struct ip_vs_dh_bucket buckets[IP_VS_DH_TAB_SIZE];
65 struct rcu_head rcu_head;
66};
cb7f6a7b
JV
67
68/*
69 * Returns hash value for IPVS DH entry
70 */
95c96174 71static inline unsigned int ip_vs_dh_hashkey(int af, const union nf_inet_addr *addr)
cb7f6a7b 72{
20971a0a
JV
73 __be32 addr_fold = addr->ip;
74
75#ifdef CONFIG_IP_VS_IPV6
76 if (af == AF_INET6)
77 addr_fold = addr->ip6[0]^addr->ip6[1]^
78 addr->ip6[2]^addr->ip6[3];
79#endif
9a17740e 80 return hash_32(ntohl(addr_fold), IP_VS_DH_TAB_BITS);
cb7f6a7b
JV
81}
82
83
84/*
85 * Get ip_vs_dest associated with supplied parameters.
86 */
87static inline struct ip_vs_dest *
8f3d0023 88ip_vs_dh_get(int af, struct ip_vs_dh_state *s, const union nf_inet_addr *addr)
cb7f6a7b 89{
8f3d0023 90 return rcu_dereference(s->buckets[ip_vs_dh_hashkey(af, addr)].dest);
cb7f6a7b
JV
91}
92
93
94/*
95 * Assign all the hash buckets of the specified table with the service.
96 */
97static int
8f3d0023 98ip_vs_dh_reassign(struct ip_vs_dh_state *s, struct ip_vs_service *svc)
cb7f6a7b
JV
99{
100 int i;
101 struct ip_vs_dh_bucket *b;
102 struct list_head *p;
103 struct ip_vs_dest *dest;
8f3d0023 104 bool empty;
cb7f6a7b 105
8f3d0023 106 b = &s->buckets[0];
cb7f6a7b 107 p = &svc->destinations;
8f3d0023 108 empty = list_empty(p);
cb7f6a7b 109 for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
8f3d0023
JA
110 dest = rcu_dereference_protected(b->dest, 1);
111 if (dest)
112 ip_vs_dest_put(dest);
113 if (empty)
114 RCU_INIT_POINTER(b->dest, NULL);
115 else {
cb7f6a7b
JV
116 if (p == &svc->destinations)
117 p = p->next;
118
119 dest = list_entry(p, struct ip_vs_dest, n_list);
8f3d0023
JA
120 ip_vs_dest_hold(dest);
121 RCU_INIT_POINTER(b->dest, dest);
cb7f6a7b
JV
122
123 p = p->next;
124 }
125 b++;
126 }
127 return 0;
128}
129
130
131/*
132 * Flush all the hash buckets of the specified table.
133 */
8f3d0023 134static void ip_vs_dh_flush(struct ip_vs_dh_state *s)
cb7f6a7b
JV
135{
136 int i;
137 struct ip_vs_dh_bucket *b;
8f3d0023 138 struct ip_vs_dest *dest;
cb7f6a7b 139
8f3d0023 140 b = &s->buckets[0];
cb7f6a7b 141 for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
8f3d0023
JA
142 dest = rcu_dereference_protected(b->dest, 1);
143 if (dest) {
144 ip_vs_dest_put(dest);
145 RCU_INIT_POINTER(b->dest, NULL);
cb7f6a7b
JV
146 }
147 b++;
148 }
149}
150
151
152static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
153{
8f3d0023 154 struct ip_vs_dh_state *s;
cb7f6a7b
JV
155
156 /* allocate the DH table for this service */
8f3d0023
JA
157 s = kzalloc(sizeof(struct ip_vs_dh_state), GFP_KERNEL);
158 if (s == NULL)
cb7f6a7b 159 return -ENOMEM;
0a9ee813 160
8f3d0023 161 svc->sched_data = s;
5b5e0928 162 IP_VS_DBG(6, "DH hash table (memory=%zdbytes) allocated for "
cb7f6a7b
JV
163 "current service\n",
164 sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
165
8f3d0023
JA
166 /* assign the hash buckets with current dests */
167 ip_vs_dh_reassign(s, svc);
cb7f6a7b
JV
168
169 return 0;
170}
171
172
ed3ffc4e 173static void ip_vs_dh_done_svc(struct ip_vs_service *svc)
cb7f6a7b 174{
8f3d0023 175 struct ip_vs_dh_state *s = svc->sched_data;
cb7f6a7b
JV
176
177 /* got to clean up hash buckets here */
8f3d0023 178 ip_vs_dh_flush(s);
cb7f6a7b
JV
179
180 /* release the table itself */
8f3d0023 181 kfree_rcu(s, rcu_head);
5b5e0928 182 IP_VS_DBG(6, "DH hash table (memory=%zdbytes) released\n",
cb7f6a7b 183 sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
cb7f6a7b
JV
184}
185
186
8f3d0023
JA
187static int ip_vs_dh_dest_changed(struct ip_vs_service *svc,
188 struct ip_vs_dest *dest)
cb7f6a7b 189{
8f3d0023 190 struct ip_vs_dh_state *s = svc->sched_data;
cb7f6a7b
JV
191
192 /* assign the hash buckets with the updated service */
8f3d0023 193 ip_vs_dh_reassign(s, svc);
cb7f6a7b
JV
194
195 return 0;
196}
197
198
199/*
200 * If the dest flags is set with IP_VS_DEST_F_OVERLOAD,
201 * consider that the server is overloaded here.
202 */
203static inline int is_overloaded(struct ip_vs_dest *dest)
204{
205 return dest->flags & IP_VS_DEST_F_OVERLOAD;
206}
207
208
209/*
210 * Destination hashing scheduling
211 */
212static struct ip_vs_dest *
bba54de5
JA
213ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
214 struct ip_vs_iphdr *iph)
cb7f6a7b
JV
215{
216 struct ip_vs_dest *dest;
8f3d0023 217 struct ip_vs_dh_state *s;
cb7f6a7b 218
1e3e238e 219 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
cb7f6a7b 220
8f3d0023 221 s = (struct ip_vs_dh_state *) svc->sched_data;
bba54de5 222 dest = ip_vs_dh_get(svc->af, s, &iph->daddr);
cb7f6a7b
JV
223 if (!dest
224 || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
225 || atomic_read(&dest->weight) <= 0
226 || is_overloaded(dest)) {
8f3d0023 227 ip_vs_scheduler_err(svc, "no destination available");
cb7f6a7b
JV
228 return NULL;
229 }
230
20971a0a 231 IP_VS_DBG_BUF(6, "DH: destination IP address %s --> server %s:%d\n",
bba54de5 232 IP_VS_DBG_ADDR(svc->af, &iph->daddr),
4d316f3f 233 IP_VS_DBG_ADDR(dest->af, &dest->addr),
20971a0a 234 ntohs(dest->port));
cb7f6a7b
JV
235
236 return dest;
237}
238
239
240/*
241 * IPVS DH Scheduler structure
242 */
243static struct ip_vs_scheduler ip_vs_dh_scheduler =
244{
245 .name = "dh",
246 .refcnt = ATOMIC_INIT(0),
247 .module = THIS_MODULE,
248 .n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
cb7f6a7b
JV
249 .init_service = ip_vs_dh_init_svc,
250 .done_service = ip_vs_dh_done_svc,
8f3d0023
JA
251 .add_dest = ip_vs_dh_dest_changed,
252 .del_dest = ip_vs_dh_dest_changed,
cb7f6a7b
JV
253 .schedule = ip_vs_dh_schedule,
254};
255
256
257static int __init ip_vs_dh_init(void)
258{
259 return register_ip_vs_scheduler(&ip_vs_dh_scheduler);
260}
261
262
263static void __exit ip_vs_dh_cleanup(void)
264{
265 unregister_ip_vs_scheduler(&ip_vs_dh_scheduler);
ceec4c38 266 synchronize_rcu();
cb7f6a7b
JV
267}
268
269
270module_init(ip_vs_dh_init);
271module_exit(ip_vs_dh_cleanup);
272MODULE_LICENSE("GPL");