]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
2906f66a VMR |
2 | #include <linux/kernel.h> |
3 | #include <linux/ip.h> | |
4 | #include <linux/sctp.h> | |
5 | #include <net/ip.h> | |
6 | #include <net/ip6_checksum.h> | |
7 | #include <linux/netfilter.h> | |
8 | #include <linux/netfilter_ipv4.h> | |
9 | #include <net/sctp/checksum.h> | |
10 | #include <net/ip_vs.h> | |
11 | ||
2906f66a | 12 | static int |
d8f44c33 EB |
13 | sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, |
14 | struct ip_vs_proto_data *pd, | |
d4383f04 JDB |
15 | int *verdict, struct ip_vs_conn **cpp, |
16 | struct ip_vs_iphdr *iph) | |
2906f66a VMR |
17 | { |
18 | struct ip_vs_service *svc; | |
922dbc5b | 19 | struct sctp_chunkhdr _schunkh, *sch; |
ae146d9b | 20 | struct sctphdr *sh, _sctph; |
5e26b1b3 | 21 | __be16 _ports[2], *ports = NULL; |
2906f66a | 22 | |
5e26b1b3 AG |
23 | if (likely(!ip_vs_iph_icmp(iph))) { |
24 | sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); | |
25 | if (sh) { | |
ae146d9b XL |
26 | sch = skb_header_pointer(skb, iph->len + sizeof(_sctph), |
27 | sizeof(_schunkh), &_schunkh); | |
1cc4a018 | 28 | if (sch) { |
68913a01 XL |
29 | if (sch->type == SCTP_CID_ABORT || |
30 | !(sysctl_sloppy_sctp(ipvs) || | |
1cc4a018 XL |
31 | sch->type == SCTP_CID_INIT)) |
32 | return 1; | |
5e26b1b3 | 33 | ports = &sh->source; |
1cc4a018 | 34 | } |
5e26b1b3 AG |
35 | } |
36 | } else { | |
37 | ports = skb_header_pointer( | |
38 | skb, iph->len, sizeof(_ports), &_ports); | |
6e7cd27c | 39 | } |
2906f66a | 40 | |
5e26b1b3 | 41 | if (!ports) { |
6e7cd27c | 42 | *verdict = NF_DROP; |
2906f66a | 43 | return 0; |
6e7cd27c DB |
44 | } |
45 | ||
5e26b1b3 | 46 | if (likely(!ip_vs_iph_inverse(iph))) |
0a4fd6ce | 47 | svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, |
5e26b1b3 AG |
48 | &iph->daddr, ports[1]); |
49 | else | |
0a4fd6ce | 50 | svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, |
5e26b1b3 AG |
51 | &iph->saddr, ports[0]); |
52 | if (svc) { | |
190ecd27 JA |
53 | int ignored; |
54 | ||
c6c96c18 | 55 | if (ip_vs_todrop(ipvs)) { |
2906f66a VMR |
56 | /* |
57 | * It seems that we are very loaded. | |
58 | * We have to drop this packet :( | |
59 | */ | |
2906f66a VMR |
60 | *verdict = NF_DROP; |
61 | return 0; | |
62 | } | |
63 | /* | |
64 | * Let the virtual server select a real server for the | |
65 | * incoming connection, and create a connection entry. | |
66 | */ | |
d4383f04 | 67 | *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph); |
a5959d53 HS |
68 | if (!*cpp && ignored <= 0) { |
69 | if (!ignored) | |
d4383f04 | 70 | *verdict = ip_vs_leave(svc, skb, pd, iph); |
ceec4c38 | 71 | else |
a5959d53 | 72 | *verdict = NF_DROP; |
2906f66a VMR |
73 | return 0; |
74 | } | |
2906f66a | 75 | } |
a5959d53 | 76 | /* NF_ACCEPT */ |
2906f66a VMR |
77 | return 1; |
78 | } | |
79 | ||
ae146d9b | 80 | static void sctp_nat_csum(struct sk_buff *skb, struct sctphdr *sctph, |
4b47bc9a DB |
81 | unsigned int sctphoff) |
82 | { | |
024ec3de | 83 | sctph->checksum = sctp_compute_cksum(skb, sctphoff); |
4b47bc9a DB |
84 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
85 | } | |
86 | ||
2906f66a | 87 | static int |
d4383f04 JDB |
88 | sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, |
89 | struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) | |
2906f66a | 90 | { |
ae146d9b | 91 | struct sctphdr *sctph; |
d4383f04 | 92 | unsigned int sctphoff = iph->len; |
97203abe | 93 | bool payload_csum = false; |
2906f66a VMR |
94 | |
95 | #ifdef CONFIG_IP_VS_IPV6 | |
d4383f04 | 96 | if (cp->af == AF_INET6 && iph->fragoffs) |
63dca2c0 | 97 | return 1; |
2906f66a | 98 | #endif |
2906f66a VMR |
99 | |
100 | /* csum_check requires unshared skb */ | |
101 | if (!skb_make_writable(skb, sctphoff + sizeof(*sctph))) | |
102 | return 0; | |
103 | ||
104 | if (unlikely(cp->app != NULL)) { | |
97203abe DB |
105 | int ret; |
106 | ||
2906f66a VMR |
107 | /* Some checks before mangling */ |
108 | if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) | |
109 | return 0; | |
110 | ||
111 | /* Call application helper if needed */ | |
97203abe DB |
112 | ret = ip_vs_app_pkt_out(cp, skb); |
113 | if (ret == 0) | |
2906f66a | 114 | return 0; |
97203abe DB |
115 | /* ret=2: csum update is needed after payload mangling */ |
116 | if (ret == 2) | |
117 | payload_csum = true; | |
2906f66a VMR |
118 | } |
119 | ||
120 | sctph = (void *) skb_network_header(skb) + sctphoff; | |
2906f66a | 121 | |
97203abe DB |
122 | /* Only update csum if we really have to */ |
123 | if (sctph->source != cp->vport || payload_csum || | |
124 | skb->ip_summed == CHECKSUM_PARTIAL) { | |
125 | sctph->source = cp->vport; | |
126 | sctp_nat_csum(skb, sctph, sctphoff); | |
127 | } else { | |
128 | skb->ip_summed = CHECKSUM_UNNECESSARY; | |
129 | } | |
2906f66a VMR |
130 | |
131 | return 1; | |
132 | } | |
133 | ||
134 | static int | |
d4383f04 JDB |
135 | sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, |
136 | struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) | |
2906f66a | 137 | { |
ae146d9b | 138 | struct sctphdr *sctph; |
d4383f04 | 139 | unsigned int sctphoff = iph->len; |
97203abe | 140 | bool payload_csum = false; |
2906f66a VMR |
141 | |
142 | #ifdef CONFIG_IP_VS_IPV6 | |
d4383f04 | 143 | if (cp->af == AF_INET6 && iph->fragoffs) |
63dca2c0 | 144 | return 1; |
2906f66a | 145 | #endif |
2906f66a VMR |
146 | |
147 | /* csum_check requires unshared skb */ | |
148 | if (!skb_make_writable(skb, sctphoff + sizeof(*sctph))) | |
149 | return 0; | |
150 | ||
151 | if (unlikely(cp->app != NULL)) { | |
97203abe DB |
152 | int ret; |
153 | ||
2906f66a VMR |
154 | /* Some checks before mangling */ |
155 | if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) | |
156 | return 0; | |
157 | ||
158 | /* Call application helper if needed */ | |
97203abe DB |
159 | ret = ip_vs_app_pkt_in(cp, skb); |
160 | if (ret == 0) | |
2906f66a | 161 | return 0; |
97203abe DB |
162 | /* ret=2: csum update is needed after payload mangling */ |
163 | if (ret == 2) | |
164 | payload_csum = true; | |
2906f66a VMR |
165 | } |
166 | ||
167 | sctph = (void *) skb_network_header(skb) + sctphoff; | |
2906f66a | 168 | |
97203abe DB |
169 | /* Only update csum if we really have to */ |
170 | if (sctph->dest != cp->dport || payload_csum || | |
171 | (skb->ip_summed == CHECKSUM_PARTIAL && | |
53692b1d | 172 | !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) { |
97203abe DB |
173 | sctph->dest = cp->dport; |
174 | sctp_nat_csum(skb, sctph, sctphoff); | |
175 | } else if (skb->ip_summed != CHECKSUM_PARTIAL) { | |
176 | skb->ip_summed = CHECKSUM_UNNECESSARY; | |
177 | } | |
2906f66a VMR |
178 | |
179 | return 1; | |
180 | } | |
181 | ||
182 | static int | |
183 | sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) | |
184 | { | |
2906f66a VMR |
185 | unsigned int sctphoff; |
186 | struct sctphdr *sh, _sctph; | |
024ec3de | 187 | __le32 cmp, val; |
2906f66a VMR |
188 | |
189 | #ifdef CONFIG_IP_VS_IPV6 | |
190 | if (af == AF_INET6) | |
191 | sctphoff = sizeof(struct ipv6hdr); | |
192 | else | |
193 | #endif | |
194 | sctphoff = ip_hdrlen(skb); | |
195 | ||
196 | sh = skb_header_pointer(skb, sctphoff, sizeof(_sctph), &_sctph); | |
197 | if (sh == NULL) | |
198 | return 0; | |
199 | ||
200 | cmp = sh->checksum; | |
024ec3de | 201 | val = sctp_compute_cksum(skb, sctphoff); |
2906f66a VMR |
202 | |
203 | if (val != cmp) { | |
204 | /* CRC failure, dump it. */ | |
0d79641a | 205 | IP_VS_DBG_RL_PKT(0, af, pp, skb, 0, |
2906f66a VMR |
206 | "Failed checksum for"); |
207 | return 0; | |
208 | } | |
209 | return 1; | |
210 | } | |
211 | ||
2906f66a | 212 | enum ipvs_sctp_event_t { |
61e7c420 JA |
213 | IP_VS_SCTP_DATA = 0, /* DATA, SACK, HEARTBEATs */ |
214 | IP_VS_SCTP_INIT, | |
215 | IP_VS_SCTP_INIT_ACK, | |
216 | IP_VS_SCTP_COOKIE_ECHO, | |
217 | IP_VS_SCTP_COOKIE_ACK, | |
218 | IP_VS_SCTP_SHUTDOWN, | |
219 | IP_VS_SCTP_SHUTDOWN_ACK, | |
220 | IP_VS_SCTP_SHUTDOWN_COMPLETE, | |
221 | IP_VS_SCTP_ERROR, | |
222 | IP_VS_SCTP_ABORT, | |
223 | IP_VS_SCTP_EVENT_LAST | |
2906f66a VMR |
224 | }; |
225 | ||
61e7c420 JA |
226 | /* RFC 2960, 3.2 Chunk Field Descriptions */ |
227 | static __u8 sctp_events[] = { | |
228 | [SCTP_CID_DATA] = IP_VS_SCTP_DATA, | |
229 | [SCTP_CID_INIT] = IP_VS_SCTP_INIT, | |
230 | [SCTP_CID_INIT_ACK] = IP_VS_SCTP_INIT_ACK, | |
231 | [SCTP_CID_SACK] = IP_VS_SCTP_DATA, | |
232 | [SCTP_CID_HEARTBEAT] = IP_VS_SCTP_DATA, | |
233 | [SCTP_CID_HEARTBEAT_ACK] = IP_VS_SCTP_DATA, | |
234 | [SCTP_CID_ABORT] = IP_VS_SCTP_ABORT, | |
235 | [SCTP_CID_SHUTDOWN] = IP_VS_SCTP_SHUTDOWN, | |
236 | [SCTP_CID_SHUTDOWN_ACK] = IP_VS_SCTP_SHUTDOWN_ACK, | |
237 | [SCTP_CID_ERROR] = IP_VS_SCTP_ERROR, | |
238 | [SCTP_CID_COOKIE_ECHO] = IP_VS_SCTP_COOKIE_ECHO, | |
239 | [SCTP_CID_COOKIE_ACK] = IP_VS_SCTP_COOKIE_ACK, | |
240 | [SCTP_CID_ECN_ECNE] = IP_VS_SCTP_DATA, | |
241 | [SCTP_CID_ECN_CWR] = IP_VS_SCTP_DATA, | |
242 | [SCTP_CID_SHUTDOWN_COMPLETE] = IP_VS_SCTP_SHUTDOWN_COMPLETE, | |
2906f66a VMR |
243 | }; |
244 | ||
61e7c420 JA |
245 | /* SCTP States: |
246 | * See RFC 2960, 4. SCTP Association State Diagram | |
247 | * | |
248 | * New states (not in diagram): | |
249 | * - INIT1 state: use shorter timeout for dropped INIT packets | |
250 | * - REJECTED state: use shorter timeout if INIT is rejected with ABORT | |
251 | * - INIT, COOKIE_SENT, COOKIE_REPLIED, COOKIE states: for better debugging | |
252 | * | |
253 | * The states are as seen in real server. In the diagram, INIT1, INIT, | |
254 | * COOKIE_SENT and COOKIE_REPLIED processing happens in CLOSED state. | |
255 | * | |
256 | * States as per packets from client (C) and server (S): | |
257 | * | |
258 | * Setup of client connection: | |
259 | * IP_VS_SCTP_S_INIT1: First C:INIT sent, wait for S:INIT-ACK | |
260 | * IP_VS_SCTP_S_INIT: Next C:INIT sent, wait for S:INIT-ACK | |
261 | * IP_VS_SCTP_S_COOKIE_SENT: S:INIT-ACK sent, wait for C:COOKIE-ECHO | |
262 | * IP_VS_SCTP_S_COOKIE_REPLIED: C:COOKIE-ECHO sent, wait for S:COOKIE-ACK | |
263 | * | |
264 | * Setup of server connection: | |
265 | * IP_VS_SCTP_S_COOKIE_WAIT: S:INIT sent, wait for C:INIT-ACK | |
266 | * IP_VS_SCTP_S_COOKIE: C:INIT-ACK sent, wait for S:COOKIE-ECHO | |
267 | * IP_VS_SCTP_S_COOKIE_ECHOED: S:COOKIE-ECHO sent, wait for C:COOKIE-ACK | |
268 | */ | |
2906f66a | 269 | |
61e7c420 JA |
270 | #define sNO IP_VS_SCTP_S_NONE |
271 | #define sI1 IP_VS_SCTP_S_INIT1 | |
272 | #define sIN IP_VS_SCTP_S_INIT | |
273 | #define sCS IP_VS_SCTP_S_COOKIE_SENT | |
274 | #define sCR IP_VS_SCTP_S_COOKIE_REPLIED | |
275 | #define sCW IP_VS_SCTP_S_COOKIE_WAIT | |
276 | #define sCO IP_VS_SCTP_S_COOKIE | |
277 | #define sCE IP_VS_SCTP_S_COOKIE_ECHOED | |
278 | #define sES IP_VS_SCTP_S_ESTABLISHED | |
279 | #define sSS IP_VS_SCTP_S_SHUTDOWN_SENT | |
280 | #define sSR IP_VS_SCTP_S_SHUTDOWN_RECEIVED | |
281 | #define sSA IP_VS_SCTP_S_SHUTDOWN_ACK_SENT | |
282 | #define sRJ IP_VS_SCTP_S_REJECTED | |
283 | #define sCL IP_VS_SCTP_S_CLOSED | |
284 | ||
285 | static const __u8 sctp_states | |
286 | [IP_VS_DIR_LAST][IP_VS_SCTP_EVENT_LAST][IP_VS_SCTP_S_LAST] = { | |
287 | { /* INPUT */ | |
288 | /* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/ | |
289 | /* d */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
290 | /* i */{sI1, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN}, | |
291 | /* i_a */{sCW, sCW, sCW, sCS, sCR, sCO, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
292 | /* c_e */{sCR, sIN, sIN, sCR, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
293 | /* c_a */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sES, sES, sSS, sSR, sSA, sRJ, sCL}, | |
294 | /* s */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL}, | |
295 | /* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sCL, sSR, sCL, sRJ, sCL}, | |
296 | /* s_c */{sCL, sCL, sCL, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sCL, sRJ, sCL}, | |
297 | /* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCL, sES, sSS, sSR, sSA, sRJ, sCL}, | |
298 | /* ab */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, | |
299 | }, | |
300 | { /* OUTPUT */ | |
301 | /* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/ | |
302 | /* d */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
303 | /* i */{sCW, sCW, sCW, sCW, sCW, sCW, sCW, sCW, sES, sCW, sCW, sCW, sCW, sCW}, | |
304 | /* i_a */{sCS, sCS, sCS, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
305 | /* c_e */{sCE, sCE, sCE, sCE, sCE, sCE, sCE, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
306 | /* c_a */{sES, sES, sES, sES, sES, sES, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL}, | |
307 | /* s */{sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSR, sSA, sRJ, sCL}, | |
308 | /* s_a */{sSA, sSA, sSA, sSA, sSA, sCW, sCO, sCE, sES, sSA, sSA, sSA, sRJ, sCL}, | |
309 | /* s_c */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
310 | /* err */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
311 | /* ab */{sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, | |
312 | }, | |
313 | { /* INPUT-ONLY */ | |
314 | /* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/ | |
315 | /* d */{sES, sI1, sIN, sCS, sCR, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
316 | /* i */{sI1, sIN, sIN, sIN, sIN, sIN, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN}, | |
317 | /* i_a */{sCE, sCE, sCE, sCE, sCE, sCE, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
318 | /* c_e */{sES, sES, sES, sES, sES, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
319 | /* c_a */{sES, sI1, sIN, sES, sES, sCW, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL}, | |
320 | /* s */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL}, | |
321 | /* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sCL, sCL, sSR, sCL, sRJ, sCL}, | |
322 | /* s_c */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sCL, sCL, sRJ, sCL}, | |
323 | /* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
324 | /* ab */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, | |
325 | }, | |
2906f66a VMR |
326 | }; |
327 | ||
61e7c420 JA |
328 | #define IP_VS_SCTP_MAX_RTO ((60 + 1) * HZ) |
329 | ||
330 | /* Timeout table[state] */ | |
9d934878 | 331 | static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = { |
61e7c420 JA |
332 | [IP_VS_SCTP_S_NONE] = 2 * HZ, |
333 | [IP_VS_SCTP_S_INIT1] = (0 + 3 + 1) * HZ, | |
334 | [IP_VS_SCTP_S_INIT] = IP_VS_SCTP_MAX_RTO, | |
335 | [IP_VS_SCTP_S_COOKIE_SENT] = IP_VS_SCTP_MAX_RTO, | |
336 | [IP_VS_SCTP_S_COOKIE_REPLIED] = IP_VS_SCTP_MAX_RTO, | |
337 | [IP_VS_SCTP_S_COOKIE_WAIT] = IP_VS_SCTP_MAX_RTO, | |
338 | [IP_VS_SCTP_S_COOKIE] = IP_VS_SCTP_MAX_RTO, | |
339 | [IP_VS_SCTP_S_COOKIE_ECHOED] = IP_VS_SCTP_MAX_RTO, | |
340 | [IP_VS_SCTP_S_ESTABLISHED] = 15 * 60 * HZ, | |
341 | [IP_VS_SCTP_S_SHUTDOWN_SENT] = IP_VS_SCTP_MAX_RTO, | |
342 | [IP_VS_SCTP_S_SHUTDOWN_RECEIVED] = IP_VS_SCTP_MAX_RTO, | |
343 | [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT] = IP_VS_SCTP_MAX_RTO, | |
344 | [IP_VS_SCTP_S_REJECTED] = (0 + 3 + 1) * HZ, | |
345 | [IP_VS_SCTP_S_CLOSED] = IP_VS_SCTP_MAX_RTO, | |
346 | [IP_VS_SCTP_S_LAST] = 2 * HZ, | |
2906f66a VMR |
347 | }; |
348 | ||
349 | static const char *sctp_state_name_table[IP_VS_SCTP_S_LAST + 1] = { | |
61e7c420 JA |
350 | [IP_VS_SCTP_S_NONE] = "NONE", |
351 | [IP_VS_SCTP_S_INIT1] = "INIT1", | |
352 | [IP_VS_SCTP_S_INIT] = "INIT", | |
353 | [IP_VS_SCTP_S_COOKIE_SENT] = "C-SENT", | |
354 | [IP_VS_SCTP_S_COOKIE_REPLIED] = "C-REPLIED", | |
355 | [IP_VS_SCTP_S_COOKIE_WAIT] = "C-WAIT", | |
356 | [IP_VS_SCTP_S_COOKIE] = "COOKIE", | |
357 | [IP_VS_SCTP_S_COOKIE_ECHOED] = "C-ECHOED", | |
358 | [IP_VS_SCTP_S_ESTABLISHED] = "ESTABLISHED", | |
359 | [IP_VS_SCTP_S_SHUTDOWN_SENT] = "S-SENT", | |
360 | [IP_VS_SCTP_S_SHUTDOWN_RECEIVED] = "S-RECEIVED", | |
361 | [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT] = "S-ACK-SENT", | |
362 | [IP_VS_SCTP_S_REJECTED] = "REJECTED", | |
363 | [IP_VS_SCTP_S_CLOSED] = "CLOSED", | |
364 | [IP_VS_SCTP_S_LAST] = "BUG!", | |
2906f66a VMR |
365 | }; |
366 | ||
367 | ||
368 | static const char *sctp_state_name(int state) | |
369 | { | |
370 | if (state >= IP_VS_SCTP_S_LAST) | |
371 | return "ERR!"; | |
372 | if (sctp_state_name_table[state]) | |
373 | return sctp_state_name_table[state]; | |
374 | return "?"; | |
375 | } | |
376 | ||
4a516f11 | 377 | static inline void |
9330419d | 378 | set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, |
2906f66a VMR |
379 | int direction, const struct sk_buff *skb) |
380 | { | |
922dbc5b | 381 | struct sctp_chunkhdr _sctpch, *sch; |
2906f66a VMR |
382 | unsigned char chunk_type; |
383 | int event, next_state; | |
cf2e3942 | 384 | int ihl, cofs; |
2906f66a VMR |
385 | |
386 | #ifdef CONFIG_IP_VS_IPV6 | |
387 | ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr); | |
388 | #else | |
389 | ihl = ip_hdrlen(skb); | |
390 | #endif | |
391 | ||
ae146d9b | 392 | cofs = ihl + sizeof(struct sctphdr); |
cf2e3942 | 393 | sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch); |
2906f66a | 394 | if (sch == NULL) |
4a516f11 | 395 | return; |
2906f66a VMR |
396 | |
397 | chunk_type = sch->type; | |
398 | /* | |
399 | * Section 3: Multiple chunks can be bundled into one SCTP packet | |
400 | * up to the MTU size, except for the INIT, INIT ACK, and | |
401 | * SHUTDOWN COMPLETE chunks. These chunks MUST NOT be bundled with | |
402 | * any other chunk in a packet. | |
403 | * | |
404 | * Section 3.3.7: DATA chunks MUST NOT be bundled with ABORT. Control | |
405 | * chunks (except for INIT, INIT ACK, and SHUTDOWN COMPLETE) MAY be | |
406 | * bundled with an ABORT, but they MUST be placed before the ABORT | |
407 | * in the SCTP packet or they will be ignored by the receiver. | |
408 | */ | |
409 | if ((sch->type == SCTP_CID_COOKIE_ECHO) || | |
410 | (sch->type == SCTP_CID_COOKIE_ACK)) { | |
cf2e3942 JA |
411 | int clen = ntohs(sch->length); |
412 | ||
922dbc5b | 413 | if (clen >= sizeof(_sctpch)) { |
cf2e3942 JA |
414 | sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4), |
415 | sizeof(_sctpch), &_sctpch); | |
416 | if (sch && sch->type == SCTP_CID_ABORT) | |
2906f66a VMR |
417 | chunk_type = sch->type; |
418 | } | |
419 | } | |
420 | ||
61e7c420 JA |
421 | event = (chunk_type < sizeof(sctp_events)) ? |
422 | sctp_events[chunk_type] : IP_VS_SCTP_DATA; | |
2906f66a | 423 | |
61e7c420 JA |
424 | /* Update direction to INPUT_ONLY if necessary |
425 | * or delete NO_OUTPUT flag if output packet detected | |
2906f66a | 426 | */ |
61e7c420 JA |
427 | if (cp->flags & IP_VS_CONN_F_NOOUTPUT) { |
428 | if (direction == IP_VS_DIR_OUTPUT) | |
429 | cp->flags &= ~IP_VS_CONN_F_NOOUTPUT; | |
430 | else | |
431 | direction = IP_VS_DIR_INPUT_ONLY; | |
432 | } | |
433 | ||
434 | next_state = sctp_states[direction][event][cp->state]; | |
2906f66a VMR |
435 | |
436 | if (next_state != cp->state) { | |
437 | struct ip_vs_dest *dest = cp->dest; | |
438 | ||
439 | IP_VS_DBG_BUF(8, "%s %s %s:%d->" | |
440 | "%s:%d state: %s->%s conn->refcnt:%d\n", | |
9330419d | 441 | pd->pp->name, |
2906f66a VMR |
442 | ((direction == IP_VS_DIR_OUTPUT) ? |
443 | "output " : "input "), | |
f18ae720 | 444 | IP_VS_DBG_ADDR(cp->daf, &cp->daddr), |
2906f66a VMR |
445 | ntohs(cp->dport), |
446 | IP_VS_DBG_ADDR(cp->af, &cp->caddr), | |
447 | ntohs(cp->cport), | |
448 | sctp_state_name(cp->state), | |
449 | sctp_state_name(next_state), | |
b54ab92b | 450 | refcount_read(&cp->refcnt)); |
2906f66a VMR |
451 | if (dest) { |
452 | if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && | |
453 | (next_state != IP_VS_SCTP_S_ESTABLISHED)) { | |
454 | atomic_dec(&dest->activeconns); | |
455 | atomic_inc(&dest->inactconns); | |
456 | cp->flags |= IP_VS_CONN_F_INACTIVE; | |
457 | } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) && | |
458 | (next_state == IP_VS_SCTP_S_ESTABLISHED)) { | |
459 | atomic_inc(&dest->activeconns); | |
460 | atomic_dec(&dest->inactconns); | |
461 | cp->flags &= ~IP_VS_CONN_F_INACTIVE; | |
462 | } | |
463 | } | |
464 | } | |
9d934878 HS |
465 | if (likely(pd)) |
466 | cp->timeout = pd->timeout_table[cp->state = next_state]; | |
467 | else /* What to do ? */ | |
468 | cp->timeout = sctp_timeouts[cp->state = next_state]; | |
2906f66a VMR |
469 | } |
470 | ||
4a516f11 | 471 | static void |
2906f66a | 472 | sctp_state_transition(struct ip_vs_conn *cp, int direction, |
9330419d | 473 | const struct sk_buff *skb, struct ip_vs_proto_data *pd) |
2906f66a | 474 | { |
ac69269a | 475 | spin_lock_bh(&cp->lock); |
4a516f11 | 476 | set_sctp_state(pd, cp, direction, skb); |
ac69269a | 477 | spin_unlock_bh(&cp->lock); |
2906f66a VMR |
478 | } |
479 | ||
2906f66a VMR |
480 | static inline __u16 sctp_app_hashkey(__be16 port) |
481 | { | |
482 | return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port) | |
483 | & SCTP_APP_TAB_MASK; | |
484 | } | |
485 | ||
19648918 | 486 | static int sctp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc) |
2906f66a VMR |
487 | { |
488 | struct ip_vs_app *i; | |
489 | __u16 hash; | |
490 | __be16 port = inc->port; | |
491 | int ret = 0; | |
18d6ade6 | 492 | struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP); |
2906f66a VMR |
493 | |
494 | hash = sctp_app_hashkey(port); | |
495 | ||
9d934878 | 496 | list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) { |
2906f66a VMR |
497 | if (i->port == port) { |
498 | ret = -EEXIST; | |
499 | goto out; | |
500 | } | |
501 | } | |
363c97d7 | 502 | list_add_rcu(&inc->p_list, &ipvs->sctp_apps[hash]); |
9bbac6a9 | 503 | atomic_inc(&pd->appcnt); |
2906f66a | 504 | out: |
2906f66a VMR |
505 | |
506 | return ret; | |
507 | } | |
508 | ||
19648918 | 509 | static void sctp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc) |
2906f66a | 510 | { |
19648918 | 511 | struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP); |
9d934878 | 512 | |
9bbac6a9 | 513 | atomic_dec(&pd->appcnt); |
363c97d7 | 514 | list_del_rcu(&inc->p_list); |
2906f66a VMR |
515 | } |
516 | ||
517 | static int sctp_app_conn_bind(struct ip_vs_conn *cp) | |
518 | { | |
58dbc6f2 | 519 | struct netns_ipvs *ipvs = cp->ipvs; |
2906f66a VMR |
520 | int hash; |
521 | struct ip_vs_app *inc; | |
522 | int result = 0; | |
523 | ||
524 | /* Default binding: bind app only for NAT */ | |
525 | if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) | |
526 | return 0; | |
527 | /* Lookup application incarnations and bind the right one */ | |
528 | hash = sctp_app_hashkey(cp->vport); | |
529 | ||
363c97d7 | 530 | list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) { |
2906f66a VMR |
531 | if (inc->port == cp->vport) { |
532 | if (unlikely(!ip_vs_app_inc_get(inc))) | |
533 | break; | |
2906f66a VMR |
534 | |
535 | IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" | |
536 | "%s:%u to app %s on port %u\n", | |
537 | __func__, | |
538 | IP_VS_DBG_ADDR(cp->af, &cp->caddr), | |
539 | ntohs(cp->cport), | |
540 | IP_VS_DBG_ADDR(cp->af, &cp->vaddr), | |
541 | ntohs(cp->vport), | |
542 | inc->name, ntohs(inc->port)); | |
543 | cp->app = inc; | |
544 | if (inc->init_conn) | |
545 | result = inc->init_conn(inc, cp); | |
0b35f603 | 546 | break; |
2906f66a VMR |
547 | } |
548 | } | |
0b35f603 | 549 | |
2906f66a VMR |
550 | return result; |
551 | } | |
552 | ||
9d934878 HS |
553 | /* --------------------------------------------- |
554 | * timeouts is netns related now. | |
555 | * --------------------------------------------- | |
556 | */ | |
1281a9c2 | 557 | static int __ip_vs_sctp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) |
2906f66a | 558 | { |
9d934878 | 559 | ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE); |
9d934878 HS |
560 | pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts, |
561 | sizeof(sctp_timeouts)); | |
582b8e3e HS |
562 | if (!pd->timeout_table) |
563 | return -ENOMEM; | |
564 | return 0; | |
9d934878 | 565 | } |
2906f66a | 566 | |
1281a9c2 | 567 | static void __ip_vs_sctp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) |
2906f66a | 568 | { |
9d934878 | 569 | kfree(pd->timeout_table); |
2906f66a VMR |
570 | } |
571 | ||
572 | struct ip_vs_protocol ip_vs_protocol_sctp = { | |
9d934878 HS |
573 | .name = "SCTP", |
574 | .protocol = IPPROTO_SCTP, | |
575 | .num_states = IP_VS_SCTP_S_LAST, | |
576 | .dont_defrag = 0, | |
577 | .init = NULL, | |
578 | .exit = NULL, | |
579 | .init_netns = __ip_vs_sctp_init, | |
580 | .exit_netns = __ip_vs_sctp_exit, | |
581 | .register_app = sctp_register_app, | |
2906f66a | 582 | .unregister_app = sctp_unregister_app, |
9d934878 HS |
583 | .conn_schedule = sctp_conn_schedule, |
584 | .conn_in_get = ip_vs_conn_in_get_proto, | |
585 | .conn_out_get = ip_vs_conn_out_get_proto, | |
586 | .snat_handler = sctp_snat_handler, | |
587 | .dnat_handler = sctp_dnat_handler, | |
588 | .csum_check = sctp_csum_check, | |
589 | .state_name = sctp_state_name, | |
2906f66a | 590 | .state_transition = sctp_state_transition, |
9d934878 HS |
591 | .app_conn_bind = sctp_app_conn_bind, |
592 | .debug_packet = ip_vs_tcpudp_debug_packet, | |
593 | .timeout_change = NULL, | |
2906f66a | 594 | }; |