]>
Commit | Line | Data |
---|---|---|
2906f66a VMR |
1 | #include <linux/kernel.h> |
2 | #include <linux/ip.h> | |
3 | #include <linux/sctp.h> | |
4 | #include <net/ip.h> | |
5 | #include <net/ip6_checksum.h> | |
6 | #include <linux/netfilter.h> | |
7 | #include <linux/netfilter_ipv4.h> | |
8 | #include <net/sctp/checksum.h> | |
9 | #include <net/ip_vs.h> | |
10 | ||
2906f66a | 11 | static int |
9330419d | 12 | sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, |
d4383f04 JDB |
13 | int *verdict, struct ip_vs_conn **cpp, |
14 | struct ip_vs_iphdr *iph) | |
2906f66a | 15 | { |
fc723250 | 16 | struct net *net; |
2906f66a | 17 | struct ip_vs_service *svc; |
c6c96c18 | 18 | struct netns_ipvs *ipvs; |
2906f66a VMR |
19 | sctp_chunkhdr_t _schunkh, *sch; |
20 | sctp_sctphdr_t *sh, _sctph; | |
2906f66a | 21 | |
d4383f04 | 22 | sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); |
2906f66a VMR |
23 | if (sh == NULL) |
24 | return 0; | |
25 | ||
d4383f04 | 26 | sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t), |
2906f66a VMR |
27 | sizeof(_schunkh), &_schunkh); |
28 | if (sch == NULL) | |
29 | return 0; | |
fc723250 | 30 | net = skb_net(skb); |
c6c96c18 | 31 | ipvs = net_ipvs(net); |
ceec4c38 | 32 | rcu_read_lock(); |
c6c96c18 | 33 | if ((sch->type == SCTP_CID_INIT || sysctl_sloppy_sctp(ipvs)) && |
ceec4c38 JA |
34 | (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, |
35 | &iph->daddr, sh->dest))) { | |
190ecd27 JA |
36 | int ignored; |
37 | ||
c6c96c18 | 38 | if (ip_vs_todrop(ipvs)) { |
2906f66a VMR |
39 | /* |
40 | * It seems that we are very loaded. | |
41 | * We have to drop this packet :( | |
42 | */ | |
ceec4c38 | 43 | rcu_read_unlock(); |
2906f66a VMR |
44 | *verdict = NF_DROP; |
45 | return 0; | |
46 | } | |
47 | /* | |
48 | * Let the virtual server select a real server for the | |
49 | * incoming connection, and create a connection entry. | |
50 | */ | |
d4383f04 | 51 | *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph); |
a5959d53 HS |
52 | if (!*cpp && ignored <= 0) { |
53 | if (!ignored) | |
d4383f04 | 54 | *verdict = ip_vs_leave(svc, skb, pd, iph); |
ceec4c38 | 55 | else |
a5959d53 | 56 | *verdict = NF_DROP; |
ceec4c38 | 57 | rcu_read_unlock(); |
2906f66a VMR |
58 | return 0; |
59 | } | |
2906f66a | 60 | } |
ceec4c38 | 61 | rcu_read_unlock(); |
a5959d53 | 62 | /* NF_ACCEPT */ |
2906f66a VMR |
63 | return 1; |
64 | } | |
65 | ||
4b47bc9a DB |
66 | static void sctp_nat_csum(struct sk_buff *skb, sctp_sctphdr_t *sctph, |
67 | unsigned int sctphoff) | |
68 | { | |
69 | __u32 crc32; | |
70 | struct sk_buff *iter; | |
71 | ||
72 | crc32 = sctp_start_cksum((__u8 *)sctph, skb_headlen(skb) - sctphoff); | |
73 | skb_walk_frags(skb, iter) | |
74 | crc32 = sctp_update_cksum((u8 *) iter->data, | |
75 | skb_headlen(iter), crc32); | |
76 | sctph->checksum = sctp_end_cksum(crc32); | |
77 | ||
78 | skb->ip_summed = CHECKSUM_UNNECESSARY; | |
79 | } | |
80 | ||
2906f66a | 81 | static int |
d4383f04 JDB |
82 | sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, |
83 | struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) | |
2906f66a VMR |
84 | { |
85 | sctp_sctphdr_t *sctph; | |
d4383f04 | 86 | unsigned int sctphoff = iph->len; |
2906f66a VMR |
87 | |
88 | #ifdef CONFIG_IP_VS_IPV6 | |
d4383f04 | 89 | if (cp->af == AF_INET6 && iph->fragoffs) |
63dca2c0 | 90 | return 1; |
2906f66a | 91 | #endif |
2906f66a VMR |
92 | |
93 | /* csum_check requires unshared skb */ | |
94 | if (!skb_make_writable(skb, sctphoff + sizeof(*sctph))) | |
95 | return 0; | |
96 | ||
97 | if (unlikely(cp->app != NULL)) { | |
98 | /* Some checks before mangling */ | |
99 | if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) | |
100 | return 0; | |
101 | ||
102 | /* Call application helper if needed */ | |
103 | if (!ip_vs_app_pkt_out(cp, skb)) | |
104 | return 0; | |
105 | } | |
106 | ||
107 | sctph = (void *) skb_network_header(skb) + sctphoff; | |
108 | sctph->source = cp->vport; | |
109 | ||
4b47bc9a | 110 | sctp_nat_csum(skb, sctph, sctphoff); |
2906f66a VMR |
111 | |
112 | return 1; | |
113 | } | |
114 | ||
115 | static int | |
d4383f04 JDB |
116 | sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, |
117 | struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) | |
2906f66a | 118 | { |
2906f66a | 119 | sctp_sctphdr_t *sctph; |
d4383f04 | 120 | unsigned int sctphoff = iph->len; |
2906f66a VMR |
121 | |
122 | #ifdef CONFIG_IP_VS_IPV6 | |
d4383f04 | 123 | if (cp->af == AF_INET6 && iph->fragoffs) |
63dca2c0 | 124 | return 1; |
2906f66a | 125 | #endif |
2906f66a VMR |
126 | |
127 | /* csum_check requires unshared skb */ | |
128 | if (!skb_make_writable(skb, sctphoff + sizeof(*sctph))) | |
129 | return 0; | |
130 | ||
131 | if (unlikely(cp->app != NULL)) { | |
132 | /* Some checks before mangling */ | |
133 | if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) | |
134 | return 0; | |
135 | ||
136 | /* Call application helper if needed */ | |
8a0acaac | 137 | if (!ip_vs_app_pkt_in(cp, skb)) |
2906f66a VMR |
138 | return 0; |
139 | } | |
140 | ||
141 | sctph = (void *) skb_network_header(skb) + sctphoff; | |
142 | sctph->dest = cp->dport; | |
143 | ||
4b47bc9a | 144 | sctp_nat_csum(skb, sctph, sctphoff); |
2906f66a VMR |
145 | |
146 | return 1; | |
147 | } | |
148 | ||
149 | static int | |
150 | sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) | |
151 | { | |
2906f66a VMR |
152 | unsigned int sctphoff; |
153 | struct sctphdr *sh, _sctph; | |
13f5bf18 | 154 | struct sk_buff *iter; |
2906f66a VMR |
155 | __le32 cmp; |
156 | __le32 val; | |
157 | __u32 tmp; | |
158 | ||
159 | #ifdef CONFIG_IP_VS_IPV6 | |
160 | if (af == AF_INET6) | |
161 | sctphoff = sizeof(struct ipv6hdr); | |
162 | else | |
163 | #endif | |
164 | sctphoff = ip_hdrlen(skb); | |
165 | ||
166 | sh = skb_header_pointer(skb, sctphoff, sizeof(_sctph), &_sctph); | |
167 | if (sh == NULL) | |
168 | return 0; | |
169 | ||
170 | cmp = sh->checksum; | |
171 | ||
172 | tmp = sctp_start_cksum((__u8 *) sh, skb_headlen(skb)); | |
13f5bf18 DM |
173 | skb_walk_frags(skb, iter) |
174 | tmp = sctp_update_cksum((__u8 *) iter->data, | |
175 | skb_headlen(iter), tmp); | |
2906f66a VMR |
176 | |
177 | val = sctp_end_cksum(tmp); | |
178 | ||
179 | if (val != cmp) { | |
180 | /* CRC failure, dump it. */ | |
0d79641a | 181 | IP_VS_DBG_RL_PKT(0, af, pp, skb, 0, |
2906f66a VMR |
182 | "Failed checksum for"); |
183 | return 0; | |
184 | } | |
185 | return 1; | |
186 | } | |
187 | ||
2906f66a | 188 | enum ipvs_sctp_event_t { |
61e7c420 JA |
189 | IP_VS_SCTP_DATA = 0, /* DATA, SACK, HEARTBEATs */ |
190 | IP_VS_SCTP_INIT, | |
191 | IP_VS_SCTP_INIT_ACK, | |
192 | IP_VS_SCTP_COOKIE_ECHO, | |
193 | IP_VS_SCTP_COOKIE_ACK, | |
194 | IP_VS_SCTP_SHUTDOWN, | |
195 | IP_VS_SCTP_SHUTDOWN_ACK, | |
196 | IP_VS_SCTP_SHUTDOWN_COMPLETE, | |
197 | IP_VS_SCTP_ERROR, | |
198 | IP_VS_SCTP_ABORT, | |
199 | IP_VS_SCTP_EVENT_LAST | |
2906f66a VMR |
200 | }; |
201 | ||
61e7c420 JA |
202 | /* RFC 2960, 3.2 Chunk Field Descriptions */ |
203 | static __u8 sctp_events[] = { | |
204 | [SCTP_CID_DATA] = IP_VS_SCTP_DATA, | |
205 | [SCTP_CID_INIT] = IP_VS_SCTP_INIT, | |
206 | [SCTP_CID_INIT_ACK] = IP_VS_SCTP_INIT_ACK, | |
207 | [SCTP_CID_SACK] = IP_VS_SCTP_DATA, | |
208 | [SCTP_CID_HEARTBEAT] = IP_VS_SCTP_DATA, | |
209 | [SCTP_CID_HEARTBEAT_ACK] = IP_VS_SCTP_DATA, | |
210 | [SCTP_CID_ABORT] = IP_VS_SCTP_ABORT, | |
211 | [SCTP_CID_SHUTDOWN] = IP_VS_SCTP_SHUTDOWN, | |
212 | [SCTP_CID_SHUTDOWN_ACK] = IP_VS_SCTP_SHUTDOWN_ACK, | |
213 | [SCTP_CID_ERROR] = IP_VS_SCTP_ERROR, | |
214 | [SCTP_CID_COOKIE_ECHO] = IP_VS_SCTP_COOKIE_ECHO, | |
215 | [SCTP_CID_COOKIE_ACK] = IP_VS_SCTP_COOKIE_ACK, | |
216 | [SCTP_CID_ECN_ECNE] = IP_VS_SCTP_DATA, | |
217 | [SCTP_CID_ECN_CWR] = IP_VS_SCTP_DATA, | |
218 | [SCTP_CID_SHUTDOWN_COMPLETE] = IP_VS_SCTP_SHUTDOWN_COMPLETE, | |
2906f66a VMR |
219 | }; |
220 | ||
61e7c420 JA |
221 | /* SCTP States: |
222 | * See RFC 2960, 4. SCTP Association State Diagram | |
223 | * | |
224 | * New states (not in diagram): | |
225 | * - INIT1 state: use shorter timeout for dropped INIT packets | |
226 | * - REJECTED state: use shorter timeout if INIT is rejected with ABORT | |
227 | * - INIT, COOKIE_SENT, COOKIE_REPLIED, COOKIE states: for better debugging | |
228 | * | |
229 | * The states are as seen in real server. In the diagram, INIT1, INIT, | |
230 | * COOKIE_SENT and COOKIE_REPLIED processing happens in CLOSED state. | |
231 | * | |
232 | * States as per packets from client (C) and server (S): | |
233 | * | |
234 | * Setup of client connection: | |
235 | * IP_VS_SCTP_S_INIT1: First C:INIT sent, wait for S:INIT-ACK | |
236 | * IP_VS_SCTP_S_INIT: Next C:INIT sent, wait for S:INIT-ACK | |
237 | * IP_VS_SCTP_S_COOKIE_SENT: S:INIT-ACK sent, wait for C:COOKIE-ECHO | |
238 | * IP_VS_SCTP_S_COOKIE_REPLIED: C:COOKIE-ECHO sent, wait for S:COOKIE-ACK | |
239 | * | |
240 | * Setup of server connection: | |
241 | * IP_VS_SCTP_S_COOKIE_WAIT: S:INIT sent, wait for C:INIT-ACK | |
242 | * IP_VS_SCTP_S_COOKIE: C:INIT-ACK sent, wait for S:COOKIE-ECHO | |
243 | * IP_VS_SCTP_S_COOKIE_ECHOED: S:COOKIE-ECHO sent, wait for C:COOKIE-ACK | |
244 | */ | |
2906f66a | 245 | |
61e7c420 JA |
246 | #define sNO IP_VS_SCTP_S_NONE |
247 | #define sI1 IP_VS_SCTP_S_INIT1 | |
248 | #define sIN IP_VS_SCTP_S_INIT | |
249 | #define sCS IP_VS_SCTP_S_COOKIE_SENT | |
250 | #define sCR IP_VS_SCTP_S_COOKIE_REPLIED | |
251 | #define sCW IP_VS_SCTP_S_COOKIE_WAIT | |
252 | #define sCO IP_VS_SCTP_S_COOKIE | |
253 | #define sCE IP_VS_SCTP_S_COOKIE_ECHOED | |
254 | #define sES IP_VS_SCTP_S_ESTABLISHED | |
255 | #define sSS IP_VS_SCTP_S_SHUTDOWN_SENT | |
256 | #define sSR IP_VS_SCTP_S_SHUTDOWN_RECEIVED | |
257 | #define sSA IP_VS_SCTP_S_SHUTDOWN_ACK_SENT | |
258 | #define sRJ IP_VS_SCTP_S_REJECTED | |
259 | #define sCL IP_VS_SCTP_S_CLOSED | |
260 | ||
261 | static const __u8 sctp_states | |
262 | [IP_VS_DIR_LAST][IP_VS_SCTP_EVENT_LAST][IP_VS_SCTP_S_LAST] = { | |
263 | { /* INPUT */ | |
264 | /* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/ | |
265 | /* d */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
266 | /* i */{sI1, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN}, | |
267 | /* i_a */{sCW, sCW, sCW, sCS, sCR, sCO, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
268 | /* c_e */{sCR, sIN, sIN, sCR, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
269 | /* c_a */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sES, sES, sSS, sSR, sSA, sRJ, sCL}, | |
270 | /* s */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL}, | |
271 | /* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sCL, sSR, sCL, sRJ, sCL}, | |
272 | /* s_c */{sCL, sCL, sCL, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sCL, sRJ, sCL}, | |
273 | /* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCL, sES, sSS, sSR, sSA, sRJ, sCL}, | |
274 | /* ab */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, | |
275 | }, | |
276 | { /* OUTPUT */ | |
277 | /* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/ | |
278 | /* d */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
279 | /* i */{sCW, sCW, sCW, sCW, sCW, sCW, sCW, sCW, sES, sCW, sCW, sCW, sCW, sCW}, | |
280 | /* i_a */{sCS, sCS, sCS, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
281 | /* c_e */{sCE, sCE, sCE, sCE, sCE, sCE, sCE, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
282 | /* c_a */{sES, sES, sES, sES, sES, sES, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL}, | |
283 | /* s */{sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSR, sSA, sRJ, sCL}, | |
284 | /* s_a */{sSA, sSA, sSA, sSA, sSA, sCW, sCO, sCE, sES, sSA, sSA, sSA, sRJ, sCL}, | |
285 | /* s_c */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
286 | /* err */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
287 | /* ab */{sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, | |
288 | }, | |
289 | { /* INPUT-ONLY */ | |
290 | /* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/ | |
291 | /* d */{sES, sI1, sIN, sCS, sCR, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
292 | /* i */{sI1, sIN, sIN, sIN, sIN, sIN, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN}, | |
293 | /* i_a */{sCE, sCE, sCE, sCE, sCE, sCE, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
294 | /* c_e */{sES, sES, sES, sES, sES, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
295 | /* c_a */{sES, sI1, sIN, sES, sES, sCW, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL}, | |
296 | /* s */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL}, | |
297 | /* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sCL, sCL, sSR, sCL, sRJ, sCL}, | |
298 | /* s_c */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sCL, sCL, sRJ, sCL}, | |
299 | /* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, | |
300 | /* ab */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, | |
301 | }, | |
2906f66a VMR |
302 | }; |
303 | ||
61e7c420 JA |
304 | #define IP_VS_SCTP_MAX_RTO ((60 + 1) * HZ) |
305 | ||
306 | /* Timeout table[state] */ | |
9d934878 | 307 | static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = { |
61e7c420 JA |
308 | [IP_VS_SCTP_S_NONE] = 2 * HZ, |
309 | [IP_VS_SCTP_S_INIT1] = (0 + 3 + 1) * HZ, | |
310 | [IP_VS_SCTP_S_INIT] = IP_VS_SCTP_MAX_RTO, | |
311 | [IP_VS_SCTP_S_COOKIE_SENT] = IP_VS_SCTP_MAX_RTO, | |
312 | [IP_VS_SCTP_S_COOKIE_REPLIED] = IP_VS_SCTP_MAX_RTO, | |
313 | [IP_VS_SCTP_S_COOKIE_WAIT] = IP_VS_SCTP_MAX_RTO, | |
314 | [IP_VS_SCTP_S_COOKIE] = IP_VS_SCTP_MAX_RTO, | |
315 | [IP_VS_SCTP_S_COOKIE_ECHOED] = IP_VS_SCTP_MAX_RTO, | |
316 | [IP_VS_SCTP_S_ESTABLISHED] = 15 * 60 * HZ, | |
317 | [IP_VS_SCTP_S_SHUTDOWN_SENT] = IP_VS_SCTP_MAX_RTO, | |
318 | [IP_VS_SCTP_S_SHUTDOWN_RECEIVED] = IP_VS_SCTP_MAX_RTO, | |
319 | [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT] = IP_VS_SCTP_MAX_RTO, | |
320 | [IP_VS_SCTP_S_REJECTED] = (0 + 3 + 1) * HZ, | |
321 | [IP_VS_SCTP_S_CLOSED] = IP_VS_SCTP_MAX_RTO, | |
322 | [IP_VS_SCTP_S_LAST] = 2 * HZ, | |
2906f66a VMR |
323 | }; |
324 | ||
325 | static const char *sctp_state_name_table[IP_VS_SCTP_S_LAST + 1] = { | |
61e7c420 JA |
326 | [IP_VS_SCTP_S_NONE] = "NONE", |
327 | [IP_VS_SCTP_S_INIT1] = "INIT1", | |
328 | [IP_VS_SCTP_S_INIT] = "INIT", | |
329 | [IP_VS_SCTP_S_COOKIE_SENT] = "C-SENT", | |
330 | [IP_VS_SCTP_S_COOKIE_REPLIED] = "C-REPLIED", | |
331 | [IP_VS_SCTP_S_COOKIE_WAIT] = "C-WAIT", | |
332 | [IP_VS_SCTP_S_COOKIE] = "COOKIE", | |
333 | [IP_VS_SCTP_S_COOKIE_ECHOED] = "C-ECHOED", | |
334 | [IP_VS_SCTP_S_ESTABLISHED] = "ESTABLISHED", | |
335 | [IP_VS_SCTP_S_SHUTDOWN_SENT] = "S-SENT", | |
336 | [IP_VS_SCTP_S_SHUTDOWN_RECEIVED] = "S-RECEIVED", | |
337 | [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT] = "S-ACK-SENT", | |
338 | [IP_VS_SCTP_S_REJECTED] = "REJECTED", | |
339 | [IP_VS_SCTP_S_CLOSED] = "CLOSED", | |
340 | [IP_VS_SCTP_S_LAST] = "BUG!", | |
2906f66a VMR |
341 | }; |
342 | ||
343 | ||
344 | static const char *sctp_state_name(int state) | |
345 | { | |
346 | if (state >= IP_VS_SCTP_S_LAST) | |
347 | return "ERR!"; | |
348 | if (sctp_state_name_table[state]) | |
349 | return sctp_state_name_table[state]; | |
350 | return "?"; | |
351 | } | |
352 | ||
4a516f11 | 353 | static inline void |
9330419d | 354 | set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, |
2906f66a VMR |
355 | int direction, const struct sk_buff *skb) |
356 | { | |
357 | sctp_chunkhdr_t _sctpch, *sch; | |
358 | unsigned char chunk_type; | |
359 | int event, next_state; | |
cf2e3942 | 360 | int ihl, cofs; |
2906f66a VMR |
361 | |
362 | #ifdef CONFIG_IP_VS_IPV6 | |
363 | ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr); | |
364 | #else | |
365 | ihl = ip_hdrlen(skb); | |
366 | #endif | |
367 | ||
cf2e3942 JA |
368 | cofs = ihl + sizeof(sctp_sctphdr_t); |
369 | sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch); | |
2906f66a | 370 | if (sch == NULL) |
4a516f11 | 371 | return; |
2906f66a VMR |
372 | |
373 | chunk_type = sch->type; | |
374 | /* | |
375 | * Section 3: Multiple chunks can be bundled into one SCTP packet | |
376 | * up to the MTU size, except for the INIT, INIT ACK, and | |
377 | * SHUTDOWN COMPLETE chunks. These chunks MUST NOT be bundled with | |
378 | * any other chunk in a packet. | |
379 | * | |
380 | * Section 3.3.7: DATA chunks MUST NOT be bundled with ABORT. Control | |
381 | * chunks (except for INIT, INIT ACK, and SHUTDOWN COMPLETE) MAY be | |
382 | * bundled with an ABORT, but they MUST be placed before the ABORT | |
383 | * in the SCTP packet or they will be ignored by the receiver. | |
384 | */ | |
385 | if ((sch->type == SCTP_CID_COOKIE_ECHO) || | |
386 | (sch->type == SCTP_CID_COOKIE_ACK)) { | |
cf2e3942 JA |
387 | int clen = ntohs(sch->length); |
388 | ||
389 | if (clen >= sizeof(sctp_chunkhdr_t)) { | |
390 | sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4), | |
391 | sizeof(_sctpch), &_sctpch); | |
392 | if (sch && sch->type == SCTP_CID_ABORT) | |
2906f66a VMR |
393 | chunk_type = sch->type; |
394 | } | |
395 | } | |
396 | ||
61e7c420 JA |
397 | event = (chunk_type < sizeof(sctp_events)) ? |
398 | sctp_events[chunk_type] : IP_VS_SCTP_DATA; | |
2906f66a | 399 | |
61e7c420 JA |
400 | /* Update direction to INPUT_ONLY if necessary |
401 | * or delete NO_OUTPUT flag if output packet detected | |
2906f66a | 402 | */ |
61e7c420 JA |
403 | if (cp->flags & IP_VS_CONN_F_NOOUTPUT) { |
404 | if (direction == IP_VS_DIR_OUTPUT) | |
405 | cp->flags &= ~IP_VS_CONN_F_NOOUTPUT; | |
406 | else | |
407 | direction = IP_VS_DIR_INPUT_ONLY; | |
408 | } | |
409 | ||
410 | next_state = sctp_states[direction][event][cp->state]; | |
2906f66a VMR |
411 | |
412 | if (next_state != cp->state) { | |
413 | struct ip_vs_dest *dest = cp->dest; | |
414 | ||
415 | IP_VS_DBG_BUF(8, "%s %s %s:%d->" | |
416 | "%s:%d state: %s->%s conn->refcnt:%d\n", | |
9330419d | 417 | pd->pp->name, |
2906f66a VMR |
418 | ((direction == IP_VS_DIR_OUTPUT) ? |
419 | "output " : "input "), | |
420 | IP_VS_DBG_ADDR(cp->af, &cp->daddr), | |
421 | ntohs(cp->dport), | |
422 | IP_VS_DBG_ADDR(cp->af, &cp->caddr), | |
423 | ntohs(cp->cport), | |
424 | sctp_state_name(cp->state), | |
425 | sctp_state_name(next_state), | |
426 | atomic_read(&cp->refcnt)); | |
427 | if (dest) { | |
428 | if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && | |
429 | (next_state != IP_VS_SCTP_S_ESTABLISHED)) { | |
430 | atomic_dec(&dest->activeconns); | |
431 | atomic_inc(&dest->inactconns); | |
432 | cp->flags |= IP_VS_CONN_F_INACTIVE; | |
433 | } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) && | |
434 | (next_state == IP_VS_SCTP_S_ESTABLISHED)) { | |
435 | atomic_inc(&dest->activeconns); | |
436 | atomic_dec(&dest->inactconns); | |
437 | cp->flags &= ~IP_VS_CONN_F_INACTIVE; | |
438 | } | |
439 | } | |
440 | } | |
9d934878 HS |
441 | if (likely(pd)) |
442 | cp->timeout = pd->timeout_table[cp->state = next_state]; | |
443 | else /* What to do ? */ | |
444 | cp->timeout = sctp_timeouts[cp->state = next_state]; | |
2906f66a VMR |
445 | } |
446 | ||
4a516f11 | 447 | static void |
2906f66a | 448 | sctp_state_transition(struct ip_vs_conn *cp, int direction, |
9330419d | 449 | const struct sk_buff *skb, struct ip_vs_proto_data *pd) |
2906f66a | 450 | { |
ac69269a | 451 | spin_lock_bh(&cp->lock); |
4a516f11 | 452 | set_sctp_state(pd, cp, direction, skb); |
ac69269a | 453 | spin_unlock_bh(&cp->lock); |
2906f66a VMR |
454 | } |
455 | ||
2906f66a VMR |
456 | static inline __u16 sctp_app_hashkey(__be16 port) |
457 | { | |
458 | return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port) | |
459 | & SCTP_APP_TAB_MASK; | |
460 | } | |
461 | ||
ab8a5e84 | 462 | static int sctp_register_app(struct net *net, struct ip_vs_app *inc) |
2906f66a VMR |
463 | { |
464 | struct ip_vs_app *i; | |
465 | __u16 hash; | |
466 | __be16 port = inc->port; | |
467 | int ret = 0; | |
ab8a5e84 HS |
468 | struct netns_ipvs *ipvs = net_ipvs(net); |
469 | struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); | |
2906f66a VMR |
470 | |
471 | hash = sctp_app_hashkey(port); | |
472 | ||
9d934878 | 473 | list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) { |
2906f66a VMR |
474 | if (i->port == port) { |
475 | ret = -EEXIST; | |
476 | goto out; | |
477 | } | |
478 | } | |
363c97d7 | 479 | list_add_rcu(&inc->p_list, &ipvs->sctp_apps[hash]); |
9bbac6a9 | 480 | atomic_inc(&pd->appcnt); |
2906f66a | 481 | out: |
2906f66a VMR |
482 | |
483 | return ret; | |
484 | } | |
485 | ||
ab8a5e84 | 486 | static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc) |
2906f66a | 487 | { |
ab8a5e84 | 488 | struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); |
9d934878 | 489 | |
9bbac6a9 | 490 | atomic_dec(&pd->appcnt); |
363c97d7 | 491 | list_del_rcu(&inc->p_list); |
2906f66a VMR |
492 | } |
493 | ||
494 | static int sctp_app_conn_bind(struct ip_vs_conn *cp) | |
495 | { | |
6e67e586 | 496 | struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); |
2906f66a VMR |
497 | int hash; |
498 | struct ip_vs_app *inc; | |
499 | int result = 0; | |
500 | ||
501 | /* Default binding: bind app only for NAT */ | |
502 | if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) | |
503 | return 0; | |
504 | /* Lookup application incarnations and bind the right one */ | |
505 | hash = sctp_app_hashkey(cp->vport); | |
506 | ||
363c97d7 JA |
507 | rcu_read_lock(); |
508 | list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) { | |
2906f66a VMR |
509 | if (inc->port == cp->vport) { |
510 | if (unlikely(!ip_vs_app_inc_get(inc))) | |
511 | break; | |
363c97d7 | 512 | rcu_read_unlock(); |
2906f66a VMR |
513 | |
514 | IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" | |
515 | "%s:%u to app %s on port %u\n", | |
516 | __func__, | |
517 | IP_VS_DBG_ADDR(cp->af, &cp->caddr), | |
518 | ntohs(cp->cport), | |
519 | IP_VS_DBG_ADDR(cp->af, &cp->vaddr), | |
520 | ntohs(cp->vport), | |
521 | inc->name, ntohs(inc->port)); | |
522 | cp->app = inc; | |
523 | if (inc->init_conn) | |
524 | result = inc->init_conn(inc, cp); | |
525 | goto out; | |
526 | } | |
527 | } | |
363c97d7 | 528 | rcu_read_unlock(); |
2906f66a VMR |
529 | out: |
530 | return result; | |
531 | } | |
532 | ||
9d934878 HS |
533 | /* --------------------------------------------- |
534 | * timeouts is netns related now. | |
535 | * --------------------------------------------- | |
536 | */ | |
582b8e3e | 537 | static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) |
2906f66a | 538 | { |
9d934878 | 539 | struct netns_ipvs *ipvs = net_ipvs(net); |
2906f66a | 540 | |
9d934878 | 541 | ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE); |
9d934878 HS |
542 | pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts, |
543 | sizeof(sctp_timeouts)); | |
582b8e3e HS |
544 | if (!pd->timeout_table) |
545 | return -ENOMEM; | |
546 | return 0; | |
9d934878 | 547 | } |
2906f66a | 548 | |
9d934878 | 549 | static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd) |
2906f66a | 550 | { |
9d934878 | 551 | kfree(pd->timeout_table); |
2906f66a VMR |
552 | } |
553 | ||
554 | struct ip_vs_protocol ip_vs_protocol_sctp = { | |
9d934878 HS |
555 | .name = "SCTP", |
556 | .protocol = IPPROTO_SCTP, | |
557 | .num_states = IP_VS_SCTP_S_LAST, | |
558 | .dont_defrag = 0, | |
559 | .init = NULL, | |
560 | .exit = NULL, | |
561 | .init_netns = __ip_vs_sctp_init, | |
562 | .exit_netns = __ip_vs_sctp_exit, | |
563 | .register_app = sctp_register_app, | |
2906f66a | 564 | .unregister_app = sctp_unregister_app, |
9d934878 HS |
565 | .conn_schedule = sctp_conn_schedule, |
566 | .conn_in_get = ip_vs_conn_in_get_proto, | |
567 | .conn_out_get = ip_vs_conn_out_get_proto, | |
568 | .snat_handler = sctp_snat_handler, | |
569 | .dnat_handler = sctp_dnat_handler, | |
570 | .csum_check = sctp_csum_check, | |
571 | .state_name = sctp_state_name, | |
2906f66a | 572 | .state_transition = sctp_state_transition, |
9d934878 HS |
573 | .app_conn_bind = sctp_app_conn_bind, |
574 | .debug_packet = ip_vs_tcpudp_debug_packet, | |
575 | .timeout_change = NULL, | |
2906f66a | 576 | }; |