]>
Commit | Line | Data |
---|---|---|
4237026e PS |
1 | /* |
2 | * Stateless TCP Tunnel (STT) vport. | |
3 | * | |
4 | * Copyright (c) 2015 Nicira, Inc. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License | |
8 | * as published by the Free Software Foundation; either version | |
9 | * 2 of the License, or (at your option) any later version. | |
10 | */ | |
11 | ||
e23775f2 | 12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
4237026e PS |
13 | #include <asm/unaligned.h> |
14 | ||
15 | #include <linux/delay.h> | |
4237026e PS |
16 | #include <linux/if.h> |
17 | #include <linux/if_vlan.h> | |
18 | #include <linux/ip.h> | |
19 | #include <linux/ipv6.h> | |
20 | #include <linux/jhash.h> | |
21 | #include <linux/list.h> | |
22 | #include <linux/log2.h> | |
23 | #include <linux/module.h> | |
fdce83a3 | 24 | #include <linux/net.h> |
4237026e PS |
25 | #include <linux/netfilter.h> |
26 | #include <linux/percpu.h> | |
27 | #include <linux/skbuff.h> | |
28 | #include <linux/tcp.h> | |
29 | #include <linux/workqueue.h> | |
30 | ||
e23775f2 | 31 | #include <net/dst_metadata.h> |
4237026e PS |
32 | #include <net/icmp.h> |
33 | #include <net/inet_ecn.h> | |
34 | #include <net/ip.h> | |
e23775f2 | 35 | #include <net/ip_tunnels.h> |
554daf06 | 36 | #include <net/ip6_checksum.h> |
4237026e PS |
37 | #include <net/net_namespace.h> |
38 | #include <net/netns/generic.h> | |
39 | #include <net/sock.h> | |
40 | #include <net/stt.h> | |
41 | #include <net/tcp.h> | |
42 | #include <net/udp.h> | |
43 | ||
44 | #include "gso.h" | |
e23775f2 PS |
45 | #include "compat.h" |
46 | ||
47 | #define STT_NETDEV_VER "0.1" | |
48 | #define STT_DST_PORT 7471 | |
4237026e PS |
49 | |
50 | #ifdef OVS_STT | |
a78350d9 PS |
51 | #ifdef CONFIG_SLUB |
52 | /* | |
53 | * We saw better performance with skipping zero copy in case of SLUB. | |
54 | * So skip zero copy for SLUB case. | |
55 | */ | |
56 | #define SKIP_ZERO_COPY | |
57 | #endif | |
58 | ||
4237026e PS |
59 | #define STT_VER 0 |
60 | ||
e23775f2 PS |
61 | /* @list: Per-net list of STT ports. |
62 | * @rcv: The callback is called on STT packet recv, STT reassembly can generate | |
63 | * multiple packets, in this case first packet has tunnel outer header, rest | |
64 | * of the packets are inner packet segments with no stt header. | |
65 | * @rcv_data: user data. | |
66 | * @sock: Fake TCP socket for the STT port. | |
67 | */ | |
68 | struct stt_dev { | |
69 | struct net_device *dev; | |
70 | struct net *net; | |
71 | struct list_head next; | |
19c64e86 | 72 | struct list_head up_next; |
e23775f2 PS |
73 | struct socket *sock; |
74 | __be16 dst_port; | |
75 | }; | |
76 | ||
4237026e PS |
77 | #define STT_CSUM_VERIFIED BIT(0) |
78 | #define STT_CSUM_PARTIAL BIT(1) | |
79 | #define STT_PROTO_IPV4 BIT(2) | |
80 | #define STT_PROTO_TCP BIT(3) | |
81 | #define STT_PROTO_TYPES (STT_PROTO_IPV4 | STT_PROTO_TCP) | |
82 | ||
969b8e6b | 83 | #ifdef HAVE_SKB_GSO_UDP |
4237026e PS |
84 | #define SUPPORTED_GSO_TYPES (SKB_GSO_TCPV4 | SKB_GSO_UDP | SKB_GSO_DODGY | \ |
85 | SKB_GSO_TCPV6) | |
969b8e6b GR |
86 | #else |
87 | #define SUPPORTED_GSO_TYPES (SKB_GSO_TCPV4 | SKB_GSO_DODGY | \ | |
88 | SKB_GSO_TCPV6) | |
89 | #endif | |
4237026e PS |
90 | |
91 | /* The length and offset of a fragment are encoded in the sequence number. | |
92 | * STT_SEQ_LEN_SHIFT is the left shift needed to store the length. | |
93 | * STT_SEQ_OFFSET_MASK is the mask to extract the offset. | |
94 | */ | |
95 | #define STT_SEQ_LEN_SHIFT 16 | |
96 | #define STT_SEQ_OFFSET_MASK (BIT(STT_SEQ_LEN_SHIFT) - 1) | |
97 | ||
98 | /* The maximum amount of memory used to store packets waiting to be reassembled | |
99 | * on a given CPU. Once this threshold is exceeded we will begin freeing the | |
100 | * least recently used fragments. | |
101 | */ | |
102 | #define REASM_HI_THRESH (4 * 1024 * 1024) | |
103 | /* The target for the high memory evictor. Once we have exceeded | |
104 | * REASM_HI_THRESH, we will continue freeing fragments until we hit | |
105 | * this limit. | |
106 | */ | |
107 | #define REASM_LO_THRESH (3 * 1024 * 1024) | |
108 | /* The length of time a given packet has to be reassembled from the time the | |
109 | * first fragment arrives. Once this limit is exceeded it becomes available | |
110 | * for cleaning. | |
111 | */ | |
112 | #define FRAG_EXP_TIME (30 * HZ) | |
113 | /* Number of hash entries. Each entry has only a single slot to hold a packet | |
114 | * so if there are collisions, we will drop packets. This is allocated | |
115 | * per-cpu and each entry consists of struct pkt_frag. | |
116 | */ | |
117 | #define FRAG_HASH_SHIFT 8 | |
118 | #define FRAG_HASH_ENTRIES BIT(FRAG_HASH_SHIFT) | |
119 | #define FRAG_HASH_SEGS ((sizeof(u32) * 8) / FRAG_HASH_SHIFT) | |
120 | ||
121 | #define CLEAN_PERCPU_INTERVAL (30 * HZ) | |
122 | ||
123 | struct pkt_key { | |
124 | __be32 saddr; | |
125 | __be32 daddr; | |
126 | __be32 pkt_seq; | |
127 | u32 mark; | |
128 | }; | |
129 | ||
130 | struct pkt_frag { | |
131 | struct sk_buff *skbs; | |
132 | unsigned long timestamp; | |
133 | struct list_head lru_node; | |
134 | struct pkt_key key; | |
135 | }; | |
136 | ||
137 | struct stt_percpu { | |
384868ca | 138 | struct pkt_frag *frag_hash; |
4237026e PS |
139 | struct list_head frag_lru; |
140 | unsigned int frag_mem_used; | |
141 | ||
142 | /* Protect frags table. */ | |
143 | spinlock_t lock; | |
144 | }; | |
145 | ||
146 | struct first_frag { | |
147 | struct sk_buff *last_skb; | |
148 | unsigned int mem_used; | |
149 | u16 tot_len; | |
150 | u16 rcvd_len; | |
151 | bool set_ecn_ce; | |
152 | }; | |
153 | ||
154 | struct frag_skb_cb { | |
155 | u16 offset; | |
156 | ||
157 | /* Only valid for the first skb in the chain. */ | |
158 | struct first_frag first; | |
159 | }; | |
160 | ||
161 | #define FRAG_CB(skb) ((struct frag_skb_cb *)(skb)->cb) | |
162 | ||
163 | /* per-network namespace private data for this module */ | |
164 | struct stt_net { | |
e23775f2 | 165 | struct list_head stt_list; |
19c64e86 | 166 | struct list_head stt_up_list; /* Devices which are in IFF_UP state. */ |
e23775f2 | 167 | int n_tunnels; |
3b6565cd PS |
168 | #ifdef HAVE_NF_REGISTER_NET_HOOK |
169 | bool nf_hook_reg_done; | |
170 | #endif | |
4237026e PS |
171 | }; |
172 | ||
173 | static int stt_net_id; | |
174 | ||
175 | static struct stt_percpu __percpu *stt_percpu_data __read_mostly; | |
176 | static u32 frag_hash_seed __read_mostly; | |
177 | ||
178 | /* Protects sock-hash and refcounts. */ | |
179 | static DEFINE_MUTEX(stt_mutex); | |
180 | ||
181 | static int n_tunnels; | |
182 | static DEFINE_PER_CPU(u32, pkt_seq_counter); | |
183 | ||
184 | static void clean_percpu(struct work_struct *work); | |
185 | static DECLARE_DELAYED_WORK(clean_percpu_wq, clean_percpu); | |
186 | ||
19c64e86 | 187 | static struct stt_dev *stt_find_up_dev(struct net *net, __be16 port) |
4237026e PS |
188 | { |
189 | struct stt_net *sn = net_generic(net, stt_net_id); | |
e23775f2 | 190 | struct stt_dev *stt_dev; |
4237026e | 191 | |
19c64e86 | 192 | list_for_each_entry_rcu(stt_dev, &sn->stt_up_list, up_next) { |
4e00c985 | 193 | if (stt_dev->dst_port == port) |
e23775f2 | 194 | return stt_dev; |
4237026e PS |
195 | } |
196 | return NULL; | |
197 | } | |
198 | ||
199 | static __be32 ack_seq(void) | |
200 | { | |
201 | #if NR_CPUS <= 65536 | |
202 | u32 pkt_seq, ack; | |
203 | ||
204 | pkt_seq = this_cpu_read(pkt_seq_counter); | |
205 | ack = pkt_seq << ilog2(NR_CPUS) | smp_processor_id(); | |
206 | this_cpu_inc(pkt_seq_counter); | |
207 | ||
208 | return (__force __be32)ack; | |
209 | #else | |
210 | #error "Support for greater than 64k CPUs not implemented" | |
211 | #endif | |
212 | } | |
213 | ||
214 | static int clear_gso(struct sk_buff *skb) | |
215 | { | |
216 | struct skb_shared_info *shinfo = skb_shinfo(skb); | |
217 | int err; | |
218 | ||
219 | if (shinfo->gso_type == 0 && shinfo->gso_size == 0 && | |
220 | shinfo->gso_segs == 0) | |
221 | return 0; | |
222 | ||
223 | err = skb_unclone(skb, GFP_ATOMIC); | |
224 | if (unlikely(err)) | |
225 | return err; | |
226 | ||
227 | shinfo = skb_shinfo(skb); | |
228 | shinfo->gso_type = 0; | |
229 | shinfo->gso_size = 0; | |
230 | shinfo->gso_segs = 0; | |
231 | return 0; | |
232 | } | |
233 | ||
4237026e PS |
234 | static void copy_skb_metadata(struct sk_buff *to, struct sk_buff *from) |
235 | { | |
236 | to->protocol = from->protocol; | |
237 | to->tstamp = from->tstamp; | |
238 | to->priority = from->priority; | |
239 | to->mark = from->mark; | |
240 | to->vlan_tci = from->vlan_tci; | |
4237026e | 241 | to->vlan_proto = from->vlan_proto; |
4237026e PS |
242 | skb_copy_secmark(to, from); |
243 | } | |
244 | ||
245 | static void update_headers(struct sk_buff *skb, bool head, | |
246 | unsigned int l4_offset, unsigned int hdr_len, | |
247 | bool ipv4, u32 tcp_seq) | |
248 | { | |
249 | u16 old_len, new_len; | |
250 | __be32 delta; | |
251 | struct tcphdr *tcph; | |
252 | int gso_size; | |
253 | ||
254 | if (ipv4) { | |
255 | struct iphdr *iph = (struct iphdr *)(skb->data + ETH_HLEN); | |
256 | ||
257 | old_len = ntohs(iph->tot_len); | |
258 | new_len = skb->len - ETH_HLEN; | |
259 | iph->tot_len = htons(new_len); | |
260 | ||
261 | ip_send_check(iph); | |
262 | } else { | |
263 | struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + ETH_HLEN); | |
264 | ||
265 | old_len = ntohs(ip6h->payload_len); | |
266 | new_len = skb->len - ETH_HLEN - sizeof(struct ipv6hdr); | |
267 | ip6h->payload_len = htons(new_len); | |
268 | } | |
269 | ||
270 | tcph = (struct tcphdr *)(skb->data + l4_offset); | |
271 | if (!head) { | |
272 | tcph->seq = htonl(tcp_seq); | |
273 | tcph->cwr = 0; | |
274 | } | |
275 | ||
276 | if (skb->next) { | |
277 | tcph->fin = 0; | |
278 | tcph->psh = 0; | |
279 | } | |
280 | ||
281 | delta = htonl(~old_len + new_len); | |
282 | tcph->check = ~csum_fold((__force __wsum)((__force u32)tcph->check + | |
283 | (__force u32)delta)); | |
284 | ||
285 | gso_size = skb_shinfo(skb)->gso_size; | |
286 | if (gso_size && skb->len - hdr_len <= gso_size) | |
287 | BUG_ON(clear_gso(skb)); | |
288 | } | |
289 | ||
290 | static bool can_segment(struct sk_buff *head, bool ipv4, bool tcp, bool csum_partial) | |
291 | { | |
292 | /* If no offloading is in use then we don't have enough information | |
293 | * to process the headers. | |
294 | */ | |
295 | if (!csum_partial) | |
296 | goto linearize; | |
297 | ||
298 | /* Handling UDP packets requires IP fragmentation, which means that | |
299 | * the L4 checksum can no longer be calculated by hardware (since the | |
300 | * fragments are in different packets. If we have to compute the | |
301 | * checksum it's faster just to linearize and large UDP packets are | |
302 | * pretty uncommon anyways, so it's not worth dealing with for now. | |
303 | */ | |
304 | if (!tcp) | |
305 | goto linearize; | |
306 | ||
307 | if (ipv4) { | |
308 | struct iphdr *iph = (struct iphdr *)(head->data + ETH_HLEN); | |
309 | ||
310 | /* It's difficult to get the IP IDs exactly right here due to | |
311 | * varying segment sizes and potentially multiple layers of | |
312 | * segmentation. IP ID isn't important when DF is set and DF | |
313 | * is generally set for TCP packets, so just linearize if it's | |
314 | * not. | |
315 | */ | |
316 | if (!(iph->frag_off & htons(IP_DF))) | |
317 | goto linearize; | |
318 | } else { | |
319 | struct ipv6hdr *ip6h = (struct ipv6hdr *)(head->data + ETH_HLEN); | |
320 | ||
321 | /* Jumbograms require more processing to update and we'll | |
322 | * probably never see them, so just linearize. | |
323 | */ | |
324 | if (ip6h->payload_len == 0) | |
325 | goto linearize; | |
326 | } | |
327 | return true; | |
328 | ||
329 | linearize: | |
330 | return false; | |
331 | } | |
332 | ||
333 | static int copy_headers(struct sk_buff *head, struct sk_buff *frag, | |
334 | int hdr_len) | |
335 | { | |
336 | u16 csum_start; | |
337 | ||
338 | if (skb_cloned(frag) || skb_headroom(frag) < hdr_len) { | |
339 | int extra_head = hdr_len - skb_headroom(frag); | |
340 | ||
341 | extra_head = extra_head > 0 ? extra_head : 0; | |
342 | if (unlikely(pskb_expand_head(frag, extra_head, 0, | |
343 | GFP_ATOMIC))) | |
344 | return -ENOMEM; | |
345 | } | |
346 | ||
347 | memcpy(__skb_push(frag, hdr_len), head->data, hdr_len); | |
348 | ||
349 | csum_start = head->csum_start - skb_headroom(head); | |
350 | frag->csum_start = skb_headroom(frag) + csum_start; | |
351 | frag->csum_offset = head->csum_offset; | |
352 | frag->ip_summed = head->ip_summed; | |
353 | ||
354 | skb_shinfo(frag)->gso_size = skb_shinfo(head)->gso_size; | |
355 | skb_shinfo(frag)->gso_type = skb_shinfo(head)->gso_type; | |
356 | skb_shinfo(frag)->gso_segs = 0; | |
357 | ||
358 | copy_skb_metadata(frag, head); | |
359 | return 0; | |
360 | } | |
361 | ||
362 | static int skb_list_segment(struct sk_buff *head, bool ipv4, int l4_offset) | |
363 | { | |
364 | struct sk_buff *skb; | |
365 | struct tcphdr *tcph; | |
366 | int seg_len; | |
367 | int hdr_len; | |
368 | int tcp_len; | |
369 | u32 seq; | |
370 | ||
371 | if (unlikely(!pskb_may_pull(head, l4_offset + sizeof(*tcph)))) | |
372 | return -ENOMEM; | |
373 | ||
374 | tcph = (struct tcphdr *)(head->data + l4_offset); | |
375 | tcp_len = tcph->doff * 4; | |
376 | hdr_len = l4_offset + tcp_len; | |
377 | ||
378 | if (unlikely((tcp_len < sizeof(struct tcphdr)) || | |
379 | (head->len < hdr_len))) | |
380 | return -EINVAL; | |
381 | ||
382 | if (unlikely(!pskb_may_pull(head, hdr_len))) | |
383 | return -ENOMEM; | |
384 | ||
385 | tcph = (struct tcphdr *)(head->data + l4_offset); | |
386 | /* Update header of each segment. */ | |
387 | seq = ntohl(tcph->seq); | |
388 | seg_len = skb_pagelen(head) - hdr_len; | |
389 | ||
390 | skb = skb_shinfo(head)->frag_list; | |
391 | skb_shinfo(head)->frag_list = NULL; | |
392 | head->next = skb; | |
393 | for (; skb; skb = skb->next) { | |
394 | int err; | |
395 | ||
396 | head->len -= skb->len; | |
397 | head->data_len -= skb->len; | |
398 | head->truesize -= skb->truesize; | |
399 | ||
400 | seq += seg_len; | |
401 | seg_len = skb->len; | |
402 | err = copy_headers(head, skb, hdr_len); | |
403 | if (err) | |
404 | return err; | |
405 | update_headers(skb, false, l4_offset, hdr_len, ipv4, seq); | |
406 | } | |
407 | update_headers(head, true, l4_offset, hdr_len, ipv4, 0); | |
408 | return 0; | |
409 | } | |
410 | ||
a78350d9 PS |
411 | #ifndef SKIP_ZERO_COPY |
412 | static struct sk_buff *normalize_frag_list(struct sk_buff *head, | |
413 | struct sk_buff **skbp) | |
414 | { | |
415 | struct sk_buff *skb = *skbp; | |
416 | struct sk_buff *last; | |
417 | ||
418 | do { | |
419 | struct sk_buff *frags; | |
420 | ||
421 | if (skb_shared(skb)) { | |
422 | struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); | |
423 | ||
424 | if (unlikely(!nskb)) | |
425 | return ERR_PTR(-ENOMEM); | |
426 | ||
427 | nskb->next = skb->next; | |
428 | consume_skb(skb); | |
429 | skb = nskb; | |
430 | *skbp = skb; | |
431 | } | |
432 | ||
433 | if (head) { | |
434 | head->len -= skb->len; | |
435 | head->data_len -= skb->len; | |
436 | head->truesize -= skb->truesize; | |
437 | } | |
438 | ||
439 | frags = skb_shinfo(skb)->frag_list; | |
440 | if (frags) { | |
441 | int err; | |
442 | ||
443 | err = skb_unclone(skb, GFP_ATOMIC); | |
444 | if (unlikely(err)) | |
445 | return ERR_PTR(err); | |
446 | ||
447 | last = normalize_frag_list(skb, &frags); | |
448 | if (IS_ERR(last)) | |
449 | return last; | |
450 | ||
451 | skb_shinfo(skb)->frag_list = NULL; | |
452 | last->next = skb->next; | |
453 | skb->next = frags; | |
454 | } else { | |
455 | last = skb; | |
456 | } | |
457 | ||
458 | skbp = &skb->next; | |
459 | } while ((skb = skb->next)); | |
460 | ||
461 | return last; | |
462 | } | |
463 | ||
464 | /* Takes a linked list of skbs, which potentially contain frag_list | |
465 | * (whose members in turn potentially contain frag_lists, etc.) and | |
466 | * converts them into a single linear linked list. | |
467 | */ | |
468 | static int straighten_frag_list(struct sk_buff **skbp) | |
469 | { | |
470 | struct sk_buff *err_skb; | |
471 | ||
472 | err_skb = normalize_frag_list(NULL, skbp); | |
473 | if (IS_ERR(err_skb)) | |
474 | return PTR_ERR(err_skb); | |
475 | ||
476 | return 0; | |
477 | } | |
478 | ||
4237026e PS |
479 | static int coalesce_skb(struct sk_buff **headp) |
480 | { | |
481 | struct sk_buff *frag, *head, *prev; | |
482 | int err; | |
483 | ||
484 | err = straighten_frag_list(headp); | |
485 | if (unlikely(err)) | |
486 | return err; | |
487 | head = *headp; | |
488 | ||
489 | /* Coalesce frag list. */ | |
490 | prev = head; | |
491 | for (frag = head->next; frag; frag = frag->next) { | |
492 | bool headstolen; | |
493 | int delta; | |
494 | ||
495 | if (unlikely(skb_unclone(prev, GFP_ATOMIC))) | |
496 | return -ENOMEM; | |
497 | ||
498 | if (!skb_try_coalesce(prev, frag, &headstolen, &delta)) { | |
499 | prev = frag; | |
500 | continue; | |
501 | } | |
502 | ||
503 | prev->next = frag->next; | |
504 | frag->len = 0; | |
505 | frag->data_len = 0; | |
506 | frag->truesize -= delta; | |
507 | kfree_skb_partial(frag, headstolen); | |
508 | frag = prev; | |
509 | } | |
510 | ||
511 | if (!head->next) | |
512 | return 0; | |
513 | ||
514 | for (frag = head->next; frag; frag = frag->next) { | |
515 | head->len += frag->len; | |
516 | head->data_len += frag->len; | |
517 | head->truesize += frag->truesize; | |
518 | } | |
519 | ||
520 | skb_shinfo(head)->frag_list = head->next; | |
521 | head->next = NULL; | |
522 | return 0; | |
523 | } | |
a78350d9 PS |
524 | #else |
525 | static int coalesce_skb(struct sk_buff **headp) | |
526 | { | |
527 | struct sk_buff *frag, *head = *headp, *next; | |
528 | int delta = FRAG_CB(head)->first.tot_len - skb_headlen(head); | |
529 | int err; | |
530 | ||
531 | if (unlikely(!head->next)) | |
532 | return 0; | |
533 | ||
534 | err = pskb_expand_head(head, 0, delta, GFP_ATOMIC); | |
535 | if (unlikely(err)) | |
536 | return err; | |
537 | ||
538 | if (unlikely(!__pskb_pull_tail(head, head->data_len))) | |
539 | BUG(); | |
540 | ||
541 | for (frag = head->next; frag; frag = next) { | |
542 | skb_copy_bits(frag, 0, skb_put(head, frag->len), frag->len); | |
543 | next = frag->next; | |
544 | kfree_skb(frag); | |
545 | } | |
546 | ||
547 | head->next = NULL; | |
548 | head->truesize = SKB_TRUESIZE(head->len); | |
549 | return 0; | |
550 | } | |
551 | #endif | |
4237026e PS |
552 | |
553 | static int __try_to_segment(struct sk_buff *skb, bool csum_partial, | |
554 | bool ipv4, bool tcp, int l4_offset) | |
555 | { | |
556 | if (can_segment(skb, ipv4, tcp, csum_partial)) | |
557 | return skb_list_segment(skb, ipv4, l4_offset); | |
558 | else | |
559 | return skb_linearize(skb); | |
560 | } | |
561 | ||
562 | static int try_to_segment(struct sk_buff *skb) | |
563 | { | |
564 | struct stthdr *stth = stt_hdr(skb); | |
565 | bool csum_partial = !!(stth->flags & STT_CSUM_PARTIAL); | |
566 | bool ipv4 = !!(stth->flags & STT_PROTO_IPV4); | |
567 | bool tcp = !!(stth->flags & STT_PROTO_TCP); | |
568 | int l4_offset = stth->l4_offset; | |
569 | ||
570 | return __try_to_segment(skb, csum_partial, ipv4, tcp, l4_offset); | |
571 | } | |
572 | ||
573 | static int segment_skb(struct sk_buff **headp, bool csum_partial, | |
574 | bool ipv4, bool tcp, int l4_offset) | |
575 | { | |
a78350d9 | 576 | #ifndef SKIP_ZERO_COPY |
4237026e PS |
577 | int err; |
578 | ||
579 | err = coalesce_skb(headp); | |
580 | if (err) | |
581 | return err; | |
a78350d9 | 582 | #endif |
4237026e PS |
583 | |
584 | if (skb_shinfo(*headp)->frag_list) | |
585 | return __try_to_segment(*headp, csum_partial, | |
586 | ipv4, tcp, l4_offset); | |
587 | return 0; | |
588 | } | |
589 | ||
590 | static int __push_stt_header(struct sk_buff *skb, __be64 tun_id, | |
591 | __be16 s_port, __be16 d_port, | |
592 | __be32 saddr, __be32 dst, | |
593 | __be16 l3_proto, u8 l4_proto, | |
594 | int dst_mtu) | |
595 | { | |
596 | int data_len = skb->len + sizeof(struct stthdr) + STT_ETH_PAD; | |
597 | unsigned short encap_mss; | |
598 | struct tcphdr *tcph; | |
599 | struct stthdr *stth; | |
600 | ||
601 | skb_push(skb, STT_HEADER_LEN); | |
602 | skb_reset_transport_header(skb); | |
603 | tcph = tcp_hdr(skb); | |
604 | memset(tcph, 0, STT_HEADER_LEN); | |
605 | stth = stt_hdr(skb); | |
606 | ||
607 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | |
608 | stth->flags |= STT_CSUM_PARTIAL; | |
609 | ||
610 | stth->l4_offset = skb->csum_start - | |
611 | (skb_headroom(skb) + | |
612 | STT_HEADER_LEN); | |
613 | ||
614 | if (l3_proto == htons(ETH_P_IP)) | |
615 | stth->flags |= STT_PROTO_IPV4; | |
616 | ||
617 | if (l4_proto == IPPROTO_TCP) | |
618 | stth->flags |= STT_PROTO_TCP; | |
619 | ||
620 | stth->mss = htons(skb_shinfo(skb)->gso_size); | |
621 | } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) { | |
622 | stth->flags |= STT_CSUM_VERIFIED; | |
623 | } | |
624 | ||
625 | stth->vlan_tci = htons(skb->vlan_tci); | |
626 | skb->vlan_tci = 0; | |
627 | put_unaligned(tun_id, &stth->key); | |
628 | ||
629 | tcph->source = s_port; | |
630 | tcph->dest = d_port; | |
631 | tcph->doff = sizeof(struct tcphdr) / 4; | |
632 | tcph->ack = 1; | |
633 | tcph->psh = 1; | |
634 | tcph->window = htons(USHRT_MAX); | |
635 | tcph->seq = htonl(data_len << STT_SEQ_LEN_SHIFT); | |
636 | tcph->ack_seq = ack_seq(); | |
637 | tcph->check = ~tcp_v4_check(skb->len, saddr, dst, 0); | |
638 | ||
639 | skb->csum_start = skb_transport_header(skb) - skb->head; | |
640 | skb->csum_offset = offsetof(struct tcphdr, check); | |
641 | skb->ip_summed = CHECKSUM_PARTIAL; | |
642 | ||
643 | encap_mss = dst_mtu - sizeof(struct iphdr) - sizeof(struct tcphdr); | |
644 | if (data_len > encap_mss) { | |
645 | if (unlikely(skb_unclone(skb, GFP_ATOMIC))) | |
646 | return -EINVAL; | |
647 | ||
648 | skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; | |
649 | skb_shinfo(skb)->gso_size = encap_mss; | |
650 | skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(data_len, encap_mss); | |
651 | } else { | |
652 | if (unlikely(clear_gso(skb))) | |
653 | return -EINVAL; | |
654 | } | |
655 | return 0; | |
656 | } | |
657 | ||
658 | static struct sk_buff *push_stt_header(struct sk_buff *head, __be64 tun_id, | |
659 | __be16 s_port, __be16 d_port, | |
660 | __be32 saddr, __be32 dst, | |
661 | __be16 l3_proto, u8 l4_proto, | |
662 | int dst_mtu) | |
663 | { | |
664 | struct sk_buff *skb; | |
665 | ||
666 | if (skb_shinfo(head)->frag_list) { | |
667 | bool ipv4 = (l3_proto == htons(ETH_P_IP)); | |
668 | bool tcp = (l4_proto == IPPROTO_TCP); | |
669 | bool csum_partial = (head->ip_summed == CHECKSUM_PARTIAL); | |
670 | int l4_offset = skb_transport_offset(head); | |
671 | ||
672 | /* Need to call skb_orphan() to report currect true-size. | |
673 | * calling skb_orphan() in this layer is odd but SKB with | |
674 | * frag-list should not be associated with any socket, so | |
675 | * skb-orphan should be no-op. */ | |
676 | skb_orphan(head); | |
677 | if (unlikely(segment_skb(&head, csum_partial, | |
678 | ipv4, tcp, l4_offset))) | |
679 | goto error; | |
680 | } | |
681 | ||
682 | for (skb = head; skb; skb = skb->next) { | |
683 | if (__push_stt_header(skb, tun_id, s_port, d_port, saddr, dst, | |
684 | l3_proto, l4_proto, dst_mtu)) | |
685 | goto error; | |
686 | } | |
687 | ||
688 | return head; | |
689 | error: | |
690 | kfree_skb_list(head); | |
691 | return NULL; | |
692 | } | |
693 | ||
694 | static int stt_can_offload(struct sk_buff *skb, __be16 l3_proto, u8 l4_proto) | |
695 | { | |
696 | if (skb_is_gso(skb) && skb->ip_summed != CHECKSUM_PARTIAL) { | |
697 | int csum_offset; | |
698 | __sum16 *csum; | |
699 | int len; | |
700 | ||
701 | if (l4_proto == IPPROTO_TCP) | |
702 | csum_offset = offsetof(struct tcphdr, check); | |
703 | else if (l4_proto == IPPROTO_UDP) | |
704 | csum_offset = offsetof(struct udphdr, check); | |
705 | else | |
706 | return 0; | |
707 | ||
708 | len = skb->len - skb_transport_offset(skb); | |
709 | csum = (__sum16 *)(skb_transport_header(skb) + csum_offset); | |
710 | ||
711 | if (unlikely(!pskb_may_pull(skb, skb_transport_offset(skb) + | |
712 | csum_offset + sizeof(*csum)))) | |
713 | return -EINVAL; | |
714 | ||
715 | if (l3_proto == htons(ETH_P_IP)) { | |
716 | struct iphdr *iph = ip_hdr(skb); | |
717 | ||
718 | *csum = ~csum_tcpudp_magic(iph->saddr, iph->daddr, | |
719 | len, l4_proto, 0); | |
720 | } else if (l3_proto == htons(ETH_P_IPV6)) { | |
721 | struct ipv6hdr *ip6h = ipv6_hdr(skb); | |
722 | ||
723 | *csum = ~csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, | |
724 | len, l4_proto, 0); | |
725 | } else { | |
726 | return 0; | |
727 | } | |
728 | skb->csum_start = skb_transport_header(skb) - skb->head; | |
729 | skb->csum_offset = csum_offset; | |
730 | skb->ip_summed = CHECKSUM_PARTIAL; | |
731 | } | |
732 | ||
733 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | |
734 | /* Assume receiver can only offload TCP/UDP over IPv4/6, | |
735 | * and require 802.1Q VLANs to be accelerated. | |
736 | */ | |
737 | if (l3_proto != htons(ETH_P_IP) && | |
738 | l3_proto != htons(ETH_P_IPV6)) | |
739 | return 0; | |
740 | ||
741 | if (l4_proto != IPPROTO_TCP && l4_proto != IPPROTO_UDP) | |
742 | return 0; | |
743 | ||
744 | /* L4 offset must fit in a 1-byte field. */ | |
745 | if (skb->csum_start - skb_headroom(skb) > 255) | |
746 | return 0; | |
747 | ||
748 | if (skb_shinfo(skb)->gso_type & ~SUPPORTED_GSO_TYPES) | |
749 | return 0; | |
750 | } | |
751 | /* Total size of encapsulated packet must fit in 16 bits. */ | |
752 | if (skb->len + STT_HEADER_LEN + sizeof(struct iphdr) > 65535) | |
753 | return 0; | |
754 | ||
4237026e PS |
755 | if (skb_vlan_tag_present(skb) && skb->vlan_proto != htons(ETH_P_8021Q)) |
756 | return 0; | |
4237026e PS |
757 | return 1; |
758 | } | |
759 | ||
760 | static bool need_linearize(const struct sk_buff *skb) | |
761 | { | |
762 | struct skb_shared_info *shinfo = skb_shinfo(skb); | |
763 | int i; | |
764 | ||
765 | if (unlikely(shinfo->frag_list)) | |
766 | return true; | |
767 | ||
768 | /* Generally speaking we should linearize if there are paged frags. | |
769 | * However, if all of the refcounts are 1 we know nobody else can | |
770 | * change them from underneath us and we can skip the linearization. | |
771 | */ | |
772 | for (i = 0; i < shinfo->nr_frags; i++) | |
773 | if (unlikely(page_count(skb_frag_page(&shinfo->frags[i])) > 1)) | |
774 | return true; | |
775 | ||
776 | return false; | |
777 | } | |
778 | ||
779 | static struct sk_buff *handle_offloads(struct sk_buff *skb, int min_headroom) | |
780 | { | |
781 | int err; | |
782 | ||
4237026e PS |
783 | if (skb_vlan_tag_present(skb) && skb->vlan_proto != htons(ETH_P_8021Q)) { |
784 | ||
785 | min_headroom += VLAN_HLEN; | |
786 | if (skb_headroom(skb) < min_headroom) { | |
787 | int head_delta = SKB_DATA_ALIGN(min_headroom - | |
788 | skb_headroom(skb) + 16); | |
789 | ||
790 | err = pskb_expand_head(skb, max_t(int, head_delta, 0), | |
791 | 0, GFP_ATOMIC); | |
792 | if (unlikely(err)) | |
793 | goto error; | |
794 | } | |
795 | ||
796 | skb = __vlan_hwaccel_push_inside(skb); | |
797 | if (!skb) { | |
798 | err = -ENOMEM; | |
799 | goto error; | |
800 | } | |
801 | } | |
4237026e PS |
802 | |
803 | if (skb_is_gso(skb)) { | |
804 | struct sk_buff *nskb; | |
805 | char cb[sizeof(skb->cb)]; | |
806 | ||
807 | memcpy(cb, skb->cb, sizeof(cb)); | |
808 | ||
809 | nskb = __skb_gso_segment(skb, 0, false); | |
810 | if (IS_ERR(nskb)) { | |
811 | err = PTR_ERR(nskb); | |
812 | goto error; | |
813 | } | |
814 | ||
815 | consume_skb(skb); | |
816 | skb = nskb; | |
817 | while (nskb) { | |
818 | memcpy(nskb->cb, cb, sizeof(cb)); | |
819 | nskb = nskb->next; | |
820 | } | |
821 | } else if (skb->ip_summed == CHECKSUM_PARTIAL) { | |
822 | /* Pages aren't locked and could change at any time. | |
823 | * If this happens after we compute the checksum, the | |
824 | * checksum will be wrong. We linearize now to avoid | |
825 | * this problem. | |
826 | */ | |
827 | if (unlikely(need_linearize(skb))) { | |
828 | err = __skb_linearize(skb); | |
829 | if (unlikely(err)) | |
830 | goto error; | |
831 | } | |
832 | ||
833 | err = skb_checksum_help(skb); | |
834 | if (unlikely(err)) | |
835 | goto error; | |
836 | } | |
837 | skb->ip_summed = CHECKSUM_NONE; | |
838 | ||
839 | return skb; | |
840 | error: | |
841 | kfree_skb(skb); | |
842 | return ERR_PTR(err); | |
843 | } | |
844 | ||
43dd2fce PS |
845 | static void skb_list_xmit(struct rtable *rt, struct sk_buff *skb, __be32 src, |
846 | __be32 dst, __u8 tos, __u8 ttl, __be16 df) | |
4237026e | 847 | { |
4237026e PS |
848 | while (skb) { |
849 | struct sk_buff *next = skb->next; | |
850 | ||
851 | if (next) | |
852 | dst_clone(&rt->dst); | |
853 | ||
4237026e | 854 | skb->next = NULL; |
43dd2fce PS |
855 | iptunnel_xmit(NULL, rt, skb, src, dst, IPPROTO_TCP, |
856 | tos, ttl, df, false); | |
4237026e PS |
857 | |
858 | skb = next; | |
859 | } | |
4237026e PS |
860 | } |
861 | ||
862 | static u8 parse_ipv6_l4_proto(struct sk_buff *skb) | |
863 | { | |
864 | unsigned int nh_ofs = skb_network_offset(skb); | |
865 | int payload_ofs; | |
866 | struct ipv6hdr *nh; | |
867 | uint8_t nexthdr; | |
868 | __be16 frag_off; | |
869 | ||
870 | if (unlikely(!pskb_may_pull(skb, nh_ofs + sizeof(struct ipv6hdr)))) | |
871 | return 0; | |
872 | ||
873 | nh = ipv6_hdr(skb); | |
874 | nexthdr = nh->nexthdr; | |
875 | payload_ofs = (u8 *)(nh + 1) - skb->data; | |
876 | ||
877 | payload_ofs = ipv6_skip_exthdr(skb, payload_ofs, &nexthdr, &frag_off); | |
878 | if (unlikely(payload_ofs < 0)) | |
879 | return 0; | |
880 | ||
881 | return nexthdr; | |
882 | } | |
883 | ||
884 | static u8 skb_get_l4_proto(struct sk_buff *skb, __be16 l3_proto) | |
885 | { | |
886 | if (l3_proto == htons(ETH_P_IP)) { | |
887 | unsigned int nh_ofs = skb_network_offset(skb); | |
888 | ||
889 | if (unlikely(!pskb_may_pull(skb, nh_ofs + sizeof(struct iphdr)))) | |
890 | return 0; | |
891 | ||
892 | return ip_hdr(skb)->protocol; | |
893 | } else if (l3_proto == htons(ETH_P_IPV6)) { | |
894 | return parse_ipv6_l4_proto(skb); | |
895 | } | |
896 | return 0; | |
897 | } | |
898 | ||
e23775f2 | 899 | static int stt_xmit_skb(struct sk_buff *skb, struct rtable *rt, |
43dd2fce PS |
900 | __be32 src, __be32 dst, __u8 tos, |
901 | __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, | |
902 | __be64 tun_id) | |
4237026e PS |
903 | { |
904 | struct ethhdr *eh = eth_hdr(skb); | |
905 | int ret = 0, min_headroom; | |
906 | __be16 inner_l3_proto; | |
907 | u8 inner_l4_proto; | |
908 | ||
909 | inner_l3_proto = eh->h_proto; | |
910 | inner_l4_proto = skb_get_l4_proto(skb, inner_l3_proto); | |
911 | ||
912 | min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len | |
913 | + STT_HEADER_LEN + sizeof(struct iphdr); | |
914 | ||
915 | if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { | |
916 | int head_delta = SKB_DATA_ALIGN(min_headroom - | |
917 | skb_headroom(skb) + | |
918 | 16); | |
919 | ||
920 | ret = pskb_expand_head(skb, max_t(int, head_delta, 0), | |
921 | 0, GFP_ATOMIC); | |
922 | if (unlikely(ret)) | |
923 | goto err_free_rt; | |
924 | } | |
925 | ||
926 | ret = stt_can_offload(skb, inner_l3_proto, inner_l4_proto); | |
927 | if (ret < 0) | |
928 | goto err_free_rt; | |
929 | if (!ret) { | |
930 | skb = handle_offloads(skb, min_headroom); | |
931 | if (IS_ERR(skb)) { | |
932 | ret = PTR_ERR(skb); | |
933 | skb = NULL; | |
934 | goto err_free_rt; | |
935 | } | |
936 | } | |
937 | ||
938 | ret = 0; | |
939 | while (skb) { | |
940 | struct sk_buff *next_skb = skb->next; | |
941 | ||
942 | skb->next = NULL; | |
943 | ||
944 | if (next_skb) | |
945 | dst_clone(&rt->dst); | |
946 | ||
947 | /* Push STT and TCP header. */ | |
948 | skb = push_stt_header(skb, tun_id, src_port, dst_port, src, | |
949 | dst, inner_l3_proto, inner_l4_proto, | |
950 | dst_mtu(&rt->dst)); | |
951 | if (unlikely(!skb)) { | |
952 | ip_rt_put(rt); | |
953 | goto next; | |
954 | } | |
955 | ||
956 | /* Push IP header. */ | |
43dd2fce | 957 | skb_list_xmit(rt, skb, src, dst, tos, ttl, df); |
4237026e PS |
958 | |
959 | next: | |
960 | skb = next_skb; | |
961 | } | |
962 | ||
43dd2fce | 963 | return 0; |
4237026e PS |
964 | |
965 | err_free_rt: | |
966 | ip_rt_put(rt); | |
967 | kfree_skb(skb); | |
968 | return ret; | |
969 | } | |
e23775f2 | 970 | |
aad7cb91 PS |
971 | static struct rtable *stt_get_rt(struct sk_buff *skb, |
972 | struct net_device *dev, | |
973 | struct flowi4 *fl, | |
87154b78 QX |
974 | const struct ip_tunnel_key *key, |
975 | __be16 dport, __be16 sport) | |
aad7cb91 PS |
976 | { |
977 | struct net *net = dev_net(dev); | |
978 | ||
979 | /* Route lookup */ | |
980 | memset(fl, 0, sizeof(*fl)); | |
981 | fl->daddr = key->u.ipv4.dst; | |
982 | fl->saddr = key->u.ipv4.src; | |
983 | fl->flowi4_tos = RT_TOS(key->tos); | |
984 | fl->flowi4_mark = skb->mark; | |
985 | fl->flowi4_proto = IPPROTO_TCP; | |
87154b78 QX |
986 | fl->fl4_dport = dport; |
987 | fl->fl4_sport = sport; | |
aad7cb91 PS |
988 | |
989 | return ip_route_output_key(net, fl); | |
990 | } | |
991 | ||
e23775f2 PS |
992 | netdev_tx_t ovs_stt_xmit(struct sk_buff *skb) |
993 | { | |
994 | struct net_device *dev = skb->dev; | |
995 | struct stt_dev *stt_dev = netdev_priv(dev); | |
996 | struct net *net = stt_dev->net; | |
4e00c985 | 997 | __be16 dport = stt_dev->dst_port; |
e23775f2 PS |
998 | struct ip_tunnel_key *tun_key; |
999 | struct ip_tunnel_info *tun_info; | |
1000 | struct rtable *rt; | |
1001 | struct flowi4 fl; | |
1002 | __be16 sport; | |
1003 | __be16 df; | |
1004 | int err; | |
1005 | ||
1006 | tun_info = skb_tunnel_info(skb); | |
1007 | if (unlikely(!tun_info)) { | |
1008 | err = -EINVAL; | |
1009 | goto error; | |
1010 | } | |
1011 | ||
1012 | tun_key = &tun_info->key; | |
1013 | ||
87154b78 QX |
1014 | sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true); |
1015 | rt = stt_get_rt(skb, dev, &fl, tun_key, dport, sport); | |
e23775f2 PS |
1016 | if (IS_ERR(rt)) { |
1017 | err = PTR_ERR(rt); | |
1018 | goto error; | |
1019 | } | |
1020 | ||
1021 | df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; | |
e23775f2 PS |
1022 | skb->ignore_df = 1; |
1023 | ||
43dd2fce PS |
1024 | stt_xmit_skb(skb, rt, fl.saddr, tun_key->u.ipv4.dst, |
1025 | tun_key->tos, tun_key->ttl, | |
1026 | df, sport, dport, tun_key->tun_id); | |
e23775f2 PS |
1027 | return NETDEV_TX_OK; |
1028 | error: | |
1029 | kfree_skb(skb); | |
1030 | dev->stats.tx_errors++; | |
9e4f118e | 1031 | return err; |
e23775f2 PS |
1032 | } |
1033 | EXPORT_SYMBOL(ovs_stt_xmit); | |
4237026e PS |
1034 | |
1035 | static void free_frag(struct stt_percpu *stt_percpu, | |
1036 | struct pkt_frag *frag) | |
1037 | { | |
1038 | stt_percpu->frag_mem_used -= FRAG_CB(frag->skbs)->first.mem_used; | |
1039 | kfree_skb_list(frag->skbs); | |
1040 | list_del(&frag->lru_node); | |
1041 | frag->skbs = NULL; | |
1042 | } | |
1043 | ||
1044 | static void evict_frags(struct stt_percpu *stt_percpu) | |
1045 | { | |
1046 | while (!list_empty(&stt_percpu->frag_lru) && | |
1047 | stt_percpu->frag_mem_used > REASM_LO_THRESH) { | |
1048 | struct pkt_frag *frag; | |
1049 | ||
1050 | frag = list_first_entry(&stt_percpu->frag_lru, | |
1051 | struct pkt_frag, | |
1052 | lru_node); | |
1053 | free_frag(stt_percpu, frag); | |
1054 | } | |
1055 | } | |
1056 | ||
1057 | static bool pkt_key_match(struct net *net, | |
1058 | const struct pkt_frag *a, const struct pkt_key *b) | |
1059 | { | |
1060 | return a->key.saddr == b->saddr && a->key.daddr == b->daddr && | |
1061 | a->key.pkt_seq == b->pkt_seq && a->key.mark == b->mark && | |
1062 | net_eq(dev_net(a->skbs->dev), net); | |
1063 | } | |
1064 | ||
1065 | static u32 pkt_key_hash(const struct net *net, const struct pkt_key *key) | |
1066 | { | |
1067 | u32 initval = frag_hash_seed ^ (u32)(unsigned long)net ^ key->mark; | |
1068 | ||
1069 | return jhash_3words((__force u32)key->saddr, (__force u32)key->daddr, | |
1070 | (__force u32)key->pkt_seq, initval); | |
1071 | } | |
1072 | ||
1073 | static struct pkt_frag *lookup_frag(struct net *net, | |
1074 | struct stt_percpu *stt_percpu, | |
1075 | const struct pkt_key *key, u32 hash) | |
1076 | { | |
1077 | struct pkt_frag *frag, *victim_frag = NULL; | |
1078 | int i; | |
1079 | ||
1080 | for (i = 0; i < FRAG_HASH_SEGS; i++) { | |
384868ca | 1081 | frag = &stt_percpu->frag_hash[hash & (FRAG_HASH_ENTRIES - 1)]; |
4237026e PS |
1082 | |
1083 | if (frag->skbs && | |
1084 | time_before(jiffies, frag->timestamp + FRAG_EXP_TIME) && | |
1085 | pkt_key_match(net, frag, key)) | |
1086 | return frag; | |
1087 | ||
1088 | if (!victim_frag || | |
1089 | (victim_frag->skbs && | |
1090 | (!frag->skbs || | |
1091 | time_before(frag->timestamp, victim_frag->timestamp)))) | |
1092 | victim_frag = frag; | |
1093 | ||
1094 | hash >>= FRAG_HASH_SHIFT; | |
1095 | } | |
1096 | ||
1097 | if (victim_frag->skbs) | |
1098 | free_frag(stt_percpu, victim_frag); | |
1099 | ||
1100 | return victim_frag; | |
1101 | } | |
1102 | ||
a78350d9 PS |
1103 | #ifdef SKIP_ZERO_COPY |
1104 | static int __copy_skb(struct sk_buff *to, struct sk_buff *from, | |
1105 | int *delta, bool *headstolen) | |
1106 | { | |
1107 | int err; | |
1108 | ||
1109 | if (unlikely(to->next)) | |
1110 | return -EINVAL; | |
1111 | ||
1112 | if (unlikely(FRAG_CB(to)->offset)) | |
1113 | return -EINVAL; | |
1114 | ||
1115 | if (unlikely(skb_unclone(to, GFP_ATOMIC))) | |
1116 | return -ENOMEM; | |
1117 | ||
1118 | if (skb_try_coalesce(to, from, headstolen, delta)) | |
1119 | return 0; | |
1120 | ||
1121 | *headstolen = false; | |
1122 | err = pskb_expand_head(to, 0, to->data_len + from->len, GFP_ATOMIC); | |
1123 | if (unlikely(err)) | |
1124 | return err; | |
1125 | ||
1126 | if (unlikely(!__pskb_pull_tail(to, to->data_len))) | |
1127 | BUG(); | |
1128 | ||
1129 | skb_copy_bits(from, 0, skb_put(to, from->len), from->len); | |
1130 | ||
1131 | *delta = from->len; | |
1132 | to->truesize += from->len; | |
1133 | return 0; | |
1134 | } | |
1135 | #else | |
1136 | static int __copy_skb(struct sk_buff *to, struct sk_buff *from, | |
1137 | int *delta, bool *headstolen) | |
1138 | { | |
1139 | *headstolen = false; | |
1140 | return -EINVAL; | |
1141 | } | |
1142 | #endif | |
1143 | ||
4237026e PS |
1144 | static struct sk_buff *reassemble(struct sk_buff *skb) |
1145 | { | |
1146 | struct iphdr *iph = ip_hdr(skb); | |
1147 | struct tcphdr *tcph = tcp_hdr(skb); | |
1148 | u32 seq = ntohl(tcph->seq); | |
1149 | struct stt_percpu *stt_percpu; | |
a78350d9 | 1150 | struct sk_buff *last_skb, *copied_skb = NULL; |
4237026e PS |
1151 | struct pkt_frag *frag; |
1152 | struct pkt_key key; | |
a78350d9 PS |
1153 | int tot_len, delta = skb->truesize; |
1154 | bool headstolen; | |
4237026e PS |
1155 | u32 hash; |
1156 | ||
1157 | tot_len = seq >> STT_SEQ_LEN_SHIFT; | |
1158 | FRAG_CB(skb)->offset = seq & STT_SEQ_OFFSET_MASK; | |
1159 | ||
1160 | if (unlikely(skb->len == 0)) | |
1161 | goto out_free; | |
1162 | ||
1163 | if (unlikely(FRAG_CB(skb)->offset + skb->len > tot_len)) | |
1164 | goto out_free; | |
1165 | ||
1166 | if (tot_len == skb->len) | |
1167 | goto out; | |
1168 | ||
1169 | key.saddr = iph->saddr; | |
1170 | key.daddr = iph->daddr; | |
1171 | key.pkt_seq = tcph->ack_seq; | |
1172 | key.mark = skb->mark; | |
1173 | hash = pkt_key_hash(dev_net(skb->dev), &key); | |
1174 | ||
1175 | stt_percpu = per_cpu_ptr(stt_percpu_data, smp_processor_id()); | |
1176 | ||
1177 | spin_lock(&stt_percpu->lock); | |
1178 | ||
1179 | if (unlikely(stt_percpu->frag_mem_used + skb->truesize > REASM_HI_THRESH)) | |
1180 | evict_frags(stt_percpu); | |
1181 | ||
1182 | frag = lookup_frag(dev_net(skb->dev), stt_percpu, &key, hash); | |
1183 | if (!frag->skbs) { | |
1184 | frag->skbs = skb; | |
1185 | frag->key = key; | |
1186 | frag->timestamp = jiffies; | |
1187 | FRAG_CB(skb)->first.last_skb = skb; | |
1188 | FRAG_CB(skb)->first.mem_used = skb->truesize; | |
1189 | FRAG_CB(skb)->first.tot_len = tot_len; | |
1190 | FRAG_CB(skb)->first.rcvd_len = skb->len; | |
1191 | FRAG_CB(skb)->first.set_ecn_ce = false; | |
1192 | list_add_tail(&frag->lru_node, &stt_percpu->frag_lru); | |
1193 | stt_percpu->frag_mem_used += skb->truesize; | |
4237026e PS |
1194 | skb = NULL; |
1195 | goto unlock; | |
1196 | } | |
1197 | ||
1198 | /* Optimize for the common case where fragments are received in-order | |
1199 | * and not overlapping. | |
1200 | */ | |
1201 | last_skb = FRAG_CB(frag->skbs)->first.last_skb; | |
1202 | if (likely(FRAG_CB(last_skb)->offset + last_skb->len == | |
1203 | FRAG_CB(skb)->offset)) { | |
a78350d9 PS |
1204 | |
1205 | if (!__copy_skb(frag->skbs, skb, &delta, &headstolen)) { | |
1206 | copied_skb = skb; | |
1207 | } else { | |
1208 | last_skb->next = skb; | |
1209 | FRAG_CB(frag->skbs)->first.last_skb = skb; | |
1210 | } | |
4237026e PS |
1211 | } else { |
1212 | struct sk_buff *prev = NULL, *next; | |
1213 | ||
1214 | for (next = frag->skbs; next; next = next->next) { | |
1215 | if (FRAG_CB(next)->offset >= FRAG_CB(skb)->offset) | |
1216 | break; | |
1217 | prev = next; | |
1218 | } | |
1219 | ||
1220 | /* Overlapping fragments aren't allowed. We shouldn't start | |
1221 | * before the end of the previous fragment. | |
1222 | */ | |
1223 | if (prev && | |
1224 | FRAG_CB(prev)->offset + prev->len > FRAG_CB(skb)->offset) | |
1225 | goto unlock_free; | |
1226 | ||
1227 | /* We also shouldn't end after the beginning of the next | |
1228 | * fragment. | |
1229 | */ | |
1230 | if (next && | |
1231 | FRAG_CB(skb)->offset + skb->len > FRAG_CB(next)->offset) | |
1232 | goto unlock_free; | |
1233 | ||
1234 | if (prev) { | |
1235 | prev->next = skb; | |
1236 | } else { | |
1237 | FRAG_CB(skb)->first = FRAG_CB(frag->skbs)->first; | |
1238 | frag->skbs = skb; | |
1239 | } | |
1240 | ||
1241 | if (next) | |
1242 | skb->next = next; | |
1243 | else | |
1244 | FRAG_CB(frag->skbs)->first.last_skb = skb; | |
1245 | } | |
1246 | ||
1247 | FRAG_CB(frag->skbs)->first.set_ecn_ce |= INET_ECN_is_ce(iph->tos); | |
1248 | FRAG_CB(frag->skbs)->first.rcvd_len += skb->len; | |
a78350d9 PS |
1249 | stt_percpu->frag_mem_used += delta; |
1250 | FRAG_CB(frag->skbs)->first.mem_used += delta; | |
4237026e PS |
1251 | |
1252 | if (FRAG_CB(frag->skbs)->first.tot_len == | |
1253 | FRAG_CB(frag->skbs)->first.rcvd_len) { | |
1254 | struct sk_buff *frag_head = frag->skbs; | |
1255 | ||
1256 | frag_head->tstamp = skb->tstamp; | |
1257 | if (FRAG_CB(frag_head)->first.set_ecn_ce) | |
1258 | INET_ECN_set_ce(frag_head); | |
1259 | ||
1260 | list_del(&frag->lru_node); | |
1261 | stt_percpu->frag_mem_used -= FRAG_CB(frag_head)->first.mem_used; | |
1262 | frag->skbs = NULL; | |
1263 | skb = frag_head; | |
1264 | } else { | |
1265 | list_move_tail(&frag->lru_node, &stt_percpu->frag_lru); | |
1266 | skb = NULL; | |
1267 | } | |
1268 | ||
a78350d9 PS |
1269 | if (copied_skb) |
1270 | kfree_skb_partial(copied_skb, headstolen); | |
4237026e PS |
1271 | goto unlock; |
1272 | ||
1273 | unlock_free: | |
1274 | kfree_skb(skb); | |
1275 | skb = NULL; | |
1276 | unlock: | |
1277 | spin_unlock(&stt_percpu->lock); | |
1278 | return skb; | |
1279 | out_free: | |
1280 | kfree_skb(skb); | |
1281 | skb = NULL; | |
1282 | out: | |
1283 | return skb; | |
1284 | } | |
1285 | ||
1286 | static bool validate_checksum(struct sk_buff *skb) | |
1287 | { | |
1288 | struct iphdr *iph = ip_hdr(skb); | |
1289 | ||
1290 | if (skb_csum_unnecessary(skb)) | |
1291 | return true; | |
1292 | ||
1293 | if (skb->ip_summed == CHECKSUM_COMPLETE && | |
1294 | !tcp_v4_check(skb->len, iph->saddr, iph->daddr, skb->csum)) | |
1295 | return true; | |
1296 | ||
1297 | skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, skb->len, | |
1298 | IPPROTO_TCP, 0); | |
1299 | ||
abcf1c0d | 1300 | return __skb_checksum_complete(skb) == 0; |
4237026e PS |
1301 | } |
1302 | ||
1303 | static bool set_offloads(struct sk_buff *skb) | |
1304 | { | |
1305 | struct stthdr *stth = stt_hdr(skb); | |
f54a7a5d | 1306 | unsigned int gso_type = 0; |
4237026e PS |
1307 | int l3_header_size; |
1308 | int l4_header_size; | |
1309 | u16 csum_offset; | |
1310 | u8 proto_type; | |
1311 | ||
1312 | if (stth->vlan_tci) | |
1313 | __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), | |
1314 | ntohs(stth->vlan_tci)); | |
1315 | ||
1316 | if (!(stth->flags & STT_CSUM_PARTIAL)) { | |
1317 | if (stth->flags & STT_CSUM_VERIFIED) | |
1318 | skb->ip_summed = CHECKSUM_UNNECESSARY; | |
1319 | else | |
1320 | skb->ip_summed = CHECKSUM_NONE; | |
1321 | ||
1322 | return clear_gso(skb) == 0; | |
1323 | } | |
1324 | ||
1325 | proto_type = stth->flags & STT_PROTO_TYPES; | |
1326 | ||
1327 | switch (proto_type) { | |
1328 | case (STT_PROTO_IPV4 | STT_PROTO_TCP): | |
1329 | /* TCP/IPv4 */ | |
1330 | csum_offset = offsetof(struct tcphdr, check); | |
1331 | gso_type = SKB_GSO_TCPV4; | |
1332 | l3_header_size = sizeof(struct iphdr); | |
1333 | l4_header_size = sizeof(struct tcphdr); | |
1334 | skb->protocol = htons(ETH_P_IP); | |
1335 | break; | |
1336 | case STT_PROTO_TCP: | |
1337 | /* TCP/IPv6 */ | |
1338 | csum_offset = offsetof(struct tcphdr, check); | |
1339 | gso_type = SKB_GSO_TCPV6; | |
1340 | l3_header_size = sizeof(struct ipv6hdr); | |
1341 | l4_header_size = sizeof(struct tcphdr); | |
1342 | skb->protocol = htons(ETH_P_IPV6); | |
1343 | break; | |
1344 | case STT_PROTO_IPV4: | |
1345 | /* UDP/IPv4 */ | |
1346 | csum_offset = offsetof(struct udphdr, check); | |
969b8e6b | 1347 | #ifdef HAVE_SKB_GSO_UDP |
4237026e | 1348 | gso_type = SKB_GSO_UDP; |
969b8e6b | 1349 | #endif |
4237026e PS |
1350 | l3_header_size = sizeof(struct iphdr); |
1351 | l4_header_size = sizeof(struct udphdr); | |
1352 | skb->protocol = htons(ETH_P_IP); | |
1353 | break; | |
1354 | default: | |
1355 | /* UDP/IPv6 */ | |
1356 | csum_offset = offsetof(struct udphdr, check); | |
969b8e6b | 1357 | #ifdef HAVE_SKB_GSO_UDP |
4237026e | 1358 | gso_type = SKB_GSO_UDP; |
969b8e6b | 1359 | #endif |
4237026e PS |
1360 | l3_header_size = sizeof(struct ipv6hdr); |
1361 | l4_header_size = sizeof(struct udphdr); | |
1362 | skb->protocol = htons(ETH_P_IPV6); | |
1363 | } | |
1364 | ||
1365 | if (unlikely(stth->l4_offset < ETH_HLEN + l3_header_size)) | |
1366 | return false; | |
1367 | ||
1368 | if (unlikely(!pskb_may_pull(skb, stth->l4_offset + l4_header_size))) | |
1369 | return false; | |
1370 | ||
1371 | stth = stt_hdr(skb); | |
1372 | ||
1373 | skb->csum_start = skb_headroom(skb) + stth->l4_offset; | |
1374 | skb->csum_offset = csum_offset; | |
1375 | skb->ip_summed = CHECKSUM_PARTIAL; | |
1376 | ||
1377 | if (stth->mss) { | |
1378 | if (unlikely(skb_unclone(skb, GFP_ATOMIC))) | |
1379 | return false; | |
1380 | ||
1381 | skb_shinfo(skb)->gso_type = gso_type | SKB_GSO_DODGY; | |
1382 | skb_shinfo(skb)->gso_size = ntohs(stth->mss); | |
1383 | skb_shinfo(skb)->gso_segs = 0; | |
1384 | } else { | |
1385 | if (unlikely(clear_gso(skb))) | |
1386 | return false; | |
1387 | } | |
1388 | ||
1389 | return true; | |
1390 | } | |
e23775f2 | 1391 | |
68fc3451 PS |
1392 | static void rcv_list(struct net_device *dev, struct sk_buff *skb, |
1393 | struct metadata_dst *tun_dst) | |
1394 | { | |
1395 | struct sk_buff *next; | |
1396 | ||
1397 | do { | |
1398 | next = skb->next; | |
1399 | skb->next = NULL; | |
1400 | if (next) { | |
1401 | ovs_dst_hold((struct dst_entry *)tun_dst); | |
1402 | ovs_skb_dst_set(next, (struct dst_entry *)tun_dst); | |
1403 | } | |
1404 | ovs_ip_tunnel_rcv(dev, skb, tun_dst); | |
1405 | } while ((skb = next)); | |
1406 | } | |
1407 | ||
1c95839f | 1408 | #ifndef USE_UPSTREAM_TUNNEL |
68fc3451 | 1409 | static int __stt_rcv(struct stt_dev *stt_dev, struct sk_buff *skb) |
e23775f2 PS |
1410 | { |
1411 | struct metadata_dst tun_dst; | |
1412 | ||
a5d59bf3 | 1413 | ovs_ip_tun_rx_dst(&tun_dst, skb, TUNNEL_KEY | TUNNEL_CSUM, |
e23775f2 PS |
1414 | get_unaligned(&stt_hdr(skb)->key), 0); |
1415 | tun_dst.u.tun_info.key.tp_src = tcp_hdr(skb)->source; | |
1416 | tun_dst.u.tun_info.key.tp_dst = tcp_hdr(skb)->dest; | |
1417 | ||
68fc3451 | 1418 | rcv_list(stt_dev->dev, skb, &tun_dst); |
e23775f2 PS |
1419 | return 0; |
1420 | } | |
1421 | #else | |
68fc3451 | 1422 | static int __stt_rcv(struct stt_dev *stt_dev, struct sk_buff *skb) |
e23775f2 PS |
1423 | { |
1424 | struct metadata_dst *tun_dst; | |
1425 | __be16 flags; | |
1426 | __be64 tun_id; | |
1427 | ||
1428 | flags = TUNNEL_KEY | TUNNEL_CSUM; | |
1429 | tun_id = get_unaligned(&stt_hdr(skb)->key); | |
1430 | tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0); | |
1431 | if (!tun_dst) | |
1432 | return -ENOMEM; | |
1433 | tun_dst->u.tun_info.key.tp_src = tcp_hdr(skb)->source; | |
1434 | tun_dst->u.tun_info.key.tp_dst = tcp_hdr(skb)->dest; | |
1435 | ||
68fc3451 | 1436 | rcv_list(stt_dev->dev, skb, tun_dst); |
e23775f2 PS |
1437 | return 0; |
1438 | } | |
e23775f2 | 1439 | #endif |
68fc3451 | 1440 | |
e23775f2 | 1441 | static void stt_rcv(struct stt_dev *stt_dev, struct sk_buff *skb) |
4237026e PS |
1442 | { |
1443 | int err; | |
1444 | ||
1445 | if (unlikely(!validate_checksum(skb))) | |
1446 | goto drop; | |
1447 | ||
c26d70a2 | 1448 | __skb_pull(skb, sizeof(struct tcphdr)); |
4237026e PS |
1449 | skb = reassemble(skb); |
1450 | if (!skb) | |
1451 | return; | |
1452 | ||
1453 | if (skb->next && coalesce_skb(&skb)) | |
1454 | goto drop; | |
1455 | ||
1456 | err = iptunnel_pull_header(skb, | |
1457 | sizeof(struct stthdr) + STT_ETH_PAD, | |
c6e13fcc PS |
1458 | htons(ETH_P_TEB), |
1459 | !net_eq(stt_dev->net, dev_net(stt_dev->dev))); | |
4237026e PS |
1460 | if (unlikely(err)) |
1461 | goto drop; | |
1462 | ||
1463 | if (unlikely(stt_hdr(skb)->version != 0)) | |
1464 | goto drop; | |
1465 | ||
1466 | if (unlikely(!set_offloads(skb))) | |
1467 | goto drop; | |
1468 | ||
1469 | if (skb_shinfo(skb)->frag_list && try_to_segment(skb)) | |
1470 | goto drop; | |
1471 | ||
68fc3451 | 1472 | err = __stt_rcv(stt_dev, skb); |
e23775f2 PS |
1473 | if (err) |
1474 | goto drop; | |
4237026e PS |
1475 | return; |
1476 | drop: | |
1477 | /* Consume bad packet */ | |
1478 | kfree_skb_list(skb); | |
e23775f2 | 1479 | stt_dev->dev->stats.rx_errors++; |
4237026e PS |
1480 | } |
1481 | ||
1482 | static void tcp_sock_release(struct socket *sock) | |
1483 | { | |
1484 | kernel_sock_shutdown(sock, SHUT_RDWR); | |
fdce83a3 | 1485 | sock_release(sock); |
4237026e PS |
1486 | } |
1487 | ||
1488 | static int tcp_sock_create4(struct net *net, __be16 port, | |
1489 | struct socket **sockp) | |
1490 | { | |
1491 | struct sockaddr_in tcp_addr; | |
1492 | struct socket *sock = NULL; | |
1493 | int err; | |
1494 | ||
fdce83a3 | 1495 | err = sock_create_kern(net, AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); |
4237026e PS |
1496 | if (err < 0) |
1497 | goto error; | |
1498 | ||
4237026e PS |
1499 | memset(&tcp_addr, 0, sizeof(tcp_addr)); |
1500 | tcp_addr.sin_family = AF_INET; | |
1501 | tcp_addr.sin_addr.s_addr = htonl(INADDR_ANY); | |
1502 | tcp_addr.sin_port = port; | |
1503 | err = kernel_bind(sock, (struct sockaddr *)&tcp_addr, | |
1504 | sizeof(tcp_addr)); | |
1505 | if (err < 0) | |
1506 | goto error; | |
1507 | ||
1508 | *sockp = sock; | |
1509 | return 0; | |
1510 | ||
1511 | error: | |
1512 | if (sock) | |
1513 | tcp_sock_release(sock); | |
1514 | *sockp = NULL; | |
1515 | return err; | |
1516 | } | |
1517 | ||
1518 | static void schedule_clean_percpu(void) | |
1519 | { | |
1520 | schedule_delayed_work(&clean_percpu_wq, CLEAN_PERCPU_INTERVAL); | |
1521 | } | |
1522 | ||
1523 | static void clean_percpu(struct work_struct *work) | |
1524 | { | |
1525 | int i; | |
1526 | ||
1527 | for_each_possible_cpu(i) { | |
1528 | struct stt_percpu *stt_percpu = per_cpu_ptr(stt_percpu_data, i); | |
1529 | int j; | |
1530 | ||
1531 | for (j = 0; j < FRAG_HASH_ENTRIES; j++) { | |
1532 | struct pkt_frag *frag; | |
1533 | ||
384868ca | 1534 | frag = &stt_percpu->frag_hash[j]; |
4237026e PS |
1535 | if (!frag->skbs || |
1536 | time_before(jiffies, frag->timestamp + FRAG_EXP_TIME)) | |
1537 | continue; | |
1538 | ||
1539 | spin_lock_bh(&stt_percpu->lock); | |
1540 | ||
1541 | if (frag->skbs && | |
1542 | time_after(jiffies, frag->timestamp + FRAG_EXP_TIME)) | |
1543 | free_frag(stt_percpu, frag); | |
1544 | ||
1545 | spin_unlock_bh(&stt_percpu->lock); | |
1546 | } | |
1547 | } | |
1548 | schedule_clean_percpu(); | |
1549 | } | |
1550 | ||
cd7330d0 | 1551 | #ifdef HAVE_NF_HOOKFN_ARG_OPS |
c0cddcec | 1552 | #define FIRST_PARAM const struct nf_hook_ops *ops |
4237026e | 1553 | #else |
0643a78b PS |
1554 | #ifdef HAVE_NF_HOOKFN_ARG_PRIV |
1555 | #define FIRST_PARAM void *priv | |
1556 | #else | |
c0cddcec | 1557 | #define FIRST_PARAM unsigned int hooknum |
4237026e | 1558 | #endif |
0643a78b | 1559 | #endif |
4237026e | 1560 | |
e23775f2 | 1561 | #ifdef HAVE_NF_HOOK_STATE |
7fb890db | 1562 | #if RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(7,0) && RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(8,0) |
e23775f2 PS |
1563 | /* RHEL nfhook hacks. */ |
1564 | #ifndef __GENKSYMS__ | |
1565 | #define LAST_PARAM const struct net_device *in, const struct net_device *out, \ | |
1566 | const struct nf_hook_state *state | |
1567 | #else | |
1568 | #define LAST_PARAM const struct net_device *in, const struct net_device *out, \ | |
1569 | int (*okfn)(struct sk_buff *) | |
1570 | #endif | |
1571 | #else | |
c0cddcec | 1572 | #define LAST_PARAM const struct nf_hook_state *state |
e23775f2 | 1573 | #endif |
c0cddcec | 1574 | #else |
e23775f2 PS |
1575 | #define LAST_PARAM const struct net_device *in, const struct net_device *out, \ |
1576 | int (*okfn)(struct sk_buff *) | |
c0cddcec JS |
1577 | #endif |
1578 | ||
1579 | static unsigned int nf_ip_hook(FIRST_PARAM, struct sk_buff *skb, LAST_PARAM) | |
4237026e | 1580 | { |
e23775f2 | 1581 | struct stt_dev *stt_dev; |
4237026e PS |
1582 | int ip_hdr_len; |
1583 | ||
1584 | if (ip_hdr(skb)->protocol != IPPROTO_TCP) | |
1585 | return NF_ACCEPT; | |
1586 | ||
1587 | ip_hdr_len = ip_hdrlen(skb); | |
1588 | if (unlikely(!pskb_may_pull(skb, ip_hdr_len + sizeof(struct tcphdr)))) | |
1589 | return NF_ACCEPT; | |
1590 | ||
1591 | skb_set_transport_header(skb, ip_hdr_len); | |
1592 | ||
19c64e86 | 1593 | stt_dev = stt_find_up_dev(dev_net(skb->dev), tcp_hdr(skb)->dest); |
e23775f2 | 1594 | if (!stt_dev) |
4237026e PS |
1595 | return NF_ACCEPT; |
1596 | ||
c26d70a2 | 1597 | __skb_pull(skb, ip_hdr_len); |
e23775f2 | 1598 | stt_rcv(stt_dev, skb); |
4237026e PS |
1599 | return NF_STOLEN; |
1600 | } | |
1601 | ||
1602 | static struct nf_hook_ops nf_hook_ops __read_mostly = { | |
1603 | .hook = nf_ip_hook, | |
0643a78b | 1604 | #ifdef HAVE_NF_HOOKS_OPS_OWNER |
4237026e | 1605 | .owner = THIS_MODULE, |
0643a78b | 1606 | #endif |
4237026e PS |
1607 | .pf = NFPROTO_IPV4, |
1608 | .hooknum = NF_INET_LOCAL_IN, | |
1609 | .priority = INT_MAX, | |
1610 | }; | |
1611 | ||
e23775f2 | 1612 | static int stt_start(struct net *net) |
4237026e | 1613 | { |
e23775f2 | 1614 | struct stt_net *sn = net_generic(net, stt_net_id); |
4237026e PS |
1615 | int err; |
1616 | int i; | |
1617 | ||
1618 | if (n_tunnels) { | |
1619 | n_tunnels++; | |
1620 | return 0; | |
1621 | } | |
1622 | get_random_bytes(&frag_hash_seed, sizeof(u32)); | |
1623 | ||
1624 | stt_percpu_data = alloc_percpu(struct stt_percpu); | |
1625 | if (!stt_percpu_data) { | |
1626 | err = -ENOMEM; | |
1627 | goto error; | |
1628 | } | |
1629 | ||
1630 | for_each_possible_cpu(i) { | |
1631 | struct stt_percpu *stt_percpu = per_cpu_ptr(stt_percpu_data, i); | |
384868ca | 1632 | struct pkt_frag *frag_hash; |
4237026e PS |
1633 | |
1634 | spin_lock_init(&stt_percpu->lock); | |
1635 | INIT_LIST_HEAD(&stt_percpu->frag_lru); | |
1636 | get_random_bytes(&per_cpu(pkt_seq_counter, i), sizeof(u32)); | |
1637 | ||
384868ca GR |
1638 | frag_hash = kvmalloc_array(sizeof(struct pkt_frag), |
1639 | FRAG_HASH_ENTRIES, | |
1640 | GFP_KERNEL | __GFP_ZERO); | |
4237026e PS |
1641 | if (!frag_hash) { |
1642 | err = -ENOMEM; | |
1643 | goto free_percpu; | |
1644 | } | |
1645 | stt_percpu->frag_hash = frag_hash; | |
4237026e | 1646 | } |
e23775f2 PS |
1647 | schedule_clean_percpu(); |
1648 | n_tunnels++; | |
1649 | ||
1650 | if (sn->n_tunnels) { | |
1651 | sn->n_tunnels++; | |
1652 | return 0; | |
1653 | } | |
1654 | #ifdef HAVE_NF_REGISTER_NET_HOOK | |
1655 | /* On kernel which support per net nf-hook, nf_register_hook() takes | |
1656 | * rtnl-lock, which results in dead lock in stt-dev-create. Therefore | |
1657 | * use this new API. | |
1658 | */ | |
3b6565cd PS |
1659 | |
1660 | if (sn->nf_hook_reg_done) | |
1661 | goto out; | |
1662 | ||
e23775f2 | 1663 | err = nf_register_net_hook(net, &nf_hook_ops); |
3b6565cd PS |
1664 | if (!err) |
1665 | sn->nf_hook_reg_done = true; | |
e23775f2 | 1666 | #else |
3b6565cd PS |
1667 | /* Register STT only on very first STT device addition. */ |
1668 | if (!list_empty(&nf_hook_ops.list)) | |
1669 | goto out; | |
1670 | ||
4237026e | 1671 | err = nf_register_hook(&nf_hook_ops); |
e23775f2 | 1672 | #endif |
4237026e | 1673 | if (err) |
15a0ca65 | 1674 | goto dec_n_tunnel; |
3b6565cd | 1675 | out: |
e23775f2 | 1676 | sn->n_tunnels++; |
4237026e PS |
1677 | return 0; |
1678 | ||
15a0ca65 PS |
1679 | dec_n_tunnel: |
1680 | n_tunnels--; | |
4237026e PS |
1681 | free_percpu: |
1682 | for_each_possible_cpu(i) { | |
1683 | struct stt_percpu *stt_percpu = per_cpu_ptr(stt_percpu_data, i); | |
1684 | ||
1685 | if (stt_percpu->frag_hash) | |
384868ca | 1686 | kvfree(stt_percpu->frag_hash); |
4237026e PS |
1687 | } |
1688 | ||
1689 | free_percpu(stt_percpu_data); | |
1690 | ||
1691 | error: | |
1692 | return err; | |
1693 | } | |
1694 | ||
e23775f2 | 1695 | static void stt_cleanup(struct net *net) |
4237026e | 1696 | { |
e23775f2 | 1697 | struct stt_net *sn = net_generic(net, stt_net_id); |
4237026e PS |
1698 | int i; |
1699 | ||
e23775f2 | 1700 | sn->n_tunnels--; |
4237026e PS |
1701 | n_tunnels--; |
1702 | if (n_tunnels) | |
1703 | return; | |
1704 | ||
1705 | cancel_delayed_work_sync(&clean_percpu_wq); | |
4237026e PS |
1706 | for_each_possible_cpu(i) { |
1707 | struct stt_percpu *stt_percpu = per_cpu_ptr(stt_percpu_data, i); | |
1708 | int j; | |
1709 | ||
1710 | for (j = 0; j < FRAG_HASH_ENTRIES; j++) { | |
1711 | struct pkt_frag *frag; | |
1712 | ||
384868ca | 1713 | frag = &stt_percpu->frag_hash[j]; |
4237026e PS |
1714 | kfree_skb_list(frag->skbs); |
1715 | } | |
1716 | ||
384868ca | 1717 | kvfree(stt_percpu->frag_hash); |
4237026e PS |
1718 | } |
1719 | ||
1720 | free_percpu(stt_percpu_data); | |
1721 | } | |
1722 | ||
e23775f2 | 1723 | static netdev_tx_t stt_dev_xmit(struct sk_buff *skb, struct net_device *dev) |
4237026e | 1724 | { |
1c95839f | 1725 | #ifdef USE_UPSTREAM_TUNNEL |
e23775f2 PS |
1726 | return ovs_stt_xmit(skb); |
1727 | #else | |
1728 | /* Drop All packets coming from networking stack. OVS-CB is | |
1729 | * not initialized for these packets. | |
1730 | */ | |
1731 | dev_kfree_skb(skb); | |
1732 | dev->stats.tx_dropped++; | |
1733 | return NETDEV_TX_OK; | |
1734 | #endif | |
1735 | } | |
1736 | ||
1737 | /* Setup stats when device is created */ | |
1738 | static int stt_init(struct net_device *dev) | |
1739 | { | |
1740 | dev->tstats = (typeof(dev->tstats)) netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); | |
1741 | if (!dev->tstats) | |
1742 | return -ENOMEM; | |
1743 | ||
1744 | return 0; | |
1745 | } | |
1746 | ||
1747 | static void stt_uninit(struct net_device *dev) | |
1748 | { | |
1749 | free_percpu(dev->tstats); | |
1750 | } | |
1751 | ||
1752 | static int stt_open(struct net_device *dev) | |
1753 | { | |
1754 | struct stt_dev *stt = netdev_priv(dev); | |
1755 | struct net *net = stt->net; | |
19c64e86 | 1756 | struct stt_net *sn = net_generic(net, stt_net_id); |
4237026e PS |
1757 | int err; |
1758 | ||
e23775f2 PS |
1759 | err = stt_start(net); |
1760 | if (err) | |
1761 | return err; | |
4237026e | 1762 | |
e23775f2 PS |
1763 | err = tcp_sock_create4(net, stt->dst_port, &stt->sock); |
1764 | if (err) | |
1765 | return err; | |
19c64e86 | 1766 | list_add_rcu(&stt->up_next, &sn->stt_up_list); |
e23775f2 PS |
1767 | return 0; |
1768 | } | |
4237026e | 1769 | |
e23775f2 PS |
1770 | static int stt_stop(struct net_device *dev) |
1771 | { | |
1772 | struct stt_dev *stt_dev = netdev_priv(dev); | |
1773 | struct net *net = stt_dev->net; | |
4237026e | 1774 | |
19c64e86 | 1775 | list_del_rcu(&stt_dev->up_next); |
fee43fa2 | 1776 | synchronize_net(); |
e23775f2 PS |
1777 | tcp_sock_release(stt_dev->sock); |
1778 | stt_dev->sock = NULL; | |
1779 | stt_cleanup(net); | |
1780 | return 0; | |
1781 | } | |
4237026e | 1782 | |
3a934139 JS |
1783 | static int __stt_change_mtu(struct net_device *dev, int new_mtu, bool strict) |
1784 | { | |
1785 | int max_mtu = IP_MAX_MTU - STT_HEADER_LEN - sizeof(struct iphdr) | |
1786 | - dev->hard_header_len; | |
1787 | ||
1788 | if (new_mtu < 68) | |
1789 | return -EINVAL; | |
1790 | ||
1791 | if (new_mtu > max_mtu) { | |
1792 | if (strict) | |
1793 | return -EINVAL; | |
1794 | ||
1795 | new_mtu = max_mtu; | |
1796 | } | |
1797 | ||
1798 | dev->mtu = new_mtu; | |
1799 | return 0; | |
1800 | } | |
1801 | ||
1802 | static int stt_change_mtu(struct net_device *dev, int new_mtu) | |
1803 | { | |
1804 | return __stt_change_mtu(dev, new_mtu, true); | |
1805 | } | |
1806 | ||
aad7cb91 PS |
1807 | int ovs_stt_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) |
1808 | { | |
1809 | struct ip_tunnel_info *info = skb_tunnel_info(skb); | |
1810 | struct stt_dev *stt_dev = netdev_priv(dev); | |
1811 | struct net *net = stt_dev->net; | |
1812 | __be16 dport = stt_dev->dst_port; | |
87154b78 | 1813 | __be16 sport; |
aad7cb91 PS |
1814 | struct flowi4 fl4; |
1815 | struct rtable *rt; | |
1816 | ||
1817 | if (ip_tunnel_info_af(info) != AF_INET) | |
1818 | return -EINVAL; | |
1819 | ||
87154b78 QX |
1820 | sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true); |
1821 | rt = stt_get_rt(skb, dev, &fl4, &info->key, dport, sport); | |
aad7cb91 PS |
1822 | if (IS_ERR(rt)) |
1823 | return PTR_ERR(rt); | |
1824 | ||
1825 | ip_rt_put(rt); | |
1826 | ||
1827 | info->key.u.ipv4.src = fl4.saddr; | |
87154b78 | 1828 | info->key.tp_src = sport; |
aad7cb91 PS |
1829 | info->key.tp_dst = dport; |
1830 | return 0; | |
1831 | } | |
1832 | EXPORT_SYMBOL_GPL(ovs_stt_fill_metadata_dst); | |
1833 | ||
e23775f2 PS |
1834 | static const struct net_device_ops stt_netdev_ops = { |
1835 | .ndo_init = stt_init, | |
1836 | .ndo_uninit = stt_uninit, | |
1837 | .ndo_open = stt_open, | |
1838 | .ndo_stop = stt_stop, | |
1839 | .ndo_start_xmit = stt_dev_xmit, | |
1840 | .ndo_get_stats64 = ip_tunnel_get_stats64, | |
39ca3383 | 1841 | #ifdef HAVE_RHEL7_MAX_MTU |
3ab8a26e | 1842 | .ndo_size = sizeof(struct net_device_ops), |
39ca3383 YHW |
1843 | .extended.ndo_change_mtu = stt_change_mtu, |
1844 | #else | |
3a934139 | 1845 | .ndo_change_mtu = stt_change_mtu, |
39ca3383 | 1846 | #endif |
e23775f2 PS |
1847 | .ndo_validate_addr = eth_validate_addr, |
1848 | .ndo_set_mac_address = eth_mac_addr, | |
564666e9 | 1849 | #ifdef USE_UPSTREAM_TUNNEL |
aad7cb91 PS |
1850 | #ifdef HAVE_NDO_FILL_METADATA_DST |
1851 | .ndo_fill_metadata_dst = stt_fill_metadata_dst, | |
1852 | #endif | |
564666e9 | 1853 | #endif |
e23775f2 PS |
1854 | }; |
1855 | ||
1856 | static void stt_get_drvinfo(struct net_device *dev, | |
1857 | struct ethtool_drvinfo *drvinfo) | |
1858 | { | |
1859 | strlcpy(drvinfo->version, STT_NETDEV_VER, sizeof(drvinfo->version)); | |
1860 | strlcpy(drvinfo->driver, "stt", sizeof(drvinfo->driver)); | |
4237026e PS |
1861 | } |
1862 | ||
e23775f2 PS |
1863 | static const struct ethtool_ops stt_ethtool_ops = { |
1864 | .get_drvinfo = stt_get_drvinfo, | |
1865 | .get_link = ethtool_op_get_link, | |
1866 | }; | |
1867 | ||
1868 | /* Info for udev, that this is a virtual tunnel endpoint */ | |
1869 | static struct device_type stt_type = { | |
1870 | .name = "stt", | |
1871 | }; | |
1872 | ||
1873 | /* Initialize the device structure. */ | |
1874 | static void stt_setup(struct net_device *dev) | |
4237026e | 1875 | { |
e23775f2 PS |
1876 | ether_setup(dev); |
1877 | ||
1878 | dev->netdev_ops = &stt_netdev_ops; | |
1879 | dev->ethtool_ops = &stt_ethtool_ops; | |
227f6893 | 1880 | #ifndef HAVE_NEEDS_FREE_NETDEV |
e23775f2 | 1881 | dev->destructor = free_netdev; |
227f6893 GR |
1882 | #else |
1883 | dev->needs_free_netdev = true; | |
1884 | #endif | |
e23775f2 PS |
1885 | |
1886 | SET_NETDEV_DEVTYPE(dev, &stt_type); | |
1887 | ||
1888 | dev->features |= NETIF_F_LLTX | NETIF_F_NETNS_LOCAL; | |
1889 | dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; | |
1890 | dev->features |= NETIF_F_RXCSUM; | |
1891 | dev->features |= NETIF_F_GSO_SOFTWARE; | |
1892 | ||
1893 | dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM; | |
1894 | dev->hw_features |= NETIF_F_GSO_SOFTWARE; | |
1895 | ||
1c95839f | 1896 | #ifdef USE_UPSTREAM_TUNNEL |
e23775f2 PS |
1897 | netif_keep_dst(dev); |
1898 | #endif | |
1899 | dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; | |
1900 | eth_hw_addr_random(dev); | |
4237026e PS |
1901 | } |
1902 | ||
e23775f2 PS |
1903 | static const struct nla_policy stt_policy[IFLA_STT_MAX + 1] = { |
1904 | [IFLA_STT_PORT] = { .type = NLA_U16 }, | |
1905 | }; | |
1906 | ||
82b7e6d1 | 1907 | #ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK |
22562e9f GR |
1908 | static int stt_validate(struct nlattr *tb[], struct nlattr *data[], |
1909 | struct netlink_ext_ack __always_unused *extack) | |
1910 | #else | |
e23775f2 | 1911 | static int stt_validate(struct nlattr *tb[], struct nlattr *data[]) |
22562e9f | 1912 | #endif |
4237026e | 1913 | { |
e23775f2 PS |
1914 | if (tb[IFLA_ADDRESS]) { |
1915 | if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) | |
1916 | return -EINVAL; | |
1917 | ||
1918 | if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) | |
1919 | return -EADDRNOTAVAIL; | |
1920 | } | |
1921 | ||
1922 | return 0; | |
1923 | } | |
1924 | ||
1925 | static struct stt_dev *find_dev(struct net *net, __be16 dst_port) | |
1926 | { | |
1927 | struct stt_net *sn = net_generic(net, stt_net_id); | |
1928 | struct stt_dev *dev; | |
1929 | ||
1930 | list_for_each_entry(dev, &sn->stt_list, next) { | |
1931 | if (dev->dst_port == dst_port) | |
1932 | return dev; | |
1933 | } | |
1934 | return NULL; | |
1935 | } | |
1936 | ||
1937 | static int stt_configure(struct net *net, struct net_device *dev, | |
1938 | __be16 dst_port) | |
1939 | { | |
1940 | struct stt_net *sn = net_generic(net, stt_net_id); | |
1941 | struct stt_dev *stt = netdev_priv(dev); | |
4237026e PS |
1942 | int err; |
1943 | ||
e23775f2 PS |
1944 | stt->net = net; |
1945 | stt->dev = dev; | |
1946 | ||
1947 | stt->dst_port = dst_port; | |
1948 | ||
1949 | if (find_dev(net, dst_port)) | |
1950 | return -EBUSY; | |
1951 | ||
3a934139 JS |
1952 | err = __stt_change_mtu(dev, IP_MAX_MTU, false); |
1953 | if (err) | |
1954 | return err; | |
1955 | ||
e23775f2 | 1956 | err = register_netdevice(dev); |
4237026e | 1957 | if (err) |
e23775f2 | 1958 | return err; |
4237026e | 1959 | |
19c64e86 | 1960 | list_add(&stt->next, &sn->stt_list); |
e23775f2 PS |
1961 | return 0; |
1962 | } | |
1963 | ||
22562e9f GR |
1964 | #ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS |
1965 | static int stt_newlink(struct net *net, struct net_device *dev, | |
1966 | struct nlattr *tb[], struct nlattr *data[], | |
1967 | struct netlink_ext_ack __always_unused *extack) | |
1968 | #else | |
e23775f2 PS |
1969 | static int stt_newlink(struct net *net, struct net_device *dev, |
1970 | struct nlattr *tb[], struct nlattr *data[]) | |
22562e9f | 1971 | #endif |
e23775f2 PS |
1972 | { |
1973 | __be16 dst_port = htons(STT_DST_PORT); | |
1974 | ||
1975 | if (data[IFLA_STT_PORT]) | |
1976 | dst_port = nla_get_be16(data[IFLA_STT_PORT]); | |
1977 | ||
1978 | return stt_configure(net, dev, dst_port); | |
1979 | } | |
1980 | ||
1981 | static void stt_dellink(struct net_device *dev, struct list_head *head) | |
1982 | { | |
1983 | struct stt_dev *stt = netdev_priv(dev); | |
4237026e | 1984 | |
19c64e86 | 1985 | list_del(&stt->next); |
e23775f2 PS |
1986 | unregister_netdevice_queue(dev, head); |
1987 | } | |
1988 | ||
1989 | static size_t stt_get_size(const struct net_device *dev) | |
1990 | { | |
1991 | return nla_total_size(sizeof(__be32)); /* IFLA_STT_PORT */ | |
1992 | } | |
1993 | ||
1994 | static int stt_fill_info(struct sk_buff *skb, const struct net_device *dev) | |
1995 | { | |
1996 | struct stt_dev *stt = netdev_priv(dev); | |
1997 | ||
1998 | if (nla_put_be16(skb, IFLA_STT_PORT, stt->dst_port)) | |
1999 | goto nla_put_failure; | |
4237026e | 2000 | |
e23775f2 | 2001 | return 0; |
4237026e | 2002 | |
e23775f2 PS |
2003 | nla_put_failure: |
2004 | return -EMSGSIZE; | |
4237026e | 2005 | } |
4237026e | 2006 | |
e23775f2 PS |
2007 | static struct rtnl_link_ops stt_link_ops __read_mostly = { |
2008 | .kind = "stt", | |
2009 | .maxtype = IFLA_STT_MAX, | |
2010 | .policy = stt_policy, | |
2011 | .priv_size = sizeof(struct stt_dev), | |
2012 | .setup = stt_setup, | |
2013 | .validate = stt_validate, | |
2014 | .newlink = stt_newlink, | |
2015 | .dellink = stt_dellink, | |
2016 | .get_size = stt_get_size, | |
2017 | .fill_info = stt_fill_info, | |
2018 | }; | |
2019 | ||
2020 | struct net_device *ovs_stt_dev_create_fb(struct net *net, const char *name, | |
2021 | u8 name_assign_type, u16 dst_port) | |
4237026e | 2022 | { |
e23775f2 PS |
2023 | struct nlattr *tb[IFLA_MAX + 1]; |
2024 | struct net_device *dev; | |
2025 | int err; | |
2026 | ||
2027 | memset(tb, 0, sizeof(tb)); | |
2028 | dev = rtnl_create_link(net, (char *) name, name_assign_type, | |
2029 | &stt_link_ops, tb); | |
2030 | if (IS_ERR(dev)) | |
2031 | return dev; | |
2032 | ||
2033 | err = stt_configure(net, dev, htons(dst_port)); | |
2034 | if (err) { | |
2035 | free_netdev(dev); | |
2036 | return ERR_PTR(err); | |
4237026e | 2037 | } |
e23775f2 | 2038 | return dev; |
4237026e | 2039 | } |
e23775f2 | 2040 | EXPORT_SYMBOL_GPL(ovs_stt_dev_create_fb); |
4237026e PS |
2041 | |
2042 | static int stt_init_net(struct net *net) | |
2043 | { | |
2044 | struct stt_net *sn = net_generic(net, stt_net_id); | |
2045 | ||
e23775f2 | 2046 | INIT_LIST_HEAD(&sn->stt_list); |
19c64e86 | 2047 | INIT_LIST_HEAD(&sn->stt_up_list); |
3b6565cd PS |
2048 | #ifdef HAVE_NF_REGISTER_NET_HOOK |
2049 | sn->nf_hook_reg_done = false; | |
2050 | #endif | |
4237026e PS |
2051 | return 0; |
2052 | } | |
2053 | ||
e23775f2 PS |
2054 | static void stt_exit_net(struct net *net) |
2055 | { | |
2056 | struct stt_net *sn = net_generic(net, stt_net_id); | |
2057 | struct stt_dev *stt, *next; | |
2058 | struct net_device *dev, *aux; | |
2059 | LIST_HEAD(list); | |
2060 | ||
fee43fa2 PS |
2061 | #ifdef HAVE_NF_REGISTER_NET_HOOK |
2062 | /* Ideally this should be done from stt_stop(), But on some kernels | |
2063 | * nf-unreg operation needs RTNL-lock, which can cause deallock. | |
2064 | * So it is done from here. */ | |
3b6565cd | 2065 | if (sn->nf_hook_reg_done) |
fee43fa2 PS |
2066 | nf_unregister_net_hook(net, &nf_hook_ops); |
2067 | #endif | |
2068 | ||
e23775f2 PS |
2069 | rtnl_lock(); |
2070 | ||
2071 | /* gather any stt devices that were moved into this ns */ | |
2072 | for_each_netdev_safe(net, dev, aux) | |
2073 | if (dev->rtnl_link_ops == &stt_link_ops) | |
2074 | unregister_netdevice_queue(dev, &list); | |
2075 | ||
2076 | list_for_each_entry_safe(stt, next, &sn->stt_list, next) { | |
2077 | /* If stt->dev is in the same netns, it was already added | |
2078 | * to the stt by the previous loop. | |
2079 | */ | |
2080 | if (!net_eq(dev_net(stt->dev), net)) | |
2081 | unregister_netdevice_queue(stt->dev, &list); | |
2082 | } | |
2083 | ||
2084 | /* unregister the devices gathered above */ | |
2085 | unregister_netdevice_many(&list); | |
2086 | rtnl_unlock(); | |
2087 | } | |
2088 | ||
4237026e PS |
2089 | static struct pernet_operations stt_net_ops = { |
2090 | .init = stt_init_net, | |
e23775f2 | 2091 | .exit = stt_exit_net, |
4237026e PS |
2092 | .id = &stt_net_id, |
2093 | .size = sizeof(struct stt_net), | |
2094 | }; | |
2095 | ||
e23775f2 | 2096 | int stt_init_module(void) |
4237026e | 2097 | { |
e23775f2 PS |
2098 | int rc; |
2099 | ||
2100 | rc = register_pernet_subsys(&stt_net_ops); | |
2101 | if (rc) | |
2102 | goto out1; | |
2103 | ||
2104 | rc = rtnl_link_register(&stt_link_ops); | |
2105 | if (rc) | |
2106 | goto out2; | |
2107 | ||
cfb9880f | 2108 | #ifdef HAVE_LIST_IN_NF_HOOK_OPS |
fee43fa2 | 2109 | INIT_LIST_HEAD(&nf_hook_ops.list); |
cfb9880f | 2110 | #endif |
e23775f2 PS |
2111 | pr_info("STT tunneling driver\n"); |
2112 | return 0; | |
2113 | out2: | |
2114 | unregister_pernet_subsys(&stt_net_ops); | |
2115 | out1: | |
258b27d3 | 2116 | pr_err("Error while initializing STT %d\n", rc); |
e23775f2 | 2117 | return rc; |
4237026e | 2118 | } |
4237026e | 2119 | |
e23775f2 | 2120 | void stt_cleanup_module(void) |
4237026e | 2121 | { |
fee43fa2 PS |
2122 | #ifndef HAVE_NF_REGISTER_NET_HOOK |
2123 | if (!list_empty(&nf_hook_ops.list)) | |
2124 | nf_unregister_hook(&nf_hook_ops); | |
2125 | #endif | |
e23775f2 | 2126 | rtnl_link_unregister(&stt_link_ops); |
4237026e PS |
2127 | unregister_pernet_subsys(&stt_net_ops); |
2128 | } | |
4237026e | 2129 | #endif |