]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - net/core/flow_dissector.c
flow_dissector: introduce programable flow_dissector
[mirror_ubuntu-hirsute-kernel.git] / net / core / flow_dissector.c
CommitLineData
fbff949e 1#include <linux/kernel.h>
0744dd00 2#include <linux/skbuff.h>
c452ed70 3#include <linux/export.h>
0744dd00
ED
4#include <linux/ip.h>
5#include <linux/ipv6.h>
6#include <linux/if_vlan.h>
7#include <net/ip.h>
ddbe5032 8#include <net/ipv6.h>
f77668dc
DB
9#include <linux/igmp.h>
10#include <linux/icmp.h>
11#include <linux/sctp.h>
12#include <linux/dccp.h>
0744dd00
ED
13#include <linux/if_tunnel.h>
14#include <linux/if_pppox.h>
15#include <linux/ppp_defs.h>
1bd758eb 16#include <net/flow_dissector.h>
56193d1b 17#include <scsi/fc/fc_fcoe.h>
0744dd00 18
fbff949e
JP
19static bool skb_flow_dissector_uses_key(struct flow_dissector *flow_dissector,
20 enum flow_dissector_key_id key_id)
21{
22 return flow_dissector->used_keys & (1 << key_id);
23}
24
25static void skb_flow_dissector_set_key(struct flow_dissector *flow_dissector,
26 enum flow_dissector_key_id key_id)
27{
28 flow_dissector->used_keys |= (1 << key_id);
29}
30
31static void *skb_flow_dissector_target(struct flow_dissector *flow_dissector,
32 enum flow_dissector_key_id key_id,
33 void *target_container)
34{
35 return ((char *) target_container) + flow_dissector->offset[key_id];
36}
37
38void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
39 const struct flow_dissector_key *key,
40 unsigned int key_count)
41{
42 unsigned int i;
43
44 memset(flow_dissector, 0, sizeof(*flow_dissector));
45
46 for (i = 0; i < key_count; i++, key++) {
47 /* User should make sure that every key target offset is withing
48 * boundaries of unsigned short.
49 */
50 BUG_ON(key->offset > USHRT_MAX);
51 BUG_ON(skb_flow_dissector_uses_key(flow_dissector,
52 key->key_id));
53
54 skb_flow_dissector_set_key(flow_dissector, key->key_id);
55 flow_dissector->offset[key->key_id] = key->offset;
56 }
57
58 /* Ensure that the dissector always includes basic key. That way
59 * we are able to avoid handling lack of it in fast path.
60 */
61 BUG_ON(!skb_flow_dissector_uses_key(flow_dissector,
62 FLOW_DISSECTOR_KEY_BASIC));
63}
64EXPORT_SYMBOL(skb_flow_dissector_init);
65
4d77d2b5
ED
66/* copy saddr & daddr, possibly using 64bit load/store
67 * Equivalent to : flow->src = iph->saddr;
68 * flow->dst = iph->daddr;
69 */
70static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *iph)
71{
72 BUILD_BUG_ON(offsetof(typeof(*flow), dst) !=
73 offsetof(typeof(*flow), src) + sizeof(flow->src));
74 memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst));
75}
0744dd00 76
357afe9c 77/**
6451b3f5
WC
78 * __skb_flow_get_ports - extract the upper layer ports and return them
79 * @skb: sk_buff to extract the ports from
357afe9c
NA
80 * @thoff: transport header offset
81 * @ip_proto: protocol for which to get port offset
6451b3f5
WC
82 * @data: raw buffer pointer to the packet, if NULL use skb->data
83 * @hlen: packet header length, if @data is NULL use skb_headlen(skb)
357afe9c
NA
84 *
85 * The function will try to retrieve the ports at offset thoff + poff where poff
86 * is the protocol port offset returned from proto_ports_offset
87 */
690e36e7
DM
88__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
89 void *data, int hlen)
357afe9c
NA
90{
91 int poff = proto_ports_offset(ip_proto);
92
690e36e7
DM
93 if (!data) {
94 data = skb->data;
95 hlen = skb_headlen(skb);
96 }
97
357afe9c
NA
98 if (poff >= 0) {
99 __be32 *ports, _ports;
100
690e36e7
DM
101 ports = __skb_header_pointer(skb, thoff + poff,
102 sizeof(_ports), data, hlen, &_ports);
357afe9c
NA
103 if (ports)
104 return *ports;
105 }
106
107 return 0;
108}
690e36e7 109EXPORT_SYMBOL(__skb_flow_get_ports);
357afe9c 110
453a940e
WC
111/**
112 * __skb_flow_dissect - extract the flow_keys struct and return it
113 * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
114 * @data: raw buffer pointer to the packet, if NULL use skb->data
115 * @proto: protocol for which to get the flow, if @data is NULL use skb->protocol
116 * @nhoff: network header offset, if @data is NULL use skb_network_offset(skb)
117 * @hlen: packet header length, if @data is NULL use skb_headlen(skb)
118 *
119 * The function will try to retrieve the struct flow_keys from either the skbuff
120 * or a raw buffer specified by the rest parameters
121 */
122bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow,
123 void *data, __be16 proto, int nhoff, int hlen)
0744dd00 124{
0744dd00 125 u8 ip_proto;
0744dd00 126
690e36e7
DM
127 if (!data) {
128 data = skb->data;
453a940e
WC
129 proto = skb->protocol;
130 nhoff = skb_network_offset(skb);
690e36e7
DM
131 hlen = skb_headlen(skb);
132 }
133
0744dd00
ED
134 memset(flow, 0, sizeof(*flow));
135
136again:
137 switch (proto) {
2b8837ae 138 case htons(ETH_P_IP): {
0744dd00
ED
139 const struct iphdr *iph;
140 struct iphdr _iph;
141ip:
690e36e7 142 iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
6f092343 143 if (!iph || iph->ihl < 5)
0744dd00 144 return false;
3797d3e8 145 nhoff += iph->ihl * 4;
0744dd00 146
3797d3e8 147 ip_proto = iph->protocol;
0744dd00
ED
148 if (ip_is_fragment(iph))
149 ip_proto = 0;
3797d3e8 150
5af7fb6e
AD
151 /* skip the address processing if skb is NULL. The assumption
152 * here is that if there is no skb we are not looking for flow
153 * info but lengths and protocols.
154 */
155 if (!skb)
156 break;
157
4d77d2b5 158 iph_to_flow_copy_addrs(flow, iph);
0744dd00
ED
159 break;
160 }
2b8837ae 161 case htons(ETH_P_IPV6): {
0744dd00
ED
162 const struct ipv6hdr *iph;
163 struct ipv6hdr _iph;
19469a87
TH
164 __be32 flow_label;
165
0744dd00 166ipv6:
690e36e7 167 iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
0744dd00
ED
168 if (!iph)
169 return false;
170
171 ip_proto = iph->nexthdr;
0744dd00 172 nhoff += sizeof(struct ipv6hdr);
19469a87 173
5af7fb6e 174 /* see comment above in IPv4 section */
56193d1b
AD
175 if (!skb)
176 break;
177
5af7fb6e
AD
178 flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
179 flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
180
19469a87
TH
181 flow_label = ip6_flowlabel(iph);
182 if (flow_label) {
183 /* Awesome, IPv6 packet has a flow label so we can
184 * use that to represent the ports without any
185 * further dissection.
186 */
187 flow->n_proto = proto;
188 flow->ip_proto = ip_proto;
189 flow->ports = flow_label;
190 flow->thoff = (u16)nhoff;
191
192 return true;
193 }
194
0744dd00
ED
195 break;
196 }
2b8837ae
JP
197 case htons(ETH_P_8021AD):
198 case htons(ETH_P_8021Q): {
0744dd00
ED
199 const struct vlan_hdr *vlan;
200 struct vlan_hdr _vlan;
201
690e36e7 202 vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), data, hlen, &_vlan);
0744dd00
ED
203 if (!vlan)
204 return false;
205
206 proto = vlan->h_vlan_encapsulated_proto;
207 nhoff += sizeof(*vlan);
208 goto again;
209 }
2b8837ae 210 case htons(ETH_P_PPP_SES): {
0744dd00
ED
211 struct {
212 struct pppoe_hdr hdr;
213 __be16 proto;
214 } *hdr, _hdr;
690e36e7 215 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
0744dd00
ED
216 if (!hdr)
217 return false;
218 proto = hdr->proto;
219 nhoff += PPPOE_SES_HLEN;
220 switch (proto) {
2b8837ae 221 case htons(PPP_IP):
0744dd00 222 goto ip;
2b8837ae 223 case htons(PPP_IPV6):
0744dd00
ED
224 goto ipv6;
225 default:
226 return false;
227 }
228 }
08bfc9cb
EH
229 case htons(ETH_P_TIPC): {
230 struct {
231 __be32 pre[3];
232 __be32 srcnode;
233 } *hdr, _hdr;
234 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
235 if (!hdr)
236 return false;
237 flow->src = hdr->srcnode;
238 flow->dst = 0;
239 flow->n_proto = proto;
240 flow->thoff = (u16)nhoff;
241 return true;
242 }
56193d1b
AD
243 case htons(ETH_P_FCOE):
244 flow->thoff = (u16)(nhoff + FCOE_HEADER_LEN);
245 /* fall through */
0744dd00
ED
246 default:
247 return false;
248 }
249
250 switch (ip_proto) {
251 case IPPROTO_GRE: {
252 struct gre_hdr {
253 __be16 flags;
254 __be16 proto;
255 } *hdr, _hdr;
256
690e36e7 257 hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
0744dd00
ED
258 if (!hdr)
259 return false;
260 /*
261 * Only look inside GRE if version zero and no
262 * routing
263 */
264 if (!(hdr->flags & (GRE_VERSION|GRE_ROUTING))) {
265 proto = hdr->proto;
266 nhoff += 4;
267 if (hdr->flags & GRE_CSUM)
268 nhoff += 4;
269 if (hdr->flags & GRE_KEY)
270 nhoff += 4;
271 if (hdr->flags & GRE_SEQ)
272 nhoff += 4;
e1733de2
MD
273 if (proto == htons(ETH_P_TEB)) {
274 const struct ethhdr *eth;
275 struct ethhdr _eth;
276
690e36e7
DM
277 eth = __skb_header_pointer(skb, nhoff,
278 sizeof(_eth),
279 data, hlen, &_eth);
e1733de2
MD
280 if (!eth)
281 return false;
282 proto = eth->h_proto;
283 nhoff += sizeof(*eth);
284 }
0744dd00
ED
285 goto again;
286 }
287 break;
288 }
289 case IPPROTO_IPIP:
fca41895
TH
290 proto = htons(ETH_P_IP);
291 goto ip;
b438f940
TH
292 case IPPROTO_IPV6:
293 proto = htons(ETH_P_IPV6);
294 goto ipv6;
0744dd00
ED
295 default:
296 break;
297 }
298
e0f31d84 299 flow->n_proto = proto;
0744dd00 300 flow->ip_proto = ip_proto;
8ed78166
DB
301 flow->thoff = (u16) nhoff;
302
5af7fb6e
AD
303 /* unless skb is set we don't need to record port info */
304 if (skb)
305 flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
306 data, hlen);
307
0744dd00
ED
308 return true;
309}
690e36e7 310EXPORT_SYMBOL(__skb_flow_dissect);
441d9d32
CW
311
312static u32 hashrnd __read_mostly;
66415cf8
HFS
313static __always_inline void __flow_hash_secret_init(void)
314{
315 net_get_random_once(&hashrnd, sizeof(hashrnd));
316}
317
50fb7992 318static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c, u32 keyval)
66415cf8 319{
50fb7992 320 return jhash_3words(a, b, c, keyval);
66415cf8
HFS
321}
322
50fb7992 323static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval)
5ed20a68
TH
324{
325 u32 hash;
326
327 /* get a consistent hash (same value on both flow directions) */
328 if (((__force u32)keys->dst < (__force u32)keys->src) ||
329 (((__force u32)keys->dst == (__force u32)keys->src) &&
330 ((__force u16)keys->port16[1] < (__force u16)keys->port16[0]))) {
331 swap(keys->dst, keys->src);
332 swap(keys->port16[0], keys->port16[1]);
333 }
334
335 hash = __flow_hash_3words((__force u32)keys->dst,
336 (__force u32)keys->src,
50fb7992
TH
337 (__force u32)keys->ports,
338 keyval);
5ed20a68
TH
339 if (!hash)
340 hash = 1;
341
342 return hash;
343}
344
345u32 flow_hash_from_keys(struct flow_keys *keys)
346{
50fb7992
TH
347 __flow_hash_secret_init();
348 return __flow_hash_from_keys(keys, hashrnd);
5ed20a68
TH
349}
350EXPORT_SYMBOL(flow_hash_from_keys);
351
50fb7992
TH
352static inline u32 ___skb_get_hash(const struct sk_buff *skb,
353 struct flow_keys *keys, u32 keyval)
354{
355 if (!skb_flow_dissect(skb, keys))
356 return 0;
357
358 return __flow_hash_from_keys(keys, keyval);
359}
360
2f59e1eb
TH
361struct _flow_keys_digest_data {
362 __be16 n_proto;
363 u8 ip_proto;
364 u8 padding;
365 __be32 ports;
366 __be32 src;
367 __be32 dst;
368};
369
370void make_flow_keys_digest(struct flow_keys_digest *digest,
371 const struct flow_keys *flow)
372{
373 struct _flow_keys_digest_data *data =
374 (struct _flow_keys_digest_data *)digest;
375
376 BUILD_BUG_ON(sizeof(*data) > sizeof(*digest));
377
378 memset(digest, 0, sizeof(*digest));
379
380 data->n_proto = flow->n_proto;
381 data->ip_proto = flow->ip_proto;
382 data->ports = flow->ports;
383 data->src = flow->src;
384 data->dst = flow->dst;
385}
386EXPORT_SYMBOL(make_flow_keys_digest);
387
d4fd3275
JP
388/**
389 * __skb_get_hash: calculate a flow hash
390 * @skb: sk_buff to calculate flow hash from
391 *
392 * This function calculates a flow hash based on src/dst addresses
61b905da
TH
393 * and src/dst port numbers. Sets hash in skb to non-zero hash value
394 * on success, zero indicates no valid hash. Also, sets l4_hash in skb
441d9d32
CW
395 * if hash is a canonical 4-tuple hash over transport ports.
396 */
3958afa1 397void __skb_get_hash(struct sk_buff *skb)
441d9d32
CW
398{
399 struct flow_keys keys;
50fb7992 400 u32 hash;
441d9d32 401
50fb7992
TH
402 __flow_hash_secret_init();
403
404 hash = ___skb_get_hash(skb, &keys, hashrnd);
405 if (!hash)
441d9d32 406 return;
441d9d32 407 if (keys.ports)
61b905da 408 skb->l4_hash = 1;
a3b18ddb 409 skb->sw_hash = 1;
50fb7992 410 skb->hash = hash;
441d9d32 411}
3958afa1 412EXPORT_SYMBOL(__skb_get_hash);
441d9d32 413
50fb7992
TH
414__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb)
415{
416 struct flow_keys keys;
417
418 return ___skb_get_hash(skb, &keys, perturb);
419}
420EXPORT_SYMBOL(skb_get_hash_perturb);
421
56193d1b
AD
422u32 __skb_get_poff(const struct sk_buff *skb, void *data,
423 const struct flow_keys *keys, int hlen)
f77668dc 424{
56193d1b 425 u32 poff = keys->thoff;
f77668dc 426
56193d1b 427 switch (keys->ip_proto) {
f77668dc 428 case IPPROTO_TCP: {
5af7fb6e
AD
429 /* access doff as u8 to avoid unaligned access */
430 const u8 *doff;
431 u8 _doff;
f77668dc 432
5af7fb6e
AD
433 doff = __skb_header_pointer(skb, poff + 12, sizeof(_doff),
434 data, hlen, &_doff);
435 if (!doff)
f77668dc
DB
436 return poff;
437
5af7fb6e 438 poff += max_t(u32, sizeof(struct tcphdr), (*doff & 0xF0) >> 2);
f77668dc
DB
439 break;
440 }
441 case IPPROTO_UDP:
442 case IPPROTO_UDPLITE:
443 poff += sizeof(struct udphdr);
444 break;
445 /* For the rest, we do not really care about header
446 * extensions at this point for now.
447 */
448 case IPPROTO_ICMP:
449 poff += sizeof(struct icmphdr);
450 break;
451 case IPPROTO_ICMPV6:
452 poff += sizeof(struct icmp6hdr);
453 break;
454 case IPPROTO_IGMP:
455 poff += sizeof(struct igmphdr);
456 break;
457 case IPPROTO_DCCP:
458 poff += sizeof(struct dccp_hdr);
459 break;
460 case IPPROTO_SCTP:
461 poff += sizeof(struct sctphdr);
462 break;
463 }
464
465 return poff;
466}
467
0db89b8b
JP
468/**
469 * skb_get_poff - get the offset to the payload
470 * @skb: sk_buff to get the payload offset from
471 *
472 * The function will get the offset to the payload as far as it could
473 * be dissected. The main user is currently BPF, so that we can dynamically
56193d1b
AD
474 * truncate packets without needing to push actual payload to the user
475 * space and can analyze headers only, instead.
476 */
477u32 skb_get_poff(const struct sk_buff *skb)
478{
479 struct flow_keys keys;
480
481 if (!skb_flow_dissect(skb, &keys))
482 return 0;
483
484 return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb));
485}