]> git.proxmox.com Git - ceph.git/blob - ceph/src/seastar/dpdk/drivers/net/tap/tap_bpf_program.c
import 15.2.0 Octopus source
[ceph.git] / ceph / src / seastar / dpdk / drivers / net / tap / tap_bpf_program.c
1 /* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 * Copyright 2017 Mellanox Technologies, Ltd
3 */
4
5 #include <stdint.h>
6 #include <stdbool.h>
7 #include <sys/types.h>
8 #include <sys/socket.h>
9 #include <asm/types.h>
10 #include <linux/in.h>
11 #include <linux/if.h>
12 #include <linux/if_ether.h>
13 #include <linux/ip.h>
14 #include <linux/ipv6.h>
15 #include <linux/if_tunnel.h>
16 #include <linux/filter.h>
17 #include <linux/bpf.h>
18
19 #include "tap_rss.h"
20
21 /** Create IPv4 address */
22 #define IPv4(a, b, c, d) ((__u32)(((a) & 0xff) << 24) | \
23 (((b) & 0xff) << 16) | \
24 (((c) & 0xff) << 8) | \
25 ((d) & 0xff))
26
27 #define PORT(a, b) ((__u16)(((a) & 0xff) << 8) | \
28 ((b) & 0xff))
29
30 /*
31 * The queue number is offset by a unique QUEUE_OFFSET, to distinguish
32 * packets that have gone through this rule (skb->cb[1] != 0) from others.
33 */
34 #define QUEUE_OFFSET 0x7cafe800
35 #define PIN_GLOBAL_NS 2
36
37 #define KEY_IDX 0
38 #define BPF_MAP_ID_KEY 1
39
40 struct vlan_hdr {
41 __be16 proto;
42 __be16 tci;
43 };
44
45 struct bpf_elf_map __attribute__((section("maps"), used))
46 map_keys = {
47 .type = BPF_MAP_TYPE_HASH,
48 .id = BPF_MAP_ID_KEY,
49 .size_key = sizeof(__u32),
50 .size_value = sizeof(struct rss_key),
51 .max_elem = 256,
52 .pinning = PIN_GLOBAL_NS,
53 };
54
55 __section("cls_q") int
56 match_q(struct __sk_buff *skb)
57 {
58 __u32 queue = skb->cb[1];
59 volatile __u32 q = 0xdeadbeef;
60 __u32 match_queue = QUEUE_OFFSET + q;
61
62 /* printt("match_q$i() queue = %d\n", queue); */
63
64 if (queue != match_queue)
65 return TC_ACT_OK;
66
67 /* queue match */
68 skb->cb[1] = 0;
69 return TC_ACT_UNSPEC;
70 }
71
72
73 struct ipv4_l3_l4_tuple {
74 __u32 src_addr;
75 __u32 dst_addr;
76 __u16 dport;
77 __u16 sport;
78 } __attribute__((packed));
79
80 struct ipv6_l3_l4_tuple {
81 __u8 src_addr[16];
82 __u8 dst_addr[16];
83 __u16 dport;
84 __u16 sport;
85 } __attribute__((packed));
86
87 static const __u8 def_rss_key[TAP_RSS_HASH_KEY_SIZE] = {
88 0xd1, 0x81, 0xc6, 0x2c,
89 0xf7, 0xf4, 0xdb, 0x5b,
90 0x19, 0x83, 0xa2, 0xfc,
91 0x94, 0x3e, 0x1a, 0xdb,
92 0xd9, 0x38, 0x9e, 0x6b,
93 0xd1, 0x03, 0x9c, 0x2c,
94 0xa7, 0x44, 0x99, 0xad,
95 0x59, 0x3d, 0x56, 0xd9,
96 0xf3, 0x25, 0x3c, 0x06,
97 0x2a, 0xdc, 0x1f, 0xfc,
98 };
99
100 static __u32 __attribute__((always_inline))
101 rte_softrss_be(const __u32 *input_tuple, const uint8_t *rss_key,
102 __u8 input_len)
103 {
104 __u32 i, j, hash = 0;
105 #pragma unroll
106 for (j = 0; j < input_len; j++) {
107 #pragma unroll
108 for (i = 0; i < 32; i++) {
109 if (input_tuple[j] & (1U << (31 - i))) {
110 hash ^= ((const __u32 *)def_rss_key)[j] << i |
111 (__u32)((uint64_t)
112 (((const __u32 *)def_rss_key)[j + 1])
113 >> (32 - i));
114 }
115 }
116 }
117 return hash;
118 }
119
120 static int __attribute__((always_inline))
121 rss_l3_l4(struct __sk_buff *skb)
122 {
123 void *data_end = (void *)(long)skb->data_end;
124 void *data = (void *)(long)skb->data;
125 __u16 proto = (__u16)skb->protocol;
126 __u32 key_idx = 0xdeadbeef;
127 __u32 hash;
128 struct rss_key *rsskey;
129 __u64 off = ETH_HLEN;
130 int j;
131 __u8 *key = 0;
132 __u32 len;
133 __u32 queue = 0;
134
135 rsskey = map_lookup_elem(&map_keys, &key_idx);
136 if (!rsskey) {
137 printt("hash(): rss key is not configured\n");
138 return TC_ACT_OK;
139 }
140 key = (__u8 *)rsskey->key;
141
142 /* Get correct proto for 802.1ad */
143 if (skb->vlan_present && skb->vlan_proto == htons(ETH_P_8021AD)) {
144 if (data + ETH_ALEN * 2 + sizeof(struct vlan_hdr) +
145 sizeof(proto) > data_end)
146 return TC_ACT_OK;
147 proto = *(__u16 *)(data + ETH_ALEN * 2 +
148 sizeof(struct vlan_hdr));
149 off += sizeof(struct vlan_hdr);
150 }
151
152 if (proto == htons(ETH_P_IP)) {
153 if (data + off + sizeof(struct iphdr) + sizeof(__u32)
154 > data_end)
155 return TC_ACT_OK;
156
157 __u8 *src_dst_addr = data + off + offsetof(struct iphdr, saddr);
158 __u8 *src_dst_port = data + off + sizeof(struct iphdr);
159 struct ipv4_l3_l4_tuple v4_tuple = {
160 .src_addr = IPv4(*(src_dst_addr + 0),
161 *(src_dst_addr + 1),
162 *(src_dst_addr + 2),
163 *(src_dst_addr + 3)),
164 .dst_addr = IPv4(*(src_dst_addr + 4),
165 *(src_dst_addr + 5),
166 *(src_dst_addr + 6),
167 *(src_dst_addr + 7)),
168 .sport = PORT(*(src_dst_port + 0),
169 *(src_dst_port + 1)),
170 .dport = PORT(*(src_dst_port + 2),
171 *(src_dst_port + 3)),
172 };
173 __u8 input_len = sizeof(v4_tuple) / sizeof(__u32);
174 if (rsskey->hash_fields & (1 << HASH_FIELD_IPV4_L3))
175 input_len--;
176 hash = rte_softrss_be((__u32 *)&v4_tuple, key, 3);
177 } else if (proto == htons(ETH_P_IPV6)) {
178 if (data + off + sizeof(struct ipv6hdr) +
179 sizeof(__u32) > data_end)
180 return TC_ACT_OK;
181 __u8 *src_dst_addr = data + off +
182 offsetof(struct ipv6hdr, saddr);
183 __u8 *src_dst_port = data + off +
184 sizeof(struct ipv6hdr);
185 struct ipv6_l3_l4_tuple v6_tuple;
186 for (j = 0; j < 4; j++)
187 *((uint32_t *)&v6_tuple.src_addr + j) =
188 __builtin_bswap32(*((uint32_t *)
189 src_dst_addr + j));
190 for (j = 0; j < 4; j++)
191 *((uint32_t *)&v6_tuple.dst_addr + j) =
192 __builtin_bswap32(*((uint32_t *)
193 src_dst_addr + 4 + j));
194 v6_tuple.sport = PORT(*(src_dst_port + 0),
195 *(src_dst_port + 1));
196 v6_tuple.dport = PORT(*(src_dst_port + 2),
197 *(src_dst_port + 3));
198
199 __u8 input_len = sizeof(v6_tuple) / sizeof(__u32);
200 if (rsskey->hash_fields & (1 << HASH_FIELD_IPV6_L3))
201 input_len--;
202 hash = rte_softrss_be((__u32 *)&v6_tuple, key, 9);
203 } else {
204 return TC_ACT_PIPE;
205 }
206
207 queue = rsskey->queues[(hash % rsskey->nb_queues) &
208 (TAP_MAX_QUEUES - 1)];
209 skb->cb[1] = QUEUE_OFFSET + queue;
210 /* printt(">>>>> rss_l3_l4 hash=0x%x queue=%u\n", hash, queue); */
211
212 return TC_ACT_RECLASSIFY;
213 }
214
215 #define RSS(L) \
216 __section(#L) int \
217 L ## _hash(struct __sk_buff *skb) \
218 { \
219 return rss_ ## L (skb); \
220 }
221
222 RSS(l3_l4)
223
224 BPF_LICENSE("Dual BSD/GPL");