]>
Commit | Line | Data |
---|---|---|
187d0738 | 1 | /* SPDX-License-Identifier: GPL-2.0 |
2 | * | |
3 | * Copyright (c) 2019 Facebook | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or | |
6 | * modify it under the terms of version 2 of the GNU General Public | |
7 | * License as published by the Free Software Foundation. | |
8 | * | |
9 | * Include file for sample Host Bandwidth Manager (HBM) BPF programs | |
10 | */ | |
11 | #define KBUILD_MODNAME "foo" | |
12 | #include <stddef.h> | |
13 | #include <stdbool.h> | |
14 | #include <uapi/linux/bpf.h> | |
15 | #include <uapi/linux/if_ether.h> | |
16 | #include <uapi/linux/if_packet.h> | |
17 | #include <uapi/linux/ip.h> | |
18 | #include <uapi/linux/ipv6.h> | |
19 | #include <uapi/linux/in.h> | |
20 | #include <uapi/linux/tcp.h> | |
21 | #include <uapi/linux/filter.h> | |
22 | #include <uapi/linux/pkt_cls.h> | |
23 | #include <net/ipv6.h> | |
24 | #include <net/inet_ecn.h> | |
7cf245a3 THJ |
25 | #include <bpf/bpf_endian.h> |
26 | #include <bpf/bpf_helpers.h> | |
187d0738 | 27 | #include "hbm.h" |
28 | ||
29 | #define DROP_PKT 0 | |
30 | #define ALLOW_PKT 1 | |
31 | #define TCP_ECN_OK 1 | |
71634d7f | 32 | #define CWR 2 |
187d0738 | 33 | |
c87f60a7 MR |
34 | #ifndef HBM_DEBUG // Define HBM_DEBUG to enable debugging |
35 | #undef bpf_printk | |
187d0738 | 36 | #define bpf_printk(fmt, ...) |
37 | #endif | |
38 | ||
39 | #define INITIAL_CREDIT_PACKETS 100 | |
40 | #define MAX_BYTES_PER_PACKET 1500 | |
41 | #define MARK_THRESH (40 * MAX_BYTES_PER_PACKET) | |
42 | #define DROP_THRESH (80 * 5 * MAX_BYTES_PER_PACKET) | |
43 | #define LARGE_PKT_DROP_THRESH (DROP_THRESH - (15 * MAX_BYTES_PER_PACKET)) | |
44 | #define MARK_REGION_SIZE (LARGE_PKT_DROP_THRESH - MARK_THRESH) | |
45 | #define LARGE_PKT_THRESH 120 | |
46 | #define MAX_CREDIT (100 * MAX_BYTES_PER_PACKET) | |
47 | #define INIT_CREDIT (INITIAL_CREDIT_PACKETS * MAX_BYTES_PER_PACKET) | |
48 | ||
71634d7f | 49 | // Time base accounting for fq's EDT |
50 | #define BURST_SIZE_NS 100000 // 100us | |
51 | #define MARK_THRESH_NS 50000 // 50us | |
52 | #define DROP_THRESH_NS 500000 // 500us | |
53 | // Reserve 20us of queuing for small packets (less than 120 bytes) | |
54 | #define LARGE_PKT_DROP_THRESH_NS (DROP_THRESH_NS - 20000) | |
55 | #define MARK_REGION_SIZE_NS (LARGE_PKT_DROP_THRESH_NS - MARK_THRESH_NS) | |
56 | ||
187d0738 | 57 | // rate in bytes per ns << 20 |
58 | #define CREDIT_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20) | |
71634d7f | 59 | #define BYTES_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20) |
60 | #define BYTES_TO_NS(bytes, rate) div64_u64(((u64)(bytes)) << 20, (u64)(rate)) | |
187d0738 | 61 | |
36b5d471 AN |
62 | struct { |
63 | __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE); | |
64 | __type(key, struct bpf_cgroup_storage_key); | |
65 | __type(value, struct hbm_vqueue); | |
66 | } queue_state SEC(".maps"); | |
187d0738 | 67 | |
36b5d471 AN |
68 | struct { |
69 | __uint(type, BPF_MAP_TYPE_ARRAY); | |
70 | __uint(max_entries, 1); | |
71 | __type(key, u32); | |
72 | __type(value, struct hvm_queue_stats); | |
73 | } queue_stats SEC(".maps"); | |
187d0738 | 74 | |
75 | struct hbm_pkt_info { | |
d58c6f72 | 76 | int cwnd; |
77 | int rtt; | |
71634d7f | 78 | int packets_out; |
187d0738 | 79 | bool is_ip; |
80 | bool is_tcp; | |
81 | short ecn; | |
82 | }; | |
83 | ||
d58c6f72 | 84 | static int get_tcp_info(struct __sk_buff *skb, struct hbm_pkt_info *pkti) |
85 | { | |
86 | struct bpf_sock *sk; | |
87 | struct bpf_tcp_sock *tp; | |
88 | ||
89 | sk = skb->sk; | |
90 | if (sk) { | |
91 | sk = bpf_sk_fullsock(sk); | |
92 | if (sk) { | |
93 | if (sk->protocol == IPPROTO_TCP) { | |
94 | tp = bpf_tcp_sock(sk); | |
95 | if (tp) { | |
96 | pkti->cwnd = tp->snd_cwnd; | |
97 | pkti->rtt = tp->srtt_us >> 3; | |
71634d7f | 98 | pkti->packets_out = tp->packets_out; |
d58c6f72 | 99 | return 0; |
100 | } | |
101 | } | |
102 | } | |
103 | } | |
71634d7f | 104 | pkti->cwnd = 0; |
105 | pkti->rtt = 0; | |
106 | pkti->packets_out = 0; | |
d58c6f72 | 107 | return 1; |
108 | } | |
109 | ||
71634d7f | 110 | static void hbm_get_pkt_info(struct __sk_buff *skb, |
111 | struct hbm_pkt_info *pkti) | |
187d0738 | 112 | { |
113 | struct iphdr iph; | |
114 | struct ipv6hdr *ip6h; | |
115 | ||
d58c6f72 | 116 | pkti->cwnd = 0; |
117 | pkti->rtt = 0; | |
187d0738 | 118 | bpf_skb_load_bytes(skb, 0, &iph, 12); |
119 | if (iph.version == 6) { | |
120 | ip6h = (struct ipv6hdr *)&iph; | |
121 | pkti->is_ip = true; | |
122 | pkti->is_tcp = (ip6h->nexthdr == 6); | |
123 | pkti->ecn = (ip6h->flow_lbl[0] >> 4) & INET_ECN_MASK; | |
124 | } else if (iph.version == 4) { | |
125 | pkti->is_ip = true; | |
126 | pkti->is_tcp = (iph.protocol == 6); | |
127 | pkti->ecn = iph.tos & INET_ECN_MASK; | |
128 | } else { | |
129 | pkti->is_ip = false; | |
130 | pkti->is_tcp = false; | |
131 | pkti->ecn = 0; | |
132 | } | |
d58c6f72 | 133 | if (pkti->is_tcp) |
134 | get_tcp_info(skb, pkti); | |
187d0738 | 135 | } |
136 | ||
137 | static __always_inline void hbm_init_vqueue(struct hbm_vqueue *qdp, int rate) | |
138 | { | |
71634d7f | 139 | bpf_printk("Initializing queue_state, rate:%d\n", rate * 128); |
140 | qdp->lasttime = bpf_ktime_get_ns(); | |
141 | qdp->credit = INIT_CREDIT; | |
142 | qdp->rate = rate * 128; | |
143 | } | |
144 | ||
145 | static __always_inline void hbm_init_edt_vqueue(struct hbm_vqueue *qdp, | |
146 | int rate) | |
147 | { | |
148 | unsigned long long curtime; | |
149 | ||
150 | curtime = bpf_ktime_get_ns(); | |
151 | bpf_printk("Initializing queue_state, rate:%d\n", rate * 128); | |
152 | qdp->lasttime = curtime - BURST_SIZE_NS; // support initial burst | |
153 | qdp->credit = 0; // not used | |
154 | qdp->rate = rate * 128; | |
187d0738 | 155 | } |
156 | ||
157 | static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp, | |
158 | int len, | |
159 | unsigned long long curtime, | |
160 | bool congestion_flag, | |
d58c6f72 | 161 | bool drop_flag, |
162 | bool cwr_flag, | |
163 | bool ecn_ce_flag, | |
164 | struct hbm_pkt_info *pkti, | |
165 | int credit) | |
187d0738 | 166 | { |
d58c6f72 | 167 | int rv = ALLOW_PKT; |
168 | ||
187d0738 | 169 | if (qsp != NULL) { |
170 | // Following is needed for work conserving | |
171 | __sync_add_and_fetch(&(qsp->bytes_total), len); | |
172 | if (qsp->stats) { | |
173 | // Optionally update statistics | |
174 | if (qsp->firstPacketTime == 0) | |
175 | qsp->firstPacketTime = curtime; | |
176 | qsp->lastPacketTime = curtime; | |
177 | __sync_add_and_fetch(&(qsp->pkts_total), 1); | |
d58c6f72 | 178 | if (congestion_flag) { |
187d0738 | 179 | __sync_add_and_fetch(&(qsp->pkts_marked), 1); |
180 | __sync_add_and_fetch(&(qsp->bytes_marked), len); | |
181 | } | |
182 | if (drop_flag) { | |
183 | __sync_add_and_fetch(&(qsp->pkts_dropped), 1); | |
184 | __sync_add_and_fetch(&(qsp->bytes_dropped), | |
185 | len); | |
186 | } | |
d58c6f72 | 187 | if (ecn_ce_flag) |
188 | __sync_add_and_fetch(&(qsp->pkts_ecn_ce), 1); | |
189 | if (pkti->cwnd) { | |
190 | __sync_add_and_fetch(&(qsp->sum_cwnd), | |
191 | pkti->cwnd); | |
192 | __sync_add_and_fetch(&(qsp->sum_cwnd_cnt), 1); | |
193 | } | |
194 | if (pkti->rtt) | |
195 | __sync_add_and_fetch(&(qsp->sum_rtt), | |
196 | pkti->rtt); | |
197 | __sync_add_and_fetch(&(qsp->sum_credit), credit); | |
198 | ||
199 | if (drop_flag) | |
200 | rv = DROP_PKT; | |
201 | if (cwr_flag) | |
202 | rv |= 2; | |
203 | if (rv == DROP_PKT) | |
204 | __sync_add_and_fetch(&(qsp->returnValCount[0]), | |
205 | 1); | |
206 | else if (rv == ALLOW_PKT) | |
207 | __sync_add_and_fetch(&(qsp->returnValCount[1]), | |
208 | 1); | |
209 | else if (rv == 2) | |
210 | __sync_add_and_fetch(&(qsp->returnValCount[2]), | |
211 | 1); | |
212 | else if (rv == 3) | |
213 | __sync_add_and_fetch(&(qsp->returnValCount[3]), | |
214 | 1); | |
187d0738 | 215 | } |
216 | } | |
217 | } |