]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/core/gen_estimator.c
smc-mca: Fix build failure due to typo.
[mirror_ubuntu-artful-kernel.git] / net / core / gen_estimator.c
CommitLineData
1da177e4
LT
1/*
2 * net/sched/gen_estimator.c Simple rate estimator.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Changes:
12 * Jamal Hadi Salim - moved it to net/core and reshulfed
13 * names to make it usable in general net subsystem.
14 */
15
16#include <asm/uaccess.h>
17#include <asm/system.h>
1977f032 18#include <linux/bitops.h>
1da177e4
LT
19#include <linux/module.h>
20#include <linux/types.h>
21#include <linux/kernel.h>
22#include <linux/jiffies.h>
23#include <linux/string.h>
24#include <linux/mm.h>
25#include <linux/socket.h>
26#include <linux/sockios.h>
27#include <linux/in.h>
28#include <linux/errno.h>
29#include <linux/interrupt.h>
30#include <linux/netdevice.h>
31#include <linux/skbuff.h>
32#include <linux/rtnetlink.h>
33#include <linux/init.h>
4db0acf3 34#include <linux/rbtree.h>
1da177e4
LT
35#include <net/sock.h>
36#include <net/gen_stats.h>
37
38/*
39 This code is NOT intended to be used for statistics collection,
40 its purpose is to provide a base for statistical multiplexing
41 for controlled load service.
42 If you need only statistics, run a user level daemon which
43 periodically reads byte counters.
44
45 Unfortunately, rate estimation is not a very easy task.
46 F.e. I did not find a simple way to estimate the current peak rate
47 and even failed to formulate the problem 8)8)
48
49 So I preferred not to built an estimator into the scheduler,
50 but run this task separately.
51 Ideally, it should be kernel thread(s), but for now it runs
52 from timers, which puts apparent top bounds on the number of rated
53 flows, has minimal overhead on small, but is enough
54 to handle controlled load service, sets of aggregates.
55
56 We measure rate over A=(1<<interval) seconds and evaluate EWMA:
57
58 avrate = avrate*(1-W) + rate*W
59
60 where W is chosen as negative power of 2: W = 2^(-ewma_log)
61
62 The resulting time constant is:
63
64 T = A/(-ln(1-W))
65
66
67 NOTES.
68
69 * The stored value for avbps is scaled by 2^5, so that maximal
70 rate is ~1Gbit, avpps is scaled by 2^10.
71
72 * Minimal interval is HZ/4=250msec (it is the greatest common divisor
73 for HZ=100 and HZ=1024 8)), maximal interval
74 is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals
75 are too expensive, longer ones can be implemented
76 at user level painlessly.
77 */
78
79#define EST_MAX_INTERVAL 5
80
81struct gen_estimator
82{
0929c2dd 83 struct list_head list;
1da177e4
LT
84 struct gnet_stats_basic *bstats;
85 struct gnet_stats_rate_est *rate_est;
86 spinlock_t *stats_lock;
1da177e4
LT
87 int ewma_log;
88 u64 last_bytes;
89 u32 last_packets;
90 u32 avpps;
91 u32 avbps;
0929c2dd 92 struct rcu_head e_rcu;
4db0acf3 93 struct rb_node node;
1da177e4
LT
94};
95
96struct gen_estimator_head
97{
98 struct timer_list timer;
0929c2dd 99 struct list_head list;
1da177e4
LT
100};
101
102static struct gen_estimator_head elist[EST_MAX_INTERVAL+1];
103
deb3abf1 104/* Protects against NULL dereference */
1da177e4
LT
105static DEFINE_RWLOCK(est_lock);
106
4db0acf3
JP
107/* Protects against soft lockup during large deletion */
108static struct rb_root est_root = RB_ROOT;
109
1da177e4
LT
110static void est_timer(unsigned long arg)
111{
112 int idx = (int)arg;
113 struct gen_estimator *e;
114
0929c2dd
RZ
115 rcu_read_lock();
116 list_for_each_entry_rcu(e, &elist[idx].list, list) {
1da177e4
LT
117 u64 nbytes;
118 u32 npackets;
119 u32 rate;
120
121 spin_lock(e->stats_lock);
0929c2dd
RZ
122 read_lock(&est_lock);
123 if (e->bstats == NULL)
124 goto skip;
125
1da177e4
LT
126 nbytes = e->bstats->bytes;
127 npackets = e->bstats->packets;
128 rate = (nbytes - e->last_bytes)<<(7 - idx);
129 e->last_bytes = nbytes;
130 e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log;
131 e->rate_est->bps = (e->avbps+0xF)>>5;
132
133 rate = (npackets - e->last_packets)<<(12 - idx);
134 e->last_packets = npackets;
135 e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log;
136 e->rate_est->pps = (e->avpps+0x1FF)>>10;
0929c2dd
RZ
137skip:
138 read_unlock(&est_lock);
1da177e4
LT
139 spin_unlock(e->stats_lock);
140 }
141
0929c2dd 142 if (!list_empty(&elist[idx].list))
789675e2 143 mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx));
0929c2dd 144 rcu_read_unlock();
1da177e4
LT
145}
146
4db0acf3
JP
147static void gen_add_node(struct gen_estimator *est)
148{
149 struct rb_node **p = &est_root.rb_node, *parent = NULL;
150
151 while (*p) {
152 struct gen_estimator *e;
153
154 parent = *p;
155 e = rb_entry(parent, struct gen_estimator, node);
156
157 if (est->bstats > e->bstats)
158 p = &parent->rb_right;
159 else
160 p = &parent->rb_left;
161 }
162 rb_link_node(&est->node, parent, p);
163 rb_insert_color(&est->node, &est_root);
164}
165
166static struct gen_estimator *gen_find_node(struct gnet_stats_basic *bstats,
167 struct gnet_stats_rate_est *rate_est)
168{
169 struct rb_node *p = est_root.rb_node;
170
171 while (p) {
172 struct gen_estimator *e;
173
174 e = rb_entry(p, struct gen_estimator, node);
175
176 if (bstats > e->bstats)
177 p = p->rb_right;
178 else if (bstats < e->bstats || rate_est != e->rate_est)
179 p = p->rb_left;
180 else
181 return e;
182 }
183 return NULL;
184}
185
1da177e4
LT
186/**
187 * gen_new_estimator - create a new rate estimator
188 * @bstats: basic statistics
189 * @rate_est: rate estimator statistics
190 * @stats_lock: statistics lock
191 * @opt: rate estimator configuration TLV
192 *
193 * Creates a new rate estimator with &bstats as source and &rate_est
194 * as destination. A new timer with the interval specified in the
195 * configuration TLV is created. Upon each interval, the latest statistics
196 * will be read from &bstats and the estimated rate will be stored in
197 * &rate_est with the statistics lock grabed during this period.
4ec93edb 198 *
1da177e4 199 * Returns 0 on success or a negative error code.
0929c2dd
RZ
200 *
201 * NOTE: Called under rtnl_mutex
1da177e4
LT
202 */
203int gen_new_estimator(struct gnet_stats_basic *bstats,
0929c2dd
RZ
204 struct gnet_stats_rate_est *rate_est,
205 spinlock_t *stats_lock,
1e90474c 206 struct nlattr *opt)
1da177e4
LT
207{
208 struct gen_estimator *est;
1e90474c 209 struct gnet_estimator *parm = nla_data(opt);
0929c2dd 210 int idx;
1da177e4 211
1e90474c 212 if (nla_len(opt) < sizeof(*parm))
1da177e4
LT
213 return -EINVAL;
214
215 if (parm->interval < -2 || parm->interval > 3)
216 return -EINVAL;
217
77d04bd9 218 est = kzalloc(sizeof(*est), GFP_KERNEL);
1da177e4
LT
219 if (est == NULL)
220 return -ENOBUFS;
221
0929c2dd 222 idx = parm->interval + 2;
1da177e4
LT
223 est->bstats = bstats;
224 est->rate_est = rate_est;
225 est->stats_lock = stats_lock;
226 est->ewma_log = parm->ewma_log;
227 est->last_bytes = bstats->bytes;
228 est->avbps = rate_est->bps<<5;
229 est->last_packets = bstats->packets;
230 est->avpps = rate_est->pps<<10;
231
0929c2dd
RZ
232 if (!elist[idx].timer.function) {
233 INIT_LIST_HEAD(&elist[idx].list);
234 setup_timer(&elist[idx].timer, est_timer, idx);
1da177e4 235 }
0929c2dd
RZ
236
237 if (list_empty(&elist[idx].list))
789675e2 238 mod_timer(&elist[idx].timer, jiffies + ((HZ/4) << idx));
0929c2dd
RZ
239
240 list_add_rcu(&est->list, &elist[idx].list);
4db0acf3
JP
241 gen_add_node(est);
242
1da177e4
LT
243 return 0;
244}
c1b56878 245EXPORT_SYMBOL(gen_new_estimator);
1da177e4 246
0929c2dd
RZ
247static void __gen_kill_estimator(struct rcu_head *head)
248{
249 struct gen_estimator *e = container_of(head,
250 struct gen_estimator, e_rcu);
251 kfree(e);
252}
253
1da177e4
LT
254/**
255 * gen_kill_estimator - remove a rate estimator
256 * @bstats: basic statistics
257 * @rate_est: rate estimator statistics
258 *
4db0acf3 259 * Removes the rate estimator specified by &bstats and &rate_est.
0929c2dd 260 *
deb3abf1 261 * NOTE: Called under rtnl_mutex
1da177e4
LT
262 */
263void gen_kill_estimator(struct gnet_stats_basic *bstats,
4db0acf3 264 struct gnet_stats_rate_est *rate_est)
1da177e4 265{
4db0acf3 266 struct gen_estimator *e;
0929c2dd 267
4db0acf3
JP
268 while ((e = gen_find_node(bstats, rate_est))) {
269 rb_erase(&e->node, &est_root);
1da177e4 270
4db0acf3
JP
271 write_lock_bh(&est_lock);
272 e->bstats = NULL;
273 write_unlock_bh(&est_lock);
1da177e4 274
4db0acf3
JP
275 list_del_rcu(&e->list);
276 call_rcu(&e->e_rcu, __gen_kill_estimator);
1da177e4
LT
277 }
278}
c1b56878 279EXPORT_SYMBOL(gen_kill_estimator);
1da177e4
LT
280
281/**
96750162 282 * gen_replace_estimator - replace rate estimator configuration
1da177e4
LT
283 * @bstats: basic statistics
284 * @rate_est: rate estimator statistics
285 * @stats_lock: statistics lock
286 * @opt: rate estimator configuration TLV
287 *
288 * Replaces the configuration of a rate estimator by calling
289 * gen_kill_estimator() and gen_new_estimator().
4ec93edb 290 *
1da177e4
LT
291 * Returns 0 on success or a negative error code.
292 */
96750162
JP
293int gen_replace_estimator(struct gnet_stats_basic *bstats,
294 struct gnet_stats_rate_est *rate_est,
1e90474c 295 spinlock_t *stats_lock, struct nlattr *opt)
1da177e4 296{
96750162
JP
297 gen_kill_estimator(bstats, rate_est);
298 return gen_new_estimator(bstats, rate_est, stats_lock, opt);
1da177e4 299}
c1b56878
SH
300EXPORT_SYMBOL(gen_replace_estimator);
301
302/**
303 * gen_estimator_active - test if estimator is currently in use
304 * @rate_est: rate estimator statistics
305 *
306 * Returns 1 if estimator is active, and 0 if not.
307 */
308int gen_estimator_active(const struct gnet_stats_rate_est *rate_est)
309{
310 int idx;
311 struct gen_estimator *e;
4ec93edb 312
c1b56878 313 ASSERT_RTNL();
1da177e4 314
c1b56878
SH
315 for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
316 if (!elist[idx].timer.function)
317 continue;
318
319 list_for_each_entry(e, &elist[idx].list, list) {
320 if (e->rate_est == rate_est)
321 return 1;
322 }
323 }
324 return 0;
325}
326EXPORT_SYMBOL(gen_estimator_active);