]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - net/openvswitch/meter.c
liquidio: Missing error code in liquidio_init_nic_module()
[mirror_ubuntu-bionic-kernel.git] / net / openvswitch / meter.c
CommitLineData
96fbc13d
AZ
1/*
2 * Copyright (c) 2017 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 */
8
9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11#include <linux/if.h>
12#include <linux/skbuff.h>
13#include <linux/ip.h>
14#include <linux/kernel.h>
15#include <linux/openvswitch.h>
16#include <linux/netlink.h>
17#include <linux/rculist.h>
18
19#include <net/netlink.h>
20#include <net/genetlink.h>
21
22#include "datapath.h"
23#include "meter.h"
24
25#define METER_HASH_BUCKETS 1024
26
27static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = {
28 [OVS_METER_ATTR_ID] = { .type = NLA_U32, },
29 [OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG },
30 [OVS_METER_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
31 [OVS_METER_ATTR_BANDS] = { .type = NLA_NESTED },
32 [OVS_METER_ATTR_USED] = { .type = NLA_U64 },
33 [OVS_METER_ATTR_CLEAR] = { .type = NLA_FLAG },
34 [OVS_METER_ATTR_MAX_METERS] = { .type = NLA_U32 },
35 [OVS_METER_ATTR_MAX_BANDS] = { .type = NLA_U32 },
36};
37
38static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = {
39 [OVS_BAND_ATTR_TYPE] = { .type = NLA_U32, },
40 [OVS_BAND_ATTR_RATE] = { .type = NLA_U32, },
41 [OVS_BAND_ATTR_BURST] = { .type = NLA_U32, },
42 [OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
43};
44
45static void rcu_free_ovs_meter_callback(struct rcu_head *rcu)
46{
47 struct dp_meter *meter = container_of(rcu, struct dp_meter, rcu);
48
49 kfree(meter);
50}
51
52static void ovs_meter_free(struct dp_meter *meter)
53{
54 if (!meter)
55 return;
56
57 call_rcu(&meter->rcu, rcu_free_ovs_meter_callback);
58}
59
60static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
61 u32 meter_id)
62{
63 return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
64}
65
66/* Call with ovs_mutex or RCU read lock. */
67static struct dp_meter *lookup_meter(const struct datapath *dp,
68 u32 meter_id)
69{
70 struct dp_meter *meter;
71 struct hlist_head *head;
72
73 head = meter_hash_bucket(dp, meter_id);
74 hlist_for_each_entry_rcu(meter, head, dp_hash_node) {
75 if (meter->id == meter_id)
76 return meter;
77 }
78 return NULL;
79}
80
81static void attach_meter(struct datapath *dp, struct dp_meter *meter)
82{
83 struct hlist_head *head = meter_hash_bucket(dp, meter->id);
84
85 hlist_add_head_rcu(&meter->dp_hash_node, head);
86}
87
88static void detach_meter(struct dp_meter *meter)
89{
90 ASSERT_OVSL();
91 if (meter)
92 hlist_del_rcu(&meter->dp_hash_node);
93}
94
95static struct sk_buff *
96ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd,
97 struct ovs_header **ovs_reply_header)
98{
99 struct sk_buff *skb;
100 struct ovs_header *ovs_header = info->userhdr;
101
102 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
103 if (!skb)
104 return ERR_PTR(-ENOMEM);
105
106 *ovs_reply_header = genlmsg_put(skb, info->snd_portid,
107 info->snd_seq,
108 &dp_meter_genl_family, 0, cmd);
109 if (!ovs_reply_header) {
110 nlmsg_free(skb);
111 return ERR_PTR(-EMSGSIZE);
112 }
113 (*ovs_reply_header)->dp_ifindex = ovs_header->dp_ifindex;
114
115 return skb;
116}
117
118static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id,
119 struct dp_meter *meter)
120{
121 struct nlattr *nla;
122 struct dp_meter_band *band;
123 u16 i;
124
125 if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id))
126 goto error;
127
128 if (!meter)
129 return 0;
130
131 if (nla_put(reply, OVS_METER_ATTR_STATS,
132 sizeof(struct ovs_flow_stats), &meter->stats) ||
133 nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used,
134 OVS_METER_ATTR_PAD))
135 goto error;
136
137 nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS);
138 if (!nla)
139 goto error;
140
141 band = meter->bands;
142
143 for (i = 0; i < meter->n_bands; ++i, ++band) {
144 struct nlattr *band_nla;
145
146 band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC);
147 if (!band_nla || nla_put(reply, OVS_BAND_ATTR_STATS,
148 sizeof(struct ovs_flow_stats),
149 &band->stats))
150 goto error;
151 nla_nest_end(reply, band_nla);
152 }
153 nla_nest_end(reply, nla);
154
155 return 0;
156error:
157 return -EMSGSIZE;
158}
159
160static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
161{
162 struct sk_buff *reply;
163 struct ovs_header *ovs_reply_header;
164 struct nlattr *nla, *band_nla;
165 int err;
166
167 reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES,
168 &ovs_reply_header);
169 if (!reply)
170 return PTR_ERR(reply);
171
172 if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) ||
173 nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
174 goto nla_put_failure;
175
176 nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS);
177 if (!nla)
178 goto nla_put_failure;
179
180 band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC);
181 if (!band_nla)
182 goto nla_put_failure;
183 /* Currently only DROP band type is supported. */
184 if (nla_put_u32(reply, OVS_BAND_ATTR_TYPE, OVS_METER_BAND_TYPE_DROP))
185 goto nla_put_failure;
186 nla_nest_end(reply, band_nla);
187 nla_nest_end(reply, nla);
188
189 genlmsg_end(reply, ovs_reply_header);
190 return genlmsg_reply(reply, info);
191
192nla_put_failure:
193 nlmsg_free(reply);
194 err = -EMSGSIZE;
195 return err;
196}
197
198static struct dp_meter *dp_meter_create(struct nlattr **a)
199{
200 struct nlattr *nla;
201 int rem;
202 u16 n_bands = 0;
203 struct dp_meter *meter;
204 struct dp_meter_band *band;
205 int err;
206
207 /* Validate attributes, count the bands. */
208 if (!a[OVS_METER_ATTR_BANDS])
209 return ERR_PTR(-EINVAL);
210
211 nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem)
212 if (++n_bands > DP_MAX_BANDS)
213 return ERR_PTR(-EINVAL);
214
215 /* Allocate and set up the meter before locking anything. */
216 meter = kzalloc(n_bands * sizeof(struct dp_meter_band) +
217 sizeof(*meter), GFP_KERNEL);
218 if (!meter)
219 return ERR_PTR(-ENOMEM);
220
221 meter->used = div_u64(ktime_get_ns(), 1000 * 1000);
222 meter->kbps = a[OVS_METER_ATTR_KBPS] ? 1 : 0;
223 meter->keep_stats = !a[OVS_METER_ATTR_CLEAR];
224 spin_lock_init(&meter->lock);
225 if (meter->keep_stats && a[OVS_METER_ATTR_STATS]) {
226 meter->stats = *(struct ovs_flow_stats *)
227 nla_data(a[OVS_METER_ATTR_STATS]);
228 }
229 meter->n_bands = n_bands;
230
231 /* Set up meter bands. */
232 band = meter->bands;
233 nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) {
234 struct nlattr *attr[OVS_BAND_ATTR_MAX + 1];
235 u32 band_max_delta_t;
236
237 err = nla_parse((struct nlattr **)&attr, OVS_BAND_ATTR_MAX,
238 nla_data(nla), nla_len(nla), band_policy,
239 NULL);
240 if (err)
241 goto exit_free_meter;
242
243 if (!attr[OVS_BAND_ATTR_TYPE] ||
244 !attr[OVS_BAND_ATTR_RATE] ||
245 !attr[OVS_BAND_ATTR_BURST]) {
246 err = -EINVAL;
247 goto exit_free_meter;
248 }
249
250 band->type = nla_get_u32(attr[OVS_BAND_ATTR_TYPE]);
251 band->rate = nla_get_u32(attr[OVS_BAND_ATTR_RATE]);
252 band->burst_size = nla_get_u32(attr[OVS_BAND_ATTR_BURST]);
253 /* Figure out max delta_t that is enough to fill any bucket.
254 * Keep max_delta_t size to the bucket units:
255 * pkts => 1/1000 packets, kilobits => bits.
256 */
257 band_max_delta_t = (band->burst_size + band->rate) * 1000;
258 /* Start with a full bucket. */
259 band->bucket = band_max_delta_t;
260 if (band_max_delta_t > meter->max_delta_t)
261 meter->max_delta_t = band_max_delta_t;
262 band++;
263 }
264
265 return meter;
266
267exit_free_meter:
268 kfree(meter);
269 return ERR_PTR(err);
270}
271
272static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
273{
274 struct nlattr **a = info->attrs;
275 struct dp_meter *meter, *old_meter;
276 struct sk_buff *reply;
277 struct ovs_header *ovs_reply_header;
278 struct ovs_header *ovs_header = info->userhdr;
279 struct datapath *dp;
280 int err;
281 u32 meter_id;
282 bool failed;
283
284 meter = dp_meter_create(a);
285 if (IS_ERR_OR_NULL(meter))
286 return PTR_ERR(meter);
287
288 reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_SET,
289 &ovs_reply_header);
290 if (IS_ERR(reply)) {
291 err = PTR_ERR(reply);
292 goto exit_free_meter;
293 }
294
295 ovs_lock();
296 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
297 if (!dp) {
298 err = -ENODEV;
299 goto exit_unlock;
300 }
301
302 if (!a[OVS_METER_ATTR_ID]) {
303 err = -ENODEV;
304 goto exit_unlock;
305 }
306
307 meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
308
309 /* Cannot fail after this. */
310 old_meter = lookup_meter(dp, meter_id);
311 detach_meter(old_meter);
312 attach_meter(dp, meter);
313 ovs_unlock();
314
315 /* Build response with the meter_id and stats from
316 * the old meter, if any.
317 */
318 failed = nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id);
319 WARN_ON(failed);
320 if (old_meter) {
321 spin_lock_bh(&old_meter->lock);
322 if (old_meter->keep_stats) {
323 err = ovs_meter_cmd_reply_stats(reply, meter_id,
324 old_meter);
325 WARN_ON(err);
326 }
327 spin_unlock_bh(&old_meter->lock);
328 ovs_meter_free(old_meter);
329 }
330
331 genlmsg_end(reply, ovs_reply_header);
332 return genlmsg_reply(reply, info);
333
334exit_unlock:
335 ovs_unlock();
336 nlmsg_free(reply);
337exit_free_meter:
338 kfree(meter);
339 return err;
340}
341
342static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
343{
344 struct nlattr **a = info->attrs;
345 u32 meter_id;
346 struct ovs_header *ovs_header = info->userhdr;
347 struct ovs_header *ovs_reply_header;
348 struct datapath *dp;
349 int err;
350 struct sk_buff *reply;
351 struct dp_meter *meter;
352
353 if (!a[OVS_METER_ATTR_ID])
354 return -EINVAL;
355
356 meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
357
358 reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_GET,
359 &ovs_reply_header);
360 if (IS_ERR(reply))
361 return PTR_ERR(reply);
362
363 ovs_lock();
364
365 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
366 if (!dp) {
367 err = -ENODEV;
368 goto exit_unlock;
369 }
370
371 /* Locate meter, copy stats. */
372 meter = lookup_meter(dp, meter_id);
373 if (!meter) {
374 err = -ENOENT;
375 goto exit_unlock;
376 }
377
378 spin_lock_bh(&meter->lock);
379 err = ovs_meter_cmd_reply_stats(reply, meter_id, meter);
380 spin_unlock_bh(&meter->lock);
381 if (err)
382 goto exit_unlock;
383
384 ovs_unlock();
385
386 genlmsg_end(reply, ovs_reply_header);
387 return genlmsg_reply(reply, info);
388
389exit_unlock:
390 ovs_unlock();
391 nlmsg_free(reply);
392 return err;
393}
394
395static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
396{
397 struct nlattr **a = info->attrs;
398 u32 meter_id;
399 struct ovs_header *ovs_header = info->userhdr;
400 struct ovs_header *ovs_reply_header;
401 struct datapath *dp;
402 int err;
403 struct sk_buff *reply;
404 struct dp_meter *old_meter;
405
406 if (!a[OVS_METER_ATTR_ID])
407 return -EINVAL;
408 meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
409
410 reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL,
411 &ovs_reply_header);
412 if (IS_ERR(reply))
413 return PTR_ERR(reply);
414
415 ovs_lock();
416
417 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
418 if (!dp) {
419 err = -ENODEV;
420 goto exit_unlock;
421 }
422
423 old_meter = lookup_meter(dp, meter_id);
424 if (old_meter) {
425 spin_lock_bh(&old_meter->lock);
426 err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
427 WARN_ON(err);
428 spin_unlock_bh(&old_meter->lock);
429 detach_meter(old_meter);
430 }
431 ovs_unlock();
432 ovs_meter_free(old_meter);
433 genlmsg_end(reply, ovs_reply_header);
434 return genlmsg_reply(reply, info);
435
436exit_unlock:
437 ovs_unlock();
438 nlmsg_free(reply);
439 return err;
440}
441
442/* Meter action execution.
443 *
444 * Return true 'meter_id' drop band is triggered. The 'skb' should be
445 * dropped by the caller'.
446 */
447bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
448 struct sw_flow_key *key, u32 meter_id)
449{
450 struct dp_meter *meter;
451 struct dp_meter_band *band;
452 long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000);
453 long long int long_delta_ms;
454 u32 delta_ms;
455 u32 cost;
456 int i, band_exceeded_max = -1;
457 u32 band_exceeded_rate = 0;
458
459 meter = lookup_meter(dp, meter_id);
460 /* Do not drop the packet when there is no meter. */
461 if (!meter)
462 return false;
463
464 /* Lock the meter while using it. */
465 spin_lock(&meter->lock);
466
467 long_delta_ms = (now_ms - meter->used); /* ms */
468
469 /* Make sure delta_ms will not be too large, so that bucket will not
470 * wrap around below.
471 */
472 delta_ms = (long_delta_ms > (long long int)meter->max_delta_t)
473 ? meter->max_delta_t : (u32)long_delta_ms;
474
475 /* Update meter statistics.
476 */
477 meter->used = now_ms;
478 meter->stats.n_packets += 1;
479 meter->stats.n_bytes += skb->len;
480
481 /* Bucket rate is either in kilobits per second, or in packets per
482 * second. We maintain the bucket in the units of either bits or
483 * 1/1000th of a packet, correspondingly.
484 * Then, when rate is multiplied with milliseconds, we get the
485 * bucket units:
486 * msec * kbps = bits, and
487 * msec * packets/sec = 1/1000 packets.
488 *
489 * 'cost' is the number of bucket units in this packet.
490 */
491 cost = (meter->kbps) ? skb->len * 8 : 1000;
492
493 /* Update all bands and find the one hit with the highest rate. */
494 for (i = 0; i < meter->n_bands; ++i) {
495 long long int max_bucket_size;
496
497 band = &meter->bands[i];
498 max_bucket_size = (band->burst_size + band->rate) * 1000;
499
500 band->bucket += delta_ms * band->rate;
501 if (band->bucket > max_bucket_size)
502 band->bucket = max_bucket_size;
503
504 if (band->bucket >= cost) {
505 band->bucket -= cost;
506 } else if (band->rate > band_exceeded_rate) {
507 band_exceeded_rate = band->rate;
508 band_exceeded_max = i;
509 }
510 }
511
512 if (band_exceeded_max >= 0) {
513 /* Update band statistics. */
514 band = &meter->bands[band_exceeded_max];
515 band->stats.n_packets += 1;
516 band->stats.n_bytes += skb->len;
517
518 /* Drop band triggered, let the caller drop the 'skb'. */
519 if (band->type == OVS_METER_BAND_TYPE_DROP) {
520 spin_unlock(&meter->lock);
521 return true;
522 }
523 }
524
525 spin_unlock(&meter->lock);
526 return false;
527}
528
529static struct genl_ops dp_meter_genl_ops[] = {
530 { .cmd = OVS_METER_CMD_FEATURES,
531 .flags = 0, /* OK for unprivileged users. */
532 .policy = meter_policy,
533 .doit = ovs_meter_cmd_features
534 },
535 { .cmd = OVS_METER_CMD_SET,
536 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
537 * privilege.
538 */
539 .policy = meter_policy,
540 .doit = ovs_meter_cmd_set,
541 },
542 { .cmd = OVS_METER_CMD_GET,
543 .flags = 0, /* OK for unprivileged users. */
544 .policy = meter_policy,
545 .doit = ovs_meter_cmd_get,
546 },
547 { .cmd = OVS_METER_CMD_DEL,
548 .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
549 * privilege.
550 */
551 .policy = meter_policy,
552 .doit = ovs_meter_cmd_del
553 },
554};
555
556static const struct genl_multicast_group ovs_meter_multicast_group = {
557 .name = OVS_METER_MCGROUP,
558};
559
560struct genl_family dp_meter_genl_family __ro_after_init = {
561 .hdrsize = sizeof(struct ovs_header),
562 .name = OVS_METER_FAMILY,
563 .version = OVS_METER_VERSION,
564 .maxattr = OVS_METER_ATTR_MAX,
565 .netnsok = true,
566 .parallel_ops = true,
567 .ops = dp_meter_genl_ops,
568 .n_ops = ARRAY_SIZE(dp_meter_genl_ops),
569 .mcgrps = &ovs_meter_multicast_group,
570 .n_mcgrps = 1,
571 .module = THIS_MODULE,
572};
573
574int ovs_meters_init(struct datapath *dp)
575{
576 int i;
577
578 dp->meters = kmalloc_array(METER_HASH_BUCKETS,
579 sizeof(struct hlist_head), GFP_KERNEL);
580
581 if (!dp->meters)
582 return -ENOMEM;
583
584 for (i = 0; i < METER_HASH_BUCKETS; i++)
585 INIT_HLIST_HEAD(&dp->meters[i]);
586
587 return 0;
588}
589
590void ovs_meters_exit(struct datapath *dp)
591{
592 int i;
593
594 for (i = 0; i < METER_HASH_BUCKETS; i++) {
595 struct hlist_head *head = &dp->meters[i];
596 struct dp_meter *meter;
597 struct hlist_node *n;
598
599 hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
600 kfree(meter);
601 }
602
603 kfree(dp->meters);
604}