2 * Copyright (c) 2017 Nicira, Inc.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12 #include <linux/skbuff.h>
14 #include <linux/kernel.h>
15 #include <linux/openvswitch.h>
16 #include <linux/overflow.h>
17 #include <linux/netlink.h>
18 #include <linux/rculist.h>
20 #include <net/netlink.h>
21 #include <net/genetlink.h>
27 #define METER_HASH_BUCKETS 1024
29 static const struct nla_policy meter_policy
[OVS_METER_ATTR_MAX
+ 1] = {
30 [OVS_METER_ATTR_ID
] = { .type
= NLA_U32
, },
31 [OVS_METER_ATTR_KBPS
] = { .type
= NLA_FLAG
},
32 [OVS_METER_ATTR_STATS
] = { .len
= sizeof(struct ovs_flow_stats
) },
33 [OVS_METER_ATTR_BANDS
] = { .type
= NLA_NESTED
},
34 [OVS_METER_ATTR_USED
] = { .type
= NLA_U64
},
35 [OVS_METER_ATTR_CLEAR
] = { .type
= NLA_FLAG
},
36 [OVS_METER_ATTR_MAX_METERS
] = { .type
= NLA_U32
},
37 [OVS_METER_ATTR_MAX_BANDS
] = { .type
= NLA_U32
},
40 static const struct nla_policy band_policy
[OVS_BAND_ATTR_MAX
+ 1] = {
41 [OVS_BAND_ATTR_TYPE
] = { .type
= NLA_U32
, },
42 [OVS_BAND_ATTR_RATE
] = { .type
= NLA_U32
, },
43 [OVS_BAND_ATTR_BURST
] = { .type
= NLA_U32
, },
44 [OVS_BAND_ATTR_STATS
] = { .len
= sizeof(struct ovs_flow_stats
) },
47 static void ovs_meter_free(struct dp_meter
*meter
)
52 kfree_rcu(meter
, rcu
);
55 static struct hlist_head
*meter_hash_bucket(const struct datapath
*dp
,
58 return &dp
->meters
[meter_id
& (METER_HASH_BUCKETS
- 1)];
61 /* Call with ovs_mutex or RCU read lock. */
62 static struct dp_meter
*lookup_meter(const struct datapath
*dp
,
65 struct dp_meter
*meter
;
66 struct hlist_head
*head
;
68 head
= meter_hash_bucket(dp
, meter_id
);
69 hlist_for_each_entry_rcu(meter
, head
, dp_hash_node
) {
70 if (meter
->id
== meter_id
)
76 static void attach_meter(struct datapath
*dp
, struct dp_meter
*meter
)
78 struct hlist_head
*head
= meter_hash_bucket(dp
, meter
->id
);
80 hlist_add_head_rcu(&meter
->dp_hash_node
, head
);
83 static void detach_meter(struct dp_meter
*meter
)
87 hlist_del_rcu(&meter
->dp_hash_node
);
90 static struct sk_buff
*
91 ovs_meter_cmd_reply_start(struct genl_info
*info
, u8 cmd
,
92 struct ovs_header
**ovs_reply_header
)
95 struct ovs_header
*ovs_header
= info
->userhdr
;
97 skb
= nlmsg_new(NLMSG_DEFAULT_SIZE
, GFP_ATOMIC
);
99 return ERR_PTR(-ENOMEM
);
101 *ovs_reply_header
= genlmsg_put(skb
, info
->snd_portid
,
103 &dp_meter_genl_family
, 0, cmd
);
104 if (!*ovs_reply_header
) {
106 return ERR_PTR(-EMSGSIZE
);
108 (*ovs_reply_header
)->dp_ifindex
= ovs_header
->dp_ifindex
;
113 static int ovs_meter_cmd_reply_stats(struct sk_buff
*reply
, u32 meter_id
,
114 struct dp_meter
*meter
)
117 struct dp_meter_band
*band
;
120 if (nla_put_u32(reply
, OVS_METER_ATTR_ID
, meter_id
))
126 if (nla_put(reply
, OVS_METER_ATTR_STATS
,
127 sizeof(struct ovs_flow_stats
), &meter
->stats
) ||
128 nla_put_u64_64bit(reply
, OVS_METER_ATTR_USED
, meter
->used
,
132 nla
= nla_nest_start_noflag(reply
, OVS_METER_ATTR_BANDS
);
138 for (i
= 0; i
< meter
->n_bands
; ++i
, ++band
) {
139 struct nlattr
*band_nla
;
141 band_nla
= nla_nest_start_noflag(reply
, OVS_BAND_ATTR_UNSPEC
);
142 if (!band_nla
|| nla_put(reply
, OVS_BAND_ATTR_STATS
,
143 sizeof(struct ovs_flow_stats
),
146 nla_nest_end(reply
, band_nla
);
148 nla_nest_end(reply
, nla
);
155 static int ovs_meter_cmd_features(struct sk_buff
*skb
, struct genl_info
*info
)
157 struct sk_buff
*reply
;
158 struct ovs_header
*ovs_reply_header
;
159 struct nlattr
*nla
, *band_nla
;
162 reply
= ovs_meter_cmd_reply_start(info
, OVS_METER_CMD_FEATURES
,
165 return PTR_ERR(reply
);
167 if (nla_put_u32(reply
, OVS_METER_ATTR_MAX_METERS
, U32_MAX
) ||
168 nla_put_u32(reply
, OVS_METER_ATTR_MAX_BANDS
, DP_MAX_BANDS
))
169 goto nla_put_failure
;
171 nla
= nla_nest_start_noflag(reply
, OVS_METER_ATTR_BANDS
);
173 goto nla_put_failure
;
175 band_nla
= nla_nest_start_noflag(reply
, OVS_BAND_ATTR_UNSPEC
);
177 goto nla_put_failure
;
178 /* Currently only DROP band type is supported. */
179 if (nla_put_u32(reply
, OVS_BAND_ATTR_TYPE
, OVS_METER_BAND_TYPE_DROP
))
180 goto nla_put_failure
;
181 nla_nest_end(reply
, band_nla
);
182 nla_nest_end(reply
, nla
);
184 genlmsg_end(reply
, ovs_reply_header
);
185 return genlmsg_reply(reply
, info
);
193 #ifndef HAVE_KTIME_GET_NS
195 #define ktime_to_ns(kt) ((kt).tv64)
197 static inline u64
ktime_get_ns(void)
199 return ktime_to_ns(ktime_get());
203 static struct dp_meter
*dp_meter_create(struct nlattr
**a
)
208 struct dp_meter
*meter
;
209 struct dp_meter_band
*band
;
212 /* Validate attributes, count the bands. */
213 if (!a
[OVS_METER_ATTR_BANDS
])
214 return ERR_PTR(-EINVAL
);
216 nla_for_each_nested(nla
, a
[OVS_METER_ATTR_BANDS
], rem
)
217 if (++n_bands
> DP_MAX_BANDS
)
218 return ERR_PTR(-EINVAL
);
220 /* Allocate and set up the meter before locking anything. */
221 meter
= kzalloc(struct_size(meter
, bands
, n_bands
), GFP_KERNEL
);
223 return ERR_PTR(-ENOMEM
);
225 meter
->id
= nla_get_u32(a
[OVS_METER_ATTR_ID
]);
226 meter
->used
= div_u64(ktime_get_ns(), 1000 * 1000);
227 meter
->kbps
= a
[OVS_METER_ATTR_KBPS
] ? 1 : 0;
228 meter
->keep_stats
= !a
[OVS_METER_ATTR_CLEAR
];
229 spin_lock_init(&meter
->lock
);
230 if (meter
->keep_stats
&& a
[OVS_METER_ATTR_STATS
]) {
231 meter
->stats
= *(struct ovs_flow_stats
*)
232 nla_data(a
[OVS_METER_ATTR_STATS
]);
234 meter
->n_bands
= n_bands
;
236 /* Set up meter bands. */
238 nla_for_each_nested(nla
, a
[OVS_METER_ATTR_BANDS
], rem
) {
239 struct nlattr
*attr
[OVS_BAND_ATTR_MAX
+ 1];
240 u32 band_max_delta_t
;
242 err
= nla_parse((struct nlattr
**)&attr
, OVS_BAND_ATTR_MAX
,
243 nla_data(nla
), nla_len(nla
), band_policy
,
246 goto exit_free_meter
;
248 if (!attr
[OVS_BAND_ATTR_TYPE
] ||
249 !attr
[OVS_BAND_ATTR_RATE
] ||
250 !attr
[OVS_BAND_ATTR_BURST
]) {
252 goto exit_free_meter
;
255 band
->type
= nla_get_u32(attr
[OVS_BAND_ATTR_TYPE
]);
256 band
->rate
= nla_get_u32(attr
[OVS_BAND_ATTR_RATE
]);
257 if (band
->rate
== 0) {
259 goto exit_free_meter
;
262 band
->burst_size
= nla_get_u32(attr
[OVS_BAND_ATTR_BURST
]);
263 /* Figure out max delta_t that is enough to fill any bucket.
264 * Keep max_delta_t size to the bucket units:
265 * pkts => 1/1000 packets, kilobits => bits.
267 * Start with a full bucket.
269 band
->bucket
= (band
->burst_size
+ band
->rate
) * 1000;
270 band_max_delta_t
= band
->bucket
/ band
->rate
;
271 if (band_max_delta_t
> meter
->max_delta_t
)
272 meter
->max_delta_t
= band_max_delta_t
;
283 static int ovs_meter_cmd_set(struct sk_buff
*skb
, struct genl_info
*info
)
285 struct nlattr
**a
= info
->attrs
;
286 struct dp_meter
*meter
, *old_meter
;
287 struct sk_buff
*reply
;
288 struct ovs_header
*ovs_reply_header
;
289 struct ovs_header
*ovs_header
= info
->userhdr
;
295 if (!a
[OVS_METER_ATTR_ID
]) {
299 meter
= dp_meter_create(a
);
300 if (IS_ERR_OR_NULL(meter
))
301 return PTR_ERR(meter
);
303 reply
= ovs_meter_cmd_reply_start(info
, OVS_METER_CMD_SET
,
306 err
= PTR_ERR(reply
);
307 goto exit_free_meter
;
311 dp
= get_dp(sock_net(skb
->sk
), ovs_header
->dp_ifindex
);
317 meter_id
= nla_get_u32(a
[OVS_METER_ATTR_ID
]);
319 /* Cannot fail after this. */
320 old_meter
= lookup_meter(dp
, meter_id
);
321 detach_meter(old_meter
);
322 attach_meter(dp
, meter
);
325 /* Build response with the meter_id and stats from
326 * the old meter, if any.
328 failed
= nla_put_u32(reply
, OVS_METER_ATTR_ID
, meter_id
);
331 spin_lock_bh(&old_meter
->lock
);
332 if (old_meter
->keep_stats
) {
333 err
= ovs_meter_cmd_reply_stats(reply
, meter_id
,
337 spin_unlock_bh(&old_meter
->lock
);
338 ovs_meter_free(old_meter
);
341 genlmsg_end(reply
, ovs_reply_header
);
342 return genlmsg_reply(reply
, info
);
352 static int ovs_meter_cmd_get(struct sk_buff
*skb
, struct genl_info
*info
)
354 struct nlattr
**a
= info
->attrs
;
356 struct ovs_header
*ovs_header
= info
->userhdr
;
357 struct ovs_header
*ovs_reply_header
;
360 struct sk_buff
*reply
;
361 struct dp_meter
*meter
;
363 if (!a
[OVS_METER_ATTR_ID
])
366 meter_id
= nla_get_u32(a
[OVS_METER_ATTR_ID
]);
368 reply
= ovs_meter_cmd_reply_start(info
, OVS_METER_CMD_GET
,
371 return PTR_ERR(reply
);
375 dp
= get_dp(sock_net(skb
->sk
), ovs_header
->dp_ifindex
);
381 /* Locate meter, copy stats. */
382 meter
= lookup_meter(dp
, meter_id
);
388 spin_lock_bh(&meter
->lock
);
389 err
= ovs_meter_cmd_reply_stats(reply
, meter_id
, meter
);
390 spin_unlock_bh(&meter
->lock
);
396 genlmsg_end(reply
, ovs_reply_header
);
397 return genlmsg_reply(reply
, info
);
405 static int ovs_meter_cmd_del(struct sk_buff
*skb
, struct genl_info
*info
)
407 struct nlattr
**a
= info
->attrs
;
409 struct ovs_header
*ovs_header
= info
->userhdr
;
410 struct ovs_header
*ovs_reply_header
;
413 struct sk_buff
*reply
;
414 struct dp_meter
*old_meter
;
416 if (!a
[OVS_METER_ATTR_ID
])
418 meter_id
= nla_get_u32(a
[OVS_METER_ATTR_ID
]);
420 reply
= ovs_meter_cmd_reply_start(info
, OVS_METER_CMD_DEL
,
423 return PTR_ERR(reply
);
427 dp
= get_dp(sock_net(skb
->sk
), ovs_header
->dp_ifindex
);
433 old_meter
= lookup_meter(dp
, meter_id
);
435 spin_lock_bh(&old_meter
->lock
);
436 err
= ovs_meter_cmd_reply_stats(reply
, meter_id
, old_meter
);
438 spin_unlock_bh(&old_meter
->lock
);
439 detach_meter(old_meter
);
442 ovs_meter_free(old_meter
);
443 genlmsg_end(reply
, ovs_reply_header
);
444 return genlmsg_reply(reply
, info
);
452 /* Meter action execution.
454 * Return true 'meter_id' drop band is triggered. The 'skb' should be
455 * dropped by the caller'.
457 bool ovs_meter_execute(struct datapath
*dp
, struct sk_buff
*skb
,
458 struct sw_flow_key
*key
, u32 meter_id
)
460 struct dp_meter
*meter
;
461 struct dp_meter_band
*band
;
462 long long int now_ms
= div_u64(ktime_get_ns(), 1000 * 1000);
463 long long int long_delta_ms
;
466 int i
, band_exceeded_max
= -1;
467 u32 band_exceeded_rate
= 0;
469 meter
= lookup_meter(dp
, meter_id
);
470 /* Do not drop the packet when there is no meter. */
474 /* Lock the meter while using it. */
475 spin_lock(&meter
->lock
);
477 long_delta_ms
= (now_ms
- meter
->used
); /* ms */
479 /* Make sure delta_ms will not be too large, so that bucket will not
482 delta_ms
= (long_delta_ms
> (long long int)meter
->max_delta_t
)
483 ? meter
->max_delta_t
: (u32
)long_delta_ms
;
485 /* Update meter statistics.
487 meter
->used
= now_ms
;
488 meter
->stats
.n_packets
+= 1;
489 meter
->stats
.n_bytes
+= skb
->len
;
491 /* Bucket rate is either in kilobits per second, or in packets per
492 * second. We maintain the bucket in the units of either bits or
493 * 1/1000th of a packet, correspondingly.
494 * Then, when rate is multiplied with milliseconds, we get the
496 * msec * kbps = bits, and
497 * msec * packets/sec = 1/1000 packets.
499 * 'cost' is the number of bucket units in this packet.
501 cost
= (meter
->kbps
) ? skb
->len
* 8 : 1000;
503 /* Update all bands and find the one hit with the highest rate. */
504 for (i
= 0; i
< meter
->n_bands
; ++i
) {
505 long long int max_bucket_size
;
507 band
= &meter
->bands
[i
];
508 max_bucket_size
= (band
->burst_size
+ band
->rate
) * 1000LL;
510 band
->bucket
+= delta_ms
* band
->rate
;
511 if (band
->bucket
> max_bucket_size
)
512 band
->bucket
= max_bucket_size
;
514 if (band
->bucket
>= cost
) {
515 band
->bucket
-= cost
;
516 } else if (band
->rate
> band_exceeded_rate
) {
517 band_exceeded_rate
= band
->rate
;
518 band_exceeded_max
= i
;
522 if (band_exceeded_max
>= 0) {
523 /* Update band statistics. */
524 band
= &meter
->bands
[band_exceeded_max
];
525 band
->stats
.n_packets
+= 1;
526 band
->stats
.n_bytes
+= skb
->len
;
528 /* Drop band triggered, let the caller drop the 'skb'. */
529 if (band
->type
== OVS_METER_BAND_TYPE_DROP
) {
530 spin_unlock(&meter
->lock
);
535 spin_unlock(&meter
->lock
);
539 static struct genl_ops dp_meter_genl_ops
[] = {
540 { .cmd
= OVS_METER_CMD_FEATURES
,
541 #ifdef HAVE_GENL_VALIDATE_FLAGS
542 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
544 .flags
= 0, /* OK for unprivileged users. */
545 .policy
= meter_policy
,
546 .doit
= ovs_meter_cmd_features
548 { .cmd
= OVS_METER_CMD_SET
,
549 #ifdef HAVE_GENL_VALIDATE_FLAGS
550 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
552 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN
555 .policy
= meter_policy
,
556 .doit
= ovs_meter_cmd_set
,
558 { .cmd
= OVS_METER_CMD_GET
,
559 #ifdef HAVE_GENL_VALIDATE_FLAGS
560 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
562 .flags
= 0, /* OK for unprivileged users. */
563 .policy
= meter_policy
,
564 .doit
= ovs_meter_cmd_get
,
566 { .cmd
= OVS_METER_CMD_DEL
,
567 #ifdef HAVE_GENL_VALIDATE_FLAGS
568 .validate
= GENL_DONT_VALIDATE_STRICT
| GENL_DONT_VALIDATE_DUMP
,
570 .flags
= GENL_ADMIN_PERM
, /* Requires CAP_NET_ADMIN
573 .policy
= meter_policy
,
574 .doit
= ovs_meter_cmd_del
578 static const struct genl_multicast_group ovs_meter_multicast_group
= {
579 .name
= OVS_METER_MCGROUP
,
582 struct genl_family dp_meter_genl_family __ro_after_init
= {
583 .hdrsize
= sizeof(struct ovs_header
),
584 .name
= OVS_METER_FAMILY
,
585 .version
= OVS_METER_VERSION
,
586 .maxattr
= OVS_METER_ATTR_MAX
,
588 .parallel_ops
= true,
589 .ops
= dp_meter_genl_ops
,
590 .n_ops
= ARRAY_SIZE(dp_meter_genl_ops
),
591 .mcgrps
= &ovs_meter_multicast_group
,
593 .module
= THIS_MODULE
,
596 int ovs_meters_init(struct datapath
*dp
)
600 dp
->meters
= kmalloc_array(METER_HASH_BUCKETS
,
601 sizeof(struct hlist_head
), GFP_KERNEL
);
606 for (i
= 0; i
< METER_HASH_BUCKETS
; i
++)
607 INIT_HLIST_HEAD(&dp
->meters
[i
]);
612 void ovs_meters_exit(struct datapath
*dp
)
616 for (i
= 0; i
< METER_HASH_BUCKETS
; i
++) {
617 struct hlist_head
*head
= &dp
->meters
[i
];
618 struct dp_meter
*meter
;
619 struct hlist_node
*n
;
621 hlist_for_each_entry_safe(meter
, n
, head
, dp_hash_node
)