]>
Commit | Line | Data |
---|---|---|
25763b3c | 1 | // SPDX-License-Identifier: GPL-2.0-only |
96fbc13d AZ |
2 | /* |
3 | * Copyright (c) 2017 Nicira, Inc. | |
96fbc13d AZ |
4 | */ |
5 | ||
6 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
7 | ||
8 | #include <linux/if.h> | |
9 | #include <linux/skbuff.h> | |
10 | #include <linux/ip.h> | |
11 | #include <linux/kernel.h> | |
12 | #include <linux/openvswitch.h> | |
13 | #include <linux/netlink.h> | |
14 | #include <linux/rculist.h> | |
eb58eebc | 15 | #include <linux/swap.h> |
96fbc13d AZ |
16 | |
17 | #include <net/netlink.h> | |
18 | #include <net/genetlink.h> | |
19 | ||
20 | #include "datapath.h" | |
21 | #include "meter.h" | |
22 | ||
96fbc13d AZ |
23 | static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = { |
24 | [OVS_METER_ATTR_ID] = { .type = NLA_U32, }, | |
25 | [OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG }, | |
26 | [OVS_METER_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) }, | |
27 | [OVS_METER_ATTR_BANDS] = { .type = NLA_NESTED }, | |
28 | [OVS_METER_ATTR_USED] = { .type = NLA_U64 }, | |
29 | [OVS_METER_ATTR_CLEAR] = { .type = NLA_FLAG }, | |
30 | [OVS_METER_ATTR_MAX_METERS] = { .type = NLA_U32 }, | |
31 | [OVS_METER_ATTR_MAX_BANDS] = { .type = NLA_U32 }, | |
32 | }; | |
33 | ||
34 | static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = { | |
35 | [OVS_BAND_ATTR_TYPE] = { .type = NLA_U32, }, | |
36 | [OVS_BAND_ATTR_RATE] = { .type = NLA_U32, }, | |
37 | [OVS_BAND_ATTR_BURST] = { .type = NLA_U32, }, | |
38 | [OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) }, | |
39 | }; | |
40 | ||
c7c4c44c TZ |
41 | static u32 meter_hash(struct dp_meter_instance *ti, u32 id) |
42 | { | |
43 | return id % ti->n_meters; | |
44 | } | |
45 | ||
96fbc13d AZ |
46 | static void ovs_meter_free(struct dp_meter *meter) |
47 | { | |
48 | if (!meter) | |
49 | return; | |
50 | ||
6dc14dc4 | 51 | kfree_rcu(meter, rcu); |
96fbc13d AZ |
52 | } |
53 | ||
96fbc13d | 54 | /* Call with ovs_mutex or RCU read lock. */ |
c7c4c44c | 55 | static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl, |
96fbc13d AZ |
56 | u32 meter_id) |
57 | { | |
c7c4c44c TZ |
58 | struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti); |
59 | u32 hash = meter_hash(ti, meter_id); | |
96fbc13d | 60 | struct dp_meter *meter; |
96fbc13d | 61 | |
c7c4c44c TZ |
62 | meter = rcu_dereference_ovsl(ti->dp_meters[hash]); |
63 | if (meter && likely(meter->id == meter_id)) | |
64 | return meter; | |
65 | ||
96fbc13d AZ |
66 | return NULL; |
67 | } | |
68 | ||
c7c4c44c TZ |
69 | static struct dp_meter_instance *dp_meter_instance_alloc(const u32 size) |
70 | { | |
71 | struct dp_meter_instance *ti; | |
72 | ||
73 | ti = kvzalloc(sizeof(*ti) + | |
74 | sizeof(struct dp_meter *) * size, | |
75 | GFP_KERNEL); | |
76 | if (!ti) | |
77 | return NULL; | |
78 | ||
79 | ti->n_meters = size; | |
80 | ||
81 | return ti; | |
82 | } | |
83 | ||
84 | static void dp_meter_instance_free(struct dp_meter_instance *ti) | |
85 | { | |
86 | kvfree(ti); | |
87 | } | |
88 | ||
89 | static void dp_meter_instance_free_rcu(struct rcu_head *rcu) | |
90 | { | |
91 | struct dp_meter_instance *ti; | |
92 | ||
93 | ti = container_of(rcu, struct dp_meter_instance, rcu); | |
94 | kvfree(ti); | |
95 | } | |
96 | ||
97 | static int | |
98 | dp_meter_instance_realloc(struct dp_meter_table *tbl, u32 size) | |
99 | { | |
100 | struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti); | |
101 | int n_meters = min(size, ti->n_meters); | |
102 | struct dp_meter_instance *new_ti; | |
103 | int i; | |
104 | ||
105 | new_ti = dp_meter_instance_alloc(size); | |
106 | if (!new_ti) | |
107 | return -ENOMEM; | |
108 | ||
109 | for (i = 0; i < n_meters; i++) | |
4b36a0df TZ |
110 | if (rcu_dereference_ovsl(ti->dp_meters[i])) |
111 | new_ti->dp_meters[i] = ti->dp_meters[i]; | |
c7c4c44c TZ |
112 | |
113 | rcu_assign_pointer(tbl->ti, new_ti); | |
114 | call_rcu(&ti->rcu, dp_meter_instance_free_rcu); | |
115 | ||
116 | return 0; | |
117 | } | |
118 | ||
119 | static void dp_meter_instance_insert(struct dp_meter_instance *ti, | |
120 | struct dp_meter *meter) | |
121 | { | |
122 | u32 hash; | |
123 | ||
124 | hash = meter_hash(ti, meter->id); | |
125 | rcu_assign_pointer(ti->dp_meters[hash], meter); | |
126 | } | |
127 | ||
128 | static void dp_meter_instance_remove(struct dp_meter_instance *ti, | |
129 | struct dp_meter *meter) | |
96fbc13d | 130 | { |
c7c4c44c | 131 | u32 hash; |
96fbc13d | 132 | |
c7c4c44c TZ |
133 | hash = meter_hash(ti, meter->id); |
134 | RCU_INIT_POINTER(ti->dp_meters[hash], NULL); | |
96fbc13d AZ |
135 | } |
136 | ||
c7c4c44c | 137 | static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter) |
96fbc13d | 138 | { |
c7c4c44c TZ |
139 | struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti); |
140 | u32 hash = meter_hash(ti, meter->id); | |
eb58eebc | 141 | int err; |
c7c4c44c TZ |
142 | |
143 | /* In generally, slots selected should be empty, because | |
144 | * OvS uses id-pool to fetch a available id. | |
145 | */ | |
146 | if (unlikely(rcu_dereference_ovsl(ti->dp_meters[hash]))) | |
147 | return -EBUSY; | |
148 | ||
149 | dp_meter_instance_insert(ti, meter); | |
150 | ||
151 | /* That function is thread-safe. */ | |
eb58eebc TZ |
152 | tbl->count++; |
153 | if (tbl->count >= tbl->max_meters_allowed) { | |
154 | err = -EFBIG; | |
155 | goto attach_err; | |
156 | } | |
157 | ||
158 | if (tbl->count >= ti->n_meters && | |
159 | dp_meter_instance_realloc(tbl, ti->n_meters * 2)) { | |
160 | err = -ENOMEM; | |
161 | goto attach_err; | |
162 | } | |
c7c4c44c TZ |
163 | |
164 | return 0; | |
165 | ||
eb58eebc | 166 | attach_err: |
c7c4c44c TZ |
167 | dp_meter_instance_remove(ti, meter); |
168 | tbl->count--; | |
eb58eebc | 169 | return err; |
c7c4c44c TZ |
170 | } |
171 | ||
172 | static int detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter) | |
173 | { | |
174 | struct dp_meter_instance *ti; | |
175 | ||
96fbc13d | 176 | ASSERT_OVSL(); |
c7c4c44c TZ |
177 | if (!meter) |
178 | return 0; | |
179 | ||
180 | ti = rcu_dereference_ovsl(tbl->ti); | |
181 | dp_meter_instance_remove(ti, meter); | |
182 | ||
183 | tbl->count--; | |
184 | ||
185 | /* Shrink the meter array if necessary. */ | |
186 | if (ti->n_meters > DP_METER_ARRAY_SIZE_MIN && | |
187 | tbl->count <= (ti->n_meters / 4)) { | |
188 | int half_size = ti->n_meters / 2; | |
189 | int i; | |
190 | ||
191 | /* Avoid hash collision, don't move slots to other place. | |
192 | * Make sure there are no references of meters in array | |
193 | * which will be released. | |
194 | */ | |
195 | for (i = half_size; i < ti->n_meters; i++) | |
196 | if (rcu_dereference_ovsl(ti->dp_meters[i])) | |
197 | goto out; | |
198 | ||
199 | if (dp_meter_instance_realloc(tbl, half_size)) | |
200 | goto shrink_err; | |
201 | } | |
202 | ||
203 | out: | |
204 | return 0; | |
205 | ||
206 | shrink_err: | |
207 | dp_meter_instance_insert(ti, meter); | |
208 | tbl->count++; | |
209 | return -ENOMEM; | |
96fbc13d AZ |
210 | } |
211 | ||
212 | static struct sk_buff * | |
213 | ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd, | |
214 | struct ovs_header **ovs_reply_header) | |
215 | { | |
216 | struct sk_buff *skb; | |
217 | struct ovs_header *ovs_header = info->userhdr; | |
218 | ||
219 | skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); | |
220 | if (!skb) | |
221 | return ERR_PTR(-ENOMEM); | |
222 | ||
223 | *ovs_reply_header = genlmsg_put(skb, info->snd_portid, | |
224 | info->snd_seq, | |
225 | &dp_meter_genl_family, 0, cmd); | |
b74912a2 | 226 | if (!*ovs_reply_header) { |
96fbc13d AZ |
227 | nlmsg_free(skb); |
228 | return ERR_PTR(-EMSGSIZE); | |
229 | } | |
230 | (*ovs_reply_header)->dp_ifindex = ovs_header->dp_ifindex; | |
231 | ||
232 | return skb; | |
233 | } | |
234 | ||
235 | static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id, | |
236 | struct dp_meter *meter) | |
237 | { | |
238 | struct nlattr *nla; | |
239 | struct dp_meter_band *band; | |
240 | u16 i; | |
241 | ||
242 | if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id)) | |
243 | goto error; | |
244 | ||
96fbc13d | 245 | if (nla_put(reply, OVS_METER_ATTR_STATS, |
a8e38738 TZ |
246 | sizeof(struct ovs_flow_stats), &meter->stats)) |
247 | goto error; | |
248 | ||
249 | if (nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used, | |
96fbc13d AZ |
250 | OVS_METER_ATTR_PAD)) |
251 | goto error; | |
252 | ||
ae0be8de | 253 | nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS); |
96fbc13d AZ |
254 | if (!nla) |
255 | goto error; | |
256 | ||
257 | band = meter->bands; | |
258 | ||
259 | for (i = 0; i < meter->n_bands; ++i, ++band) { | |
260 | struct nlattr *band_nla; | |
261 | ||
ae0be8de | 262 | band_nla = nla_nest_start_noflag(reply, OVS_BAND_ATTR_UNSPEC); |
96fbc13d AZ |
263 | if (!band_nla || nla_put(reply, OVS_BAND_ATTR_STATS, |
264 | sizeof(struct ovs_flow_stats), | |
265 | &band->stats)) | |
266 | goto error; | |
267 | nla_nest_end(reply, band_nla); | |
268 | } | |
269 | nla_nest_end(reply, nla); | |
270 | ||
271 | return 0; | |
272 | error: | |
273 | return -EMSGSIZE; | |
274 | } | |
275 | ||
276 | static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info) | |
277 | { | |
eb58eebc | 278 | struct ovs_header *ovs_header = info->userhdr; |
96fbc13d AZ |
279 | struct ovs_header *ovs_reply_header; |
280 | struct nlattr *nla, *band_nla; | |
eb58eebc TZ |
281 | struct sk_buff *reply; |
282 | struct datapath *dp; | |
283 | int err = -EMSGSIZE; | |
96fbc13d AZ |
284 | |
285 | reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES, | |
286 | &ovs_reply_header); | |
8a860c2b | 287 | if (IS_ERR(reply)) |
96fbc13d AZ |
288 | return PTR_ERR(reply); |
289 | ||
eb58eebc TZ |
290 | ovs_lock(); |
291 | dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); | |
292 | if (!dp) { | |
293 | err = -ENODEV; | |
294 | goto exit_unlock; | |
295 | } | |
296 | ||
297 | if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, | |
298 | dp->meter_tbl.max_meters_allowed)) | |
299 | goto exit_unlock; | |
300 | ||
301 | ovs_unlock(); | |
302 | ||
303 | if (nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS)) | |
96fbc13d AZ |
304 | goto nla_put_failure; |
305 | ||
ae0be8de | 306 | nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS); |
96fbc13d AZ |
307 | if (!nla) |
308 | goto nla_put_failure; | |
309 | ||
ae0be8de | 310 | band_nla = nla_nest_start_noflag(reply, OVS_BAND_ATTR_UNSPEC); |
96fbc13d AZ |
311 | if (!band_nla) |
312 | goto nla_put_failure; | |
313 | /* Currently only DROP band type is supported. */ | |
314 | if (nla_put_u32(reply, OVS_BAND_ATTR_TYPE, OVS_METER_BAND_TYPE_DROP)) | |
315 | goto nla_put_failure; | |
316 | nla_nest_end(reply, band_nla); | |
317 | nla_nest_end(reply, nla); | |
318 | ||
319 | genlmsg_end(reply, ovs_reply_header); | |
320 | return genlmsg_reply(reply, info); | |
321 | ||
eb58eebc TZ |
322 | exit_unlock: |
323 | ovs_unlock(); | |
96fbc13d AZ |
324 | nla_put_failure: |
325 | nlmsg_free(reply); | |
96fbc13d AZ |
326 | return err; |
327 | } | |
328 | ||
329 | static struct dp_meter *dp_meter_create(struct nlattr **a) | |
330 | { | |
331 | struct nlattr *nla; | |
332 | int rem; | |
333 | u16 n_bands = 0; | |
334 | struct dp_meter *meter; | |
335 | struct dp_meter_band *band; | |
336 | int err; | |
337 | ||
338 | /* Validate attributes, count the bands. */ | |
339 | if (!a[OVS_METER_ATTR_BANDS]) | |
340 | return ERR_PTR(-EINVAL); | |
341 | ||
342 | nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) | |
343 | if (++n_bands > DP_MAX_BANDS) | |
344 | return ERR_PTR(-EINVAL); | |
345 | ||
346 | /* Allocate and set up the meter before locking anything. */ | |
c5c3899d | 347 | meter = kzalloc(struct_size(meter, bands, n_bands), GFP_KERNEL); |
96fbc13d AZ |
348 | if (!meter) |
349 | return ERR_PTR(-ENOMEM); | |
350 | ||
25432eba | 351 | meter->id = nla_get_u32(a[OVS_METER_ATTR_ID]); |
96fbc13d AZ |
352 | meter->used = div_u64(ktime_get_ns(), 1000 * 1000); |
353 | meter->kbps = a[OVS_METER_ATTR_KBPS] ? 1 : 0; | |
354 | meter->keep_stats = !a[OVS_METER_ATTR_CLEAR]; | |
355 | spin_lock_init(&meter->lock); | |
356 | if (meter->keep_stats && a[OVS_METER_ATTR_STATS]) { | |
357 | meter->stats = *(struct ovs_flow_stats *) | |
358 | nla_data(a[OVS_METER_ATTR_STATS]); | |
359 | } | |
360 | meter->n_bands = n_bands; | |
361 | ||
362 | /* Set up meter bands. */ | |
363 | band = meter->bands; | |
364 | nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) { | |
365 | struct nlattr *attr[OVS_BAND_ATTR_MAX + 1]; | |
366 | u32 band_max_delta_t; | |
367 | ||
8cb08174 JB |
368 | err = nla_parse_deprecated((struct nlattr **)&attr, |
369 | OVS_BAND_ATTR_MAX, nla_data(nla), | |
370 | nla_len(nla), band_policy, NULL); | |
96fbc13d AZ |
371 | if (err) |
372 | goto exit_free_meter; | |
373 | ||
374 | if (!attr[OVS_BAND_ATTR_TYPE] || | |
375 | !attr[OVS_BAND_ATTR_RATE] || | |
376 | !attr[OVS_BAND_ATTR_BURST]) { | |
377 | err = -EINVAL; | |
378 | goto exit_free_meter; | |
379 | } | |
380 | ||
381 | band->type = nla_get_u32(attr[OVS_BAND_ATTR_TYPE]); | |
382 | band->rate = nla_get_u32(attr[OVS_BAND_ATTR_RATE]); | |
ddc502df | 383 | if (band->rate == 0) { |
384 | err = -EINVAL; | |
385 | goto exit_free_meter; | |
386 | } | |
387 | ||
96fbc13d AZ |
388 | band->burst_size = nla_get_u32(attr[OVS_BAND_ATTR_BURST]); |
389 | /* Figure out max delta_t that is enough to fill any bucket. | |
390 | * Keep max_delta_t size to the bucket units: | |
391 | * pkts => 1/1000 packets, kilobits => bits. | |
ddc502df | 392 | * |
393 | * Start with a full bucket. | |
96fbc13d | 394 | */ |
7d742b50 | 395 | band->bucket = band->burst_size * 1000ULL; |
659d4587 | 396 | band_max_delta_t = div_u64(band->bucket, band->rate); |
96fbc13d AZ |
397 | if (band_max_delta_t > meter->max_delta_t) |
398 | meter->max_delta_t = band_max_delta_t; | |
399 | band++; | |
400 | } | |
401 | ||
402 | return meter; | |
403 | ||
404 | exit_free_meter: | |
405 | kfree(meter); | |
406 | return ERR_PTR(err); | |
407 | } | |
408 | ||
409 | static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info) | |
410 | { | |
411 | struct nlattr **a = info->attrs; | |
412 | struct dp_meter *meter, *old_meter; | |
413 | struct sk_buff *reply; | |
414 | struct ovs_header *ovs_reply_header; | |
415 | struct ovs_header *ovs_header = info->userhdr; | |
c7c4c44c | 416 | struct dp_meter_table *meter_tbl; |
96fbc13d AZ |
417 | struct datapath *dp; |
418 | int err; | |
419 | u32 meter_id; | |
420 | bool failed; | |
421 | ||
c7735008 TZ |
422 | if (!a[OVS_METER_ATTR_ID]) |
423 | return -EINVAL; | |
25432eba | 424 | |
96fbc13d | 425 | meter = dp_meter_create(a); |
92f9e238 | 426 | if (IS_ERR(meter)) |
96fbc13d AZ |
427 | return PTR_ERR(meter); |
428 | ||
429 | reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_SET, | |
430 | &ovs_reply_header); | |
431 | if (IS_ERR(reply)) { | |
432 | err = PTR_ERR(reply); | |
433 | goto exit_free_meter; | |
434 | } | |
435 | ||
436 | ovs_lock(); | |
437 | dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); | |
438 | if (!dp) { | |
439 | err = -ENODEV; | |
440 | goto exit_unlock; | |
441 | } | |
442 | ||
c7c4c44c | 443 | meter_tbl = &dp->meter_tbl; |
96fbc13d AZ |
444 | meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); |
445 | ||
c7c4c44c TZ |
446 | old_meter = lookup_meter(meter_tbl, meter_id); |
447 | err = detach_meter(meter_tbl, old_meter); | |
448 | if (err) | |
449 | goto exit_unlock; | |
450 | ||
451 | err = attach_meter(meter_tbl, meter); | |
452 | if (err) | |
453 | goto exit_unlock; | |
454 | ||
96fbc13d AZ |
455 | ovs_unlock(); |
456 | ||
457 | /* Build response with the meter_id and stats from | |
458 | * the old meter, if any. | |
459 | */ | |
460 | failed = nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id); | |
461 | WARN_ON(failed); | |
462 | if (old_meter) { | |
463 | spin_lock_bh(&old_meter->lock); | |
464 | if (old_meter->keep_stats) { | |
465 | err = ovs_meter_cmd_reply_stats(reply, meter_id, | |
466 | old_meter); | |
467 | WARN_ON(err); | |
468 | } | |
469 | spin_unlock_bh(&old_meter->lock); | |
470 | ovs_meter_free(old_meter); | |
471 | } | |
472 | ||
473 | genlmsg_end(reply, ovs_reply_header); | |
474 | return genlmsg_reply(reply, info); | |
475 | ||
476 | exit_unlock: | |
477 | ovs_unlock(); | |
478 | nlmsg_free(reply); | |
479 | exit_free_meter: | |
480 | kfree(meter); | |
481 | return err; | |
482 | } | |
483 | ||
484 | static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info) | |
485 | { | |
96fbc13d AZ |
486 | struct ovs_header *ovs_header = info->userhdr; |
487 | struct ovs_header *ovs_reply_header; | |
c7c4c44c TZ |
488 | struct nlattr **a = info->attrs; |
489 | struct dp_meter *meter; | |
490 | struct sk_buff *reply; | |
96fbc13d | 491 | struct datapath *dp; |
c7c4c44c | 492 | u32 meter_id; |
96fbc13d | 493 | int err; |
96fbc13d AZ |
494 | |
495 | if (!a[OVS_METER_ATTR_ID]) | |
496 | return -EINVAL; | |
497 | ||
498 | meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); | |
499 | ||
500 | reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_GET, | |
501 | &ovs_reply_header); | |
502 | if (IS_ERR(reply)) | |
503 | return PTR_ERR(reply); | |
504 | ||
505 | ovs_lock(); | |
506 | ||
507 | dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); | |
508 | if (!dp) { | |
509 | err = -ENODEV; | |
510 | goto exit_unlock; | |
511 | } | |
512 | ||
513 | /* Locate meter, copy stats. */ | |
c7c4c44c | 514 | meter = lookup_meter(&dp->meter_tbl, meter_id); |
96fbc13d AZ |
515 | if (!meter) { |
516 | err = -ENOENT; | |
517 | goto exit_unlock; | |
518 | } | |
519 | ||
520 | spin_lock_bh(&meter->lock); | |
521 | err = ovs_meter_cmd_reply_stats(reply, meter_id, meter); | |
522 | spin_unlock_bh(&meter->lock); | |
523 | if (err) | |
524 | goto exit_unlock; | |
525 | ||
526 | ovs_unlock(); | |
527 | ||
528 | genlmsg_end(reply, ovs_reply_header); | |
529 | return genlmsg_reply(reply, info); | |
530 | ||
531 | exit_unlock: | |
532 | ovs_unlock(); | |
533 | nlmsg_free(reply); | |
534 | return err; | |
535 | } | |
536 | ||
537 | static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info) | |
538 | { | |
96fbc13d AZ |
539 | struct ovs_header *ovs_header = info->userhdr; |
540 | struct ovs_header *ovs_reply_header; | |
c7c4c44c TZ |
541 | struct nlattr **a = info->attrs; |
542 | struct dp_meter *old_meter; | |
543 | struct sk_buff *reply; | |
96fbc13d | 544 | struct datapath *dp; |
c7c4c44c | 545 | u32 meter_id; |
96fbc13d | 546 | int err; |
96fbc13d AZ |
547 | |
548 | if (!a[OVS_METER_ATTR_ID]) | |
549 | return -EINVAL; | |
96fbc13d AZ |
550 | |
551 | reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL, | |
552 | &ovs_reply_header); | |
553 | if (IS_ERR(reply)) | |
554 | return PTR_ERR(reply); | |
555 | ||
556 | ovs_lock(); | |
557 | ||
558 | dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); | |
559 | if (!dp) { | |
560 | err = -ENODEV; | |
561 | goto exit_unlock; | |
562 | } | |
563 | ||
c7c4c44c TZ |
564 | meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); |
565 | old_meter = lookup_meter(&dp->meter_tbl, meter_id); | |
96fbc13d AZ |
566 | if (old_meter) { |
567 | spin_lock_bh(&old_meter->lock); | |
568 | err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter); | |
569 | WARN_ON(err); | |
570 | spin_unlock_bh(&old_meter->lock); | |
c7c4c44c TZ |
571 | |
572 | err = detach_meter(&dp->meter_tbl, old_meter); | |
573 | if (err) | |
574 | goto exit_unlock; | |
96fbc13d | 575 | } |
c7c4c44c | 576 | |
96fbc13d AZ |
577 | ovs_unlock(); |
578 | ovs_meter_free(old_meter); | |
579 | genlmsg_end(reply, ovs_reply_header); | |
580 | return genlmsg_reply(reply, info); | |
581 | ||
582 | exit_unlock: | |
583 | ovs_unlock(); | |
584 | nlmsg_free(reply); | |
585 | return err; | |
586 | } | |
587 | ||
588 | /* Meter action execution. | |
589 | * | |
590 | * Return true 'meter_id' drop band is triggered. The 'skb' should be | |
591 | * dropped by the caller'. | |
592 | */ | |
593 | bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb, | |
594 | struct sw_flow_key *key, u32 meter_id) | |
595 | { | |
96fbc13d AZ |
596 | long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000); |
597 | long long int long_delta_ms; | |
c7c4c44c TZ |
598 | struct dp_meter_band *band; |
599 | struct dp_meter *meter; | |
96fbc13d AZ |
600 | int i, band_exceeded_max = -1; |
601 | u32 band_exceeded_rate = 0; | |
c7c4c44c TZ |
602 | u32 delta_ms; |
603 | u32 cost; | |
96fbc13d | 604 | |
c7c4c44c | 605 | meter = lookup_meter(&dp->meter_tbl, meter_id); |
96fbc13d AZ |
606 | /* Do not drop the packet when there is no meter. */ |
607 | if (!meter) | |
608 | return false; | |
609 | ||
610 | /* Lock the meter while using it. */ | |
611 | spin_lock(&meter->lock); | |
612 | ||
613 | long_delta_ms = (now_ms - meter->used); /* ms */ | |
e4df1b0c TL |
614 | if (long_delta_ms < 0) { |
615 | /* This condition means that we have several threads fighting | |
616 | * for a meter lock, and the one who received the packets a | |
617 | * bit later wins. Assuming that all racing threads received | |
618 | * packets at the same time to avoid overflow. | |
619 | */ | |
620 | long_delta_ms = 0; | |
621 | } | |
96fbc13d AZ |
622 | |
623 | /* Make sure delta_ms will not be too large, so that bucket will not | |
624 | * wrap around below. | |
625 | */ | |
626 | delta_ms = (long_delta_ms > (long long int)meter->max_delta_t) | |
627 | ? meter->max_delta_t : (u32)long_delta_ms; | |
628 | ||
629 | /* Update meter statistics. | |
630 | */ | |
631 | meter->used = now_ms; | |
632 | meter->stats.n_packets += 1; | |
633 | meter->stats.n_bytes += skb->len; | |
634 | ||
635 | /* Bucket rate is either in kilobits per second, or in packets per | |
636 | * second. We maintain the bucket in the units of either bits or | |
637 | * 1/1000th of a packet, correspondingly. | |
638 | * Then, when rate is multiplied with milliseconds, we get the | |
639 | * bucket units: | |
640 | * msec * kbps = bits, and | |
641 | * msec * packets/sec = 1/1000 packets. | |
642 | * | |
643 | * 'cost' is the number of bucket units in this packet. | |
644 | */ | |
645 | cost = (meter->kbps) ? skb->len * 8 : 1000; | |
646 | ||
647 | /* Update all bands and find the one hit with the highest rate. */ | |
648 | for (i = 0; i < meter->n_bands; ++i) { | |
649 | long long int max_bucket_size; | |
650 | ||
651 | band = &meter->bands[i]; | |
7d742b50 | 652 | max_bucket_size = band->burst_size * 1000LL; |
96fbc13d AZ |
653 | |
654 | band->bucket += delta_ms * band->rate; | |
655 | if (band->bucket > max_bucket_size) | |
656 | band->bucket = max_bucket_size; | |
657 | ||
658 | if (band->bucket >= cost) { | |
659 | band->bucket -= cost; | |
660 | } else if (band->rate > band_exceeded_rate) { | |
661 | band_exceeded_rate = band->rate; | |
662 | band_exceeded_max = i; | |
663 | } | |
664 | } | |
665 | ||
666 | if (band_exceeded_max >= 0) { | |
667 | /* Update band statistics. */ | |
668 | band = &meter->bands[band_exceeded_max]; | |
669 | band->stats.n_packets += 1; | |
670 | band->stats.n_bytes += skb->len; | |
671 | ||
672 | /* Drop band triggered, let the caller drop the 'skb'. */ | |
673 | if (band->type == OVS_METER_BAND_TYPE_DROP) { | |
674 | spin_unlock(&meter->lock); | |
675 | return true; | |
676 | } | |
677 | } | |
678 | ||
679 | spin_unlock(&meter->lock); | |
680 | return false; | |
681 | } | |
682 | ||
b980b313 | 683 | static const struct genl_small_ops dp_meter_genl_ops[] = { |
96fbc13d | 684 | { .cmd = OVS_METER_CMD_FEATURES, |
ef6243ac | 685 | .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, |
96fbc13d | 686 | .flags = 0, /* OK for unprivileged users. */ |
96fbc13d AZ |
687 | .doit = ovs_meter_cmd_features |
688 | }, | |
689 | { .cmd = OVS_METER_CMD_SET, | |
ef6243ac | 690 | .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, |
96fbc13d AZ |
691 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN |
692 | * privilege. | |
693 | */ | |
96fbc13d AZ |
694 | .doit = ovs_meter_cmd_set, |
695 | }, | |
696 | { .cmd = OVS_METER_CMD_GET, | |
ef6243ac | 697 | .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, |
96fbc13d | 698 | .flags = 0, /* OK for unprivileged users. */ |
96fbc13d AZ |
699 | .doit = ovs_meter_cmd_get, |
700 | }, | |
701 | { .cmd = OVS_METER_CMD_DEL, | |
ef6243ac | 702 | .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, |
96fbc13d AZ |
703 | .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN |
704 | * privilege. | |
705 | */ | |
96fbc13d AZ |
706 | .doit = ovs_meter_cmd_del |
707 | }, | |
708 | }; | |
709 | ||
710 | static const struct genl_multicast_group ovs_meter_multicast_group = { | |
711 | .name = OVS_METER_MCGROUP, | |
712 | }; | |
713 | ||
714 | struct genl_family dp_meter_genl_family __ro_after_init = { | |
715 | .hdrsize = sizeof(struct ovs_header), | |
716 | .name = OVS_METER_FAMILY, | |
717 | .version = OVS_METER_VERSION, | |
718 | .maxattr = OVS_METER_ATTR_MAX, | |
3b0f31f2 | 719 | .policy = meter_policy, |
96fbc13d AZ |
720 | .netnsok = true, |
721 | .parallel_ops = true, | |
66a9b928 JK |
722 | .small_ops = dp_meter_genl_ops, |
723 | .n_small_ops = ARRAY_SIZE(dp_meter_genl_ops), | |
96fbc13d AZ |
724 | .mcgrps = &ovs_meter_multicast_group, |
725 | .n_mcgrps = 1, | |
726 | .module = THIS_MODULE, | |
727 | }; | |
728 | ||
729 | int ovs_meters_init(struct datapath *dp) | |
730 | { | |
c7c4c44c TZ |
731 | struct dp_meter_table *tbl = &dp->meter_tbl; |
732 | struct dp_meter_instance *ti; | |
eb58eebc | 733 | unsigned long free_mem_bytes; |
96fbc13d | 734 | |
c7c4c44c TZ |
735 | ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN); |
736 | if (!ti) | |
96fbc13d AZ |
737 | return -ENOMEM; |
738 | ||
eb58eebc TZ |
739 | /* Allow meters in a datapath to use ~3.12% of physical memory. */ |
740 | free_mem_bytes = nr_free_buffer_pages() * (PAGE_SIZE >> 5); | |
741 | tbl->max_meters_allowed = min(free_mem_bytes / sizeof(struct dp_meter), | |
742 | DP_METER_NUM_MAX); | |
743 | if (!tbl->max_meters_allowed) | |
744 | goto out_err; | |
745 | ||
c7c4c44c TZ |
746 | rcu_assign_pointer(tbl->ti, ti); |
747 | tbl->count = 0; | |
96fbc13d AZ |
748 | |
749 | return 0; | |
eb58eebc TZ |
750 | |
751 | out_err: | |
752 | dp_meter_instance_free(ti); | |
753 | return -ENOMEM; | |
96fbc13d AZ |
754 | } |
755 | ||
756 | void ovs_meters_exit(struct datapath *dp) | |
757 | { | |
c7c4c44c TZ |
758 | struct dp_meter_table *tbl = &dp->meter_tbl; |
759 | struct dp_meter_instance *ti = rcu_dereference_raw(tbl->ti); | |
96fbc13d AZ |
760 | int i; |
761 | ||
c7c4c44c | 762 | for (i = 0; i < ti->n_meters; i++) |
4b36a0df | 763 | ovs_meter_free(rcu_dereference_raw(ti->dp_meters[i])); |
96fbc13d | 764 | |
c7c4c44c | 765 | dp_meter_instance_free(ti); |
96fbc13d | 766 | } |