]> git.proxmox.com Git - mirror_ovs.git/blame - ofproto/pinsched.c
ofproto-dpif: Fix for recirc issue with mpls traffic with dp_hash
[mirror_ovs.git] / ofproto / pinsched.c
CommitLineData
064af421 1/*
64e3c4e5 2 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2016 Nicira, Inc.
064af421 3 *
a14bc59f
BP
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
064af421 7 *
a14bc59f
BP
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
064af421
BP
15 */
16
17#include <config.h>
18#include "pinsched.h"
7f3adc00
BP
19#include <sys/types.h>
20#include <netinet/in.h>
064af421 21#include <arpa/inet.h>
54e05b5f 22#include <stdint.h>
064af421 23#include <stdlib.h>
4e022ec0 24#include "flow.h"
3021ea60 25#include "hash.h"
ee89ea7b 26#include "openvswitch/hmap.h"
64c96779 27#include "openvswitch/ofpbuf.h"
064af421 28#include "openflow/openflow.h"
fd016ae3 29#include "openvswitch/poll-loop.h"
064af421 30#include "random.h"
dc02e1eb 31#include "openvswitch/rconn.h"
648f4f1f 32#include "sat-math.h"
064af421 33#include "timeval.h"
d668c4a9 34#include "openvswitch/token-bucket.h"
4a1f523f 35#include "openvswitch/vconn.h"
064af421 36
b3907fbc 37struct pinqueue {
531edfbb 38 struct hmap_node node; /* In struct pinsched's 'queues' hmap. */
ca6ba700
TG
39 ofp_port_t port_no; /* Port number. */
40 struct ovs_list packets; /* Contains "struct ofpbuf"s. */
b3907fbc
BP
41 int n; /* Number of packets in 'packets'. */
42};
43
064af421 44struct pinsched {
648f4f1f 45 struct token_bucket token_bucket;
064af421
BP
46
47 /* One queue per physical port. */
531edfbb 48 struct hmap queues; /* Contains "struct pinqueue"s. */
c946befe 49 unsigned int n_queued; /* Sum over queues[*].n. */
531edfbb 50 struct pinqueue *next_txq; /* Next pinqueue check in round-robin. */
064af421 51
064af421
BP
52 /* Statistics reporting. */
53 unsigned long long n_normal; /* # txed w/o rate limit queuing. */
54 unsigned long long n_limited; /* # queued for rate limiting. */
55 unsigned long long n_queue_dropped; /* # dropped due to queue overflow. */
064af421
BP
56};
57
531edfbb
BP
58static void
59advance_txq(struct pinsched *ps)
60{
61 struct hmap_node *next;
62
63 next = (ps->next_txq
64 ? hmap_next(&ps->queues, &ps->next_txq->node)
65 : hmap_first(&ps->queues));
66 ps->next_txq = next ? CONTAINER_OF(next, struct pinqueue, node) : NULL;
67}
68
064af421 69static struct ofpbuf *
531edfbb 70dequeue_packet(struct pinsched *ps, struct pinqueue *q)
064af421 71{
417e7e66 72 struct ofpbuf *packet = ofpbuf_from_list(ovs_list_pop_front(&q->packets));
531edfbb 73 q->n--;
064af421
BP
74 ps->n_queued--;
75 return packet;
76}
77
648f4f1f
BP
78static void
79adjust_limits(int *rate_limit, int *burst_limit)
80{
81 if (*rate_limit <= 0) {
82 *rate_limit = 1000;
83 }
84 if (*burst_limit <= 0) {
85 *burst_limit = *rate_limit / 4;
86 }
87 if (*burst_limit < 1) {
88 *burst_limit = 1;
89 }
90}
91
531edfbb
BP
92/* Destroys 'q' and removes it from 'ps''s set of queues.
93 * (The caller must ensure that 'q' is empty.) */
94static void
95pinqueue_destroy(struct pinsched *ps, struct pinqueue *q)
96{
2d2b0114
TP
97 if (ps->next_txq == q) {
98 advance_txq(ps);
99 if (ps->next_txq == q) {
100 ps->next_txq = NULL;
101 }
102 }
531edfbb
BP
103 hmap_remove(&ps->queues, &q->node);
104 free(q);
105}
106
107static struct pinqueue *
4e022ec0 108pinqueue_get(struct pinsched *ps, ofp_port_t port_no)
531edfbb 109{
f9c0c3ec 110 uint32_t hash = hash_ofp_port(port_no);
531edfbb
BP
111 struct pinqueue *q;
112
113 HMAP_FOR_EACH_IN_BUCKET (q, node, hash, &ps->queues) {
114 if (port_no == q->port_no) {
115 return q;
116 }
117 }
118
119 q = xmalloc(sizeof *q);
120 hmap_insert(&ps->queues, &q->node, hash);
121 q->port_no = port_no;
417e7e66 122 ovs_list_init(&q->packets);
531edfbb
BP
123 q->n = 0;
124 return q;
125}
126
064af421
BP
127/* Drop a packet from the longest queue in 'ps'. */
128static void
129drop_packet(struct pinsched *ps)
130{
b3907fbc 131 struct pinqueue *longest; /* Queue currently selected as longest. */
a2973b1a 132 int n_longest = 0; /* # of queues of same length as 'longest'. */
b3907fbc 133 struct pinqueue *q;
064af421
BP
134
135 ps->n_queue_dropped++;
136
531edfbb
BP
137 longest = NULL;
138 HMAP_FOR_EACH (q, node, &ps->queues) {
139 if (!longest || longest->n < q->n) {
064af421
BP
140 longest = q;
141 n_longest = 1;
142 } else if (longest->n == q->n) {
143 n_longest++;
144
145 /* Randomly select one of the longest queues, with a uniform
146 * distribution (Knuth algorithm 3.4.2R). */
147 if (!random_range(n_longest)) {
148 longest = q;
064af421
BP
149 }
150 }
151 }
152
153 /* FIXME: do we want to pop the tail instead? */
531edfbb
BP
154 ofpbuf_delete(dequeue_packet(ps, longest));
155 if (longest->n == 0) {
156 pinqueue_destroy(ps, longest);
157 }
064af421
BP
158}
159
160/* Remove and return the next packet to transmit (in round-robin order). */
161static struct ofpbuf *
162get_tx_packet(struct pinsched *ps)
163{
531edfbb
BP
164 struct ofpbuf *packet;
165 struct pinqueue *q;
166
167 if (!ps->next_txq) {
168 advance_txq(ps);
169 }
170
171 q = ps->next_txq;
172 packet = dequeue_packet(ps, q);
173 advance_txq(ps);
174 if (q->n == 0) {
175 pinqueue_destroy(ps, q);
064af421 176 }
531edfbb
BP
177
178 return packet;
064af421
BP
179}
180
064af421
BP
181/* Attempts to remove enough tokens from 'ps' to transmit a packet. Returns
182 * true if successful, false otherwise. (In the latter case no tokens are
183 * removed.) */
184static bool
185get_token(struct pinsched *ps)
186{
648f4f1f 187 return token_bucket_withdraw(&ps->token_bucket, 1000);
064af421
BP
188}
189
190void
4e022ec0 191pinsched_send(struct pinsched *ps, ofp_port_t port_no,
ca6ba700 192 struct ofpbuf *packet, struct ovs_list *txq)
064af421 193{
417e7e66 194 ovs_list_init(txq);
064af421 195 if (!ps) {
417e7e66 196 ovs_list_push_back(txq, &packet->list_node);
064af421
BP
197 } else if (!ps->n_queued && get_token(ps)) {
198 /* In the common case where we are not constrained by the rate limit,
199 * let the packet take the normal path. */
200 ps->n_normal++;
417e7e66 201 ovs_list_push_back(txq, &packet->list_node);
064af421
BP
202 } else {
203 /* Otherwise queue it up for the periodic callback to drain out. */
648f4f1f 204 if (ps->n_queued * 1000 >= ps->token_bucket.burst) {
064af421
BP
205 drop_packet(ps);
206 }
64e3c4e5
BP
207
208 struct pinqueue *q = pinqueue_get(ps, port_no);
417e7e66 209 ovs_list_push_back(&q->packets, &packet->list_node);
b3907fbc 210 q->n++;
064af421
BP
211 ps->n_queued++;
212 ps->n_limited++;
213 }
214}
215
064af421 216void
ca6ba700 217pinsched_run(struct pinsched *ps, struct ovs_list *txq)
064af421 218{
417e7e66 219 ovs_list_init(txq);
064af421
BP
220 if (ps) {
221 int i;
222
223 /* Drain some packets out of the bucket if possible, but limit the
224 * number of iterations to allow other code to get work done too. */
064af421 225 for (i = 0; ps->n_queued && get_token(ps) && i < 50; i++) {
a6f75961 226 struct ofpbuf *packet = get_tx_packet(ps);
417e7e66 227 ovs_list_push_back(txq, &packet->list_node);
064af421
BP
228 }
229 }
230}
231
232void
233pinsched_wait(struct pinsched *ps)
234{
235 if (ps && ps->n_queued) {
648f4f1f 236 token_bucket_wait(&ps->token_bucket, 1000);
064af421
BP
237 }
238}
239
240/* Creates and returns a scheduler for sending packet-in messages. */
241struct pinsched *
9b45d7f5 242pinsched_create(int rate_limit, int burst_limit)
064af421
BP
243{
244 struct pinsched *ps;
245
ec6fde61 246 ps = xzalloc(sizeof *ps);
648f4f1f
BP
247
248 adjust_limits(&rate_limit, &burst_limit);
249 token_bucket_init(&ps->token_bucket,
250 rate_limit, sat_mul(burst_limit, 1000));
251
531edfbb 252 hmap_init(&ps->queues);
064af421 253 ps->n_queued = 0;
531edfbb 254 ps->next_txq = NULL;
064af421
BP
255 ps->n_normal = 0;
256 ps->n_limited = 0;
257 ps->n_queue_dropped = 0;
064af421 258
064af421
BP
259 return ps;
260}
261
262void
263pinsched_destroy(struct pinsched *ps)
264{
265 if (ps) {
4ec3d7c7 266 struct pinqueue *q;
064af421 267
4ec3d7c7 268 HMAP_FOR_EACH_POP (q, node, &ps->queues) {
531edfbb
BP
269 ofpbuf_list_delete(&q->packets);
270 free(q);
064af421 271 }
531edfbb 272 hmap_destroy(&ps->queues);
064af421
BP
273 free(ps);
274 }
275}
276
79c9f2ee
BP
277void
278pinsched_get_limits(const struct pinsched *ps,
279 int *rate_limit, int *burst_limit)
280{
648f4f1f
BP
281 *rate_limit = ps->token_bucket.rate;
282 *burst_limit = ps->token_bucket.burst / 1000;
79c9f2ee
BP
283}
284
064af421
BP
285void
286pinsched_set_limits(struct pinsched *ps, int rate_limit, int burst_limit)
287{
648f4f1f
BP
288 adjust_limits(&rate_limit, &burst_limit);
289 token_bucket_set(&ps->token_bucket,
290 rate_limit, sat_mul(burst_limit, 1000));
064af421
BP
291 while (ps->n_queued > burst_limit) {
292 drop_packet(ps);
293 }
294}
0d085684 295
a413195e
BP
296/* Retrieves statistics for 'ps'. The statistics will be all zero if 'ps' is
297 * null. */
298void
299pinsched_get_stats(const struct pinsched *ps, struct pinsched_stats *stats)
0d085684 300{
a413195e
BP
301 if (ps) {
302 stats->n_queued = ps->n_queued;
303 stats->n_normal = ps->n_normal;
304 stats->n_limited = ps->n_limited;
305 stats->n_queue_dropped = ps->n_queue_dropped;
306 } else {
307 memset(stats, 0, sizeof *stats);
308 }
0d085684 309}