]>
Commit | Line | Data |
---|---|---|
064af421 | 1 | /* |
c946befe | 2 | * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. |
064af421 | 3 | * |
a14bc59f BP |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
064af421 | 7 | * |
a14bc59f BP |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
064af421 BP |
15 | */ |
16 | ||
17 | #include <config.h> | |
18 | #include "pinsched.h" | |
7f3adc00 BP |
19 | #include <sys/types.h> |
20 | #include <netinet/in.h> | |
064af421 | 21 | #include <arpa/inet.h> |
54e05b5f | 22 | #include <stdint.h> |
064af421 | 23 | #include <stdlib.h> |
4e022ec0 | 24 | #include "flow.h" |
3021ea60 | 25 | #include "hash.h" |
531edfbb | 26 | #include "hmap.h" |
064af421 BP |
27 | #include "ofpbuf.h" |
28 | #include "openflow/openflow.h" | |
29 | #include "poll-loop.h" | |
064af421 BP |
30 | #include "random.h" |
31 | #include "rconn.h" | |
648f4f1f | 32 | #include "sat-math.h" |
064af421 | 33 | #include "timeval.h" |
648f4f1f | 34 | #include "token-bucket.h" |
064af421 BP |
35 | #include "vconn.h" |
36 | ||
b3907fbc | 37 | struct pinqueue { |
531edfbb | 38 | struct hmap_node node; /* In struct pinsched's 'queues' hmap. */ |
4e022ec0 | 39 | ofp_port_t port_no; /* Port number. */ |
b3907fbc BP |
40 | struct list packets; /* Contains "struct ofpbuf"s. */ |
41 | int n; /* Number of packets in 'packets'. */ | |
42 | }; | |
43 | ||
064af421 | 44 | struct pinsched { |
648f4f1f | 45 | struct token_bucket token_bucket; |
064af421 BP |
46 | |
47 | /* One queue per physical port. */ | |
531edfbb | 48 | struct hmap queues; /* Contains "struct pinqueue"s. */ |
c946befe | 49 | unsigned int n_queued; /* Sum over queues[*].n. */ |
531edfbb | 50 | struct pinqueue *next_txq; /* Next pinqueue check in round-robin. */ |
064af421 | 51 | |
064af421 BP |
52 | /* Statistics reporting. */ |
53 | unsigned long long n_normal; /* # txed w/o rate limit queuing. */ | |
54 | unsigned long long n_limited; /* # queued for rate limiting. */ | |
55 | unsigned long long n_queue_dropped; /* # dropped due to queue overflow. */ | |
064af421 BP |
56 | }; |
57 | ||
531edfbb BP |
58 | static void |
59 | advance_txq(struct pinsched *ps) | |
60 | { | |
61 | struct hmap_node *next; | |
62 | ||
63 | next = (ps->next_txq | |
64 | ? hmap_next(&ps->queues, &ps->next_txq->node) | |
65 | : hmap_first(&ps->queues)); | |
66 | ps->next_txq = next ? CONTAINER_OF(next, struct pinqueue, node) : NULL; | |
67 | } | |
68 | ||
064af421 | 69 | static struct ofpbuf * |
531edfbb | 70 | dequeue_packet(struct pinsched *ps, struct pinqueue *q) |
064af421 | 71 | { |
b3907fbc | 72 | struct ofpbuf *packet = ofpbuf_from_list(list_pop_front(&q->packets)); |
531edfbb | 73 | q->n--; |
064af421 BP |
74 | ps->n_queued--; |
75 | return packet; | |
76 | } | |
77 | ||
648f4f1f BP |
78 | static void |
79 | adjust_limits(int *rate_limit, int *burst_limit) | |
80 | { | |
81 | if (*rate_limit <= 0) { | |
82 | *rate_limit = 1000; | |
83 | } | |
84 | if (*burst_limit <= 0) { | |
85 | *burst_limit = *rate_limit / 4; | |
86 | } | |
87 | if (*burst_limit < 1) { | |
88 | *burst_limit = 1; | |
89 | } | |
90 | } | |
91 | ||
531edfbb BP |
92 | /* Destroys 'q' and removes it from 'ps''s set of queues. |
93 | * (The caller must ensure that 'q' is empty.) */ | |
94 | static void | |
95 | pinqueue_destroy(struct pinsched *ps, struct pinqueue *q) | |
96 | { | |
97 | hmap_remove(&ps->queues, &q->node); | |
98 | free(q); | |
99 | } | |
100 | ||
101 | static struct pinqueue * | |
4e022ec0 | 102 | pinqueue_get(struct pinsched *ps, ofp_port_t port_no) |
531edfbb | 103 | { |
f9c0c3ec | 104 | uint32_t hash = hash_ofp_port(port_no); |
531edfbb BP |
105 | struct pinqueue *q; |
106 | ||
107 | HMAP_FOR_EACH_IN_BUCKET (q, node, hash, &ps->queues) { | |
108 | if (port_no == q->port_no) { | |
109 | return q; | |
110 | } | |
111 | } | |
112 | ||
113 | q = xmalloc(sizeof *q); | |
114 | hmap_insert(&ps->queues, &q->node, hash); | |
115 | q->port_no = port_no; | |
116 | list_init(&q->packets); | |
117 | q->n = 0; | |
118 | return q; | |
119 | } | |
120 | ||
064af421 BP |
121 | /* Drop a packet from the longest queue in 'ps'. */ |
122 | static void | |
123 | drop_packet(struct pinsched *ps) | |
124 | { | |
b3907fbc | 125 | struct pinqueue *longest; /* Queue currently selected as longest. */ |
a2973b1a | 126 | int n_longest = 0; /* # of queues of same length as 'longest'. */ |
b3907fbc | 127 | struct pinqueue *q; |
064af421 BP |
128 | |
129 | ps->n_queue_dropped++; | |
130 | ||
531edfbb BP |
131 | longest = NULL; |
132 | HMAP_FOR_EACH (q, node, &ps->queues) { | |
133 | if (!longest || longest->n < q->n) { | |
064af421 BP |
134 | longest = q; |
135 | n_longest = 1; | |
136 | } else if (longest->n == q->n) { | |
137 | n_longest++; | |
138 | ||
139 | /* Randomly select one of the longest queues, with a uniform | |
140 | * distribution (Knuth algorithm 3.4.2R). */ | |
141 | if (!random_range(n_longest)) { | |
142 | longest = q; | |
064af421 BP |
143 | } |
144 | } | |
145 | } | |
146 | ||
147 | /* FIXME: do we want to pop the tail instead? */ | |
531edfbb BP |
148 | ofpbuf_delete(dequeue_packet(ps, longest)); |
149 | if (longest->n == 0) { | |
150 | pinqueue_destroy(ps, longest); | |
151 | } | |
064af421 BP |
152 | } |
153 | ||
154 | /* Remove and return the next packet to transmit (in round-robin order). */ | |
155 | static struct ofpbuf * | |
156 | get_tx_packet(struct pinsched *ps) | |
157 | { | |
531edfbb BP |
158 | struct ofpbuf *packet; |
159 | struct pinqueue *q; | |
160 | ||
161 | if (!ps->next_txq) { | |
162 | advance_txq(ps); | |
163 | } | |
164 | ||
165 | q = ps->next_txq; | |
166 | packet = dequeue_packet(ps, q); | |
167 | advance_txq(ps); | |
168 | if (q->n == 0) { | |
169 | pinqueue_destroy(ps, q); | |
064af421 | 170 | } |
531edfbb BP |
171 | |
172 | return packet; | |
064af421 BP |
173 | } |
174 | ||
064af421 BP |
175 | /* Attempts to remove enough tokens from 'ps' to transmit a packet. Returns |
176 | * true if successful, false otherwise. (In the latter case no tokens are | |
177 | * removed.) */ | |
178 | static bool | |
179 | get_token(struct pinsched *ps) | |
180 | { | |
648f4f1f | 181 | return token_bucket_withdraw(&ps->token_bucket, 1000); |
064af421 BP |
182 | } |
183 | ||
184 | void | |
4e022ec0 | 185 | pinsched_send(struct pinsched *ps, ofp_port_t port_no, |
a6f75961 | 186 | struct ofpbuf *packet, struct list *txq) |
064af421 | 187 | { |
a6f75961 | 188 | list_init(txq); |
064af421 | 189 | if (!ps) { |
a6f75961 | 190 | list_push_back(txq, &packet->list_node); |
064af421 BP |
191 | } else if (!ps->n_queued && get_token(ps)) { |
192 | /* In the common case where we are not constrained by the rate limit, | |
193 | * let the packet take the normal path. */ | |
194 | ps->n_normal++; | |
a6f75961 | 195 | list_push_back(txq, &packet->list_node); |
064af421 BP |
196 | } else { |
197 | /* Otherwise queue it up for the periodic callback to drain out. */ | |
b3907fbc | 198 | struct pinqueue *q; |
064af421 | 199 | |
f797957a BP |
200 | /* We might be called with a buffer obtained from dpif_recv() that has |
201 | * much more allocated space than actual content most of the time. | |
202 | * Since we're going to store the packet for some time, free up that | |
064af421 BP |
203 | * otherwise wasted space. */ |
204 | ofpbuf_trim(packet); | |
205 | ||
648f4f1f | 206 | if (ps->n_queued * 1000 >= ps->token_bucket.burst) { |
064af421 BP |
207 | drop_packet(ps); |
208 | } | |
531edfbb | 209 | q = pinqueue_get(ps, port_no); |
b3907fbc BP |
210 | list_push_back(&q->packets, &packet->list_node); |
211 | q->n++; | |
064af421 BP |
212 | ps->n_queued++; |
213 | ps->n_limited++; | |
214 | } | |
215 | } | |
216 | ||
064af421 | 217 | void |
a6f75961 | 218 | pinsched_run(struct pinsched *ps, struct list *txq) |
064af421 | 219 | { |
a6f75961 | 220 | list_init(txq); |
064af421 BP |
221 | if (ps) { |
222 | int i; | |
223 | ||
224 | /* Drain some packets out of the bucket if possible, but limit the | |
225 | * number of iterations to allow other code to get work done too. */ | |
064af421 | 226 | for (i = 0; ps->n_queued && get_token(ps) && i < 50; i++) { |
a6f75961 BP |
227 | struct ofpbuf *packet = get_tx_packet(ps); |
228 | list_push_back(txq, &packet->list_node); | |
064af421 BP |
229 | } |
230 | } | |
231 | } | |
232 | ||
233 | void | |
234 | pinsched_wait(struct pinsched *ps) | |
235 | { | |
236 | if (ps && ps->n_queued) { | |
648f4f1f | 237 | token_bucket_wait(&ps->token_bucket, 1000); |
064af421 BP |
238 | } |
239 | } | |
240 | ||
241 | /* Creates and returns a scheduler for sending packet-in messages. */ | |
242 | struct pinsched * | |
9b45d7f5 | 243 | pinsched_create(int rate_limit, int burst_limit) |
064af421 BP |
244 | { |
245 | struct pinsched *ps; | |
246 | ||
ec6fde61 | 247 | ps = xzalloc(sizeof *ps); |
648f4f1f BP |
248 | |
249 | adjust_limits(&rate_limit, &burst_limit); | |
250 | token_bucket_init(&ps->token_bucket, | |
251 | rate_limit, sat_mul(burst_limit, 1000)); | |
252 | ||
531edfbb | 253 | hmap_init(&ps->queues); |
064af421 | 254 | ps->n_queued = 0; |
531edfbb | 255 | ps->next_txq = NULL; |
064af421 BP |
256 | ps->n_normal = 0; |
257 | ps->n_limited = 0; | |
258 | ps->n_queue_dropped = 0; | |
064af421 | 259 | |
064af421 BP |
260 | return ps; |
261 | } | |
262 | ||
263 | void | |
264 | pinsched_destroy(struct pinsched *ps) | |
265 | { | |
266 | if (ps) { | |
531edfbb | 267 | struct pinqueue *q, *next; |
064af421 | 268 | |
531edfbb BP |
269 | HMAP_FOR_EACH_SAFE (q, next, node, &ps->queues) { |
270 | hmap_remove(&ps->queues, &q->node); | |
271 | ofpbuf_list_delete(&q->packets); | |
272 | free(q); | |
064af421 | 273 | } |
531edfbb | 274 | hmap_destroy(&ps->queues); |
064af421 BP |
275 | free(ps); |
276 | } | |
277 | } | |
278 | ||
79c9f2ee BP |
279 | void |
280 | pinsched_get_limits(const struct pinsched *ps, | |
281 | int *rate_limit, int *burst_limit) | |
282 | { | |
648f4f1f BP |
283 | *rate_limit = ps->token_bucket.rate; |
284 | *burst_limit = ps->token_bucket.burst / 1000; | |
79c9f2ee BP |
285 | } |
286 | ||
064af421 BP |
287 | void |
288 | pinsched_set_limits(struct pinsched *ps, int rate_limit, int burst_limit) | |
289 | { | |
648f4f1f BP |
290 | adjust_limits(&rate_limit, &burst_limit); |
291 | token_bucket_set(&ps->token_bucket, | |
292 | rate_limit, sat_mul(burst_limit, 1000)); | |
064af421 BP |
293 | while (ps->n_queued > burst_limit) { |
294 | drop_packet(ps); | |
295 | } | |
296 | } | |
0d085684 | 297 | |
a413195e BP |
298 | /* Retrieves statistics for 'ps'. The statistics will be all zero if 'ps' is |
299 | * null. */ | |
300 | void | |
301 | pinsched_get_stats(const struct pinsched *ps, struct pinsched_stats *stats) | |
0d085684 | 302 | { |
a413195e BP |
303 | if (ps) { |
304 | stats->n_queued = ps->n_queued; | |
305 | stats->n_normal = ps->n_normal; | |
306 | stats->n_limited = ps->n_limited; | |
307 | stats->n_queue_dropped = ps->n_queue_dropped; | |
308 | } else { | |
309 | memset(stats, 0, sizeof *stats); | |
310 | } | |
0d085684 | 311 | } |