]>
Commit | Line | Data |
---|---|---|
064af421 | 1 | /* |
e0edde6f | 2 | * Copyright (c) 2008, 2009, 2010, 2011 Nicira, Inc. |
064af421 | 3 | * |
a14bc59f BP |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
064af421 | 7 | * |
a14bc59f BP |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
064af421 BP |
15 | */ |
16 | ||
17 | #include <config.h> | |
18 | #include "pinsched.h" | |
7f3adc00 BP |
19 | #include <sys/types.h> |
20 | #include <netinet/in.h> | |
064af421 | 21 | #include <arpa/inet.h> |
54e05b5f | 22 | #include <stdint.h> |
064af421 | 23 | #include <stdlib.h> |
3021ea60 | 24 | #include "hash.h" |
531edfbb | 25 | #include "hmap.h" |
064af421 BP |
26 | #include "ofpbuf.h" |
27 | #include "openflow/openflow.h" | |
28 | #include "poll-loop.h" | |
064af421 BP |
29 | #include "random.h" |
30 | #include "rconn.h" | |
064af421 BP |
31 | #include "timeval.h" |
32 | #include "vconn.h" | |
33 | ||
b3907fbc | 34 | struct pinqueue { |
531edfbb BP |
35 | struct hmap_node node; /* In struct pinsched's 'queues' hmap. */ |
36 | uint16_t port_no; /* Port number. */ | |
b3907fbc BP |
37 | struct list packets; /* Contains "struct ofpbuf"s. */ |
38 | int n; /* Number of packets in 'packets'. */ | |
39 | }; | |
40 | ||
064af421 BP |
41 | struct pinsched { |
42 | /* Client-supplied parameters. */ | |
43 | int rate_limit; /* Packets added to bucket per second. */ | |
44 | int burst_limit; /* Maximum token bucket size, in packets. */ | |
45 | ||
46 | /* One queue per physical port. */ | |
531edfbb | 47 | struct hmap queues; /* Contains "struct pinqueue"s. */ |
064af421 | 48 | int n_queued; /* Sum over queues[*].n. */ |
531edfbb | 49 | struct pinqueue *next_txq; /* Next pinqueue check in round-robin. */ |
064af421 BP |
50 | |
51 | /* Token bucket. | |
52 | * | |
53 | * It costs 1000 tokens to send a single packet_in message. A single token | |
54 | * per message would be more straightforward, but this choice lets us avoid | |
55 | * round-off error in refill_bucket()'s calculation of how many tokens to | |
56 | * add to the bucket, since no division step is needed. */ | |
57 | long long int last_fill; /* Time at which we last added tokens. */ | |
58 | int tokens; /* Current number of tokens. */ | |
59 | ||
60 | /* Transmission queue. */ | |
61 | int n_txq; /* No. of packets waiting in rconn for tx. */ | |
62 | ||
63 | /* Statistics reporting. */ | |
64 | unsigned long long n_normal; /* # txed w/o rate limit queuing. */ | |
65 | unsigned long long n_limited; /* # queued for rate limiting. */ | |
66 | unsigned long long n_queue_dropped; /* # dropped due to queue overflow. */ | |
064af421 BP |
67 | }; |
68 | ||
531edfbb BP |
69 | static void |
70 | advance_txq(struct pinsched *ps) | |
71 | { | |
72 | struct hmap_node *next; | |
73 | ||
74 | next = (ps->next_txq | |
75 | ? hmap_next(&ps->queues, &ps->next_txq->node) | |
76 | : hmap_first(&ps->queues)); | |
77 | ps->next_txq = next ? CONTAINER_OF(next, struct pinqueue, node) : NULL; | |
78 | } | |
79 | ||
064af421 | 80 | static struct ofpbuf * |
531edfbb | 81 | dequeue_packet(struct pinsched *ps, struct pinqueue *q) |
064af421 | 82 | { |
b3907fbc | 83 | struct ofpbuf *packet = ofpbuf_from_list(list_pop_front(&q->packets)); |
531edfbb | 84 | q->n--; |
064af421 BP |
85 | ps->n_queued--; |
86 | return packet; | |
87 | } | |
88 | ||
531edfbb BP |
89 | /* Destroys 'q' and removes it from 'ps''s set of queues. |
90 | * (The caller must ensure that 'q' is empty.) */ | |
91 | static void | |
92 | pinqueue_destroy(struct pinsched *ps, struct pinqueue *q) | |
93 | { | |
94 | hmap_remove(&ps->queues, &q->node); | |
95 | free(q); | |
96 | } | |
97 | ||
98 | static struct pinqueue * | |
99 | pinqueue_get(struct pinsched *ps, uint16_t port_no) | |
100 | { | |
101 | uint32_t hash = hash_int(port_no, 0); | |
102 | struct pinqueue *q; | |
103 | ||
104 | HMAP_FOR_EACH_IN_BUCKET (q, node, hash, &ps->queues) { | |
105 | if (port_no == q->port_no) { | |
106 | return q; | |
107 | } | |
108 | } | |
109 | ||
110 | q = xmalloc(sizeof *q); | |
111 | hmap_insert(&ps->queues, &q->node, hash); | |
112 | q->port_no = port_no; | |
113 | list_init(&q->packets); | |
114 | q->n = 0; | |
115 | return q; | |
116 | } | |
117 | ||
064af421 BP |
118 | /* Drop a packet from the longest queue in 'ps'. */ |
119 | static void | |
120 | drop_packet(struct pinsched *ps) | |
121 | { | |
b3907fbc | 122 | struct pinqueue *longest; /* Queue currently selected as longest. */ |
a2973b1a | 123 | int n_longest = 0; /* # of queues of same length as 'longest'. */ |
b3907fbc | 124 | struct pinqueue *q; |
064af421 BP |
125 | |
126 | ps->n_queue_dropped++; | |
127 | ||
531edfbb BP |
128 | longest = NULL; |
129 | HMAP_FOR_EACH (q, node, &ps->queues) { | |
130 | if (!longest || longest->n < q->n) { | |
064af421 BP |
131 | longest = q; |
132 | n_longest = 1; | |
133 | } else if (longest->n == q->n) { | |
134 | n_longest++; | |
135 | ||
136 | /* Randomly select one of the longest queues, with a uniform | |
137 | * distribution (Knuth algorithm 3.4.2R). */ | |
138 | if (!random_range(n_longest)) { | |
139 | longest = q; | |
064af421 BP |
140 | } |
141 | } | |
142 | } | |
143 | ||
144 | /* FIXME: do we want to pop the tail instead? */ | |
531edfbb BP |
145 | ofpbuf_delete(dequeue_packet(ps, longest)); |
146 | if (longest->n == 0) { | |
147 | pinqueue_destroy(ps, longest); | |
148 | } | |
064af421 BP |
149 | } |
150 | ||
151 | /* Remove and return the next packet to transmit (in round-robin order). */ | |
152 | static struct ofpbuf * | |
153 | get_tx_packet(struct pinsched *ps) | |
154 | { | |
531edfbb BP |
155 | struct ofpbuf *packet; |
156 | struct pinqueue *q; | |
157 | ||
158 | if (!ps->next_txq) { | |
159 | advance_txq(ps); | |
160 | } | |
161 | ||
162 | q = ps->next_txq; | |
163 | packet = dequeue_packet(ps, q); | |
164 | advance_txq(ps); | |
165 | if (q->n == 0) { | |
166 | pinqueue_destroy(ps, q); | |
064af421 | 167 | } |
531edfbb BP |
168 | |
169 | return packet; | |
064af421 BP |
170 | } |
171 | ||
172 | /* Add tokens to the bucket based on elapsed time. */ | |
173 | static void | |
174 | refill_bucket(struct pinsched *ps) | |
175 | { | |
176 | long long int now = time_msec(); | |
177 | long long int tokens = (now - ps->last_fill) * ps->rate_limit + ps->tokens; | |
178 | if (tokens >= 1000) { | |
179 | ps->last_fill = now; | |
180 | ps->tokens = MIN(tokens, ps->burst_limit * 1000); | |
181 | } | |
182 | } | |
183 | ||
184 | /* Attempts to remove enough tokens from 'ps' to transmit a packet. Returns | |
185 | * true if successful, false otherwise. (In the latter case no tokens are | |
186 | * removed.) */ | |
187 | static bool | |
188 | get_token(struct pinsched *ps) | |
189 | { | |
190 | if (ps->tokens >= 1000) { | |
191 | ps->tokens -= 1000; | |
192 | return true; | |
193 | } else { | |
194 | return false; | |
195 | } | |
196 | } | |
197 | ||
198 | void | |
199 | pinsched_send(struct pinsched *ps, uint16_t port_no, | |
200 | struct ofpbuf *packet, pinsched_tx_cb *cb, void *aux) | |
201 | { | |
202 | if (!ps) { | |
203 | cb(packet, aux); | |
204 | } else if (!ps->n_queued && get_token(ps)) { | |
205 | /* In the common case where we are not constrained by the rate limit, | |
206 | * let the packet take the normal path. */ | |
207 | ps->n_normal++; | |
208 | cb(packet, aux); | |
209 | } else { | |
210 | /* Otherwise queue it up for the periodic callback to drain out. */ | |
b3907fbc | 211 | struct pinqueue *q; |
064af421 | 212 | |
f797957a BP |
213 | /* We might be called with a buffer obtained from dpif_recv() that has |
214 | * much more allocated space than actual content most of the time. | |
215 | * Since we're going to store the packet for some time, free up that | |
064af421 BP |
216 | * otherwise wasted space. */ |
217 | ofpbuf_trim(packet); | |
218 | ||
219 | if (ps->n_queued >= ps->burst_limit) { | |
220 | drop_packet(ps); | |
221 | } | |
531edfbb | 222 | q = pinqueue_get(ps, port_no); |
b3907fbc BP |
223 | list_push_back(&q->packets, &packet->list_node); |
224 | q->n++; | |
064af421 BP |
225 | ps->n_queued++; |
226 | ps->n_limited++; | |
227 | } | |
228 | } | |
229 | ||
064af421 BP |
230 | void |
231 | pinsched_run(struct pinsched *ps, pinsched_tx_cb *cb, void *aux) | |
232 | { | |
233 | if (ps) { | |
234 | int i; | |
235 | ||
236 | /* Drain some packets out of the bucket if possible, but limit the | |
237 | * number of iterations to allow other code to get work done too. */ | |
238 | refill_bucket(ps); | |
239 | for (i = 0; ps->n_queued && get_token(ps) && i < 50; i++) { | |
240 | cb(get_tx_packet(ps), aux); | |
241 | } | |
242 | } | |
243 | } | |
244 | ||
245 | void | |
246 | pinsched_wait(struct pinsched *ps) | |
247 | { | |
248 | if (ps && ps->n_queued) { | |
249 | if (ps->tokens >= 1000) { | |
250 | /* We can transmit more packets as soon as we're called again. */ | |
251 | poll_immediate_wake(); | |
252 | } else { | |
253 | /* We have to wait for the bucket to re-fill. We could calculate | |
254 | * the exact amount of time here for increased smoothness. */ | |
255 | poll_timer_wait(TIME_UPDATE_INTERVAL / 2); | |
256 | } | |
257 | } | |
258 | } | |
259 | ||
260 | /* Creates and returns a scheduler for sending packet-in messages. */ | |
261 | struct pinsched * | |
9b45d7f5 | 262 | pinsched_create(int rate_limit, int burst_limit) |
064af421 BP |
263 | { |
264 | struct pinsched *ps; | |
265 | ||
ec6fde61 | 266 | ps = xzalloc(sizeof *ps); |
531edfbb | 267 | hmap_init(&ps->queues); |
064af421 | 268 | ps->n_queued = 0; |
531edfbb | 269 | ps->next_txq = NULL; |
064af421 BP |
270 | ps->last_fill = time_msec(); |
271 | ps->tokens = rate_limit * 100; | |
272 | ps->n_txq = 0; | |
273 | ps->n_normal = 0; | |
274 | ps->n_limited = 0; | |
275 | ps->n_queue_dropped = 0; | |
276 | pinsched_set_limits(ps, rate_limit, burst_limit); | |
277 | ||
064af421 BP |
278 | return ps; |
279 | } | |
280 | ||
281 | void | |
282 | pinsched_destroy(struct pinsched *ps) | |
283 | { | |
284 | if (ps) { | |
531edfbb | 285 | struct pinqueue *q, *next; |
064af421 | 286 | |
531edfbb BP |
287 | HMAP_FOR_EACH_SAFE (q, next, node, &ps->queues) { |
288 | hmap_remove(&ps->queues, &q->node); | |
289 | ofpbuf_list_delete(&q->packets); | |
290 | free(q); | |
064af421 | 291 | } |
531edfbb | 292 | hmap_destroy(&ps->queues); |
064af421 BP |
293 | free(ps); |
294 | } | |
295 | } | |
296 | ||
79c9f2ee BP |
297 | void |
298 | pinsched_get_limits(const struct pinsched *ps, | |
299 | int *rate_limit, int *burst_limit) | |
300 | { | |
301 | *rate_limit = ps->rate_limit; | |
302 | *burst_limit = ps->burst_limit; | |
303 | } | |
304 | ||
064af421 BP |
305 | void |
306 | pinsched_set_limits(struct pinsched *ps, int rate_limit, int burst_limit) | |
307 | { | |
308 | if (rate_limit <= 0) { | |
309 | rate_limit = 1000; | |
310 | } | |
311 | if (burst_limit <= 0) { | |
312 | burst_limit = rate_limit / 4; | |
313 | } | |
314 | burst_limit = MAX(burst_limit, 1); | |
315 | burst_limit = MIN(burst_limit, INT_MAX / 1000); | |
316 | ||
317 | ps->rate_limit = rate_limit; | |
318 | ps->burst_limit = burst_limit; | |
319 | while (ps->n_queued > burst_limit) { | |
320 | drop_packet(ps); | |
321 | } | |
322 | } |