]>
Commit | Line | Data |
---|---|---|
064af421 | 1 | /* |
54e05b5f | 2 | * Copyright (c) 2008, 2009, 2010 Nicira Networks. |
064af421 | 3 | * |
a14bc59f BP |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
064af421 | 7 | * |
a14bc59f BP |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
064af421 BP |
15 | */ |
16 | ||
17 | #include <config.h> | |
18 | #include "pinsched.h" | |
7f3adc00 BP |
19 | #include <sys/types.h> |
20 | #include <netinet/in.h> | |
064af421 | 21 | #include <arpa/inet.h> |
54e05b5f | 22 | #include <stdint.h> |
064af421 | 23 | #include <stdlib.h> |
531edfbb | 24 | #include "hmap.h" |
064af421 BP |
25 | #include "ofpbuf.h" |
26 | #include "openflow/openflow.h" | |
27 | #include "poll-loop.h" | |
064af421 BP |
28 | #include "random.h" |
29 | #include "rconn.h" | |
30 | #include "status.h" | |
31 | #include "timeval.h" | |
32 | #include "vconn.h" | |
33 | ||
b3907fbc | 34 | struct pinqueue { |
531edfbb BP |
35 | struct hmap_node node; /* In struct pinsched's 'queues' hmap. */ |
36 | uint16_t port_no; /* Port number. */ | |
b3907fbc BP |
37 | struct list packets; /* Contains "struct ofpbuf"s. */ |
38 | int n; /* Number of packets in 'packets'. */ | |
39 | }; | |
40 | ||
064af421 BP |
41 | struct pinsched { |
42 | /* Client-supplied parameters. */ | |
43 | int rate_limit; /* Packets added to bucket per second. */ | |
44 | int burst_limit; /* Maximum token bucket size, in packets. */ | |
45 | ||
46 | /* One queue per physical port. */ | |
531edfbb | 47 | struct hmap queues; /* Contains "struct pinqueue"s. */ |
064af421 | 48 | int n_queued; /* Sum over queues[*].n. */ |
531edfbb | 49 | struct pinqueue *next_txq; /* Next pinqueue check in round-robin. */ |
064af421 BP |
50 | |
51 | /* Token bucket. | |
52 | * | |
53 | * It costs 1000 tokens to send a single packet_in message. A single token | |
54 | * per message would be more straightforward, but this choice lets us avoid | |
55 | * round-off error in refill_bucket()'s calculation of how many tokens to | |
56 | * add to the bucket, since no division step is needed. */ | |
57 | long long int last_fill; /* Time at which we last added tokens. */ | |
58 | int tokens; /* Current number of tokens. */ | |
59 | ||
60 | /* Transmission queue. */ | |
61 | int n_txq; /* No. of packets waiting in rconn for tx. */ | |
62 | ||
63 | /* Statistics reporting. */ | |
64 | unsigned long long n_normal; /* # txed w/o rate limit queuing. */ | |
65 | unsigned long long n_limited; /* # queued for rate limiting. */ | |
66 | unsigned long long n_queue_dropped; /* # dropped due to queue overflow. */ | |
67 | ||
68 | /* Switch status. */ | |
69 | struct status_category *ss_cat; | |
70 | }; | |
71 | ||
531edfbb BP |
72 | static void |
73 | advance_txq(struct pinsched *ps) | |
74 | { | |
75 | struct hmap_node *next; | |
76 | ||
77 | next = (ps->next_txq | |
78 | ? hmap_next(&ps->queues, &ps->next_txq->node) | |
79 | : hmap_first(&ps->queues)); | |
80 | ps->next_txq = next ? CONTAINER_OF(next, struct pinqueue, node) : NULL; | |
81 | } | |
82 | ||
064af421 | 83 | static struct ofpbuf * |
531edfbb | 84 | dequeue_packet(struct pinsched *ps, struct pinqueue *q) |
064af421 | 85 | { |
b3907fbc | 86 | struct ofpbuf *packet = ofpbuf_from_list(list_pop_front(&q->packets)); |
531edfbb | 87 | q->n--; |
064af421 BP |
88 | ps->n_queued--; |
89 | return packet; | |
90 | } | |
91 | ||
531edfbb BP |
92 | /* Destroys 'q' and removes it from 'ps''s set of queues. |
93 | * (The caller must ensure that 'q' is empty.) */ | |
94 | static void | |
95 | pinqueue_destroy(struct pinsched *ps, struct pinqueue *q) | |
96 | { | |
97 | hmap_remove(&ps->queues, &q->node); | |
98 | free(q); | |
99 | } | |
100 | ||
101 | static struct pinqueue * | |
102 | pinqueue_get(struct pinsched *ps, uint16_t port_no) | |
103 | { | |
104 | uint32_t hash = hash_int(port_no, 0); | |
105 | struct pinqueue *q; | |
106 | ||
107 | HMAP_FOR_EACH_IN_BUCKET (q, node, hash, &ps->queues) { | |
108 | if (port_no == q->port_no) { | |
109 | return q; | |
110 | } | |
111 | } | |
112 | ||
113 | q = xmalloc(sizeof *q); | |
114 | hmap_insert(&ps->queues, &q->node, hash); | |
115 | q->port_no = port_no; | |
116 | list_init(&q->packets); | |
117 | q->n = 0; | |
118 | return q; | |
119 | } | |
120 | ||
064af421 BP |
121 | /* Drop a packet from the longest queue in 'ps'. */ |
122 | static void | |
123 | drop_packet(struct pinsched *ps) | |
124 | { | |
b3907fbc | 125 | struct pinqueue *longest; /* Queue currently selected as longest. */ |
a2973b1a | 126 | int n_longest = 0; /* # of queues of same length as 'longest'. */ |
b3907fbc | 127 | struct pinqueue *q; |
064af421 BP |
128 | |
129 | ps->n_queue_dropped++; | |
130 | ||
531edfbb BP |
131 | longest = NULL; |
132 | HMAP_FOR_EACH (q, node, &ps->queues) { | |
133 | if (!longest || longest->n < q->n) { | |
064af421 BP |
134 | longest = q; |
135 | n_longest = 1; | |
136 | } else if (longest->n == q->n) { | |
137 | n_longest++; | |
138 | ||
139 | /* Randomly select one of the longest queues, with a uniform | |
140 | * distribution (Knuth algorithm 3.4.2R). */ | |
141 | if (!random_range(n_longest)) { | |
142 | longest = q; | |
064af421 BP |
143 | } |
144 | } | |
145 | } | |
146 | ||
147 | /* FIXME: do we want to pop the tail instead? */ | |
531edfbb BP |
148 | ofpbuf_delete(dequeue_packet(ps, longest)); |
149 | if (longest->n == 0) { | |
150 | pinqueue_destroy(ps, longest); | |
151 | } | |
064af421 BP |
152 | } |
153 | ||
154 | /* Remove and return the next packet to transmit (in round-robin order). */ | |
155 | static struct ofpbuf * | |
156 | get_tx_packet(struct pinsched *ps) | |
157 | { | |
531edfbb BP |
158 | struct ofpbuf *packet; |
159 | struct pinqueue *q; | |
160 | ||
161 | if (!ps->next_txq) { | |
162 | advance_txq(ps); | |
163 | } | |
164 | ||
165 | q = ps->next_txq; | |
166 | packet = dequeue_packet(ps, q); | |
167 | advance_txq(ps); | |
168 | if (q->n == 0) { | |
169 | pinqueue_destroy(ps, q); | |
064af421 | 170 | } |
531edfbb BP |
171 | |
172 | return packet; | |
064af421 BP |
173 | } |
174 | ||
175 | /* Add tokens to the bucket based on elapsed time. */ | |
176 | static void | |
177 | refill_bucket(struct pinsched *ps) | |
178 | { | |
179 | long long int now = time_msec(); | |
180 | long long int tokens = (now - ps->last_fill) * ps->rate_limit + ps->tokens; | |
181 | if (tokens >= 1000) { | |
182 | ps->last_fill = now; | |
183 | ps->tokens = MIN(tokens, ps->burst_limit * 1000); | |
184 | } | |
185 | } | |
186 | ||
187 | /* Attempts to remove enough tokens from 'ps' to transmit a packet. Returns | |
188 | * true if successful, false otherwise. (In the latter case no tokens are | |
189 | * removed.) */ | |
190 | static bool | |
191 | get_token(struct pinsched *ps) | |
192 | { | |
193 | if (ps->tokens >= 1000) { | |
194 | ps->tokens -= 1000; | |
195 | return true; | |
196 | } else { | |
197 | return false; | |
198 | } | |
199 | } | |
200 | ||
201 | void | |
202 | pinsched_send(struct pinsched *ps, uint16_t port_no, | |
203 | struct ofpbuf *packet, pinsched_tx_cb *cb, void *aux) | |
204 | { | |
205 | if (!ps) { | |
206 | cb(packet, aux); | |
207 | } else if (!ps->n_queued && get_token(ps)) { | |
208 | /* In the common case where we are not constrained by the rate limit, | |
209 | * let the packet take the normal path. */ | |
210 | ps->n_normal++; | |
211 | cb(packet, aux); | |
212 | } else { | |
213 | /* Otherwise queue it up for the periodic callback to drain out. */ | |
b3907fbc | 214 | struct pinqueue *q; |
064af421 BP |
215 | |
216 | /* We are called with a buffer obtained from dpif_recv() that has much | |
217 | * more allocated space than actual content most of the time. Since | |
218 | * we're going to store the packet for some time, free up that | |
219 | * otherwise wasted space. */ | |
220 | ofpbuf_trim(packet); | |
221 | ||
222 | if (ps->n_queued >= ps->burst_limit) { | |
223 | drop_packet(ps); | |
224 | } | |
531edfbb | 225 | q = pinqueue_get(ps, port_no); |
b3907fbc BP |
226 | list_push_back(&q->packets, &packet->list_node); |
227 | q->n++; | |
064af421 BP |
228 | ps->n_queued++; |
229 | ps->n_limited++; | |
230 | } | |
231 | } | |
232 | ||
233 | static void | |
234 | pinsched_status_cb(struct status_reply *sr, void *ps_) | |
235 | { | |
236 | struct pinsched *ps = ps_; | |
237 | ||
238 | status_reply_put(sr, "normal=%llu", ps->n_normal); | |
239 | status_reply_put(sr, "limited=%llu", ps->n_limited); | |
240 | status_reply_put(sr, "queue-dropped=%llu", ps->n_queue_dropped); | |
241 | } | |
242 | ||
243 | void | |
244 | pinsched_run(struct pinsched *ps, pinsched_tx_cb *cb, void *aux) | |
245 | { | |
246 | if (ps) { | |
247 | int i; | |
248 | ||
249 | /* Drain some packets out of the bucket if possible, but limit the | |
250 | * number of iterations to allow other code to get work done too. */ | |
251 | refill_bucket(ps); | |
252 | for (i = 0; ps->n_queued && get_token(ps) && i < 50; i++) { | |
253 | cb(get_tx_packet(ps), aux); | |
254 | } | |
255 | } | |
256 | } | |
257 | ||
258 | void | |
259 | pinsched_wait(struct pinsched *ps) | |
260 | { | |
261 | if (ps && ps->n_queued) { | |
262 | if (ps->tokens >= 1000) { | |
263 | /* We can transmit more packets as soon as we're called again. */ | |
264 | poll_immediate_wake(); | |
265 | } else { | |
266 | /* We have to wait for the bucket to re-fill. We could calculate | |
267 | * the exact amount of time here for increased smoothness. */ | |
268 | poll_timer_wait(TIME_UPDATE_INTERVAL / 2); | |
269 | } | |
270 | } | |
271 | } | |
272 | ||
273 | /* Creates and returns a scheduler for sending packet-in messages. */ | |
274 | struct pinsched * | |
275 | pinsched_create(int rate_limit, int burst_limit, struct switch_status *ss) | |
276 | { | |
277 | struct pinsched *ps; | |
278 | ||
ec6fde61 | 279 | ps = xzalloc(sizeof *ps); |
531edfbb | 280 | hmap_init(&ps->queues); |
064af421 | 281 | ps->n_queued = 0; |
531edfbb | 282 | ps->next_txq = NULL; |
064af421 BP |
283 | ps->last_fill = time_msec(); |
284 | ps->tokens = rate_limit * 100; | |
285 | ps->n_txq = 0; | |
286 | ps->n_normal = 0; | |
287 | ps->n_limited = 0; | |
288 | ps->n_queue_dropped = 0; | |
289 | pinsched_set_limits(ps, rate_limit, burst_limit); | |
290 | ||
291 | if (ss) { | |
292 | ps->ss_cat = switch_status_register(ss, "rate-limit", | |
293 | pinsched_status_cb, ps); | |
294 | } | |
295 | ||
296 | return ps; | |
297 | } | |
298 | ||
299 | void | |
300 | pinsched_destroy(struct pinsched *ps) | |
301 | { | |
302 | if (ps) { | |
531edfbb | 303 | struct pinqueue *q, *next; |
064af421 | 304 | |
531edfbb BP |
305 | HMAP_FOR_EACH_SAFE (q, next, node, &ps->queues) { |
306 | hmap_remove(&ps->queues, &q->node); | |
307 | ofpbuf_list_delete(&q->packets); | |
308 | free(q); | |
064af421 | 309 | } |
531edfbb | 310 | hmap_destroy(&ps->queues); |
064af421 BP |
311 | switch_status_unregister(ps->ss_cat); |
312 | free(ps); | |
313 | } | |
314 | } | |
315 | ||
79c9f2ee BP |
316 | void |
317 | pinsched_get_limits(const struct pinsched *ps, | |
318 | int *rate_limit, int *burst_limit) | |
319 | { | |
320 | *rate_limit = ps->rate_limit; | |
321 | *burst_limit = ps->burst_limit; | |
322 | } | |
323 | ||
064af421 BP |
324 | void |
325 | pinsched_set_limits(struct pinsched *ps, int rate_limit, int burst_limit) | |
326 | { | |
327 | if (rate_limit <= 0) { | |
328 | rate_limit = 1000; | |
329 | } | |
330 | if (burst_limit <= 0) { | |
331 | burst_limit = rate_limit / 4; | |
332 | } | |
333 | burst_limit = MAX(burst_limit, 1); | |
334 | burst_limit = MIN(burst_limit, INT_MAX / 1000); | |
335 | ||
336 | ps->rate_limit = rate_limit; | |
337 | ps->burst_limit = burst_limit; | |
338 | while (ps->n_queued > burst_limit) { | |
339 | drop_packet(ps); | |
340 | } | |
341 | } |