]>
Commit | Line | Data |
---|---|---|
064af421 | 1 | /* |
54e05b5f | 2 | * Copyright (c) 2008, 2009, 2010 Nicira Networks. |
064af421 | 3 | * |
a14bc59f BP |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
064af421 | 7 | * |
a14bc59f BP |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
064af421 BP |
15 | */ |
16 | ||
17 | #include <config.h> | |
18 | #include "pinsched.h" | |
19 | #include <arpa/inet.h> | |
54e05b5f | 20 | #include <stdint.h> |
064af421 BP |
21 | #include <stdlib.h> |
22 | #include "ofpbuf.h" | |
23 | #include "openflow/openflow.h" | |
24 | #include "poll-loop.h" | |
25 | #include "port-array.h" | |
26 | #include "queue.h" | |
27 | #include "random.h" | |
28 | #include "rconn.h" | |
29 | #include "status.h" | |
30 | #include "timeval.h" | |
31 | #include "vconn.h" | |
32 | ||
33 | struct pinsched { | |
34 | /* Client-supplied parameters. */ | |
35 | int rate_limit; /* Packets added to bucket per second. */ | |
36 | int burst_limit; /* Maximum token bucket size, in packets. */ | |
37 | ||
38 | /* One queue per physical port. */ | |
39 | struct port_array queues; /* Array of "struct ovs_queue *". */ | |
40 | int n_queued; /* Sum over queues[*].n. */ | |
41 | unsigned int last_tx_port; /* Last port checked in round-robin. */ | |
42 | ||
43 | /* Token bucket. | |
44 | * | |
45 | * It costs 1000 tokens to send a single packet_in message. A single token | |
46 | * per message would be more straightforward, but this choice lets us avoid | |
47 | * round-off error in refill_bucket()'s calculation of how many tokens to | |
48 | * add to the bucket, since no division step is needed. */ | |
49 | long long int last_fill; /* Time at which we last added tokens. */ | |
50 | int tokens; /* Current number of tokens. */ | |
51 | ||
52 | /* Transmission queue. */ | |
53 | int n_txq; /* No. of packets waiting in rconn for tx. */ | |
54 | ||
55 | /* Statistics reporting. */ | |
56 | unsigned long long n_normal; /* # txed w/o rate limit queuing. */ | |
57 | unsigned long long n_limited; /* # queued for rate limiting. */ | |
58 | unsigned long long n_queue_dropped; /* # dropped due to queue overflow. */ | |
59 | ||
60 | /* Switch status. */ | |
61 | struct status_category *ss_cat; | |
62 | }; | |
63 | ||
64 | static struct ofpbuf * | |
65 | dequeue_packet(struct pinsched *ps, struct ovs_queue *q, | |
66 | unsigned int port_no) | |
67 | { | |
68 | struct ofpbuf *packet = queue_pop_head(q); | |
69 | if (!q->n) { | |
70 | free(q); | |
71 | port_array_set(&ps->queues, port_no, NULL); | |
72 | } | |
73 | ps->n_queued--; | |
74 | return packet; | |
75 | } | |
76 | ||
77 | /* Drop a packet from the longest queue in 'ps'. */ | |
78 | static void | |
79 | drop_packet(struct pinsched *ps) | |
80 | { | |
81 | struct ovs_queue *longest; /* Queue currently selected as longest. */ | |
82 | int n_longest; /* # of queues of same length as 'longest'. */ | |
83 | unsigned int longest_port_no; | |
84 | unsigned int port_no; | |
85 | struct ovs_queue *q; | |
86 | ||
87 | ps->n_queue_dropped++; | |
88 | ||
89 | longest = port_array_first(&ps->queues, &port_no); | |
90 | longest_port_no = port_no; | |
91 | n_longest = 1; | |
92 | while ((q = port_array_next(&ps->queues, &port_no)) != NULL) { | |
93 | if (longest->n < q->n) { | |
94 | longest = q; | |
95 | n_longest = 1; | |
96 | } else if (longest->n == q->n) { | |
97 | n_longest++; | |
98 | ||
99 | /* Randomly select one of the longest queues, with a uniform | |
100 | * distribution (Knuth algorithm 3.4.2R). */ | |
101 | if (!random_range(n_longest)) { | |
102 | longest = q; | |
103 | longest_port_no = port_no; | |
104 | } | |
105 | } | |
106 | } | |
107 | ||
108 | /* FIXME: do we want to pop the tail instead? */ | |
109 | ofpbuf_delete(dequeue_packet(ps, longest, longest_port_no)); | |
110 | } | |
111 | ||
112 | /* Remove and return the next packet to transmit (in round-robin order). */ | |
113 | static struct ofpbuf * | |
114 | get_tx_packet(struct pinsched *ps) | |
115 | { | |
116 | struct ovs_queue *q = port_array_next(&ps->queues, &ps->last_tx_port); | |
117 | if (!q) { | |
118 | q = port_array_first(&ps->queues, &ps->last_tx_port); | |
119 | } | |
120 | return dequeue_packet(ps, q, ps->last_tx_port); | |
121 | } | |
122 | ||
123 | /* Add tokens to the bucket based on elapsed time. */ | |
124 | static void | |
125 | refill_bucket(struct pinsched *ps) | |
126 | { | |
127 | long long int now = time_msec(); | |
128 | long long int tokens = (now - ps->last_fill) * ps->rate_limit + ps->tokens; | |
129 | if (tokens >= 1000) { | |
130 | ps->last_fill = now; | |
131 | ps->tokens = MIN(tokens, ps->burst_limit * 1000); | |
132 | } | |
133 | } | |
134 | ||
135 | /* Attempts to remove enough tokens from 'ps' to transmit a packet. Returns | |
136 | * true if successful, false otherwise. (In the latter case no tokens are | |
137 | * removed.) */ | |
138 | static bool | |
139 | get_token(struct pinsched *ps) | |
140 | { | |
141 | if (ps->tokens >= 1000) { | |
142 | ps->tokens -= 1000; | |
143 | return true; | |
144 | } else { | |
145 | return false; | |
146 | } | |
147 | } | |
148 | ||
149 | void | |
150 | pinsched_send(struct pinsched *ps, uint16_t port_no, | |
151 | struct ofpbuf *packet, pinsched_tx_cb *cb, void *aux) | |
152 | { | |
153 | if (!ps) { | |
154 | cb(packet, aux); | |
155 | } else if (!ps->n_queued && get_token(ps)) { | |
156 | /* In the common case where we are not constrained by the rate limit, | |
157 | * let the packet take the normal path. */ | |
158 | ps->n_normal++; | |
159 | cb(packet, aux); | |
160 | } else { | |
161 | /* Otherwise queue it up for the periodic callback to drain out. */ | |
162 | struct ovs_queue *q; | |
163 | ||
164 | /* We are called with a buffer obtained from dpif_recv() that has much | |
165 | * more allocated space than actual content most of the time. Since | |
166 | * we're going to store the packet for some time, free up that | |
167 | * otherwise wasted space. */ | |
168 | ofpbuf_trim(packet); | |
169 | ||
170 | if (ps->n_queued >= ps->burst_limit) { | |
171 | drop_packet(ps); | |
172 | } | |
173 | q = port_array_get(&ps->queues, port_no); | |
174 | if (!q) { | |
175 | q = xmalloc(sizeof *q); | |
176 | queue_init(q); | |
177 | port_array_set(&ps->queues, port_no, q); | |
178 | } | |
179 | queue_push_tail(q, packet); | |
180 | ps->n_queued++; | |
181 | ps->n_limited++; | |
182 | } | |
183 | } | |
184 | ||
185 | static void | |
186 | pinsched_status_cb(struct status_reply *sr, void *ps_) | |
187 | { | |
188 | struct pinsched *ps = ps_; | |
189 | ||
190 | status_reply_put(sr, "normal=%llu", ps->n_normal); | |
191 | status_reply_put(sr, "limited=%llu", ps->n_limited); | |
192 | status_reply_put(sr, "queue-dropped=%llu", ps->n_queue_dropped); | |
193 | } | |
194 | ||
195 | void | |
196 | pinsched_run(struct pinsched *ps, pinsched_tx_cb *cb, void *aux) | |
197 | { | |
198 | if (ps) { | |
199 | int i; | |
200 | ||
201 | /* Drain some packets out of the bucket if possible, but limit the | |
202 | * number of iterations to allow other code to get work done too. */ | |
203 | refill_bucket(ps); | |
204 | for (i = 0; ps->n_queued && get_token(ps) && i < 50; i++) { | |
205 | cb(get_tx_packet(ps), aux); | |
206 | } | |
207 | } | |
208 | } | |
209 | ||
210 | void | |
211 | pinsched_wait(struct pinsched *ps) | |
212 | { | |
213 | if (ps && ps->n_queued) { | |
214 | if (ps->tokens >= 1000) { | |
215 | /* We can transmit more packets as soon as we're called again. */ | |
216 | poll_immediate_wake(); | |
217 | } else { | |
218 | /* We have to wait for the bucket to re-fill. We could calculate | |
219 | * the exact amount of time here for increased smoothness. */ | |
220 | poll_timer_wait(TIME_UPDATE_INTERVAL / 2); | |
221 | } | |
222 | } | |
223 | } | |
224 | ||
225 | /* Creates and returns a scheduler for sending packet-in messages. */ | |
226 | struct pinsched * | |
227 | pinsched_create(int rate_limit, int burst_limit, struct switch_status *ss) | |
228 | { | |
229 | struct pinsched *ps; | |
230 | ||
231 | ps = xcalloc(1, sizeof *ps); | |
232 | port_array_init(&ps->queues); | |
233 | ps->n_queued = 0; | |
234 | ps->last_tx_port = PORT_ARRAY_SIZE; | |
235 | ps->last_fill = time_msec(); | |
236 | ps->tokens = rate_limit * 100; | |
237 | ps->n_txq = 0; | |
238 | ps->n_normal = 0; | |
239 | ps->n_limited = 0; | |
240 | ps->n_queue_dropped = 0; | |
241 | pinsched_set_limits(ps, rate_limit, burst_limit); | |
242 | ||
243 | if (ss) { | |
244 | ps->ss_cat = switch_status_register(ss, "rate-limit", | |
245 | pinsched_status_cb, ps); | |
246 | } | |
247 | ||
248 | return ps; | |
249 | } | |
250 | ||
251 | void | |
252 | pinsched_destroy(struct pinsched *ps) | |
253 | { | |
254 | if (ps) { | |
255 | struct ovs_queue *queue; | |
256 | unsigned int port_no; | |
257 | ||
258 | PORT_ARRAY_FOR_EACH (queue, &ps->queues, port_no) { | |
259 | queue_destroy(queue); | |
260 | free(queue); | |
261 | } | |
262 | port_array_destroy(&ps->queues); | |
263 | switch_status_unregister(ps->ss_cat); | |
264 | free(ps); | |
265 | } | |
266 | } | |
267 | ||
268 | void | |
269 | pinsched_set_limits(struct pinsched *ps, int rate_limit, int burst_limit) | |
270 | { | |
271 | if (rate_limit <= 0) { | |
272 | rate_limit = 1000; | |
273 | } | |
274 | if (burst_limit <= 0) { | |
275 | burst_limit = rate_limit / 4; | |
276 | } | |
277 | burst_limit = MAX(burst_limit, 1); | |
278 | burst_limit = MIN(burst_limit, INT_MAX / 1000); | |
279 | ||
280 | ps->rate_limit = rate_limit; | |
281 | ps->burst_limit = burst_limit; | |
282 | while (ps->n_queued > burst_limit) { | |
283 | drop_packet(ps); | |
284 | } | |
285 | } |