]>
Commit | Line | Data |
---|---|---|
064af421 | 1 | /* |
54e05b5f | 2 | * Copyright (c) 2008, 2009, 2010 Nicira Networks. |
064af421 | 3 | * |
a14bc59f BP |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
064af421 | 7 | * |
a14bc59f BP |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
064af421 BP |
15 | */ |
16 | ||
17 | #include <config.h> | |
18 | #include "pinsched.h" | |
7f3adc00 BP |
19 | #include <sys/types.h> |
20 | #include <netinet/in.h> | |
064af421 | 21 | #include <arpa/inet.h> |
54e05b5f | 22 | #include <stdint.h> |
064af421 BP |
23 | #include <stdlib.h> |
24 | #include "ofpbuf.h" | |
25 | #include "openflow/openflow.h" | |
26 | #include "poll-loop.h" | |
27 | #include "port-array.h" | |
28 | #include "queue.h" | |
29 | #include "random.h" | |
30 | #include "rconn.h" | |
31 | #include "status.h" | |
32 | #include "timeval.h" | |
33 | #include "vconn.h" | |
34 | ||
35 | struct pinsched { | |
36 | /* Client-supplied parameters. */ | |
37 | int rate_limit; /* Packets added to bucket per second. */ | |
38 | int burst_limit; /* Maximum token bucket size, in packets. */ | |
39 | ||
40 | /* One queue per physical port. */ | |
41 | struct port_array queues; /* Array of "struct ovs_queue *". */ | |
42 | int n_queued; /* Sum over queues[*].n. */ | |
43 | unsigned int last_tx_port; /* Last port checked in round-robin. */ | |
44 | ||
45 | /* Token bucket. | |
46 | * | |
47 | * It costs 1000 tokens to send a single packet_in message. A single token | |
48 | * per message would be more straightforward, but this choice lets us avoid | |
49 | * round-off error in refill_bucket()'s calculation of how many tokens to | |
50 | * add to the bucket, since no division step is needed. */ | |
51 | long long int last_fill; /* Time at which we last added tokens. */ | |
52 | int tokens; /* Current number of tokens. */ | |
53 | ||
54 | /* Transmission queue. */ | |
55 | int n_txq; /* No. of packets waiting in rconn for tx. */ | |
56 | ||
57 | /* Statistics reporting. */ | |
58 | unsigned long long n_normal; /* # txed w/o rate limit queuing. */ | |
59 | unsigned long long n_limited; /* # queued for rate limiting. */ | |
60 | unsigned long long n_queue_dropped; /* # dropped due to queue overflow. */ | |
61 | ||
62 | /* Switch status. */ | |
63 | struct status_category *ss_cat; | |
64 | }; | |
65 | ||
66 | static struct ofpbuf * | |
67 | dequeue_packet(struct pinsched *ps, struct ovs_queue *q, | |
68 | unsigned int port_no) | |
69 | { | |
70 | struct ofpbuf *packet = queue_pop_head(q); | |
71 | if (!q->n) { | |
72 | free(q); | |
73 | port_array_set(&ps->queues, port_no, NULL); | |
74 | } | |
75 | ps->n_queued--; | |
76 | return packet; | |
77 | } | |
78 | ||
79 | /* Drop a packet from the longest queue in 'ps'. */ | |
80 | static void | |
81 | drop_packet(struct pinsched *ps) | |
82 | { | |
83 | struct ovs_queue *longest; /* Queue currently selected as longest. */ | |
84 | int n_longest; /* # of queues of same length as 'longest'. */ | |
85 | unsigned int longest_port_no; | |
86 | unsigned int port_no; | |
87 | struct ovs_queue *q; | |
88 | ||
89 | ps->n_queue_dropped++; | |
90 | ||
91 | longest = port_array_first(&ps->queues, &port_no); | |
92 | longest_port_no = port_no; | |
93 | n_longest = 1; | |
94 | while ((q = port_array_next(&ps->queues, &port_no)) != NULL) { | |
95 | if (longest->n < q->n) { | |
96 | longest = q; | |
97 | n_longest = 1; | |
98 | } else if (longest->n == q->n) { | |
99 | n_longest++; | |
100 | ||
101 | /* Randomly select one of the longest queues, with a uniform | |
102 | * distribution (Knuth algorithm 3.4.2R). */ | |
103 | if (!random_range(n_longest)) { | |
104 | longest = q; | |
105 | longest_port_no = port_no; | |
106 | } | |
107 | } | |
108 | } | |
109 | ||
110 | /* FIXME: do we want to pop the tail instead? */ | |
111 | ofpbuf_delete(dequeue_packet(ps, longest, longest_port_no)); | |
112 | } | |
113 | ||
114 | /* Remove and return the next packet to transmit (in round-robin order). */ | |
115 | static struct ofpbuf * | |
116 | get_tx_packet(struct pinsched *ps) | |
117 | { | |
118 | struct ovs_queue *q = port_array_next(&ps->queues, &ps->last_tx_port); | |
119 | if (!q) { | |
120 | q = port_array_first(&ps->queues, &ps->last_tx_port); | |
121 | } | |
122 | return dequeue_packet(ps, q, ps->last_tx_port); | |
123 | } | |
124 | ||
125 | /* Add tokens to the bucket based on elapsed time. */ | |
126 | static void | |
127 | refill_bucket(struct pinsched *ps) | |
128 | { | |
129 | long long int now = time_msec(); | |
130 | long long int tokens = (now - ps->last_fill) * ps->rate_limit + ps->tokens; | |
131 | if (tokens >= 1000) { | |
132 | ps->last_fill = now; | |
133 | ps->tokens = MIN(tokens, ps->burst_limit * 1000); | |
134 | } | |
135 | } | |
136 | ||
137 | /* Attempts to remove enough tokens from 'ps' to transmit a packet. Returns | |
138 | * true if successful, false otherwise. (In the latter case no tokens are | |
139 | * removed.) */ | |
140 | static bool | |
141 | get_token(struct pinsched *ps) | |
142 | { | |
143 | if (ps->tokens >= 1000) { | |
144 | ps->tokens -= 1000; | |
145 | return true; | |
146 | } else { | |
147 | return false; | |
148 | } | |
149 | } | |
150 | ||
151 | void | |
152 | pinsched_send(struct pinsched *ps, uint16_t port_no, | |
153 | struct ofpbuf *packet, pinsched_tx_cb *cb, void *aux) | |
154 | { | |
155 | if (!ps) { | |
156 | cb(packet, aux); | |
157 | } else if (!ps->n_queued && get_token(ps)) { | |
158 | /* In the common case where we are not constrained by the rate limit, | |
159 | * let the packet take the normal path. */ | |
160 | ps->n_normal++; | |
161 | cb(packet, aux); | |
162 | } else { | |
163 | /* Otherwise queue it up for the periodic callback to drain out. */ | |
164 | struct ovs_queue *q; | |
165 | ||
166 | /* We are called with a buffer obtained from dpif_recv() that has much | |
167 | * more allocated space than actual content most of the time. Since | |
168 | * we're going to store the packet for some time, free up that | |
169 | * otherwise wasted space. */ | |
170 | ofpbuf_trim(packet); | |
171 | ||
172 | if (ps->n_queued >= ps->burst_limit) { | |
173 | drop_packet(ps); | |
174 | } | |
175 | q = port_array_get(&ps->queues, port_no); | |
176 | if (!q) { | |
177 | q = xmalloc(sizeof *q); | |
178 | queue_init(q); | |
179 | port_array_set(&ps->queues, port_no, q); | |
180 | } | |
181 | queue_push_tail(q, packet); | |
182 | ps->n_queued++; | |
183 | ps->n_limited++; | |
184 | } | |
185 | } | |
186 | ||
187 | static void | |
188 | pinsched_status_cb(struct status_reply *sr, void *ps_) | |
189 | { | |
190 | struct pinsched *ps = ps_; | |
191 | ||
192 | status_reply_put(sr, "normal=%llu", ps->n_normal); | |
193 | status_reply_put(sr, "limited=%llu", ps->n_limited); | |
194 | status_reply_put(sr, "queue-dropped=%llu", ps->n_queue_dropped); | |
195 | } | |
196 | ||
197 | void | |
198 | pinsched_run(struct pinsched *ps, pinsched_tx_cb *cb, void *aux) | |
199 | { | |
200 | if (ps) { | |
201 | int i; | |
202 | ||
203 | /* Drain some packets out of the bucket if possible, but limit the | |
204 | * number of iterations to allow other code to get work done too. */ | |
205 | refill_bucket(ps); | |
206 | for (i = 0; ps->n_queued && get_token(ps) && i < 50; i++) { | |
207 | cb(get_tx_packet(ps), aux); | |
208 | } | |
209 | } | |
210 | } | |
211 | ||
212 | void | |
213 | pinsched_wait(struct pinsched *ps) | |
214 | { | |
215 | if (ps && ps->n_queued) { | |
216 | if (ps->tokens >= 1000) { | |
217 | /* We can transmit more packets as soon as we're called again. */ | |
218 | poll_immediate_wake(); | |
219 | } else { | |
220 | /* We have to wait for the bucket to re-fill. We could calculate | |
221 | * the exact amount of time here for increased smoothness. */ | |
222 | poll_timer_wait(TIME_UPDATE_INTERVAL / 2); | |
223 | } | |
224 | } | |
225 | } | |
226 | ||
227 | /* Creates and returns a scheduler for sending packet-in messages. */ | |
228 | struct pinsched * | |
229 | pinsched_create(int rate_limit, int burst_limit, struct switch_status *ss) | |
230 | { | |
231 | struct pinsched *ps; | |
232 | ||
ec6fde61 | 233 | ps = xzalloc(sizeof *ps); |
064af421 BP |
234 | port_array_init(&ps->queues); |
235 | ps->n_queued = 0; | |
236 | ps->last_tx_port = PORT_ARRAY_SIZE; | |
237 | ps->last_fill = time_msec(); | |
238 | ps->tokens = rate_limit * 100; | |
239 | ps->n_txq = 0; | |
240 | ps->n_normal = 0; | |
241 | ps->n_limited = 0; | |
242 | ps->n_queue_dropped = 0; | |
243 | pinsched_set_limits(ps, rate_limit, burst_limit); | |
244 | ||
245 | if (ss) { | |
246 | ps->ss_cat = switch_status_register(ss, "rate-limit", | |
247 | pinsched_status_cb, ps); | |
248 | } | |
249 | ||
250 | return ps; | |
251 | } | |
252 | ||
253 | void | |
254 | pinsched_destroy(struct pinsched *ps) | |
255 | { | |
256 | if (ps) { | |
257 | struct ovs_queue *queue; | |
258 | unsigned int port_no; | |
259 | ||
260 | PORT_ARRAY_FOR_EACH (queue, &ps->queues, port_no) { | |
261 | queue_destroy(queue); | |
262 | free(queue); | |
263 | } | |
264 | port_array_destroy(&ps->queues); | |
265 | switch_status_unregister(ps->ss_cat); | |
266 | free(ps); | |
267 | } | |
268 | } | |
269 | ||
270 | void | |
271 | pinsched_set_limits(struct pinsched *ps, int rate_limit, int burst_limit) | |
272 | { | |
273 | if (rate_limit <= 0) { | |
274 | rate_limit = 1000; | |
275 | } | |
276 | if (burst_limit <= 0) { | |
277 | burst_limit = rate_limit / 4; | |
278 | } | |
279 | burst_limit = MAX(burst_limit, 1); | |
280 | burst_limit = MIN(burst_limit, INT_MAX / 1000); | |
281 | ||
282 | ps->rate_limit = rate_limit; | |
283 | ps->burst_limit = burst_limit; | |
284 | while (ps->n_queued > burst_limit) { | |
285 | drop_packet(ps); | |
286 | } | |
287 | } |