]>
Commit | Line | Data |
---|---|---|
064af421 BP |
1 | /* |
2 | * Copyright (c) 2008, 2009 Nicira Networks. | |
3 | * | |
4 | * Permission to use, copy, modify, and/or distribute this software for any | |
5 | * purpose with or without fee is hereby granted, provided that the above | |
6 | * copyright notice and this permission notice appear in all copies. | |
7 | * | |
8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
18 | #include "pinsched.h" | |
19 | #include <arpa/inet.h> | |
20 | #include <stdlib.h> | |
21 | #include "ofpbuf.h" | |
22 | #include "openflow/openflow.h" | |
23 | #include "poll-loop.h" | |
24 | #include "port-array.h" | |
25 | #include "queue.h" | |
26 | #include "random.h" | |
27 | #include "rconn.h" | |
28 | #include "status.h" | |
29 | #include "timeval.h" | |
30 | #include "vconn.h" | |
31 | ||
32 | struct pinsched { | |
33 | /* Client-supplied parameters. */ | |
34 | int rate_limit; /* Packets added to bucket per second. */ | |
35 | int burst_limit; /* Maximum token bucket size, in packets. */ | |
36 | ||
37 | /* One queue per physical port. */ | |
38 | struct port_array queues; /* Array of "struct ovs_queue *". */ | |
39 | int n_queued; /* Sum over queues[*].n. */ | |
40 | unsigned int last_tx_port; /* Last port checked in round-robin. */ | |
41 | ||
42 | /* Token bucket. | |
43 | * | |
44 | * It costs 1000 tokens to send a single packet_in message. A single token | |
45 | * per message would be more straightforward, but this choice lets us avoid | |
46 | * round-off error in refill_bucket()'s calculation of how many tokens to | |
47 | * add to the bucket, since no division step is needed. */ | |
48 | long long int last_fill; /* Time at which we last added tokens. */ | |
49 | int tokens; /* Current number of tokens. */ | |
50 | ||
51 | /* Transmission queue. */ | |
52 | int n_txq; /* No. of packets waiting in rconn for tx. */ | |
53 | ||
54 | /* Statistics reporting. */ | |
55 | unsigned long long n_normal; /* # txed w/o rate limit queuing. */ | |
56 | unsigned long long n_limited; /* # queued for rate limiting. */ | |
57 | unsigned long long n_queue_dropped; /* # dropped due to queue overflow. */ | |
58 | ||
59 | /* Switch status. */ | |
60 | struct status_category *ss_cat; | |
61 | }; | |
62 | ||
63 | static struct ofpbuf * | |
64 | dequeue_packet(struct pinsched *ps, struct ovs_queue *q, | |
65 | unsigned int port_no) | |
66 | { | |
67 | struct ofpbuf *packet = queue_pop_head(q); | |
68 | if (!q->n) { | |
69 | free(q); | |
70 | port_array_set(&ps->queues, port_no, NULL); | |
71 | } | |
72 | ps->n_queued--; | |
73 | return packet; | |
74 | } | |
75 | ||
76 | /* Drop a packet from the longest queue in 'ps'. */ | |
77 | static void | |
78 | drop_packet(struct pinsched *ps) | |
79 | { | |
80 | struct ovs_queue *longest; /* Queue currently selected as longest. */ | |
81 | int n_longest; /* # of queues of same length as 'longest'. */ | |
82 | unsigned int longest_port_no; | |
83 | unsigned int port_no; | |
84 | struct ovs_queue *q; | |
85 | ||
86 | ps->n_queue_dropped++; | |
87 | ||
88 | longest = port_array_first(&ps->queues, &port_no); | |
89 | longest_port_no = port_no; | |
90 | n_longest = 1; | |
91 | while ((q = port_array_next(&ps->queues, &port_no)) != NULL) { | |
92 | if (longest->n < q->n) { | |
93 | longest = q; | |
94 | n_longest = 1; | |
95 | } else if (longest->n == q->n) { | |
96 | n_longest++; | |
97 | ||
98 | /* Randomly select one of the longest queues, with a uniform | |
99 | * distribution (Knuth algorithm 3.4.2R). */ | |
100 | if (!random_range(n_longest)) { | |
101 | longest = q; | |
102 | longest_port_no = port_no; | |
103 | } | |
104 | } | |
105 | } | |
106 | ||
107 | /* FIXME: do we want to pop the tail instead? */ | |
108 | ofpbuf_delete(dequeue_packet(ps, longest, longest_port_no)); | |
109 | } | |
110 | ||
111 | /* Remove and return the next packet to transmit (in round-robin order). */ | |
112 | static struct ofpbuf * | |
113 | get_tx_packet(struct pinsched *ps) | |
114 | { | |
115 | struct ovs_queue *q = port_array_next(&ps->queues, &ps->last_tx_port); | |
116 | if (!q) { | |
117 | q = port_array_first(&ps->queues, &ps->last_tx_port); | |
118 | } | |
119 | return dequeue_packet(ps, q, ps->last_tx_port); | |
120 | } | |
121 | ||
122 | /* Add tokens to the bucket based on elapsed time. */ | |
123 | static void | |
124 | refill_bucket(struct pinsched *ps) | |
125 | { | |
126 | long long int now = time_msec(); | |
127 | long long int tokens = (now - ps->last_fill) * ps->rate_limit + ps->tokens; | |
128 | if (tokens >= 1000) { | |
129 | ps->last_fill = now; | |
130 | ps->tokens = MIN(tokens, ps->burst_limit * 1000); | |
131 | } | |
132 | } | |
133 | ||
134 | /* Attempts to remove enough tokens from 'ps' to transmit a packet. Returns | |
135 | * true if successful, false otherwise. (In the latter case no tokens are | |
136 | * removed.) */ | |
137 | static bool | |
138 | get_token(struct pinsched *ps) | |
139 | { | |
140 | if (ps->tokens >= 1000) { | |
141 | ps->tokens -= 1000; | |
142 | return true; | |
143 | } else { | |
144 | return false; | |
145 | } | |
146 | } | |
147 | ||
148 | void | |
149 | pinsched_send(struct pinsched *ps, uint16_t port_no, | |
150 | struct ofpbuf *packet, pinsched_tx_cb *cb, void *aux) | |
151 | { | |
152 | if (!ps) { | |
153 | cb(packet, aux); | |
154 | } else if (!ps->n_queued && get_token(ps)) { | |
155 | /* In the common case where we are not constrained by the rate limit, | |
156 | * let the packet take the normal path. */ | |
157 | ps->n_normal++; | |
158 | cb(packet, aux); | |
159 | } else { | |
160 | /* Otherwise queue it up for the periodic callback to drain out. */ | |
161 | struct ovs_queue *q; | |
162 | ||
163 | /* We are called with a buffer obtained from dpif_recv() that has much | |
164 | * more allocated space than actual content most of the time. Since | |
165 | * we're going to store the packet for some time, free up that | |
166 | * otherwise wasted space. */ | |
167 | ofpbuf_trim(packet); | |
168 | ||
169 | if (ps->n_queued >= ps->burst_limit) { | |
170 | drop_packet(ps); | |
171 | } | |
172 | q = port_array_get(&ps->queues, port_no); | |
173 | if (!q) { | |
174 | q = xmalloc(sizeof *q); | |
175 | queue_init(q); | |
176 | port_array_set(&ps->queues, port_no, q); | |
177 | } | |
178 | queue_push_tail(q, packet); | |
179 | ps->n_queued++; | |
180 | ps->n_limited++; | |
181 | } | |
182 | } | |
183 | ||
184 | static void | |
185 | pinsched_status_cb(struct status_reply *sr, void *ps_) | |
186 | { | |
187 | struct pinsched *ps = ps_; | |
188 | ||
189 | status_reply_put(sr, "normal=%llu", ps->n_normal); | |
190 | status_reply_put(sr, "limited=%llu", ps->n_limited); | |
191 | status_reply_put(sr, "queue-dropped=%llu", ps->n_queue_dropped); | |
192 | } | |
193 | ||
194 | void | |
195 | pinsched_run(struct pinsched *ps, pinsched_tx_cb *cb, void *aux) | |
196 | { | |
197 | if (ps) { | |
198 | int i; | |
199 | ||
200 | /* Drain some packets out of the bucket if possible, but limit the | |
201 | * number of iterations to allow other code to get work done too. */ | |
202 | refill_bucket(ps); | |
203 | for (i = 0; ps->n_queued && get_token(ps) && i < 50; i++) { | |
204 | cb(get_tx_packet(ps), aux); | |
205 | } | |
206 | } | |
207 | } | |
208 | ||
209 | void | |
210 | pinsched_wait(struct pinsched *ps) | |
211 | { | |
212 | if (ps && ps->n_queued) { | |
213 | if (ps->tokens >= 1000) { | |
214 | /* We can transmit more packets as soon as we're called again. */ | |
215 | poll_immediate_wake(); | |
216 | } else { | |
217 | /* We have to wait for the bucket to re-fill. We could calculate | |
218 | * the exact amount of time here for increased smoothness. */ | |
219 | poll_timer_wait(TIME_UPDATE_INTERVAL / 2); | |
220 | } | |
221 | } | |
222 | } | |
223 | ||
224 | /* Creates and returns a scheduler for sending packet-in messages. */ | |
225 | struct pinsched * | |
226 | pinsched_create(int rate_limit, int burst_limit, struct switch_status *ss) | |
227 | { | |
228 | struct pinsched *ps; | |
229 | ||
230 | ps = xcalloc(1, sizeof *ps); | |
231 | port_array_init(&ps->queues); | |
232 | ps->n_queued = 0; | |
233 | ps->last_tx_port = PORT_ARRAY_SIZE; | |
234 | ps->last_fill = time_msec(); | |
235 | ps->tokens = rate_limit * 100; | |
236 | ps->n_txq = 0; | |
237 | ps->n_normal = 0; | |
238 | ps->n_limited = 0; | |
239 | ps->n_queue_dropped = 0; | |
240 | pinsched_set_limits(ps, rate_limit, burst_limit); | |
241 | ||
242 | if (ss) { | |
243 | ps->ss_cat = switch_status_register(ss, "rate-limit", | |
244 | pinsched_status_cb, ps); | |
245 | } | |
246 | ||
247 | return ps; | |
248 | } | |
249 | ||
250 | void | |
251 | pinsched_destroy(struct pinsched *ps) | |
252 | { | |
253 | if (ps) { | |
254 | struct ovs_queue *queue; | |
255 | unsigned int port_no; | |
256 | ||
257 | PORT_ARRAY_FOR_EACH (queue, &ps->queues, port_no) { | |
258 | queue_destroy(queue); | |
259 | free(queue); | |
260 | } | |
261 | port_array_destroy(&ps->queues); | |
262 | switch_status_unregister(ps->ss_cat); | |
263 | free(ps); | |
264 | } | |
265 | } | |
266 | ||
267 | void | |
268 | pinsched_set_limits(struct pinsched *ps, int rate_limit, int burst_limit) | |
269 | { | |
270 | if (rate_limit <= 0) { | |
271 | rate_limit = 1000; | |
272 | } | |
273 | if (burst_limit <= 0) { | |
274 | burst_limit = rate_limit / 4; | |
275 | } | |
276 | burst_limit = MAX(burst_limit, 1); | |
277 | burst_limit = MIN(burst_limit, INT_MAX / 1000); | |
278 | ||
279 | ps->rate_limit = rate_limit; | |
280 | ps->burst_limit = burst_limit; | |
281 | while (ps->n_queued > burst_limit) { | |
282 | drop_packet(ps); | |
283 | } | |
284 | } |