]>
Commit | Line | Data |
---|---|---|
1e214a5c SL |
1 | /* |
2 | * Virtio balloon implementation, inspired by Dor Laor and Marcelo | |
6b35e407 RR |
3 | * Tosatti's implementations. |
4 | * | |
5 | * Copyright 2008 Rusty Russell IBM Corporation | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License as published by | |
9 | * the Free Software Foundation; either version 2 of the License, or | |
10 | * (at your option) any later version. | |
11 | * | |
12 | * This program is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | * GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with this program; if not, write to the Free Software | |
19 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
20 | */ | |
1e214a5c | 21 | |
6b35e407 RR |
22 | #include <linux/virtio.h> |
23 | #include <linux/virtio_balloon.h> | |
24 | #include <linux/swap.h> | |
25 | #include <linux/kthread.h> | |
26 | #include <linux/freezer.h> | |
6659a0f0 | 27 | #include <linux/delay.h> |
5a0e3ad6 | 28 | #include <linux/slab.h> |
b5a2c4f1 | 29 | #include <linux/module.h> |
6b35e407 | 30 | |
3ccc9372 MT |
31 | /* |
32 | * Balloon device works in 4K page units. So each page is pointed to by | |
33 | * multiple balloon pages. All memory counters in this driver are in balloon | |
34 | * page units. | |
35 | */ | |
36 | #define VIRTIO_BALLOON_PAGES_PER_PAGE (PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT) | |
37 | ||
6b35e407 RR |
38 | struct virtio_balloon |
39 | { | |
40 | struct virtio_device *vdev; | |
9564e138 | 41 | struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; |
6b35e407 RR |
42 | |
43 | /* Where the ballooning thread waits for config to change. */ | |
44 | wait_queue_head_t config_change; | |
45 | ||
46 | /* The thread servicing the balloon. */ | |
47 | struct task_struct *thread; | |
48 | ||
49 | /* Waiting for host to ack the pages we released. */ | |
9c378abc | 50 | wait_queue_head_t acked; |
6b35e407 | 51 | |
3ccc9372 | 52 | /* Number of balloon pages we've told the Host we're not using. */ |
6b35e407 | 53 | unsigned int num_pages; |
3ccc9372 MT |
54 | /* |
55 | * The pages we've told the Host we're not using. | |
56 | * Each page on this list adds VIRTIO_BALLOON_PAGES_PER_PAGE | |
57 | * to num_pages above. | |
58 | */ | |
6b35e407 RR |
59 | struct list_head pages; |
60 | ||
61 | /* The array of pfns we tell the Host about. */ | |
62 | unsigned int num_pfns; | |
63 | u32 pfns[256]; | |
9564e138 AL |
64 | |
65 | /* Memory statistics */ | |
1f34c71a | 66 | int need_stats_update; |
9564e138 | 67 | struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; |
6b35e407 RR |
68 | }; |
69 | ||
70 | static struct virtio_device_id id_table[] = { | |
71 | { VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID }, | |
72 | { 0 }, | |
73 | }; | |
74 | ||
1b4aa2fa HB |
75 | static u32 page_to_balloon_pfn(struct page *page) |
76 | { | |
77 | unsigned long pfn = page_to_pfn(page); | |
78 | ||
79 | BUILD_BUG_ON(PAGE_SHIFT < VIRTIO_BALLOON_PFN_SHIFT); | |
80 | /* Convert pfn from Linux page size to balloon page size. */ | |
3ccc9372 MT |
81 | return pfn * VIRTIO_BALLOON_PAGES_PER_PAGE; |
82 | } | |
83 | ||
84 | static struct page *balloon_pfn_to_page(u32 pfn) | |
85 | { | |
86 | BUG_ON(pfn % VIRTIO_BALLOON_PAGES_PER_PAGE); | |
87 | return pfn_to_page(pfn / VIRTIO_BALLOON_PAGES_PER_PAGE); | |
1b4aa2fa HB |
88 | } |
89 | ||
6b35e407 RR |
90 | static void balloon_ack(struct virtqueue *vq) |
91 | { | |
9c378abc | 92 | struct virtio_balloon *vb = vq->vdev->priv; |
6b35e407 | 93 | |
9c378abc | 94 | wake_up(&vb->acked); |
6b35e407 RR |
95 | } |
96 | ||
97 | static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq) | |
98 | { | |
99 | struct scatterlist sg; | |
9c378abc | 100 | unsigned int len; |
6b35e407 RR |
101 | |
102 | sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns); | |
103 | ||
6b35e407 | 104 | /* We should always be able to add one buffer to an empty queue. */ |
f96fde41 | 105 | if (virtqueue_add_buf(vq, &sg, 1, 0, vb, GFP_KERNEL) < 0) |
6b35e407 | 106 | BUG(); |
946cfe0e | 107 | virtqueue_kick(vq); |
6b35e407 RR |
108 | |
109 | /* When host has read buffer, this completes via balloon_ack */ | |
9c378abc | 110 | wait_event(vb->acked, virtqueue_get_buf(vq, &len)); |
6b35e407 RR |
111 | } |
112 | ||
3ccc9372 MT |
113 | static void set_page_pfns(u32 pfns[], struct page *page) |
114 | { | |
115 | unsigned int i; | |
116 | ||
117 | /* Set balloon pfns pointing at this page. | |
118 | * Note that the first pfn points at start of the page. */ | |
119 | for (i = 0; i < VIRTIO_BALLOON_PAGES_PER_PAGE; i++) | |
120 | pfns[i] = page_to_balloon_pfn(page) + i; | |
121 | } | |
122 | ||
6b35e407 RR |
123 | static void fill_balloon(struct virtio_balloon *vb, size_t num) |
124 | { | |
125 | /* We can only do one array worth at a time. */ | |
126 | num = min(num, ARRAY_SIZE(vb->pfns)); | |
127 | ||
3ccc9372 MT |
128 | for (vb->num_pfns = 0; vb->num_pfns < num; |
129 | vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) { | |
61fb06cc BS |
130 | struct page *page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY | |
131 | __GFP_NOMEMALLOC | __GFP_NOWARN); | |
6b35e407 RR |
132 | if (!page) { |
133 | if (printk_ratelimit()) | |
134 | dev_printk(KERN_INFO, &vb->vdev->dev, | |
135 | "Out of puff! Can't get %zu pages\n", | |
136 | num); | |
137 | /* Sleep for at least 1/5 of a second before retry. */ | |
138 | msleep(200); | |
139 | break; | |
140 | } | |
3ccc9372 MT |
141 | set_page_pfns(vb->pfns + vb->num_pfns, page); |
142 | vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE; | |
6b35e407 | 143 | totalram_pages--; |
6b35e407 RR |
144 | list_add(&page->lru, &vb->pages); |
145 | } | |
146 | ||
147 | /* Didn't get any? Oh well. */ | |
148 | if (vb->num_pfns == 0) | |
149 | return; | |
150 | ||
151 | tell_host(vb, vb->inflate_vq); | |
152 | } | |
153 | ||
154 | static void release_pages_by_pfn(const u32 pfns[], unsigned int num) | |
155 | { | |
156 | unsigned int i; | |
157 | ||
3ccc9372 MT |
158 | /* Find pfns pointing at start of each page, get pages and free them. */ |
159 | for (i = 0; i < num; i += VIRTIO_BALLOON_PAGES_PER_PAGE) { | |
160 | __free_page(balloon_pfn_to_page(pfns[i])); | |
6b35e407 RR |
161 | totalram_pages++; |
162 | } | |
163 | } | |
164 | ||
165 | static void leak_balloon(struct virtio_balloon *vb, size_t num) | |
166 | { | |
167 | struct page *page; | |
168 | ||
169 | /* We can only do one array worth at a time. */ | |
170 | num = min(num, ARRAY_SIZE(vb->pfns)); | |
171 | ||
3ccc9372 MT |
172 | for (vb->num_pfns = 0; vb->num_pfns < num; |
173 | vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) { | |
6b35e407 RR |
174 | page = list_first_entry(&vb->pages, struct page, lru); |
175 | list_del(&page->lru); | |
3ccc9372 MT |
176 | set_page_pfns(vb->pfns + vb->num_pfns, page); |
177 | vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE; | |
6b35e407 RR |
178 | } |
179 | ||
bf50e69f DH |
180 | /* |
181 | * Note that if | |
182 | * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); | |
183 | * is true, we *have* to do it in this order | |
184 | */ | |
185 | tell_host(vb, vb->deflate_vq); | |
186 | release_pages_by_pfn(vb->pfns, vb->num_pfns); | |
6b35e407 RR |
187 | } |
188 | ||
9564e138 AL |
189 | static inline void update_stat(struct virtio_balloon *vb, int idx, |
190 | u16 tag, u64 val) | |
191 | { | |
192 | BUG_ON(idx >= VIRTIO_BALLOON_S_NR); | |
193 | vb->stats[idx].tag = tag; | |
194 | vb->stats[idx].val = val; | |
195 | } | |
196 | ||
197 | #define pages_to_bytes(x) ((u64)(x) << PAGE_SHIFT) | |
198 | ||
199 | static void update_balloon_stats(struct virtio_balloon *vb) | |
200 | { | |
201 | unsigned long events[NR_VM_EVENT_ITEMS]; | |
202 | struct sysinfo i; | |
203 | int idx = 0; | |
204 | ||
205 | all_vm_events(events); | |
206 | si_meminfo(&i); | |
207 | ||
208 | update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN, | |
209 | pages_to_bytes(events[PSWPIN])); | |
210 | update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT, | |
211 | pages_to_bytes(events[PSWPOUT])); | |
212 | update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]); | |
213 | update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]); | |
214 | update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE, | |
215 | pages_to_bytes(i.freeram)); | |
216 | update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT, | |
217 | pages_to_bytes(i.totalram)); | |
218 | } | |
219 | ||
220 | /* | |
221 | * While most virtqueues communicate guest-initiated requests to the hypervisor, | |
222 | * the stats queue operates in reverse. The driver initializes the virtqueue | |
223 | * with a single buffer. From that point forward, all conversations consist of | |
224 | * a hypervisor request (a call to this function) which directs us to refill | |
1f34c71a AL |
225 | * the virtqueue with a fresh stats buffer. Since stats collection can sleep, |
226 | * we notify our kthread which does the actual work via stats_handle_request(). | |
9564e138 | 227 | */ |
1f34c71a | 228 | static void stats_request(struct virtqueue *vq) |
9564e138 | 229 | { |
9c378abc | 230 | struct virtio_balloon *vb = vq->vdev->priv; |
9564e138 | 231 | |
1f34c71a AL |
232 | vb->need_stats_update = 1; |
233 | wake_up(&vb->config_change); | |
234 | } | |
235 | ||
236 | static void stats_handle_request(struct virtio_balloon *vb) | |
237 | { | |
238 | struct virtqueue *vq; | |
239 | struct scatterlist sg; | |
9c378abc | 240 | unsigned int len; |
9564e138 | 241 | |
1f34c71a | 242 | vb->need_stats_update = 0; |
9564e138 AL |
243 | update_balloon_stats(vb); |
244 | ||
1f34c71a | 245 | vq = vb->stats_vq; |
9c378abc MT |
246 | if (!virtqueue_get_buf(vq, &len)) |
247 | return; | |
9564e138 | 248 | sg_init_one(&sg, vb->stats, sizeof(vb->stats)); |
f96fde41 | 249 | if (virtqueue_add_buf(vq, &sg, 1, 0, vb, GFP_KERNEL) < 0) |
9564e138 | 250 | BUG(); |
946cfe0e | 251 | virtqueue_kick(vq); |
9564e138 AL |
252 | } |
253 | ||
6b35e407 RR |
254 | static void virtballoon_changed(struct virtio_device *vdev) |
255 | { | |
256 | struct virtio_balloon *vb = vdev->priv; | |
257 | ||
258 | wake_up(&vb->config_change); | |
259 | } | |
260 | ||
bdc1681c | 261 | static inline s64 towards_target(struct virtio_balloon *vb) |
6b35e407 | 262 | { |
1a87228f DG |
263 | __le32 v; |
264 | s64 target; | |
265 | ||
72e61eb4 RR |
266 | vb->vdev->config->get(vb->vdev, |
267 | offsetof(struct virtio_balloon_config, num_pages), | |
268 | &v, sizeof(v)); | |
1a87228f DG |
269 | target = le32_to_cpu(v); |
270 | return target - vb->num_pages; | |
6b35e407 RR |
271 | } |
272 | ||
273 | static void update_balloon_size(struct virtio_balloon *vb) | |
274 | { | |
275 | __le32 actual = cpu_to_le32(vb->num_pages); | |
276 | ||
277 | vb->vdev->config->set(vb->vdev, | |
278 | offsetof(struct virtio_balloon_config, actual), | |
279 | &actual, sizeof(actual)); | |
280 | } | |
281 | ||
282 | static int balloon(void *_vballoon) | |
283 | { | |
284 | struct virtio_balloon *vb = _vballoon; | |
285 | ||
286 | set_freezable(); | |
287 | while (!kthread_should_stop()) { | |
bdc1681c | 288 | s64 diff; |
6b35e407 RR |
289 | |
290 | try_to_freeze(); | |
291 | wait_event_interruptible(vb->config_change, | |
292 | (diff = towards_target(vb)) != 0 | |
1f34c71a | 293 | || vb->need_stats_update |
84a139a9 MT |
294 | || kthread_should_stop() |
295 | || freezing(current)); | |
1f34c71a AL |
296 | if (vb->need_stats_update) |
297 | stats_handle_request(vb); | |
6b35e407 RR |
298 | if (diff > 0) |
299 | fill_balloon(vb, diff); | |
300 | else if (diff < 0) | |
301 | leak_balloon(vb, -diff); | |
302 | update_balloon_size(vb); | |
303 | } | |
304 | return 0; | |
305 | } | |
306 | ||
be91c33d | 307 | static int init_vqs(struct virtio_balloon *vb) |
6b35e407 | 308 | { |
9564e138 | 309 | struct virtqueue *vqs[3]; |
1f34c71a | 310 | vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request }; |
9564e138 AL |
311 | const char *names[] = { "inflate", "deflate", "stats" }; |
312 | int err, nvqs; | |
6b35e407 | 313 | |
be91c33d AS |
314 | /* |
315 | * We expect two virtqueues: inflate and deflate, and | |
316 | * optionally stat. | |
317 | */ | |
9564e138 | 318 | nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2; |
be91c33d | 319 | err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names); |
d2a7ddda | 320 | if (err) |
be91c33d | 321 | return err; |
6b35e407 | 322 | |
d2a7ddda MT |
323 | vb->inflate_vq = vqs[0]; |
324 | vb->deflate_vq = vqs[1]; | |
9564e138 AL |
325 | if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) { |
326 | struct scatterlist sg; | |
327 | vb->stats_vq = vqs[2]; | |
328 | ||
329 | /* | |
330 | * Prime this virtqueue with one buffer so the hypervisor can | |
331 | * use it to signal us later. | |
332 | */ | |
333 | sg_init_one(&sg, vb->stats, sizeof vb->stats); | |
f96fde41 RR |
334 | if (virtqueue_add_buf(vb->stats_vq, &sg, 1, 0, vb, GFP_KERNEL) |
335 | < 0) | |
9564e138 | 336 | BUG(); |
946cfe0e | 337 | virtqueue_kick(vb->stats_vq); |
9564e138 | 338 | } |
be91c33d AS |
339 | return 0; |
340 | } | |
341 | ||
342 | static int virtballoon_probe(struct virtio_device *vdev) | |
343 | { | |
344 | struct virtio_balloon *vb; | |
345 | int err; | |
346 | ||
347 | vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL); | |
348 | if (!vb) { | |
349 | err = -ENOMEM; | |
350 | goto out; | |
351 | } | |
352 | ||
353 | INIT_LIST_HEAD(&vb->pages); | |
354 | vb->num_pages = 0; | |
355 | init_waitqueue_head(&vb->config_change); | |
9c378abc | 356 | init_waitqueue_head(&vb->acked); |
be91c33d AS |
357 | vb->vdev = vdev; |
358 | vb->need_stats_update = 0; | |
359 | ||
360 | err = init_vqs(vb); | |
361 | if (err) | |
362 | goto out_free_vb; | |
6b35e407 RR |
363 | |
364 | vb->thread = kthread_run(balloon, vb, "vballoon"); | |
365 | if (IS_ERR(vb->thread)) { | |
366 | err = PTR_ERR(vb->thread); | |
d2a7ddda | 367 | goto out_del_vqs; |
6b35e407 RR |
368 | } |
369 | ||
6b35e407 RR |
370 | return 0; |
371 | ||
d2a7ddda MT |
372 | out_del_vqs: |
373 | vdev->config->del_vqs(vdev); | |
6b35e407 RR |
374 | out_free_vb: |
375 | kfree(vb); | |
376 | out: | |
377 | return err; | |
378 | } | |
379 | ||
c877bab5 | 380 | static void remove_common(struct virtio_balloon *vb) |
6b35e407 | 381 | { |
6b35e407 RR |
382 | /* There might be pages left in the balloon: free them. */ |
383 | while (vb->num_pages) | |
384 | leak_balloon(vb, vb->num_pages); | |
b8ae0eb3 | 385 | update_balloon_size(vb); |
6b35e407 RR |
386 | |
387 | /* Now we reset the device so we can clean up the queues. */ | |
c877bab5 | 388 | vb->vdev->config->reset(vb->vdev); |
6b35e407 | 389 | |
c877bab5 AS |
390 | vb->vdev->config->del_vqs(vb->vdev); |
391 | } | |
392 | ||
393 | static void __devexit virtballoon_remove(struct virtio_device *vdev) | |
394 | { | |
395 | struct virtio_balloon *vb = vdev->priv; | |
396 | ||
397 | kthread_stop(vb->thread); | |
398 | remove_common(vb); | |
6b35e407 RR |
399 | kfree(vb); |
400 | } | |
401 | ||
e562966d AS |
402 | #ifdef CONFIG_PM |
403 | static int virtballoon_freeze(struct virtio_device *vdev) | |
404 | { | |
4eb05d56 AS |
405 | struct virtio_balloon *vb = vdev->priv; |
406 | ||
e562966d AS |
407 | /* |
408 | * The kthread is already frozen by the PM core before this | |
409 | * function is called. | |
410 | */ | |
411 | ||
c877bab5 | 412 | remove_common(vb); |
e562966d AS |
413 | return 0; |
414 | } | |
415 | ||
c45b4166 | 416 | static int virtballoon_restore(struct virtio_device *vdev) |
4eb05d56 AS |
417 | { |
418 | struct virtio_balloon *vb = vdev->priv; | |
419 | int ret; | |
420 | ||
421 | ret = init_vqs(vdev->priv); | |
422 | if (ret) | |
423 | return ret; | |
424 | ||
425 | fill_balloon(vb, towards_target(vb)); | |
426 | update_balloon_size(vb); | |
427 | return 0; | |
428 | } | |
e562966d AS |
429 | #endif |
430 | ||
9564e138 AL |
431 | static unsigned int features[] = { |
432 | VIRTIO_BALLOON_F_MUST_TELL_HOST, | |
433 | VIRTIO_BALLOON_F_STATS_VQ, | |
434 | }; | |
c45a6816 | 435 | |
d817cd52 | 436 | static struct virtio_driver virtio_balloon_driver = { |
c45a6816 RR |
437 | .feature_table = features, |
438 | .feature_table_size = ARRAY_SIZE(features), | |
6b35e407 RR |
439 | .driver.name = KBUILD_MODNAME, |
440 | .driver.owner = THIS_MODULE, | |
441 | .id_table = id_table, | |
442 | .probe = virtballoon_probe, | |
443 | .remove = __devexit_p(virtballoon_remove), | |
444 | .config_changed = virtballoon_changed, | |
e562966d AS |
445 | #ifdef CONFIG_PM |
446 | .freeze = virtballoon_freeze, | |
447 | .restore = virtballoon_restore, | |
e562966d | 448 | #endif |
6b35e407 RR |
449 | }; |
450 | ||
451 | static int __init init(void) | |
452 | { | |
d817cd52 | 453 | return register_virtio_driver(&virtio_balloon_driver); |
6b35e407 RR |
454 | } |
455 | ||
456 | static void __exit fini(void) | |
457 | { | |
d817cd52 | 458 | unregister_virtio_driver(&virtio_balloon_driver); |
6b35e407 RR |
459 | } |
460 | module_init(init); | |
461 | module_exit(fini); | |
462 | ||
463 | MODULE_DEVICE_TABLE(virtio, id_table); | |
464 | MODULE_DESCRIPTION("Virtio balloon driver"); | |
465 | MODULE_LICENSE("GPL"); |