]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - drivers/misc/vmw_balloon.c
vmw_balloon: refactor change size from vmballoon_work
[mirror_ubuntu-jammy-kernel.git] / drivers / misc / vmw_balloon.c
CommitLineData
8b4770ec 1// SPDX-License-Identifier: GPL-2.0
453dc659
DT
2/*
3 * VMware Balloon driver.
4 *
8b4770ec 5 * Copyright (C) 2000-2018, VMware, Inc. All Rights Reserved.
453dc659 6 *
453dc659
DT
7 * This is VMware physical memory management driver for Linux. The driver
8 * acts like a "balloon" that can be inflated to reclaim physical pages by
9 * reserving them in the guest and invalidating them in the monitor,
10 * freeing up the underlying machine pages so they can be allocated to
11 * other guests. The balloon can also be deflated to allow the guest to
12 * use more physical memory. Higher level policies can control the sizes
13 * of balloons in VMs in order to manage physical memory resources.
14 */
15
16//#define DEBUG
17#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18
19#include <linux/types.h>
20#include <linux/kernel.h>
21#include <linux/mm.h>
f220a80f 22#include <linux/vmalloc.h>
453dc659
DT
23#include <linux/sched.h>
24#include <linux/module.h>
25#include <linux/workqueue.h>
26#include <linux/debugfs.h>
27#include <linux/seq_file.h>
48e3d668
PM
28#include <linux/vmw_vmci_defs.h>
29#include <linux/vmw_vmci_api.h>
a10a5698 30#include <asm/hypervisor.h>
453dc659
DT
31
32MODULE_AUTHOR("VMware, Inc.");
33MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver");
48e3d668 34MODULE_VERSION("1.5.0.0-k");
453dc659
DT
35MODULE_ALIAS("dmi:*:svnVMware*:*");
36MODULE_ALIAS("vmware_vmmemctl");
37MODULE_LICENSE("GPL");
38
453dc659 39/*
622074a9
NA
40 * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't allow wait
41 * (__GFP_RECLAIM) for huge page allocations. Use __GFP_NOWARN, to suppress page
42 * allocation failure warnings. Disallow access to emergency low-memory pools.
453dc659 43 */
622074a9
NA
44#define VMW_HUGE_PAGE_ALLOC_FLAGS (__GFP_HIGHMEM|__GFP_NOWARN| \
45 __GFP_NOMEMALLOC)
453dc659
DT
46
47/*
622074a9
NA
48 * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We allow lightweight
49 * reclamation (__GFP_NORETRY). Use __GFP_NOWARN, to suppress page allocation
50 * failure warnings. Disallow access to emergency low-memory pools.
453dc659 51 */
622074a9
NA
52#define VMW_PAGE_ALLOC_FLAGS (__GFP_HIGHMEM|__GFP_NOWARN| \
53 __GFP_NOMEMALLOC|__GFP_NORETRY)
453dc659 54
55adaa49
DT
55/* Maximum number of refused pages we accumulate during inflation cycle */
56#define VMW_BALLOON_MAX_REFUSED 16
453dc659
DT
57
58/*
59 * Hypervisor communication port definitions.
60 */
61#define VMW_BALLOON_HV_PORT 0x5670
62#define VMW_BALLOON_HV_MAGIC 0x456c6d6f
453dc659
DT
63#define VMW_BALLOON_GUEST_ID 1 /* Linux */
64
eb79100f
XD
65enum vmwballoon_capabilities {
66 /*
67 * Bit 0 is reserved and not associated to any capability.
68 */
48e3d668
PM
69 VMW_BALLOON_BASIC_CMDS = (1 << 1),
70 VMW_BALLOON_BATCHED_CMDS = (1 << 2),
71 VMW_BALLOON_BATCHED_2M_CMDS = (1 << 3),
72 VMW_BALLOON_SIGNALLED_WAKEUP_CMD = (1 << 4),
eb79100f
XD
73};
74
f220a80f 75#define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_BASIC_CMDS \
365bd7ef 76 | VMW_BALLOON_BATCHED_CMDS \
48e3d668
PM
77 | VMW_BALLOON_BATCHED_2M_CMDS \
78 | VMW_BALLOON_SIGNALLED_WAKEUP_CMD)
365bd7ef 79
25acbdd7 80#define VMW_BALLOON_2M_ORDER (PMD_SHIFT - PAGE_SHIFT)
365bd7ef 81#define VMW_BALLOON_NUM_PAGE_SIZES (2)
eb79100f 82
f220a80f
XD
83/*
84 * Backdoor commands availability:
85 *
86 * START, GET_TARGET and GUEST_ID are always available,
87 *
88 * VMW_BALLOON_BASIC_CMDS:
89 * LOCK and UNLOCK commands,
90 * VMW_BALLOON_BATCHED_CMDS:
91 * BATCHED_LOCK and BATCHED_UNLOCK commands.
365bd7ef 92 * VMW BALLOON_BATCHED_2M_CMDS:
48e3d668
PM
93 * BATCHED_2M_LOCK and BATCHED_2M_UNLOCK commands,
94 * VMW VMW_BALLOON_SIGNALLED_WAKEUP_CMD:
95 * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command.
f220a80f 96 */
365bd7ef
PM
97#define VMW_BALLOON_CMD_START 0
98#define VMW_BALLOON_CMD_GET_TARGET 1
99#define VMW_BALLOON_CMD_LOCK 2
100#define VMW_BALLOON_CMD_UNLOCK 3
101#define VMW_BALLOON_CMD_GUEST_ID 4
102#define VMW_BALLOON_CMD_BATCHED_LOCK 6
103#define VMW_BALLOON_CMD_BATCHED_UNLOCK 7
104#define VMW_BALLOON_CMD_BATCHED_2M_LOCK 8
105#define VMW_BALLOON_CMD_BATCHED_2M_UNLOCK 9
48e3d668 106#define VMW_BALLOON_CMD_VMCI_DOORBELL_SET 10
365bd7ef 107
68131184 108#define VMW_BALLOON_CMD_NUM 11
453dc659
DT
109
110/* error codes */
eb79100f
XD
111#define VMW_BALLOON_SUCCESS 0
112#define VMW_BALLOON_FAILURE -1
113#define VMW_BALLOON_ERROR_CMD_INVALID 1
114#define VMW_BALLOON_ERROR_PPN_INVALID 2
115#define VMW_BALLOON_ERROR_PPN_LOCKED 3
116#define VMW_BALLOON_ERROR_PPN_UNLOCKED 4
117#define VMW_BALLOON_ERROR_PPN_PINNED 5
118#define VMW_BALLOON_ERROR_PPN_NOTNEEDED 6
119#define VMW_BALLOON_ERROR_RESET 7
120#define VMW_BALLOON_ERROR_BUSY 8
121
122#define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES (0x03000000)
123
10a95d5d
NA
124#define VMW_BALLOON_CMD_WITH_TARGET_MASK \
125 ((1UL << VMW_BALLOON_CMD_GET_TARGET) | \
126 (1UL << VMW_BALLOON_CMD_LOCK) | \
127 (1UL << VMW_BALLOON_CMD_UNLOCK) | \
128 (1UL << VMW_BALLOON_CMD_BATCHED_LOCK) | \
129 (1UL << VMW_BALLOON_CMD_BATCHED_UNLOCK) | \
130 (1UL << VMW_BALLOON_CMD_BATCHED_2M_LOCK) | \
131 (1UL << VMW_BALLOON_CMD_BATCHED_2M_UNLOCK))
132
68131184
NA
133static const char * const vmballoon_cmd_names[] = {
134 [VMW_BALLOON_CMD_START] = "start",
135 [VMW_BALLOON_CMD_GET_TARGET] = "target",
136 [VMW_BALLOON_CMD_LOCK] = "lock",
137 [VMW_BALLOON_CMD_UNLOCK] = "unlock",
138 [VMW_BALLOON_CMD_GUEST_ID] = "guestType",
139 [VMW_BALLOON_CMD_BATCHED_LOCK] = "batchLock",
140 [VMW_BALLOON_CMD_BATCHED_UNLOCK] = "batchUnlock",
141 [VMW_BALLOON_CMD_BATCHED_2M_LOCK] = "2m-lock",
142 [VMW_BALLOON_CMD_BATCHED_2M_UNLOCK] = "2m-unlock",
143 [VMW_BALLOON_CMD_VMCI_DOORBELL_SET] = "doorbellSet"
144};
145
453dc659
DT
146#ifdef CONFIG_DEBUG_FS
147struct vmballoon_stats {
148 unsigned int timer;
48e3d668 149 unsigned int doorbell;
453dc659 150
2ca02df6 151 /* allocation statistics */
365bd7ef
PM
152 unsigned int alloc[VMW_BALLOON_NUM_PAGE_SIZES];
153 unsigned int alloc_fail[VMW_BALLOON_NUM_PAGE_SIZES];
365bd7ef
PM
154 unsigned int refused_alloc[VMW_BALLOON_NUM_PAGE_SIZES];
155 unsigned int refused_free[VMW_BALLOON_NUM_PAGE_SIZES];
156 unsigned int free[VMW_BALLOON_NUM_PAGE_SIZES];
453dc659 157
68131184
NA
158 /* Monitor operations. */
159 unsigned long ops[VMW_BALLOON_CMD_NUM];
160 unsigned long ops_fail[VMW_BALLOON_CMD_NUM];
453dc659
DT
161};
162
163#define STATS_INC(stat) (stat)++
164#else
165#define STATS_INC(stat)
166#endif
167
df8d0d42 168static DEFINE_STATIC_KEY_TRUE(vmw_balloon_batching);
f220a80f 169
365bd7ef 170struct vmballoon_page_size {
453dc659
DT
171 /* list of reserved physical pages */
172 struct list_head pages;
173
174 /* transient list of non-balloonable pages */
175 struct list_head refused_pages;
55adaa49 176 unsigned int n_refused_pages;
365bd7ef
PM
177};
178
6c948757
NA
179/**
180 * struct vmballoon_batch_entry - a batch entry for lock or unlock.
181 *
182 * @status: the status of the operation, which is written by the hypervisor.
183 * @reserved: reserved for future use. Must be set to zero.
184 * @pfn: the physical frame number of the page to be locked or unlocked.
185 */
186struct vmballoon_batch_entry {
187 u64 status : 5;
188 u64 reserved : PAGE_SHIFT - 5;
189 u64 pfn : 52;
190} __packed;
191
365bd7ef
PM
192struct vmballoon {
193 struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES];
194
195 /* supported page sizes. 1 == 4k pages only, 2 == 4k and 2m pages */
196 unsigned supported_page_sizes;
453dc659
DT
197
198 /* balloon size in pages */
199 unsigned int size;
200 unsigned int target;
201
202 /* reset flag */
203 bool reset_required;
204
f220a80f
XD
205 unsigned long capabilities;
206
6c948757
NA
207 /**
208 * @batch_page: pointer to communication batch page.
209 *
210 * When batching is used, batch_page points to a page, which holds up to
211 * %VMW_BALLOON_BATCH_MAX_PAGES entries for locking or unlocking.
212 */
213 struct vmballoon_batch_entry *batch_page;
214
f220a80f
XD
215 unsigned int batch_max_pages;
216 struct page *page;
217
453dc659
DT
218#ifdef CONFIG_DEBUG_FS
219 /* statistics */
220 struct vmballoon_stats stats;
221
222 /* debugfs file exporting statistics */
223 struct dentry *dbg_entry;
224#endif
225
226 struct sysinfo sysinfo;
227
228 struct delayed_work dwork;
48e3d668
PM
229
230 struct vmci_handle vmci_doorbell;
453dc659
DT
231};
232
233static struct vmballoon balloon;
453dc659 234
10a95d5d
NA
235static inline unsigned long
236__vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1,
237 unsigned long arg2, unsigned long *result)
238{
239 unsigned long status, dummy1, dummy2, dummy3, local_result;
240
68131184
NA
241 STATS_INC(b->stats.ops[cmd]);
242
10a95d5d
NA
243 asm volatile ("inl %%dx" :
244 "=a"(status),
245 "=c"(dummy1),
246 "=d"(dummy2),
247 "=b"(local_result),
248 "=S"(dummy3) :
249 "0"(VMW_BALLOON_HV_MAGIC),
250 "1"(cmd),
251 "2"(VMW_BALLOON_HV_PORT),
252 "3"(arg1),
253 "4"(arg2) :
254 "memory");
255
256 /* update the result if needed */
257 if (result)
258 *result = (cmd == VMW_BALLOON_CMD_START) ? dummy1 :
259 local_result;
260
261 /* update target when applicable */
262 if (status == VMW_BALLOON_SUCCESS &&
263 ((1ul << cmd) & VMW_BALLOON_CMD_WITH_TARGET_MASK))
264 b->target = local_result;
265
68131184
NA
266 if (status != VMW_BALLOON_SUCCESS &&
267 status != VMW_BALLOON_SUCCESS_WITH_CAPABILITIES) {
268 STATS_INC(b->stats.ops_fail[cmd]);
269 pr_debug("%s: %s [0x%lx,0x%lx) failed, returned %ld\n",
270 __func__, vmballoon_cmd_names[cmd], arg1, arg2,
271 status);
272 }
273
10a95d5d
NA
274 /* mark reset required accordingly */
275 if (status == VMW_BALLOON_ERROR_RESET)
276 b->reset_required = true;
277
278 return status;
279}
280
281static __always_inline unsigned long
282vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1,
283 unsigned long arg2)
284{
285 unsigned long dummy;
286
287 return __vmballoon_cmd(b, cmd, arg1, arg2, &dummy);
288}
289
453dc659
DT
290/*
291 * Send "start" command to the host, communicating supported version
292 * of the protocol.
293 */
f220a80f 294static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
453dc659 295{
10a95d5d 296 unsigned long status, capabilities;
365bd7ef 297 bool success;
453dc659 298
10a95d5d
NA
299 status = __vmballoon_cmd(b, VMW_BALLOON_CMD_START, req_caps, 0,
300 &capabilities);
f220a80f
XD
301
302 switch (status) {
303 case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES:
304 b->capabilities = capabilities;
365bd7ef
PM
305 success = true;
306 break;
f220a80f
XD
307 case VMW_BALLOON_SUCCESS:
308 b->capabilities = VMW_BALLOON_BASIC_CMDS;
365bd7ef
PM
309 success = true;
310 break;
311 default:
312 success = false;
f220a80f 313 }
453dc659 314
5081efd1
NA
315 /*
316 * 2MB pages are only supported with batching. If batching is for some
317 * reason disabled, do not use 2MB pages, since otherwise the legacy
318 * mechanism is used with 2MB pages, causing a failure.
319 */
320 if ((b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) &&
321 (b->capabilities & VMW_BALLOON_BATCHED_CMDS))
365bd7ef
PM
322 b->supported_page_sizes = 2;
323 else
324 b->supported_page_sizes = 1;
325
365bd7ef 326 return success;
453dc659
DT
327}
328
453dc659
DT
329/*
330 * Communicate guest type to the host so that it can adjust ballooning
331 * algorithm to the one most appropriate for the guest. This command
332 * is normally issued after sending "start" command and is part of
333 * standard reset sequence.
334 */
335static bool vmballoon_send_guest_id(struct vmballoon *b)
336{
10a95d5d 337 unsigned long status;
453dc659 338
10a95d5d
NA
339 status = vmballoon_cmd(b, VMW_BALLOON_CMD_GUEST_ID,
340 VMW_BALLOON_GUEST_ID, 0);
453dc659 341
10a95d5d 342 if (status == VMW_BALLOON_SUCCESS)
453dc659
DT
343 return true;
344
453dc659
DT
345 return false;
346}
347
365bd7ef
PM
348static u16 vmballoon_page_size(bool is_2m_page)
349{
350 if (is_2m_page)
25acbdd7 351 return 1 << VMW_BALLOON_2M_ORDER;
365bd7ef
PM
352
353 return 1;
354}
355
453dc659
DT
356/*
357 * Retrieve desired balloon size from the host.
358 */
10a95d5d 359static bool vmballoon_send_get_target(struct vmballoon *b)
453dc659
DT
360{
361 unsigned long status;
453dc659
DT
362 unsigned long limit;
363 u32 limit32;
364
365 /*
366 * si_meminfo() is cheap. Moreover, we want to provide dynamic
367 * max balloon size later. So let us call si_meminfo() every
368 * iteration.
369 */
370 si_meminfo(&b->sysinfo);
371 limit = b->sysinfo.totalram;
372
373 /* Ensure limit fits in 32-bits */
374 limit32 = (u32)limit;
375 if (limit != limit32)
376 return false;
377
10a95d5d
NA
378 status = vmballoon_cmd(b, VMW_BALLOON_CMD_GET_TARGET, limit, 0);
379
380 if (status == VMW_BALLOON_SUCCESS)
453dc659 381 return true;
453dc659 382
453dc659
DT
383 return false;
384}
385
622074a9 386static struct page *vmballoon_alloc_page(bool is_2m_page)
365bd7ef
PM
387{
388 if (is_2m_page)
622074a9 389 return alloc_pages(VMW_HUGE_PAGE_ALLOC_FLAGS,
25acbdd7 390 VMW_BALLOON_2M_ORDER);
365bd7ef 391
622074a9 392 return alloc_page(VMW_PAGE_ALLOC_FLAGS);
365bd7ef
PM
393}
394
395static void vmballoon_free_page(struct page *page, bool is_2m_page)
396{
397 if (is_2m_page)
25acbdd7 398 __free_pages(page, VMW_BALLOON_2M_ORDER);
365bd7ef
PM
399 else
400 __free_page(page);
401}
402
453dc659
DT
403/*
404 * Quickly release all pages allocated for the balloon. This function is
405 * called when host decides to "reset" balloon for one reason or another.
406 * Unlike normal "deflate" we do not (shall not) notify host of the pages
407 * being released.
408 */
409static void vmballoon_pop(struct vmballoon *b)
410{
411 struct page *page, *next;
365bd7ef
PM
412 unsigned is_2m_pages;
413
414 for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
415 is_2m_pages++) {
416 struct vmballoon_page_size *page_size =
417 &b->page_sizes[is_2m_pages];
418 u16 size_per_page = vmballoon_page_size(is_2m_pages);
419
420 list_for_each_entry_safe(page, next, &page_size->pages, lru) {
421 list_del(&page->lru);
422 vmballoon_free_page(page, is_2m_pages);
423 STATS_INC(b->stats.free[is_2m_pages]);
424 b->size -= size_per_page;
425 cond_resched();
426 }
453dc659 427 }
453dc659 428
b23220fe
GK
429 /* Clearing the batch_page unconditionally has no adverse effect */
430 free_page((unsigned long)b->batch_page);
431 b->batch_page = NULL;
453dc659
DT
432}
433
df8d0d42
NA
434/**
435 * vmballoon_status_page - returns the status of (un)lock operation
436 *
437 * @b: pointer to the balloon.
438 * @idx: index for the page for which the operation is performed.
439 * @p: pointer to where the page struct is returned.
440 *
441 * Following a lock or unlock operation, returns the status of the operation for
442 * an individual page. Provides the page that the operation was performed on on
443 * the @page argument.
444 *
445 * Returns: The status of a lock or unlock operation for an individual page.
453dc659 446 */
df8d0d42
NA
447static unsigned long vmballoon_status_page(struct vmballoon *b, int idx,
448 struct page **p)
453dc659 449{
df8d0d42
NA
450 if (static_branch_likely(&vmw_balloon_batching)) {
451 /* batching mode */
452 *p = pfn_to_page(b->batch_page[idx].pfn);
453 return b->batch_page[idx].status;
454 }
10a95d5d 455
df8d0d42
NA
456 /* non-batching mode */
457 *p = b->page;
453dc659 458
df8d0d42
NA
459 /*
460 * If a failure occurs, the indication will be provided in the status
461 * of the entire operation, which is considered before the individual
462 * page status. So for non-batching mode, the indication is always of
463 * success.
464 */
465 return VMW_BALLOON_SUCCESS;
466}
453dc659 467
df8d0d42
NA
468/**
469 * vmballoon_lock_op - notifies the host about inflated/deflated pages.
470 * @b: pointer to the balloon.
471 * @num_pages: number of inflated/deflated pages.
472 * @is_2m_pages: whether the page(s) are 2M (or 4k).
473 * @lock: whether the operation is lock (or unlock).
474 *
475 * Notify the host about page(s) that were ballooned (or removed from the
476 * balloon) so that host can use it without fear that guest will need it (or
477 * stop using them since the VM does). Host may reject some pages, we need to
478 * check the return value and maybe submit a different page. The pages that are
479 * inflated/deflated are pointed by @b->page.
480 *
481 * Return: result as provided by the hypervisor.
482 */
483static unsigned long vmballoon_lock_op(struct vmballoon *b,
484 unsigned int num_pages,
485 bool is_2m_pages, bool lock)
486{
487 unsigned long cmd, pfn;
488
489 if (static_branch_likely(&vmw_balloon_batching)) {
490 if (lock)
491 cmd = is_2m_pages ? VMW_BALLOON_CMD_BATCHED_2M_LOCK :
492 VMW_BALLOON_CMD_BATCHED_LOCK;
493 else
494 cmd = is_2m_pages ? VMW_BALLOON_CMD_BATCHED_2M_UNLOCK :
495 VMW_BALLOON_CMD_BATCHED_UNLOCK;
496
497 pfn = PHYS_PFN(virt_to_phys(b->batch_page));
498 } else {
499 cmd = lock ? VMW_BALLOON_CMD_LOCK : VMW_BALLOON_CMD_UNLOCK;
500 pfn = page_to_pfn(b->page);
501
502 /* In non-batching mode, PFNs must fit in 32-bit */
503 if (unlikely(pfn != (u32)pfn))
504 return VMW_BALLOON_ERROR_PPN_INVALID;
ef0f8f11 505 }
453dc659 506
df8d0d42 507 return vmballoon_cmd(b, cmd, pfn, num_pages);
453dc659
DT
508}
509
df8d0d42
NA
510static int vmballoon_lock(struct vmballoon *b, unsigned int num_pages,
511 bool is_2m_pages)
f220a80f 512{
df8d0d42
NA
513 unsigned long batch_status;
514 int i;
365bd7ef 515 u16 size_per_page = vmballoon_page_size(is_2m_pages);
f220a80f 516
df8d0d42 517 batch_status = vmballoon_lock_op(b, num_pages, is_2m_pages, true);
f220a80f
XD
518
519 for (i = 0; i < num_pages; i++) {
df8d0d42
NA
520 unsigned long status;
521 struct page *p;
365bd7ef
PM
522 struct vmballoon_page_size *page_size =
523 &b->page_sizes[is_2m_pages];
f220a80f 524
df8d0d42
NA
525 status = vmballoon_status_page(b, i, &p);
526
527 /*
528 * Failure of the whole batch overrides a single operation
529 * results.
530 */
531 if (batch_status != VMW_BALLOON_SUCCESS)
532 status = batch_status;
f220a80f 533
df8d0d42
NA
534 if (status == VMW_BALLOON_SUCCESS) {
535 /* track allocated page */
365bd7ef 536 list_add(&p->lru, &page_size->pages);
df8d0d42
NA
537
538 /* update balloon size */
365bd7ef 539 b->size += size_per_page;
df8d0d42
NA
540 continue;
541 }
542
543 /* Error occurred */
544 STATS_INC(b->stats.refused_alloc[is_2m_pages]);
545
8fa3c61a
NA
546 /*
547 * Place page on the list of non-balloonable pages
548 * and retry allocation, unless we already accumulated
549 * too many of them, in which case take a breather.
550 */
551 list_add(&p->lru, &page_size->refused_pages);
552 page_size->n_refused_pages++;
f220a80f
XD
553 }
554
df8d0d42 555 return batch_status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
f220a80f
XD
556}
557
453dc659
DT
558/*
559 * Release the page allocated for the balloon. Note that we first notify
560 * the host so it can make sure the page will be available for the guest
561 * to use, if needed.
562 */
df8d0d42
NA
563static int vmballoon_unlock(struct vmballoon *b, unsigned int num_pages,
564 bool is_2m_pages)
453dc659 565{
df8d0d42
NA
566 int i;
567 unsigned long batch_status;
365bd7ef 568 u16 size_per_page = vmballoon_page_size(is_2m_pages);
f220a80f 569
df8d0d42 570 batch_status = vmballoon_lock_op(b, num_pages, is_2m_pages, false);
f220a80f
XD
571
572 for (i = 0; i < num_pages; i++) {
df8d0d42
NA
573 struct vmballoon_page_size *page_size;
574 unsigned long status;
575 struct page *p;
576
577 status = vmballoon_status_page(b, i, &p);
578 page_size = &b->page_sizes[is_2m_pages];
f220a80f 579
df8d0d42
NA
580 /*
581 * Failure of the whole batch overrides a single operation
582 * results.
583 */
584 if (batch_status != VMW_BALLOON_SUCCESS)
585 status = batch_status;
586
587 if (status != VMW_BALLOON_SUCCESS) {
f220a80f
XD
588 /*
589 * That page wasn't successfully unlocked by the
590 * hypervisor, re-add it to the list of pages owned by
591 * the balloon driver.
592 */
365bd7ef 593 list_add(&p->lru, &page_size->pages);
f220a80f
XD
594 } else {
595 /* deallocate page */
365bd7ef
PM
596 vmballoon_free_page(p, is_2m_pages);
597 STATS_INC(b->stats.free[is_2m_pages]);
f220a80f
XD
598
599 /* update balloon size */
365bd7ef 600 b->size -= size_per_page;
f220a80f
XD
601 }
602 }
603
df8d0d42 604 return batch_status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
f220a80f
XD
605}
606
453dc659
DT
607/*
608 * Release pages that were allocated while attempting to inflate the
609 * balloon but were refused by the host for one reason or another.
610 */
365bd7ef
PM
611static void vmballoon_release_refused_pages(struct vmballoon *b,
612 bool is_2m_pages)
453dc659
DT
613{
614 struct page *page, *next;
365bd7ef
PM
615 struct vmballoon_page_size *page_size =
616 &b->page_sizes[is_2m_pages];
453dc659 617
365bd7ef 618 list_for_each_entry_safe(page, next, &page_size->refused_pages, lru) {
453dc659 619 list_del(&page->lru);
365bd7ef
PM
620 vmballoon_free_page(page, is_2m_pages);
621 STATS_INC(b->stats.refused_free[is_2m_pages]);
453dc659 622 }
55adaa49 623
365bd7ef 624 page_size->n_refused_pages = 0;
453dc659
DT
625}
626
f220a80f
XD
627static void vmballoon_add_page(struct vmballoon *b, int idx, struct page *p)
628{
df8d0d42
NA
629 if (static_branch_likely(&vmw_balloon_batching))
630 b->batch_page[idx] = (struct vmballoon_batch_entry)
6c948757 631 { .pfn = page_to_pfn(p) };
df8d0d42
NA
632 else
633 b->page = p;
f220a80f
XD
634}
635
8b079cd0
NA
636/**
637 * vmballoon_change - retrieve the required balloon change
638 *
639 * @b: pointer for the balloon.
640 *
641 * Return: the required change for the balloon size. A positive number
642 * indicates inflation, a negative number indicates a deflation.
643 */
644static int64_t vmballoon_change(struct vmballoon *b)
645{
646 int64_t size, target;
647
648 size = b->size;
649 target = b->target;
650
651 /*
652 * We must cast first because of int sizes
653 * Otherwise we might get huge positives instead of negatives
654 */
655
656 if (b->reset_required)
657 return 0;
658
659 /* consider a 2MB slack on deflate, unless the balloon is emptied */
660 if (target < size && size - target < vmballoon_page_size(true) &&
661 target != 0)
662 return 0;
663
664 return target - size;
665}
666
453dc659
DT
667/*
668 * Inflate the balloon towards its target size. Note that we try to limit
669 * the rate of allocation to make sure we are not choking the rest of the
670 * system.
671 */
672static void vmballoon_inflate(struct vmballoon *b)
673{
f220a80f 674 unsigned int num_pages = 0;
453dc659 675 int error = 0;
365bd7ef 676 bool is_2m_pages;
453dc659 677
453dc659
DT
678 /*
679 * First try NOSLEEP page allocations to inflate balloon.
680 *
681 * If we do not throttle nosleep allocations, we can drain all
682 * free pages in the guest quickly (if the balloon target is high).
683 * As a side-effect, draining free pages helps to inform (force)
684 * the guest to start swapping if balloon target is not met yet,
685 * which is a desired behavior. However, balloon driver can consume
686 * all available CPU cycles if too many pages are allocated in a
687 * second. Therefore, we throttle nosleep allocations even when
688 * the guest is not under memory pressure. OTOH, if we have already
689 * predicted that the guest is under memory pressure, then we
690 * slowdown page allocations considerably.
691 */
692
453dc659
DT
693 /*
694 * Start with no sleep allocation rate which may be higher
695 * than sleeping allocation rate.
696 */
ec992cc7 697 is_2m_pages = b->supported_page_sizes == VMW_BALLOON_NUM_PAGE_SIZES;
453dc659 698
8b079cd0
NA
699 while ((int64_t)(num_pages * vmballoon_page_size(is_2m_pages)) <
700 vmballoon_change(b)) {
4670de4d 701 struct page *page;
453dc659 702
622074a9
NA
703 STATS_INC(b->stats.alloc[is_2m_pages]);
704 page = vmballoon_alloc_page(is_2m_pages);
ef0f8f11 705 if (!page) {
365bd7ef 706 STATS_INC(b->stats.alloc_fail[is_2m_pages]);
365bd7ef 707 if (is_2m_pages) {
df8d0d42 708 vmballoon_lock(b, num_pages, true);
365bd7ef
PM
709
710 /*
711 * ignore errors from locking as we now switch
712 * to 4k pages and we might get different
713 * errors.
714 */
715
716 num_pages = 0;
717 is_2m_pages = false;
718 continue;
719 }
622074a9 720 break;
453dc659
DT
721 }
722
df8d0d42 723 vmballoon_add_page(b, num_pages++, page);
f220a80f 724 if (num_pages == b->batch_max_pages) {
8fa3c61a
NA
725 struct vmballoon_page_size *page_size =
726 &b->page_sizes[is_2m_pages];
727
df8d0d42 728 error = vmballoon_lock(b, num_pages, is_2m_pages);
10a95d5d 729
f220a80f 730 num_pages = 0;
8fa3c61a
NA
731
732 /*
733 * Stop allocating this page size if we already
734 * accumulated too many pages that the hypervisor
735 * refused.
736 */
737 if (page_size->n_refused_pages >=
738 VMW_BALLOON_MAX_REFUSED) {
739 if (!is_2m_pages)
740 break;
741
742 /*
743 * Release the refused pages as we move to 4k
744 * pages.
745 */
746 vmballoon_release_refused_pages(b, true);
747 is_2m_pages = true;
748 }
749
f220a80f
XD
750 if (error)
751 break;
752 }
ef0f8f11 753
33d268ed 754 cond_resched();
453dc659
DT
755 }
756
f220a80f 757 if (num_pages > 0)
df8d0d42 758 vmballoon_lock(b, num_pages, is_2m_pages);
f220a80f 759
365bd7ef
PM
760 vmballoon_release_refused_pages(b, true);
761 vmballoon_release_refused_pages(b, false);
453dc659
DT
762}
763
764/*
765 * Decrease the size of the balloon allowing guest to use more memory.
766 */
767static void vmballoon_deflate(struct vmballoon *b)
768{
365bd7ef 769 unsigned is_2m_pages;
453dc659 770
453dc659 771 /* free pages to reach target */
365bd7ef
PM
772 for (is_2m_pages = 0; is_2m_pages < b->supported_page_sizes;
773 is_2m_pages++) {
774 struct page *page, *next;
775 unsigned int num_pages = 0;
776 struct vmballoon_page_size *page_size =
777 &b->page_sizes[is_2m_pages];
778
779 list_for_each_entry_safe(page, next, &page_size->pages, lru) {
8b079cd0
NA
780 if ((int64_t)(num_pages *
781 vmballoon_page_size(is_2m_pages)) >=
782 -vmballoon_change(b))
365bd7ef 783 break;
f220a80f 784
365bd7ef 785 list_del(&page->lru);
df8d0d42 786 vmballoon_add_page(b, num_pages++, page);
33d268ed 787
365bd7ef
PM
788 if (num_pages == b->batch_max_pages) {
789 int error;
453dc659 790
df8d0d42 791 error = vmballoon_unlock(b, num_pages,
10a95d5d 792 is_2m_pages);
365bd7ef
PM
793 num_pages = 0;
794 if (error)
795 return;
796 }
33d268ed 797
365bd7ef
PM
798 cond_resched();
799 }
453dc659 800
365bd7ef 801 if (num_pages > 0)
df8d0d42 802 vmballoon_unlock(b, num_pages, is_2m_pages);
365bd7ef 803 }
f220a80f
XD
804}
805
df8d0d42
NA
806/**
807 * vmballoon_deinit_batching - disables batching mode.
808 *
809 * @b: pointer to &struct vmballoon.
810 *
811 * Disables batching, by deallocating the page for communication with the
812 * hypervisor and disabling the static key to indicate that batching is off.
813 */
814static void vmballoon_deinit_batching(struct vmballoon *b)
815{
816 free_page((unsigned long)b->batch_page);
817 b->batch_page = NULL;
818 static_branch_disable(&vmw_balloon_batching);
819 b->batch_max_pages = 1;
820}
f220a80f 821
df8d0d42
NA
822/**
823 * vmballoon_init_batching - enable batching mode.
824 *
825 * @b: pointer to &struct vmballoon.
826 *
827 * Enables batching, by allocating a page for communication with the hypervisor
828 * and enabling the static_key to use batching.
829 *
830 * Return: zero on success or an appropriate error-code.
831 */
832static int vmballoon_init_batching(struct vmballoon *b)
f220a80f 833{
b23220fe 834 struct page *page;
f220a80f 835
b23220fe
GK
836 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
837 if (!page)
df8d0d42 838 return -ENOMEM;
f220a80f 839
b23220fe 840 b->batch_page = page_address(page);
df8d0d42
NA
841 b->batch_max_pages = PAGE_SIZE / sizeof(struct vmballoon_batch_entry);
842
843 static_branch_enable(&vmw_balloon_batching);
844
845 return 0;
f220a80f
XD
846}
847
48e3d668
PM
848/*
849 * Receive notification and resize balloon
850 */
851static void vmballoon_doorbell(void *client_data)
852{
853 struct vmballoon *b = client_data;
854
855 STATS_INC(b->stats.doorbell);
856
857 mod_delayed_work(system_freezable_wq, &b->dwork, 0);
858}
859
860/*
861 * Clean up vmci doorbell
862 */
863static void vmballoon_vmci_cleanup(struct vmballoon *b)
864{
10a95d5d
NA
865 vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
866 VMCI_INVALID_ID, VMCI_INVALID_ID);
48e3d668 867
48e3d668
PM
868 if (!vmci_handle_is_invalid(b->vmci_doorbell)) {
869 vmci_doorbell_destroy(b->vmci_doorbell);
870 b->vmci_doorbell = VMCI_INVALID_HANDLE;
871 }
872}
873
874/*
875 * Initialize vmci doorbell, to get notified as soon as balloon changes
876 */
877static int vmballoon_vmci_init(struct vmballoon *b)
878{
10a95d5d 879 unsigned long error;
48e3d668 880
ce664331
NA
881 if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) == 0)
882 return 0;
48e3d668 883
ce664331
NA
884 error = vmci_doorbell_create(&b->vmci_doorbell, VMCI_FLAG_DELAYED_CB,
885 VMCI_PRIVILEGE_FLAG_RESTRICTED,
886 vmballoon_doorbell, b);
48e3d668 887
ce664331
NA
888 if (error != VMCI_SUCCESS)
889 goto fail;
890
10a95d5d
NA
891 error = __vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
892 b->vmci_doorbell.context,
893 b->vmci_doorbell.resource, NULL);
ce664331 894
ce664331
NA
895 if (error != VMW_BALLOON_SUCCESS)
896 goto fail;
48e3d668
PM
897
898 return 0;
ce664331
NA
899fail:
900 vmballoon_vmci_cleanup(b);
901 return -EIO;
48e3d668
PM
902}
903
f220a80f
XD
904/*
905 * Perform standard reset sequence by popping the balloon (in case it
906 * is not empty) and then restarting protocol. This operation normally
907 * happens when host responds with VMW_BALLOON_ERROR_RESET to a command.
908 */
909static void vmballoon_reset(struct vmballoon *b)
910{
48e3d668
PM
911 int error;
912
913 vmballoon_vmci_cleanup(b);
914
f220a80f
XD
915 /* free all pages, skipping monitor unlock */
916 vmballoon_pop(b);
917
918 if (!vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES))
919 return;
920
921 if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
df8d0d42 922 if (vmballoon_init_batching(b)) {
f220a80f
XD
923 /*
924 * We failed to initialize batching, inform the monitor
925 * about it by sending a null capability.
926 *
927 * The guest will retry in one second.
928 */
929 vmballoon_send_start(b, 0);
930 return;
931 }
932 } else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) {
df8d0d42 933 vmballoon_deinit_batching(b);
f220a80f
XD
934 }
935
936 b->reset_required = false;
48e3d668
PM
937
938 error = vmballoon_vmci_init(b);
939 if (error)
940 pr_err("failed to initialize vmci doorbell\n");
941
f220a80f
XD
942 if (!vmballoon_send_guest_id(b))
943 pr_err("failed to send guest ID to the host\n");
453dc659
DT
944}
945
8b079cd0
NA
946/**
947 * vmballoon_work - periodic balloon worker for reset, inflation and deflation.
948 *
949 * @work: pointer to the &work_struct which is provided by the workqueue.
950 *
951 * Resets the protocol if needed, gets the new size and adjusts balloon as
952 * needed. Repeat in 1 sec.
453dc659
DT
953 */
954static void vmballoon_work(struct work_struct *work)
955{
956 struct delayed_work *dwork = to_delayed_work(work);
957 struct vmballoon *b = container_of(dwork, struct vmballoon, dwork);
8b079cd0 958 int64_t change = 0;
453dc659
DT
959
960 STATS_INC(b->stats.timer);
961
962 if (b->reset_required)
963 vmballoon_reset(b);
964
8b079cd0
NA
965 if (vmballoon_send_get_target(b))
966 change = vmballoon_change(b);
967
968 if (change != 0) {
969 pr_debug("%s - size: %u, target %u", __func__,
970 b->size, b->target);
453dc659 971
8b079cd0 972 if (change > 0)
453dc659 973 vmballoon_inflate(b);
8b079cd0 974 else /* (change < 0) */
453dc659
DT
975 vmballoon_deflate(b);
976 }
977
beda94da
DT
978 /*
979 * We are using a freezable workqueue so that balloon operations are
980 * stopped while the system transitions to/from sleep/hibernation.
981 */
982 queue_delayed_work(system_freezable_wq,
983 dwork, round_jiffies_relative(HZ));
453dc659
DT
984}
985
986/*
987 * DEBUGFS Interface
988 */
989#ifdef CONFIG_DEBUG_FS
990
991static int vmballoon_debug_show(struct seq_file *f, void *offset)
992{
993 struct vmballoon *b = f->private;
994 struct vmballoon_stats *stats = &b->stats;
68131184 995 int i;
453dc659 996
b36e89da
PM
997 /* format capabilities info */
998 seq_printf(f,
999 "balloon capabilities: %#4x\n"
d7568c13
PM
1000 "used capabilities: %#4lx\n"
1001 "is resetting: %c\n",
1002 VMW_BALLOON_CAPABILITIES, b->capabilities,
1003 b->reset_required ? 'y' : 'n');
b36e89da 1004
453dc659
DT
1005 /* format size info */
1006 seq_printf(f,
1007 "target: %8d pages\n"
1008 "current: %8d pages\n",
1009 b->target, b->size);
1010
68131184
NA
1011 for (i = 0; i < VMW_BALLOON_CMD_NUM; i++) {
1012 if (vmballoon_cmd_names[i] == NULL)
1013 continue;
1014
1015 seq_printf(f, "%-22s: %16lu (%lu failed)\n",
1016 vmballoon_cmd_names[i], stats->ops[i],
1017 stats->ops_fail[i]);
1018 }
1019
453dc659
DT
1020 seq_printf(f,
1021 "\n"
1022 "timer: %8u\n"
48e3d668 1023 "doorbell: %8u\n"
365bd7ef 1024 "prim2mAlloc: %8u (%4u failed)\n"
622074a9 1025 "prim4kAlloc: %8u (%4u failed)\n"
365bd7ef 1026 "prim2mFree: %8u\n"
453dc659 1027 "primFree: %8u\n"
365bd7ef 1028 "err2mAlloc: %8u\n"
453dc659 1029 "errAlloc: %8u\n"
365bd7ef 1030 "err2mFree: %8u\n"
68131184 1031 "errFree: %8u\n",
453dc659 1032 stats->timer,
48e3d668 1033 stats->doorbell,
365bd7ef
PM
1034 stats->alloc[true], stats->alloc_fail[true],
1035 stats->alloc[false], stats->alloc_fail[false],
365bd7ef
PM
1036 stats->free[true],
1037 stats->free[false],
1038 stats->refused_alloc[true], stats->refused_alloc[false],
68131184 1039 stats->refused_free[true], stats->refused_free[false]);
453dc659
DT
1040
1041 return 0;
1042}
1043
1044static int vmballoon_debug_open(struct inode *inode, struct file *file)
1045{
1046 return single_open(file, vmballoon_debug_show, inode->i_private);
1047}
1048
1049static const struct file_operations vmballoon_debug_fops = {
1050 .owner = THIS_MODULE,
1051 .open = vmballoon_debug_open,
1052 .read = seq_read,
1053 .llseek = seq_lseek,
1054 .release = single_release,
1055};
1056
1057static int __init vmballoon_debugfs_init(struct vmballoon *b)
1058{
1059 int error;
1060
1061 b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b,
1062 &vmballoon_debug_fops);
1063 if (IS_ERR(b->dbg_entry)) {
1064 error = PTR_ERR(b->dbg_entry);
1065 pr_err("failed to create debugfs entry, error: %d\n", error);
1066 return error;
1067 }
1068
1069 return 0;
1070}
1071
1072static void __exit vmballoon_debugfs_exit(struct vmballoon *b)
1073{
1074 debugfs_remove(b->dbg_entry);
1075}
1076
1077#else
1078
1079static inline int vmballoon_debugfs_init(struct vmballoon *b)
1080{
1081 return 0;
1082}
1083
1084static inline void vmballoon_debugfs_exit(struct vmballoon *b)
1085{
1086}
1087
1088#endif /* CONFIG_DEBUG_FS */
1089
1090static int __init vmballoon_init(void)
1091{
1092 int error;
365bd7ef 1093 unsigned is_2m_pages;
453dc659
DT
1094 /*
1095 * Check if we are running on VMware's hypervisor and bail out
1096 * if we are not.
1097 */
03b2a320 1098 if (x86_hyper_type != X86_HYPER_VMWARE)
453dc659
DT
1099 return -ENODEV;
1100
365bd7ef
PM
1101 for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
1102 is_2m_pages++) {
1103 INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].pages);
1104 INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].refused_pages);
1105 }
453dc659 1106
453dc659
DT
1107 INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work);
1108
453dc659
DT
1109 error = vmballoon_debugfs_init(&balloon);
1110 if (error)
beda94da 1111 return error;
453dc659 1112
48e3d668 1113 balloon.vmci_doorbell = VMCI_INVALID_HANDLE;
d7568c13
PM
1114 balloon.batch_page = NULL;
1115 balloon.page = NULL;
1116 balloon.reset_required = true;
1117
beda94da 1118 queue_delayed_work(system_freezable_wq, &balloon.dwork, 0);
453dc659
DT
1119
1120 return 0;
453dc659 1121}
c3cc1b0f
NA
1122
1123/*
1124 * Using late_initcall() instead of module_init() allows the balloon to use the
1125 * VMCI doorbell even when the balloon is built into the kernel. Otherwise the
1126 * VMCI is probed only after the balloon is initialized. If the balloon is used
1127 * as a module, late_initcall() is equivalent to module_init().
1128 */
1129late_initcall(vmballoon_init);
453dc659
DT
1130
1131static void __exit vmballoon_exit(void)
1132{
48e3d668 1133 vmballoon_vmci_cleanup(&balloon);
453dc659 1134 cancel_delayed_work_sync(&balloon.dwork);
453dc659
DT
1135
1136 vmballoon_debugfs_exit(&balloon);
1137
1138 /*
1139 * Deallocate all reserved memory, and reset connection with monitor.
1140 * Reset connection before deallocating memory to avoid potential for
1141 * additional spurious resets from guest touching deallocated pages.
1142 */
d7568c13 1143 vmballoon_send_start(&balloon, 0);
453dc659
DT
1144 vmballoon_pop(&balloon);
1145}
1146module_exit(vmballoon_exit);