]>
Commit | Line | Data |
---|---|---|
8b4770ec | 1 | // SPDX-License-Identifier: GPL-2.0 |
453dc659 DT |
2 | /* |
3 | * VMware Balloon driver. | |
4 | * | |
8b4770ec | 5 | * Copyright (C) 2000-2018, VMware, Inc. All Rights Reserved. |
453dc659 | 6 | * |
453dc659 DT |
7 | * This is VMware physical memory management driver for Linux. The driver |
8 | * acts like a "balloon" that can be inflated to reclaim physical pages by | |
9 | * reserving them in the guest and invalidating them in the monitor, | |
10 | * freeing up the underlying machine pages so they can be allocated to | |
11 | * other guests. The balloon can also be deflated to allow the guest to | |
12 | * use more physical memory. Higher level policies can control the sizes | |
13 | * of balloons in VMs in order to manage physical memory resources. | |
14 | */ | |
15 | ||
16 | //#define DEBUG | |
17 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
18 | ||
19 | #include <linux/types.h> | |
20 | #include <linux/kernel.h> | |
21 | #include <linux/mm.h> | |
f220a80f | 22 | #include <linux/vmalloc.h> |
453dc659 DT |
23 | #include <linux/sched.h> |
24 | #include <linux/module.h> | |
25 | #include <linux/workqueue.h> | |
26 | #include <linux/debugfs.h> | |
27 | #include <linux/seq_file.h> | |
c7b3690f NA |
28 | #include <linux/rwsem.h> |
29 | #include <linux/slab.h> | |
48e3d668 PM |
30 | #include <linux/vmw_vmci_defs.h> |
31 | #include <linux/vmw_vmci_api.h> | |
a10a5698 | 32 | #include <asm/hypervisor.h> |
453dc659 DT |
33 | |
34 | MODULE_AUTHOR("VMware, Inc."); | |
35 | MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver"); | |
48e3d668 | 36 | MODULE_VERSION("1.5.0.0-k"); |
453dc659 DT |
37 | MODULE_ALIAS("dmi:*:svnVMware*:*"); |
38 | MODULE_ALIAS("vmware_vmmemctl"); | |
39 | MODULE_LICENSE("GPL"); | |
40 | ||
453dc659 | 41 | /* |
622074a9 NA |
42 | * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't allow wait |
43 | * (__GFP_RECLAIM) for huge page allocations. Use __GFP_NOWARN, to suppress page | |
44 | * allocation failure warnings. Disallow access to emergency low-memory pools. | |
453dc659 | 45 | */ |
622074a9 NA |
46 | #define VMW_HUGE_PAGE_ALLOC_FLAGS (__GFP_HIGHMEM|__GFP_NOWARN| \ |
47 | __GFP_NOMEMALLOC) | |
453dc659 DT |
48 | |
49 | /* | |
622074a9 NA |
50 | * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We allow lightweight |
51 | * reclamation (__GFP_NORETRY). Use __GFP_NOWARN, to suppress page allocation | |
52 | * failure warnings. Disallow access to emergency low-memory pools. | |
453dc659 | 53 | */ |
622074a9 NA |
54 | #define VMW_PAGE_ALLOC_FLAGS (__GFP_HIGHMEM|__GFP_NOWARN| \ |
55 | __GFP_NOMEMALLOC|__GFP_NORETRY) | |
453dc659 | 56 | |
55adaa49 DT |
57 | /* Maximum number of refused pages we accumulate during inflation cycle */ |
58 | #define VMW_BALLOON_MAX_REFUSED 16 | |
453dc659 DT |
59 | |
60 | /* | |
61 | * Hypervisor communication port definitions. | |
62 | */ | |
63 | #define VMW_BALLOON_HV_PORT 0x5670 | |
64 | #define VMW_BALLOON_HV_MAGIC 0x456c6d6f | |
453dc659 DT |
65 | #define VMW_BALLOON_GUEST_ID 1 /* Linux */ |
66 | ||
eb79100f XD |
67 | enum vmwballoon_capabilities { |
68 | /* | |
69 | * Bit 0 is reserved and not associated to any capability. | |
70 | */ | |
48e3d668 PM |
71 | VMW_BALLOON_BASIC_CMDS = (1 << 1), |
72 | VMW_BALLOON_BATCHED_CMDS = (1 << 2), | |
73 | VMW_BALLOON_BATCHED_2M_CMDS = (1 << 3), | |
74 | VMW_BALLOON_SIGNALLED_WAKEUP_CMD = (1 << 4), | |
eb79100f XD |
75 | }; |
76 | ||
f220a80f | 77 | #define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_BASIC_CMDS \ |
365bd7ef | 78 | | VMW_BALLOON_BATCHED_CMDS \ |
48e3d668 PM |
79 | | VMW_BALLOON_BATCHED_2M_CMDS \ |
80 | | VMW_BALLOON_SIGNALLED_WAKEUP_CMD) | |
365bd7ef | 81 | |
25acbdd7 | 82 | #define VMW_BALLOON_2M_ORDER (PMD_SHIFT - PAGE_SHIFT) |
eb79100f | 83 | |
c7b3690f NA |
84 | enum vmballoon_page_size_type { |
85 | VMW_BALLOON_4K_PAGE, | |
86 | VMW_BALLOON_2M_PAGE, | |
87 | VMW_BALLOON_LAST_SIZE = VMW_BALLOON_2M_PAGE | |
88 | }; | |
89 | ||
90 | #define VMW_BALLOON_NUM_PAGE_SIZES (VMW_BALLOON_LAST_SIZE + 1) | |
91 | ||
92 | enum vmballoon_op_stat_type { | |
93 | VMW_BALLOON_OP_STAT, | |
94 | VMW_BALLOON_OP_FAIL_STAT | |
95 | }; | |
96 | ||
97 | #define VMW_BALLOON_OP_STAT_TYPES (VMW_BALLOON_OP_FAIL_STAT + 1) | |
98 | ||
99 | /** | |
100 | * enum vmballoon_cmd_type - backdoor commands. | |
101 | * | |
102 | * Availability of the commands is as followed: | |
103 | * | |
104 | * %VMW_BALLOON_CMD_START, %VMW_BALLOON_CMD_GET_TARGET and | |
105 | * %VMW_BALLOON_CMD_GUEST_ID are always available. | |
106 | * | |
107 | * If the host reports %VMW_BALLOON_BASIC_CMDS are supported then | |
108 | * %VMW_BALLOON_CMD_LOCK and %VMW_BALLOON_CMD_UNLOCK commands are available. | |
f220a80f | 109 | * |
c7b3690f NA |
110 | * If the host reports %VMW_BALLOON_BATCHED_CMDS are supported then |
111 | * %VMW_BALLOON_CMD_BATCHED_LOCK and VMW_BALLOON_CMD_BATCHED_UNLOCK commands | |
112 | * are available. | |
f220a80f | 113 | * |
c7b3690f NA |
114 | * If the host reports %VMW_BALLOON_BATCHED_2M_CMDS are supported then |
115 | * %VMW_BALLOON_CMD_BATCHED_2M_LOCK and %VMW_BALLOON_CMD_BATCHED_2M_UNLOCK | |
116 | * are supported. | |
117 | * | |
118 | * If the host reports VMW_BALLOON_SIGNALLED_WAKEUP_CMD is supported then | |
119 | * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command is supported. | |
120 | * | |
121 | * @VMW_BALLOON_CMD_START: Communicating supported version with the hypervisor. | |
122 | * @VMW_BALLOON_CMD_GET_TARGET: Gets the balloon target size. | |
123 | * @VMW_BALLOON_CMD_LOCK: Informs the hypervisor about a ballooned page. | |
124 | * @VMW_BALLOON_CMD_UNLOCK: Informs the hypervisor about a page that is about | |
125 | * to be deflated from the balloon. | |
126 | * @VMW_BALLOON_CMD_GUEST_ID: Informs the hypervisor about the type of OS that | |
127 | * runs in the VM. | |
128 | * @VMW_BALLOON_CMD_BATCHED_LOCK: Inform the hypervisor about a batch of | |
129 | * ballooned pages (up to 512). | |
130 | * @VMW_BALLOON_CMD_BATCHED_UNLOCK: Inform the hypervisor about a batch of | |
131 | * pages that are about to be deflated from the | |
132 | * balloon (up to 512). | |
133 | * @VMW_BALLOON_CMD_BATCHED_2M_LOCK: Similar to @VMW_BALLOON_CMD_BATCHED_LOCK | |
134 | * for 2MB pages. | |
135 | * @VMW_BALLOON_CMD_BATCHED_2M_UNLOCK: Similar to | |
136 | * @VMW_BALLOON_CMD_BATCHED_UNLOCK for 2MB | |
137 | * pages. | |
138 | * @VMW_BALLOON_CMD_VMCI_DOORBELL_SET: A command to set doorbell notification | |
139 | * that would be invoked when the balloon | |
140 | * size changes. | |
141 | * @VMW_BALLOON_CMD_LAST: Value of the last command. | |
f220a80f | 142 | */ |
c7b3690f NA |
143 | enum vmballoon_cmd_type { |
144 | VMW_BALLOON_CMD_START, | |
145 | VMW_BALLOON_CMD_GET_TARGET, | |
146 | VMW_BALLOON_CMD_LOCK, | |
147 | VMW_BALLOON_CMD_UNLOCK, | |
148 | VMW_BALLOON_CMD_GUEST_ID, | |
149 | /* No command 5 */ | |
150 | VMW_BALLOON_CMD_BATCHED_LOCK = 6, | |
151 | VMW_BALLOON_CMD_BATCHED_UNLOCK, | |
152 | VMW_BALLOON_CMD_BATCHED_2M_LOCK, | |
153 | VMW_BALLOON_CMD_BATCHED_2M_UNLOCK, | |
154 | VMW_BALLOON_CMD_VMCI_DOORBELL_SET, | |
155 | VMW_BALLOON_CMD_LAST = VMW_BALLOON_CMD_VMCI_DOORBELL_SET, | |
156 | }; | |
157 | ||
158 | #define VMW_BALLOON_CMD_NUM (VMW_BALLOON_CMD_LAST + 1) | |
159 | ||
160 | enum vmballoon_error_codes { | |
161 | VMW_BALLOON_SUCCESS, | |
162 | VMW_BALLOON_ERROR_CMD_INVALID, | |
163 | VMW_BALLOON_ERROR_PPN_INVALID, | |
164 | VMW_BALLOON_ERROR_PPN_LOCKED, | |
165 | VMW_BALLOON_ERROR_PPN_UNLOCKED, | |
166 | VMW_BALLOON_ERROR_PPN_PINNED, | |
167 | VMW_BALLOON_ERROR_PPN_NOTNEEDED, | |
168 | VMW_BALLOON_ERROR_RESET, | |
169 | VMW_BALLOON_ERROR_BUSY | |
170 | }; | |
eb79100f XD |
171 | |
172 | #define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES (0x03000000) | |
173 | ||
10a95d5d NA |
174 | #define VMW_BALLOON_CMD_WITH_TARGET_MASK \ |
175 | ((1UL << VMW_BALLOON_CMD_GET_TARGET) | \ | |
176 | (1UL << VMW_BALLOON_CMD_LOCK) | \ | |
177 | (1UL << VMW_BALLOON_CMD_UNLOCK) | \ | |
178 | (1UL << VMW_BALLOON_CMD_BATCHED_LOCK) | \ | |
179 | (1UL << VMW_BALLOON_CMD_BATCHED_UNLOCK) | \ | |
180 | (1UL << VMW_BALLOON_CMD_BATCHED_2M_LOCK) | \ | |
181 | (1UL << VMW_BALLOON_CMD_BATCHED_2M_UNLOCK)) | |
182 | ||
68131184 NA |
183 | static const char * const vmballoon_cmd_names[] = { |
184 | [VMW_BALLOON_CMD_START] = "start", | |
185 | [VMW_BALLOON_CMD_GET_TARGET] = "target", | |
186 | [VMW_BALLOON_CMD_LOCK] = "lock", | |
187 | [VMW_BALLOON_CMD_UNLOCK] = "unlock", | |
188 | [VMW_BALLOON_CMD_GUEST_ID] = "guestType", | |
189 | [VMW_BALLOON_CMD_BATCHED_LOCK] = "batchLock", | |
190 | [VMW_BALLOON_CMD_BATCHED_UNLOCK] = "batchUnlock", | |
191 | [VMW_BALLOON_CMD_BATCHED_2M_LOCK] = "2m-lock", | |
192 | [VMW_BALLOON_CMD_BATCHED_2M_UNLOCK] = "2m-unlock", | |
193 | [VMW_BALLOON_CMD_VMCI_DOORBELL_SET] = "doorbellSet" | |
194 | }; | |
195 | ||
c7b3690f NA |
196 | enum vmballoon_stat_page { |
197 | VMW_BALLOON_PAGE_STAT_ALLOC, | |
198 | VMW_BALLOON_PAGE_STAT_ALLOC_FAIL, | |
199 | VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC, | |
200 | VMW_BALLOON_PAGE_STAT_REFUSED_FREE, | |
201 | VMW_BALLOON_PAGE_STAT_FREE, | |
202 | VMW_BALLOON_PAGE_STAT_LAST = VMW_BALLOON_PAGE_STAT_FREE | |
453dc659 DT |
203 | }; |
204 | ||
c7b3690f NA |
205 | #define VMW_BALLOON_PAGE_STAT_NUM (VMW_BALLOON_PAGE_STAT_LAST + 1) |
206 | ||
207 | enum vmballoon_stat_general { | |
208 | VMW_BALLOON_STAT_TIMER, | |
209 | VMW_BALLOON_STAT_DOORBELL, | |
210 | VMW_BALLOON_STAT_LAST = VMW_BALLOON_STAT_DOORBELL | |
211 | }; | |
212 | ||
213 | #define VMW_BALLOON_STAT_NUM (VMW_BALLOON_STAT_LAST + 1) | |
214 | ||
453dc659 | 215 | |
df8d0d42 | 216 | static DEFINE_STATIC_KEY_TRUE(vmw_balloon_batching); |
c7b3690f | 217 | static DEFINE_STATIC_KEY_FALSE(balloon_stat_enabled); |
f220a80f | 218 | |
365bd7ef | 219 | struct vmballoon_page_size { |
453dc659 DT |
220 | /* list of reserved physical pages */ |
221 | struct list_head pages; | |
222 | ||
223 | /* transient list of non-balloonable pages */ | |
224 | struct list_head refused_pages; | |
55adaa49 | 225 | unsigned int n_refused_pages; |
365bd7ef PM |
226 | }; |
227 | ||
6c948757 NA |
228 | /** |
229 | * struct vmballoon_batch_entry - a batch entry for lock or unlock. | |
230 | * | |
231 | * @status: the status of the operation, which is written by the hypervisor. | |
232 | * @reserved: reserved for future use. Must be set to zero. | |
233 | * @pfn: the physical frame number of the page to be locked or unlocked. | |
234 | */ | |
235 | struct vmballoon_batch_entry { | |
236 | u64 status : 5; | |
237 | u64 reserved : PAGE_SHIFT - 5; | |
238 | u64 pfn : 52; | |
239 | } __packed; | |
240 | ||
365bd7ef PM |
241 | struct vmballoon { |
242 | struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES]; | |
243 | ||
244 | /* supported page sizes. 1 == 4k pages only, 2 == 4k and 2m pages */ | |
245 | unsigned supported_page_sizes; | |
453dc659 DT |
246 | |
247 | /* balloon size in pages */ | |
248 | unsigned int size; | |
249 | unsigned int target; | |
250 | ||
251 | /* reset flag */ | |
252 | bool reset_required; | |
253 | ||
f220a80f XD |
254 | unsigned long capabilities; |
255 | ||
6c948757 NA |
256 | /** |
257 | * @batch_page: pointer to communication batch page. | |
258 | * | |
259 | * When batching is used, batch_page points to a page, which holds up to | |
260 | * %VMW_BALLOON_BATCH_MAX_PAGES entries for locking or unlocking. | |
261 | */ | |
262 | struct vmballoon_batch_entry *batch_page; | |
263 | ||
f220a80f XD |
264 | unsigned int batch_max_pages; |
265 | struct page *page; | |
266 | ||
453dc659 | 267 | /* statistics */ |
c7b3690f | 268 | struct vmballoon_stats *stats; |
453dc659 | 269 | |
c7b3690f | 270 | #ifdef CONFIG_DEBUG_FS |
453dc659 DT |
271 | /* debugfs file exporting statistics */ |
272 | struct dentry *dbg_entry; | |
273 | #endif | |
274 | ||
453dc659 | 275 | struct delayed_work dwork; |
48e3d668 PM |
276 | |
277 | struct vmci_handle vmci_doorbell; | |
c7b3690f NA |
278 | |
279 | /** | |
280 | * @conf_sem: semaphore to protect the configuration and the statistics. | |
281 | */ | |
282 | struct rw_semaphore conf_sem; | |
453dc659 DT |
283 | }; |
284 | ||
285 | static struct vmballoon balloon; | |
453dc659 | 286 | |
c7b3690f NA |
287 | struct vmballoon_stats { |
288 | /* timer / doorbell operations */ | |
289 | atomic64_t general_stat[VMW_BALLOON_STAT_NUM]; | |
290 | ||
291 | /* allocation statistics for huge and small pages */ | |
292 | atomic64_t | |
293 | page_stat[VMW_BALLOON_PAGE_STAT_NUM][VMW_BALLOON_NUM_PAGE_SIZES]; | |
294 | ||
295 | /* Monitor operations: total operations, and failures */ | |
296 | atomic64_t ops[VMW_BALLOON_CMD_NUM][VMW_BALLOON_OP_STAT_TYPES]; | |
297 | }; | |
298 | ||
299 | static inline bool is_vmballoon_stats_on(void) | |
300 | { | |
301 | return IS_ENABLED(CONFIG_DEBUG_FS) && | |
302 | static_branch_unlikely(&balloon_stat_enabled); | |
303 | } | |
304 | ||
305 | static inline void vmballoon_stats_op_inc(struct vmballoon *b, unsigned int op, | |
306 | enum vmballoon_op_stat_type type) | |
307 | { | |
308 | if (is_vmballoon_stats_on()) | |
309 | atomic64_inc(&b->stats->ops[op][type]); | |
310 | } | |
311 | ||
312 | static inline void vmballoon_stats_gen_inc(struct vmballoon *b, | |
313 | enum vmballoon_stat_general stat) | |
314 | { | |
315 | if (is_vmballoon_stats_on()) | |
316 | atomic64_inc(&b->stats->general_stat[stat]); | |
317 | } | |
318 | ||
319 | static inline void vmballoon_stats_gen_add(struct vmballoon *b, | |
320 | enum vmballoon_stat_general stat, | |
321 | unsigned int val) | |
322 | { | |
323 | if (is_vmballoon_stats_on()) | |
324 | atomic64_add(val, &b->stats->general_stat[stat]); | |
325 | } | |
326 | ||
327 | static inline void vmballoon_stats_page_inc(struct vmballoon *b, | |
328 | enum vmballoon_stat_page stat, | |
329 | bool is_2m_page) | |
330 | { | |
331 | if (is_vmballoon_stats_on()) | |
332 | atomic64_inc(&b->stats->page_stat[stat][is_2m_page]); | |
333 | } | |
334 | ||
10a95d5d NA |
335 | static inline unsigned long |
336 | __vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1, | |
337 | unsigned long arg2, unsigned long *result) | |
338 | { | |
339 | unsigned long status, dummy1, dummy2, dummy3, local_result; | |
340 | ||
c7b3690f | 341 | vmballoon_stats_op_inc(b, cmd, VMW_BALLOON_OP_STAT); |
68131184 | 342 | |
10a95d5d NA |
343 | asm volatile ("inl %%dx" : |
344 | "=a"(status), | |
345 | "=c"(dummy1), | |
346 | "=d"(dummy2), | |
347 | "=b"(local_result), | |
348 | "=S"(dummy3) : | |
349 | "0"(VMW_BALLOON_HV_MAGIC), | |
350 | "1"(cmd), | |
351 | "2"(VMW_BALLOON_HV_PORT), | |
352 | "3"(arg1), | |
353 | "4"(arg2) : | |
354 | "memory"); | |
355 | ||
356 | /* update the result if needed */ | |
357 | if (result) | |
358 | *result = (cmd == VMW_BALLOON_CMD_START) ? dummy1 : | |
359 | local_result; | |
360 | ||
361 | /* update target when applicable */ | |
362 | if (status == VMW_BALLOON_SUCCESS && | |
363 | ((1ul << cmd) & VMW_BALLOON_CMD_WITH_TARGET_MASK)) | |
364 | b->target = local_result; | |
365 | ||
68131184 NA |
366 | if (status != VMW_BALLOON_SUCCESS && |
367 | status != VMW_BALLOON_SUCCESS_WITH_CAPABILITIES) { | |
c7b3690f | 368 | vmballoon_stats_op_inc(b, cmd, VMW_BALLOON_OP_FAIL_STAT); |
68131184 NA |
369 | pr_debug("%s: %s [0x%lx,0x%lx) failed, returned %ld\n", |
370 | __func__, vmballoon_cmd_names[cmd], arg1, arg2, | |
371 | status); | |
372 | } | |
373 | ||
10a95d5d NA |
374 | /* mark reset required accordingly */ |
375 | if (status == VMW_BALLOON_ERROR_RESET) | |
376 | b->reset_required = true; | |
377 | ||
378 | return status; | |
379 | } | |
380 | ||
381 | static __always_inline unsigned long | |
382 | vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1, | |
383 | unsigned long arg2) | |
384 | { | |
385 | unsigned long dummy; | |
386 | ||
387 | return __vmballoon_cmd(b, cmd, arg1, arg2, &dummy); | |
388 | } | |
389 | ||
453dc659 DT |
390 | /* |
391 | * Send "start" command to the host, communicating supported version | |
392 | * of the protocol. | |
393 | */ | |
f220a80f | 394 | static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps) |
453dc659 | 395 | { |
10a95d5d | 396 | unsigned long status, capabilities; |
365bd7ef | 397 | bool success; |
453dc659 | 398 | |
10a95d5d NA |
399 | status = __vmballoon_cmd(b, VMW_BALLOON_CMD_START, req_caps, 0, |
400 | &capabilities); | |
f220a80f XD |
401 | |
402 | switch (status) { | |
403 | case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES: | |
404 | b->capabilities = capabilities; | |
365bd7ef PM |
405 | success = true; |
406 | break; | |
f220a80f XD |
407 | case VMW_BALLOON_SUCCESS: |
408 | b->capabilities = VMW_BALLOON_BASIC_CMDS; | |
365bd7ef PM |
409 | success = true; |
410 | break; | |
411 | default: | |
412 | success = false; | |
f220a80f | 413 | } |
453dc659 | 414 | |
5081efd1 NA |
415 | /* |
416 | * 2MB pages are only supported with batching. If batching is for some | |
417 | * reason disabled, do not use 2MB pages, since otherwise the legacy | |
418 | * mechanism is used with 2MB pages, causing a failure. | |
419 | */ | |
420 | if ((b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) && | |
421 | (b->capabilities & VMW_BALLOON_BATCHED_CMDS)) | |
365bd7ef PM |
422 | b->supported_page_sizes = 2; |
423 | else | |
424 | b->supported_page_sizes = 1; | |
425 | ||
365bd7ef | 426 | return success; |
453dc659 DT |
427 | } |
428 | ||
453dc659 DT |
429 | /* |
430 | * Communicate guest type to the host so that it can adjust ballooning | |
431 | * algorithm to the one most appropriate for the guest. This command | |
432 | * is normally issued after sending "start" command and is part of | |
433 | * standard reset sequence. | |
434 | */ | |
435 | static bool vmballoon_send_guest_id(struct vmballoon *b) | |
436 | { | |
10a95d5d | 437 | unsigned long status; |
453dc659 | 438 | |
10a95d5d NA |
439 | status = vmballoon_cmd(b, VMW_BALLOON_CMD_GUEST_ID, |
440 | VMW_BALLOON_GUEST_ID, 0); | |
453dc659 | 441 | |
10a95d5d | 442 | if (status == VMW_BALLOON_SUCCESS) |
453dc659 DT |
443 | return true; |
444 | ||
453dc659 DT |
445 | return false; |
446 | } | |
447 | ||
365bd7ef PM |
448 | static u16 vmballoon_page_size(bool is_2m_page) |
449 | { | |
450 | if (is_2m_page) | |
25acbdd7 | 451 | return 1 << VMW_BALLOON_2M_ORDER; |
365bd7ef PM |
452 | |
453 | return 1; | |
454 | } | |
455 | ||
0395be3e NA |
456 | /** |
457 | * vmballoon_send_get_target() - Retrieve desired balloon size from the host. | |
458 | * | |
459 | * @b: pointer to the balloon. | |
460 | * | |
461 | * Return: zero on success, EINVAL if limit does not fit in 32-bit, as required | |
462 | * by the host-guest protocol and EIO if an error occurred in communicating with | |
463 | * the host. | |
453dc659 | 464 | */ |
0395be3e | 465 | static int vmballoon_send_get_target(struct vmballoon *b) |
453dc659 DT |
466 | { |
467 | unsigned long status; | |
453dc659 | 468 | unsigned long limit; |
453dc659 | 469 | |
0395be3e | 470 | limit = totalram_pages; |
453dc659 DT |
471 | |
472 | /* Ensure limit fits in 32-bits */ | |
0395be3e NA |
473 | if (limit != (u32)limit) |
474 | return -EINVAL; | |
453dc659 | 475 | |
10a95d5d NA |
476 | status = vmballoon_cmd(b, VMW_BALLOON_CMD_GET_TARGET, limit, 0); |
477 | ||
0395be3e | 478 | return status == VMW_BALLOON_SUCCESS ? 0 : -EIO; |
453dc659 DT |
479 | } |
480 | ||
622074a9 | 481 | static struct page *vmballoon_alloc_page(bool is_2m_page) |
365bd7ef PM |
482 | { |
483 | if (is_2m_page) | |
622074a9 | 484 | return alloc_pages(VMW_HUGE_PAGE_ALLOC_FLAGS, |
25acbdd7 | 485 | VMW_BALLOON_2M_ORDER); |
365bd7ef | 486 | |
622074a9 | 487 | return alloc_page(VMW_PAGE_ALLOC_FLAGS); |
365bd7ef PM |
488 | } |
489 | ||
490 | static void vmballoon_free_page(struct page *page, bool is_2m_page) | |
491 | { | |
492 | if (is_2m_page) | |
25acbdd7 | 493 | __free_pages(page, VMW_BALLOON_2M_ORDER); |
365bd7ef PM |
494 | else |
495 | __free_page(page); | |
496 | } | |
497 | ||
453dc659 DT |
498 | /* |
499 | * Quickly release all pages allocated for the balloon. This function is | |
500 | * called when host decides to "reset" balloon for one reason or another. | |
501 | * Unlike normal "deflate" we do not (shall not) notify host of the pages | |
502 | * being released. | |
503 | */ | |
504 | static void vmballoon_pop(struct vmballoon *b) | |
505 | { | |
506 | struct page *page, *next; | |
365bd7ef PM |
507 | unsigned is_2m_pages; |
508 | ||
509 | for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES; | |
510 | is_2m_pages++) { | |
511 | struct vmballoon_page_size *page_size = | |
512 | &b->page_sizes[is_2m_pages]; | |
513 | u16 size_per_page = vmballoon_page_size(is_2m_pages); | |
514 | ||
515 | list_for_each_entry_safe(page, next, &page_size->pages, lru) { | |
516 | list_del(&page->lru); | |
517 | vmballoon_free_page(page, is_2m_pages); | |
c7b3690f NA |
518 | vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_FREE, |
519 | is_2m_pages); | |
365bd7ef PM |
520 | b->size -= size_per_page; |
521 | cond_resched(); | |
522 | } | |
453dc659 | 523 | } |
453dc659 | 524 | |
b23220fe GK |
525 | /* Clearing the batch_page unconditionally has no adverse effect */ |
526 | free_page((unsigned long)b->batch_page); | |
527 | b->batch_page = NULL; | |
453dc659 DT |
528 | } |
529 | ||
df8d0d42 NA |
530 | /** |
531 | * vmballoon_status_page - returns the status of (un)lock operation | |
532 | * | |
533 | * @b: pointer to the balloon. | |
534 | * @idx: index for the page for which the operation is performed. | |
535 | * @p: pointer to where the page struct is returned. | |
536 | * | |
537 | * Following a lock or unlock operation, returns the status of the operation for | |
538 | * an individual page. Provides the page that the operation was performed on on | |
539 | * the @page argument. | |
540 | * | |
541 | * Returns: The status of a lock or unlock operation for an individual page. | |
453dc659 | 542 | */ |
df8d0d42 NA |
543 | static unsigned long vmballoon_status_page(struct vmballoon *b, int idx, |
544 | struct page **p) | |
453dc659 | 545 | { |
df8d0d42 NA |
546 | if (static_branch_likely(&vmw_balloon_batching)) { |
547 | /* batching mode */ | |
548 | *p = pfn_to_page(b->batch_page[idx].pfn); | |
549 | return b->batch_page[idx].status; | |
550 | } | |
10a95d5d | 551 | |
df8d0d42 NA |
552 | /* non-batching mode */ |
553 | *p = b->page; | |
453dc659 | 554 | |
df8d0d42 NA |
555 | /* |
556 | * If a failure occurs, the indication will be provided in the status | |
557 | * of the entire operation, which is considered before the individual | |
558 | * page status. So for non-batching mode, the indication is always of | |
559 | * success. | |
560 | */ | |
561 | return VMW_BALLOON_SUCCESS; | |
562 | } | |
453dc659 | 563 | |
df8d0d42 NA |
564 | /** |
565 | * vmballoon_lock_op - notifies the host about inflated/deflated pages. | |
566 | * @b: pointer to the balloon. | |
567 | * @num_pages: number of inflated/deflated pages. | |
568 | * @is_2m_pages: whether the page(s) are 2M (or 4k). | |
569 | * @lock: whether the operation is lock (or unlock). | |
570 | * | |
571 | * Notify the host about page(s) that were ballooned (or removed from the | |
572 | * balloon) so that host can use it without fear that guest will need it (or | |
573 | * stop using them since the VM does). Host may reject some pages, we need to | |
574 | * check the return value and maybe submit a different page. The pages that are | |
575 | * inflated/deflated are pointed by @b->page. | |
576 | * | |
577 | * Return: result as provided by the hypervisor. | |
578 | */ | |
579 | static unsigned long vmballoon_lock_op(struct vmballoon *b, | |
580 | unsigned int num_pages, | |
581 | bool is_2m_pages, bool lock) | |
582 | { | |
583 | unsigned long cmd, pfn; | |
584 | ||
585 | if (static_branch_likely(&vmw_balloon_batching)) { | |
586 | if (lock) | |
587 | cmd = is_2m_pages ? VMW_BALLOON_CMD_BATCHED_2M_LOCK : | |
588 | VMW_BALLOON_CMD_BATCHED_LOCK; | |
589 | else | |
590 | cmd = is_2m_pages ? VMW_BALLOON_CMD_BATCHED_2M_UNLOCK : | |
591 | VMW_BALLOON_CMD_BATCHED_UNLOCK; | |
592 | ||
593 | pfn = PHYS_PFN(virt_to_phys(b->batch_page)); | |
594 | } else { | |
595 | cmd = lock ? VMW_BALLOON_CMD_LOCK : VMW_BALLOON_CMD_UNLOCK; | |
596 | pfn = page_to_pfn(b->page); | |
597 | ||
598 | /* In non-batching mode, PFNs must fit in 32-bit */ | |
599 | if (unlikely(pfn != (u32)pfn)) | |
600 | return VMW_BALLOON_ERROR_PPN_INVALID; | |
ef0f8f11 | 601 | } |
453dc659 | 602 | |
df8d0d42 | 603 | return vmballoon_cmd(b, cmd, pfn, num_pages); |
453dc659 DT |
604 | } |
605 | ||
df8d0d42 NA |
606 | static int vmballoon_lock(struct vmballoon *b, unsigned int num_pages, |
607 | bool is_2m_pages) | |
f220a80f | 608 | { |
df8d0d42 NA |
609 | unsigned long batch_status; |
610 | int i; | |
365bd7ef | 611 | u16 size_per_page = vmballoon_page_size(is_2m_pages); |
f220a80f | 612 | |
df8d0d42 | 613 | batch_status = vmballoon_lock_op(b, num_pages, is_2m_pages, true); |
f220a80f XD |
614 | |
615 | for (i = 0; i < num_pages; i++) { | |
df8d0d42 NA |
616 | unsigned long status; |
617 | struct page *p; | |
365bd7ef PM |
618 | struct vmballoon_page_size *page_size = |
619 | &b->page_sizes[is_2m_pages]; | |
f220a80f | 620 | |
df8d0d42 NA |
621 | status = vmballoon_status_page(b, i, &p); |
622 | ||
623 | /* | |
624 | * Failure of the whole batch overrides a single operation | |
625 | * results. | |
626 | */ | |
627 | if (batch_status != VMW_BALLOON_SUCCESS) | |
628 | status = batch_status; | |
f220a80f | 629 | |
df8d0d42 NA |
630 | if (status == VMW_BALLOON_SUCCESS) { |
631 | /* track allocated page */ | |
365bd7ef | 632 | list_add(&p->lru, &page_size->pages); |
df8d0d42 NA |
633 | |
634 | /* update balloon size */ | |
365bd7ef | 635 | b->size += size_per_page; |
df8d0d42 NA |
636 | continue; |
637 | } | |
638 | ||
639 | /* Error occurred */ | |
c7b3690f NA |
640 | vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC, |
641 | is_2m_pages); | |
df8d0d42 | 642 | |
8fa3c61a NA |
643 | /* |
644 | * Place page on the list of non-balloonable pages | |
645 | * and retry allocation, unless we already accumulated | |
646 | * too many of them, in which case take a breather. | |
647 | */ | |
648 | list_add(&p->lru, &page_size->refused_pages); | |
649 | page_size->n_refused_pages++; | |
f220a80f XD |
650 | } |
651 | ||
df8d0d42 | 652 | return batch_status == VMW_BALLOON_SUCCESS ? 0 : -EIO; |
f220a80f XD |
653 | } |
654 | ||
453dc659 DT |
655 | /* |
656 | * Release the page allocated for the balloon. Note that we first notify | |
657 | * the host so it can make sure the page will be available for the guest | |
658 | * to use, if needed. | |
659 | */ | |
df8d0d42 NA |
660 | static int vmballoon_unlock(struct vmballoon *b, unsigned int num_pages, |
661 | bool is_2m_pages) | |
453dc659 | 662 | { |
df8d0d42 NA |
663 | int i; |
664 | unsigned long batch_status; | |
365bd7ef | 665 | u16 size_per_page = vmballoon_page_size(is_2m_pages); |
f220a80f | 666 | |
df8d0d42 | 667 | batch_status = vmballoon_lock_op(b, num_pages, is_2m_pages, false); |
f220a80f XD |
668 | |
669 | for (i = 0; i < num_pages; i++) { | |
df8d0d42 NA |
670 | struct vmballoon_page_size *page_size; |
671 | unsigned long status; | |
672 | struct page *p; | |
673 | ||
674 | status = vmballoon_status_page(b, i, &p); | |
675 | page_size = &b->page_sizes[is_2m_pages]; | |
f220a80f | 676 | |
df8d0d42 NA |
677 | /* |
678 | * Failure of the whole batch overrides a single operation | |
679 | * results. | |
680 | */ | |
681 | if (batch_status != VMW_BALLOON_SUCCESS) | |
682 | status = batch_status; | |
683 | ||
684 | if (status != VMW_BALLOON_SUCCESS) { | |
f220a80f XD |
685 | /* |
686 | * That page wasn't successfully unlocked by the | |
687 | * hypervisor, re-add it to the list of pages owned by | |
688 | * the balloon driver. | |
689 | */ | |
365bd7ef | 690 | list_add(&p->lru, &page_size->pages); |
f220a80f XD |
691 | } else { |
692 | /* deallocate page */ | |
365bd7ef | 693 | vmballoon_free_page(p, is_2m_pages); |
c7b3690f NA |
694 | vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_FREE, |
695 | is_2m_pages); | |
f220a80f XD |
696 | |
697 | /* update balloon size */ | |
365bd7ef | 698 | b->size -= size_per_page; |
f220a80f XD |
699 | } |
700 | } | |
701 | ||
df8d0d42 | 702 | return batch_status == VMW_BALLOON_SUCCESS ? 0 : -EIO; |
f220a80f XD |
703 | } |
704 | ||
453dc659 DT |
705 | /* |
706 | * Release pages that were allocated while attempting to inflate the | |
707 | * balloon but were refused by the host for one reason or another. | |
708 | */ | |
365bd7ef PM |
709 | static void vmballoon_release_refused_pages(struct vmballoon *b, |
710 | bool is_2m_pages) | |
453dc659 DT |
711 | { |
712 | struct page *page, *next; | |
365bd7ef PM |
713 | struct vmballoon_page_size *page_size = |
714 | &b->page_sizes[is_2m_pages]; | |
453dc659 | 715 | |
365bd7ef | 716 | list_for_each_entry_safe(page, next, &page_size->refused_pages, lru) { |
453dc659 | 717 | list_del(&page->lru); |
365bd7ef | 718 | vmballoon_free_page(page, is_2m_pages); |
c7b3690f NA |
719 | vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_REFUSED_FREE, |
720 | is_2m_pages); | |
453dc659 | 721 | } |
55adaa49 | 722 | |
365bd7ef | 723 | page_size->n_refused_pages = 0; |
453dc659 DT |
724 | } |
725 | ||
f220a80f XD |
726 | static void vmballoon_add_page(struct vmballoon *b, int idx, struct page *p) |
727 | { | |
df8d0d42 NA |
728 | if (static_branch_likely(&vmw_balloon_batching)) |
729 | b->batch_page[idx] = (struct vmballoon_batch_entry) | |
6c948757 | 730 | { .pfn = page_to_pfn(p) }; |
df8d0d42 NA |
731 | else |
732 | b->page = p; | |
f220a80f XD |
733 | } |
734 | ||
8b079cd0 NA |
735 | /** |
736 | * vmballoon_change - retrieve the required balloon change | |
737 | * | |
738 | * @b: pointer for the balloon. | |
739 | * | |
740 | * Return: the required change for the balloon size. A positive number | |
741 | * indicates inflation, a negative number indicates a deflation. | |
742 | */ | |
743 | static int64_t vmballoon_change(struct vmballoon *b) | |
744 | { | |
745 | int64_t size, target; | |
746 | ||
747 | size = b->size; | |
748 | target = b->target; | |
749 | ||
750 | /* | |
751 | * We must cast first because of int sizes | |
752 | * Otherwise we might get huge positives instead of negatives | |
753 | */ | |
754 | ||
755 | if (b->reset_required) | |
756 | return 0; | |
757 | ||
758 | /* consider a 2MB slack on deflate, unless the balloon is emptied */ | |
759 | if (target < size && size - target < vmballoon_page_size(true) && | |
760 | target != 0) | |
761 | return 0; | |
762 | ||
763 | return target - size; | |
764 | } | |
765 | ||
453dc659 DT |
766 | /* |
767 | * Inflate the balloon towards its target size. Note that we try to limit | |
768 | * the rate of allocation to make sure we are not choking the rest of the | |
769 | * system. | |
770 | */ | |
771 | static void vmballoon_inflate(struct vmballoon *b) | |
772 | { | |
f220a80f | 773 | unsigned int num_pages = 0; |
453dc659 | 774 | int error = 0; |
365bd7ef | 775 | bool is_2m_pages; |
453dc659 | 776 | |
453dc659 DT |
777 | /* |
778 | * First try NOSLEEP page allocations to inflate balloon. | |
779 | * | |
780 | * If we do not throttle nosleep allocations, we can drain all | |
781 | * free pages in the guest quickly (if the balloon target is high). | |
782 | * As a side-effect, draining free pages helps to inform (force) | |
783 | * the guest to start swapping if balloon target is not met yet, | |
784 | * which is a desired behavior. However, balloon driver can consume | |
785 | * all available CPU cycles if too many pages are allocated in a | |
786 | * second. Therefore, we throttle nosleep allocations even when | |
787 | * the guest is not under memory pressure. OTOH, if we have already | |
788 | * predicted that the guest is under memory pressure, then we | |
789 | * slowdown page allocations considerably. | |
790 | */ | |
791 | ||
453dc659 DT |
792 | /* |
793 | * Start with no sleep allocation rate which may be higher | |
794 | * than sleeping allocation rate. | |
795 | */ | |
ec992cc7 | 796 | is_2m_pages = b->supported_page_sizes == VMW_BALLOON_NUM_PAGE_SIZES; |
453dc659 | 797 | |
8b079cd0 NA |
798 | while ((int64_t)(num_pages * vmballoon_page_size(is_2m_pages)) < |
799 | vmballoon_change(b)) { | |
4670de4d | 800 | struct page *page; |
453dc659 | 801 | |
c7b3690f NA |
802 | vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC, |
803 | is_2m_pages); | |
804 | ||
622074a9 | 805 | page = vmballoon_alloc_page(is_2m_pages); |
ef0f8f11 | 806 | if (!page) { |
c7b3690f NA |
807 | vmballoon_stats_page_inc(b, |
808 | VMW_BALLOON_PAGE_STAT_ALLOC_FAIL, is_2m_pages); | |
809 | ||
365bd7ef | 810 | if (is_2m_pages) { |
df8d0d42 | 811 | vmballoon_lock(b, num_pages, true); |
365bd7ef PM |
812 | |
813 | /* | |
814 | * ignore errors from locking as we now switch | |
815 | * to 4k pages and we might get different | |
816 | * errors. | |
817 | */ | |
818 | ||
819 | num_pages = 0; | |
820 | is_2m_pages = false; | |
821 | continue; | |
822 | } | |
622074a9 | 823 | break; |
453dc659 DT |
824 | } |
825 | ||
df8d0d42 | 826 | vmballoon_add_page(b, num_pages++, page); |
f220a80f | 827 | if (num_pages == b->batch_max_pages) { |
8fa3c61a NA |
828 | struct vmballoon_page_size *page_size = |
829 | &b->page_sizes[is_2m_pages]; | |
830 | ||
df8d0d42 | 831 | error = vmballoon_lock(b, num_pages, is_2m_pages); |
10a95d5d | 832 | |
f220a80f | 833 | num_pages = 0; |
8fa3c61a NA |
834 | |
835 | /* | |
836 | * Stop allocating this page size if we already | |
837 | * accumulated too many pages that the hypervisor | |
838 | * refused. | |
839 | */ | |
840 | if (page_size->n_refused_pages >= | |
841 | VMW_BALLOON_MAX_REFUSED) { | |
842 | if (!is_2m_pages) | |
843 | break; | |
844 | ||
845 | /* | |
846 | * Release the refused pages as we move to 4k | |
847 | * pages. | |
848 | */ | |
849 | vmballoon_release_refused_pages(b, true); | |
850 | is_2m_pages = true; | |
851 | } | |
852 | ||
f220a80f XD |
853 | if (error) |
854 | break; | |
855 | } | |
ef0f8f11 | 856 | |
33d268ed | 857 | cond_resched(); |
453dc659 DT |
858 | } |
859 | ||
f220a80f | 860 | if (num_pages > 0) |
df8d0d42 | 861 | vmballoon_lock(b, num_pages, is_2m_pages); |
f220a80f | 862 | |
365bd7ef PM |
863 | vmballoon_release_refused_pages(b, true); |
864 | vmballoon_release_refused_pages(b, false); | |
453dc659 DT |
865 | } |
866 | ||
867 | /* | |
868 | * Decrease the size of the balloon allowing guest to use more memory. | |
869 | */ | |
870 | static void vmballoon_deflate(struct vmballoon *b) | |
871 | { | |
365bd7ef | 872 | unsigned is_2m_pages; |
453dc659 | 873 | |
453dc659 | 874 | /* free pages to reach target */ |
365bd7ef PM |
875 | for (is_2m_pages = 0; is_2m_pages < b->supported_page_sizes; |
876 | is_2m_pages++) { | |
877 | struct page *page, *next; | |
878 | unsigned int num_pages = 0; | |
879 | struct vmballoon_page_size *page_size = | |
880 | &b->page_sizes[is_2m_pages]; | |
881 | ||
882 | list_for_each_entry_safe(page, next, &page_size->pages, lru) { | |
8b079cd0 NA |
883 | if ((int64_t)(num_pages * |
884 | vmballoon_page_size(is_2m_pages)) >= | |
885 | -vmballoon_change(b)) | |
365bd7ef | 886 | break; |
f220a80f | 887 | |
365bd7ef | 888 | list_del(&page->lru); |
df8d0d42 | 889 | vmballoon_add_page(b, num_pages++, page); |
33d268ed | 890 | |
365bd7ef PM |
891 | if (num_pages == b->batch_max_pages) { |
892 | int error; | |
453dc659 | 893 | |
df8d0d42 | 894 | error = vmballoon_unlock(b, num_pages, |
10a95d5d | 895 | is_2m_pages); |
365bd7ef PM |
896 | num_pages = 0; |
897 | if (error) | |
898 | return; | |
899 | } | |
33d268ed | 900 | |
365bd7ef PM |
901 | cond_resched(); |
902 | } | |
453dc659 | 903 | |
365bd7ef | 904 | if (num_pages > 0) |
df8d0d42 | 905 | vmballoon_unlock(b, num_pages, is_2m_pages); |
365bd7ef | 906 | } |
f220a80f XD |
907 | } |
908 | ||
df8d0d42 NA |
909 | /** |
910 | * vmballoon_deinit_batching - disables batching mode. | |
911 | * | |
912 | * @b: pointer to &struct vmballoon. | |
913 | * | |
914 | * Disables batching, by deallocating the page for communication with the | |
915 | * hypervisor and disabling the static key to indicate that batching is off. | |
916 | */ | |
917 | static void vmballoon_deinit_batching(struct vmballoon *b) | |
918 | { | |
919 | free_page((unsigned long)b->batch_page); | |
920 | b->batch_page = NULL; | |
921 | static_branch_disable(&vmw_balloon_batching); | |
922 | b->batch_max_pages = 1; | |
923 | } | |
f220a80f | 924 | |
df8d0d42 NA |
925 | /** |
926 | * vmballoon_init_batching - enable batching mode. | |
927 | * | |
928 | * @b: pointer to &struct vmballoon. | |
929 | * | |
930 | * Enables batching, by allocating a page for communication with the hypervisor | |
931 | * and enabling the static_key to use batching. | |
932 | * | |
933 | * Return: zero on success or an appropriate error-code. | |
934 | */ | |
935 | static int vmballoon_init_batching(struct vmballoon *b) | |
f220a80f | 936 | { |
b23220fe | 937 | struct page *page; |
f220a80f | 938 | |
b23220fe GK |
939 | page = alloc_page(GFP_KERNEL | __GFP_ZERO); |
940 | if (!page) | |
df8d0d42 | 941 | return -ENOMEM; |
f220a80f | 942 | |
b23220fe | 943 | b->batch_page = page_address(page); |
df8d0d42 NA |
944 | b->batch_max_pages = PAGE_SIZE / sizeof(struct vmballoon_batch_entry); |
945 | ||
946 | static_branch_enable(&vmw_balloon_batching); | |
947 | ||
948 | return 0; | |
f220a80f XD |
949 | } |
950 | ||
48e3d668 PM |
951 | /* |
952 | * Receive notification and resize balloon | |
953 | */ | |
954 | static void vmballoon_doorbell(void *client_data) | |
955 | { | |
956 | struct vmballoon *b = client_data; | |
957 | ||
c7b3690f | 958 | vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_DOORBELL); |
48e3d668 PM |
959 | |
960 | mod_delayed_work(system_freezable_wq, &b->dwork, 0); | |
961 | } | |
962 | ||
963 | /* | |
964 | * Clean up vmci doorbell | |
965 | */ | |
966 | static void vmballoon_vmci_cleanup(struct vmballoon *b) | |
967 | { | |
10a95d5d NA |
968 | vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET, |
969 | VMCI_INVALID_ID, VMCI_INVALID_ID); | |
48e3d668 | 970 | |
48e3d668 PM |
971 | if (!vmci_handle_is_invalid(b->vmci_doorbell)) { |
972 | vmci_doorbell_destroy(b->vmci_doorbell); | |
973 | b->vmci_doorbell = VMCI_INVALID_HANDLE; | |
974 | } | |
975 | } | |
976 | ||
977 | /* | |
978 | * Initialize vmci doorbell, to get notified as soon as balloon changes | |
979 | */ | |
980 | static int vmballoon_vmci_init(struct vmballoon *b) | |
981 | { | |
10a95d5d | 982 | unsigned long error; |
48e3d668 | 983 | |
ce664331 NA |
984 | if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) == 0) |
985 | return 0; | |
48e3d668 | 986 | |
ce664331 NA |
987 | error = vmci_doorbell_create(&b->vmci_doorbell, VMCI_FLAG_DELAYED_CB, |
988 | VMCI_PRIVILEGE_FLAG_RESTRICTED, | |
989 | vmballoon_doorbell, b); | |
48e3d668 | 990 | |
ce664331 NA |
991 | if (error != VMCI_SUCCESS) |
992 | goto fail; | |
993 | ||
10a95d5d NA |
994 | error = __vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET, |
995 | b->vmci_doorbell.context, | |
996 | b->vmci_doorbell.resource, NULL); | |
ce664331 | 997 | |
ce664331 NA |
998 | if (error != VMW_BALLOON_SUCCESS) |
999 | goto fail; | |
48e3d668 PM |
1000 | |
1001 | return 0; | |
ce664331 NA |
1002 | fail: |
1003 | vmballoon_vmci_cleanup(b); | |
1004 | return -EIO; | |
48e3d668 PM |
1005 | } |
1006 | ||
f220a80f XD |
1007 | /* |
1008 | * Perform standard reset sequence by popping the balloon (in case it | |
1009 | * is not empty) and then restarting protocol. This operation normally | |
1010 | * happens when host responds with VMW_BALLOON_ERROR_RESET to a command. | |
1011 | */ | |
1012 | static void vmballoon_reset(struct vmballoon *b) | |
1013 | { | |
48e3d668 PM |
1014 | int error; |
1015 | ||
c7b3690f NA |
1016 | down_write(&b->conf_sem); |
1017 | ||
48e3d668 PM |
1018 | vmballoon_vmci_cleanup(b); |
1019 | ||
f220a80f XD |
1020 | /* free all pages, skipping monitor unlock */ |
1021 | vmballoon_pop(b); | |
1022 | ||
1023 | if (!vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES)) | |
1024 | return; | |
1025 | ||
1026 | if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) { | |
df8d0d42 | 1027 | if (vmballoon_init_batching(b)) { |
f220a80f XD |
1028 | /* |
1029 | * We failed to initialize batching, inform the monitor | |
1030 | * about it by sending a null capability. | |
1031 | * | |
1032 | * The guest will retry in one second. | |
1033 | */ | |
1034 | vmballoon_send_start(b, 0); | |
1035 | return; | |
1036 | } | |
1037 | } else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) { | |
df8d0d42 | 1038 | vmballoon_deinit_batching(b); |
f220a80f XD |
1039 | } |
1040 | ||
1041 | b->reset_required = false; | |
48e3d668 PM |
1042 | |
1043 | error = vmballoon_vmci_init(b); | |
1044 | if (error) | |
1045 | pr_err("failed to initialize vmci doorbell\n"); | |
1046 | ||
f220a80f XD |
1047 | if (!vmballoon_send_guest_id(b)) |
1048 | pr_err("failed to send guest ID to the host\n"); | |
c7b3690f NA |
1049 | |
1050 | up_write(&b->conf_sem); | |
453dc659 DT |
1051 | } |
1052 | ||
8b079cd0 NA |
1053 | /** |
1054 | * vmballoon_work - periodic balloon worker for reset, inflation and deflation. | |
1055 | * | |
1056 | * @work: pointer to the &work_struct which is provided by the workqueue. | |
1057 | * | |
1058 | * Resets the protocol if needed, gets the new size and adjusts balloon as | |
1059 | * needed. Repeat in 1 sec. | |
453dc659 DT |
1060 | */ |
1061 | static void vmballoon_work(struct work_struct *work) | |
1062 | { | |
1063 | struct delayed_work *dwork = to_delayed_work(work); | |
1064 | struct vmballoon *b = container_of(dwork, struct vmballoon, dwork); | |
8b079cd0 | 1065 | int64_t change = 0; |
453dc659 | 1066 | |
453dc659 DT |
1067 | if (b->reset_required) |
1068 | vmballoon_reset(b); | |
1069 | ||
c7b3690f NA |
1070 | down_read(&b->conf_sem); |
1071 | ||
1072 | /* | |
1073 | * Update the stats while holding the semaphore to ensure that | |
1074 | * @stats_enabled is consistent with whether the stats are actually | |
1075 | * enabled | |
1076 | */ | |
1077 | vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_TIMER); | |
1078 | ||
0395be3e | 1079 | if (!vmballoon_send_get_target(b)) |
8b079cd0 NA |
1080 | change = vmballoon_change(b); |
1081 | ||
1082 | if (change != 0) { | |
1083 | pr_debug("%s - size: %u, target %u", __func__, | |
1084 | b->size, b->target); | |
453dc659 | 1085 | |
8b079cd0 | 1086 | if (change > 0) |
453dc659 | 1087 | vmballoon_inflate(b); |
8b079cd0 | 1088 | else /* (change < 0) */ |
453dc659 DT |
1089 | vmballoon_deflate(b); |
1090 | } | |
1091 | ||
c7b3690f NA |
1092 | up_read(&b->conf_sem); |
1093 | ||
beda94da DT |
1094 | /* |
1095 | * We are using a freezable workqueue so that balloon operations are | |
1096 | * stopped while the system transitions to/from sleep/hibernation. | |
1097 | */ | |
1098 | queue_delayed_work(system_freezable_wq, | |
1099 | dwork, round_jiffies_relative(HZ)); | |
c7b3690f | 1100 | |
453dc659 DT |
1101 | } |
1102 | ||
1103 | /* | |
1104 | * DEBUGFS Interface | |
1105 | */ | |
1106 | #ifdef CONFIG_DEBUG_FS | |
1107 | ||
c7b3690f NA |
1108 | static const char * const vmballoon_stat_page_names[] = { |
1109 | [VMW_BALLOON_PAGE_STAT_ALLOC] = "alloc", | |
1110 | [VMW_BALLOON_PAGE_STAT_ALLOC_FAIL] = "allocFail", | |
1111 | [VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC] = "errAlloc", | |
1112 | [VMW_BALLOON_PAGE_STAT_REFUSED_FREE] = "errFree", | |
1113 | [VMW_BALLOON_PAGE_STAT_FREE] = "free" | |
1114 | }; | |
1115 | ||
1116 | static const char * const vmballoon_stat_names[] = { | |
1117 | [VMW_BALLOON_STAT_TIMER] = "timer", | |
1118 | [VMW_BALLOON_STAT_DOORBELL] = "doorbell" | |
1119 | }; | |
1120 | ||
1121 | static const char * const vmballoon_page_size_names[] = { | |
1122 | [VMW_BALLOON_4K_PAGE] = "4k", | |
1123 | [VMW_BALLOON_2M_PAGE] = "2M" | |
1124 | }; | |
1125 | ||
1126 | static int vmballoon_enable_stats(struct vmballoon *b) | |
1127 | { | |
1128 | int r = 0; | |
1129 | ||
1130 | down_write(&b->conf_sem); | |
1131 | ||
1132 | /* did we somehow race with another reader which enabled stats? */ | |
1133 | if (b->stats) | |
1134 | goto out; | |
1135 | ||
1136 | b->stats = kzalloc(sizeof(*b->stats), GFP_KERNEL); | |
1137 | ||
1138 | if (!b->stats) { | |
1139 | /* allocation failed */ | |
1140 | r = -ENOMEM; | |
1141 | goto out; | |
1142 | } | |
1143 | static_key_enable(&balloon_stat_enabled.key); | |
1144 | out: | |
1145 | up_write(&b->conf_sem); | |
1146 | return r; | |
1147 | } | |
1148 | ||
1149 | /** | |
1150 | * vmballoon_debug_show - shows statistics of balloon operations. | |
1151 | * @f: pointer to the &struct seq_file. | |
1152 | * @offset: ignored. | |
1153 | * | |
1154 | * Provides the statistics that can be accessed in vmmemctl in the debugfs. | |
1155 | * To avoid the overhead - mainly that of memory - of collecting the statistics, | |
1156 | * we only collect statistics after the first time the counters are read. | |
1157 | * | |
1158 | * Return: zero on success or an error code. | |
1159 | */ | |
453dc659 DT |
1160 | static int vmballoon_debug_show(struct seq_file *f, void *offset) |
1161 | { | |
1162 | struct vmballoon *b = f->private; | |
c7b3690f NA |
1163 | int i, j; |
1164 | ||
1165 | /* enables stats if they are disabled */ | |
1166 | if (!b->stats) { | |
1167 | int r = vmballoon_enable_stats(b); | |
1168 | ||
1169 | if (r) | |
1170 | return r; | |
1171 | } | |
453dc659 | 1172 | |
b36e89da | 1173 | /* format capabilities info */ |
c7b3690f NA |
1174 | seq_printf(f, "%-22s: %#4x\n", "balloon capabilities", |
1175 | VMW_BALLOON_CAPABILITIES); | |
1176 | seq_printf(f, "%-22s: %#4lx\n", "used capabilities", | |
1177 | b->capabilities); | |
1178 | seq_printf(f, "%-22s: %16s\n", "is resetting", | |
1179 | b->reset_required ? "y" : "n"); | |
b36e89da | 1180 | |
453dc659 | 1181 | /* format size info */ |
c7b3690f NA |
1182 | seq_printf(f, "%-22s: %16u\n", "target", b->target); |
1183 | seq_printf(f, "%-22s: %16u\n", "current", b->size); | |
453dc659 | 1184 | |
68131184 NA |
1185 | for (i = 0; i < VMW_BALLOON_CMD_NUM; i++) { |
1186 | if (vmballoon_cmd_names[i] == NULL) | |
1187 | continue; | |
1188 | ||
c7b3690f NA |
1189 | seq_printf(f, "%-22s: %16llu (%llu failed)\n", |
1190 | vmballoon_cmd_names[i], | |
1191 | atomic64_read(&b->stats->ops[i][VMW_BALLOON_OP_STAT]), | |
1192 | atomic64_read(&b->stats->ops[i][VMW_BALLOON_OP_FAIL_STAT])); | |
68131184 NA |
1193 | } |
1194 | ||
c7b3690f NA |
1195 | for (i = 0; i < VMW_BALLOON_STAT_NUM; i++) |
1196 | seq_printf(f, "%-22s: %16llu\n", | |
1197 | vmballoon_stat_names[i], | |
1198 | atomic64_read(&b->stats->general_stat[i])); | |
1199 | ||
1200 | for (i = 0; i < VMW_BALLOON_PAGE_STAT_NUM; i++) { | |
1201 | for (j = 0; j < VMW_BALLOON_NUM_PAGE_SIZES; j++) | |
1202 | seq_printf(f, "%-18s(%s): %16llu\n", | |
1203 | vmballoon_stat_page_names[i], | |
1204 | vmballoon_page_size_names[j], | |
1205 | atomic64_read(&b->stats->page_stat[i][j])); | |
1206 | } | |
453dc659 DT |
1207 | |
1208 | return 0; | |
1209 | } | |
1210 | ||
1211 | static int vmballoon_debug_open(struct inode *inode, struct file *file) | |
1212 | { | |
1213 | return single_open(file, vmballoon_debug_show, inode->i_private); | |
1214 | } | |
1215 | ||
1216 | static const struct file_operations vmballoon_debug_fops = { | |
1217 | .owner = THIS_MODULE, | |
1218 | .open = vmballoon_debug_open, | |
1219 | .read = seq_read, | |
1220 | .llseek = seq_lseek, | |
1221 | .release = single_release, | |
1222 | }; | |
1223 | ||
1224 | static int __init vmballoon_debugfs_init(struct vmballoon *b) | |
1225 | { | |
1226 | int error; | |
1227 | ||
1228 | b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b, | |
1229 | &vmballoon_debug_fops); | |
1230 | if (IS_ERR(b->dbg_entry)) { | |
1231 | error = PTR_ERR(b->dbg_entry); | |
1232 | pr_err("failed to create debugfs entry, error: %d\n", error); | |
1233 | return error; | |
1234 | } | |
1235 | ||
1236 | return 0; | |
1237 | } | |
1238 | ||
1239 | static void __exit vmballoon_debugfs_exit(struct vmballoon *b) | |
1240 | { | |
c7b3690f | 1241 | static_key_disable(&balloon_stat_enabled.key); |
453dc659 | 1242 | debugfs_remove(b->dbg_entry); |
c7b3690f NA |
1243 | kfree(b->stats); |
1244 | b->stats = NULL; | |
453dc659 DT |
1245 | } |
1246 | ||
1247 | #else | |
1248 | ||
1249 | static inline int vmballoon_debugfs_init(struct vmballoon *b) | |
1250 | { | |
1251 | return 0; | |
1252 | } | |
1253 | ||
1254 | static inline void vmballoon_debugfs_exit(struct vmballoon *b) | |
1255 | { | |
1256 | } | |
1257 | ||
1258 | #endif /* CONFIG_DEBUG_FS */ | |
1259 | ||
1260 | static int __init vmballoon_init(void) | |
1261 | { | |
1262 | int error; | |
365bd7ef | 1263 | unsigned is_2m_pages; |
453dc659 DT |
1264 | /* |
1265 | * Check if we are running on VMware's hypervisor and bail out | |
1266 | * if we are not. | |
1267 | */ | |
03b2a320 | 1268 | if (x86_hyper_type != X86_HYPER_VMWARE) |
453dc659 DT |
1269 | return -ENODEV; |
1270 | ||
365bd7ef PM |
1271 | for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES; |
1272 | is_2m_pages++) { | |
1273 | INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].pages); | |
1274 | INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].refused_pages); | |
1275 | } | |
453dc659 | 1276 | |
453dc659 DT |
1277 | INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work); |
1278 | ||
453dc659 DT |
1279 | error = vmballoon_debugfs_init(&balloon); |
1280 | if (error) | |
beda94da | 1281 | return error; |
453dc659 | 1282 | |
c7b3690f | 1283 | init_rwsem(&balloon.conf_sem); |
48e3d668 | 1284 | balloon.vmci_doorbell = VMCI_INVALID_HANDLE; |
d7568c13 PM |
1285 | balloon.batch_page = NULL; |
1286 | balloon.page = NULL; | |
1287 | balloon.reset_required = true; | |
1288 | ||
beda94da | 1289 | queue_delayed_work(system_freezable_wq, &balloon.dwork, 0); |
453dc659 DT |
1290 | |
1291 | return 0; | |
453dc659 | 1292 | } |
c3cc1b0f NA |
1293 | |
1294 | /* | |
1295 | * Using late_initcall() instead of module_init() allows the balloon to use the | |
1296 | * VMCI doorbell even when the balloon is built into the kernel. Otherwise the | |
1297 | * VMCI is probed only after the balloon is initialized. If the balloon is used | |
1298 | * as a module, late_initcall() is equivalent to module_init(). | |
1299 | */ | |
1300 | late_initcall(vmballoon_init); | |
453dc659 DT |
1301 | |
1302 | static void __exit vmballoon_exit(void) | |
1303 | { | |
48e3d668 | 1304 | vmballoon_vmci_cleanup(&balloon); |
453dc659 | 1305 | cancel_delayed_work_sync(&balloon.dwork); |
453dc659 DT |
1306 | |
1307 | vmballoon_debugfs_exit(&balloon); | |
1308 | ||
1309 | /* | |
1310 | * Deallocate all reserved memory, and reset connection with monitor. | |
1311 | * Reset connection before deallocating memory to avoid potential for | |
1312 | * additional spurious resets from guest touching deallocated pages. | |
1313 | */ | |
d7568c13 | 1314 | vmballoon_send_start(&balloon, 0); |
453dc659 DT |
1315 | vmballoon_pop(&balloon); |
1316 | } | |
1317 | module_exit(vmballoon_exit); |