]> git.proxmox.com Git - pve-qemu.git/blob - debian/patches/extra/0010-coroutine-Revert-to-constant-batch-size.patch
711ffe0b35c6e92cb0fca6c4743a057de4c9e0be
[pve-qemu.git] / debian / patches / extra / 0010-coroutine-Revert-to-constant-batch-size.patch
1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: Kevin Wolf <kwolf@redhat.com>
3 Date: Tue, 10 May 2022 17:10:20 +0200
4 Subject: [PATCH] coroutine: Revert to constant batch size
5
6 Commit 4c41c69e changed the way the coroutine pool is sized because for
7 virtio-blk devices with a large queue size and heavy I/O, it was just
8 too small and caused coroutines to be deleted and reallocated soon
9 afterwards. The change made the size dynamic based on the number of
10 queues and the queue size of virtio-blk devices.
11
12 There are two important numbers here: Slightly simplified, when a
13 coroutine terminates, it is generally stored in the global release pool
14 up to a certain pool size, and if the pool is full, it is freed.
15 Conversely, when allocating a new coroutine, the coroutines in the
16 release pool are reused if the pool already has reached a certain
17 minimum size (the batch size), otherwise we allocate new coroutines.
18
19 The problem after commit 4c41c69e is that it not only increases the
20 maximum pool size (which is the intended effect), but also the batch
21 size for reusing coroutines (which is a bug). It means that in cases
22 with many devices and/or a large queue size (which defaults to the
23 number of vcpus for virtio-blk-pci), many thousand coroutines could be
24 sitting in the release pool without being reused.
25
26 This is not only a waste of memory and allocations, but it actually
27 makes the QEMU process likely to hit the vm.max_map_count limit on Linux
28 because each coroutine requires two mappings (its stack and the guard
29 page for the stack), causing it to abort() in qemu_alloc_stack() because
30 when the limit is hit, mprotect() starts to fail with ENOMEM.
31
32 In order to fix the problem, change the batch size back to 64 to avoid
33 uselessly accumulating coroutines in the release pool, but keep the
34 dynamic maximum pool size so that coroutines aren't freed too early
35 in heavy I/O scenarios.
36
37 Note that this fix doesn't strictly make it impossible to hit the limit,
38 but this would only happen if most of the coroutines are actually in use
39 at the same time, not just sitting in a pool. This is the same behaviour
40 as we already had before commit 4c41c69e. Fully preventing this would
41 require allowing qemu_coroutine_create() to return an error, but it
42 doesn't seem to be a scenario that people hit in practice.
43
44 Cc: qemu-stable@nongnu.org
45 Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2079938
46 Fixes: 4c41c69e05fe28c0f95f8abd2ebf407e95a4f04b
47 Signed-off-by: Kevin Wolf <kwolf@redhat.com>
48 Message-Id: <20220510151020.105528-3-kwolf@redhat.com>
49 Tested-by: Hiroki Narukawa <hnarukaw@yahoo-corp.jp>
50 Signed-off-by: Kevin Wolf <kwolf@redhat.com>
51 (cherry-picked from commit 9ec7a59b5aad4b736871c378d30f5ef5ec51cb52)
52 Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
53 ---
54 util/qemu-coroutine.c | 22 ++++++++++++++--------
55 1 file changed, 14 insertions(+), 8 deletions(-)
56
57 diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
58 index ea23929a74..4a8bd63ef0 100644
59 --- a/util/qemu-coroutine.c
60 +++ b/util/qemu-coroutine.c
61 @@ -21,14 +21,20 @@
62 #include "qemu/coroutine-tls.h"
63 #include "block/aio.h"
64
65 -/** Initial batch size is 64, and is increased on demand */
66 +/**
67 + * The minimal batch size is always 64, coroutines from the release_pool are
68 + * reused as soon as there are 64 coroutines in it. The maximum pool size starts
69 + * with 64 and is increased on demand so that coroutines are not deleted even if
70 + * they are not immediately reused.
71 + */
72 enum {
73 - POOL_INITIAL_BATCH_SIZE = 64,
74 + POOL_MIN_BATCH_SIZE = 64,
75 + POOL_INITIAL_MAX_SIZE = 64,
76 };
77
78 /** Free list to speed up creation */
79 static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool);
80 -static unsigned int pool_batch_size = POOL_INITIAL_BATCH_SIZE;
81 +static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE;
82 static unsigned int release_pool_size;
83
84 typedef QSLIST_HEAD(, Coroutine) CoroutineQSList;
85 @@ -57,7 +63,7 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque)
86
87 co = QSLIST_FIRST(alloc_pool);
88 if (!co) {
89 - if (release_pool_size > qatomic_read(&pool_batch_size)) {
90 + if (release_pool_size > POOL_MIN_BATCH_SIZE) {
91 /* Slow path; a good place to register the destructor, too. */
92 Notifier *notifier = get_ptr_coroutine_pool_cleanup_notifier();
93 if (!notifier->notify) {
94 @@ -95,12 +101,12 @@ static void coroutine_delete(Coroutine *co)
95 co->caller = NULL;
96
97 if (CONFIG_COROUTINE_POOL) {
98 - if (release_pool_size < qatomic_read(&pool_batch_size) * 2) {
99 + if (release_pool_size < qatomic_read(&pool_max_size) * 2) {
100 QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next);
101 qatomic_inc(&release_pool_size);
102 return;
103 }
104 - if (get_alloc_pool_size() < qatomic_read(&pool_batch_size)) {
105 + if (get_alloc_pool_size() < qatomic_read(&pool_max_size)) {
106 QSLIST_INSERT_HEAD(get_ptr_alloc_pool(), co, pool_next);
107 set_alloc_pool_size(get_alloc_pool_size() + 1);
108 return;
109 @@ -214,10 +220,10 @@ AioContext *coroutine_fn qemu_coroutine_get_aio_context(Coroutine *co)
110
111 void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size)
112 {
113 - qatomic_add(&pool_batch_size, additional_pool_size);
114 + qatomic_add(&pool_max_size, additional_pool_size);
115 }
116
117 void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size)
118 {
119 - qatomic_sub(&pool_batch_size, removing_pool_size);
120 + qatomic_sub(&pool_max_size, removing_pool_size);
121 }