]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/commitdiff
kvm: x86: Add memcg accounting to KVM allocations
authorBen Gardon <bgardon@google.com>
Mon, 11 Feb 2019 19:02:50 +0000 (11:02 -0800)
committerPaolo Bonzini <pbonzini@redhat.com>
Wed, 20 Feb 2019 21:48:30 +0000 (22:48 +0100)
There are many KVM kernel memory allocations which are tied to the life of
the VM process and should be charged to the VM process's cgroup. If the
allocations aren't tied to the process, the OOM killer will not know
that killing the process will free the associated kernel memory.
Add __GFP_ACCOUNT flags to many of the allocations which are not yet being
charged to the VM process's cgroup.

Tested:
Ran all kvm-unit-tests on a 64 bit Haswell machine, the patch
introduced no new failures.
Ran a kernel memory accounting test which creates a VM to touch
memory and then checks that the kernel memory allocated for the
process is within certain bounds.
With this patch we account for much more of the vmalloc and slab memory
allocated for the VM.

There remain a few allocations which should be charged to the VM's
cgroup but are not. In x86, they include:
vcpu->arch.pio_data
There allocations are unaccounted in this patch because they are mapped
to userspace, and accounting them to a cgroup causes problems. This
should be addressed in a future patch.

Signed-off-by: Ben Gardon <bgardon@google.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/kvm/hyperv.c
arch/x86/kvm/i8254.c
arch/x86/kvm/i8259.c
arch/x86/kvm/ioapic.c
arch/x86/kvm/lapic.c
arch/x86/kvm/mmu.c
arch/x86/kvm/page_track.c
arch/x86/kvm/x86.c

index 89d20ed1d2e8bf7abe753adba301ef2b31ae8398..27c43525a05f1afabeb705b27f955eba5fe5356d 100644 (file)
@@ -1729,7 +1729,7 @@ static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd)
 
        mutex_lock(&hv->hv_lock);
        ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1,
-                       GFP_KERNEL);
+                       GFP_KERNEL_ACCOUNT);
        mutex_unlock(&hv->hv_lock);
 
        if (ret >= 0)
index af192895b1fc633e9b2922c587862d1cbb41efd7..4a6dc54cc12becf739afb3bb468b1e0d9c4c52d1 100644 (file)
@@ -653,7 +653,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
        pid_t pid_nr;
        int ret;
 
-       pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL);
+       pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL_ACCOUNT);
        if (!pit)
                return NULL;
 
index bdcd4139eca9233bbd9e82615a1ed3c45c2ad060..8b38bb4868a65defc9143776ab96b19744be2fa1 100644 (file)
@@ -583,7 +583,7 @@ int kvm_pic_init(struct kvm *kvm)
        struct kvm_pic *s;
        int ret;
 
-       s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
+       s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL_ACCOUNT);
        if (!s)
                return -ENOMEM;
        spin_lock_init(&s->lock);
index 4e822ad363f37f613d14ab94f35609bcf3539bf7..1add1bc881e22418ff06e4c375aaca1a4a8b274c 100644 (file)
@@ -622,7 +622,7 @@ int kvm_ioapic_init(struct kvm *kvm)
        struct kvm_ioapic *ioapic;
        int ret;
 
-       ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
+       ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL_ACCOUNT);
        if (!ioapic)
                return -ENOMEM;
        spin_lock_init(&ioapic->lock);
index 4b6c2da7265c88f8f530eb026ba6b0e950eac51e..991fdf7fc17fbd9e1a4cab99d688a7af820d397c 100644 (file)
@@ -181,7 +181,8 @@ static void recalculate_apic_map(struct kvm *kvm)
                        max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
 
        new = kvzalloc(sizeof(struct kvm_apic_map) +
-                          sizeof(struct kvm_lapic *) * ((u64)max_id + 1), GFP_KERNEL);
+                          sizeof(struct kvm_lapic *) * ((u64)max_id + 1),
+                          GFP_KERNEL_ACCOUNT);
 
        if (!new)
                goto out;
@@ -2259,13 +2260,13 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
        ASSERT(vcpu != NULL);
        apic_debug("apic_init %d\n", vcpu->vcpu_id);
 
-       apic = kzalloc(sizeof(*apic), GFP_KERNEL);
+       apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
        if (!apic)
                goto nomem;
 
        vcpu->arch.apic = apic;
 
-       apic->regs = (void *)get_zeroed_page(GFP_KERNEL);
+       apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
        if (!apic->regs) {
                printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
                       vcpu->vcpu_id);
index 45eb988aa4119afddd9581848fee0d7fd05de328..415d0e62cb3efe50ee940a6a880e67a012978893 100644 (file)
@@ -961,7 +961,7 @@ static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
        if (cache->nobjs >= min)
                return 0;
        while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
-               obj = kmem_cache_zalloc(base_cache, GFP_KERNEL);
+               obj = kmem_cache_zalloc(base_cache, GFP_KERNEL_ACCOUNT);
                if (!obj)
                        return cache->nobjs >= min ? 0 : -ENOMEM;
                cache->objects[cache->nobjs++] = obj;
@@ -3702,7 +3702,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 
                        u64 *lm_root;
 
-                       lm_root = (void*)get_zeroed_page(GFP_KERNEL);
+                       lm_root = (void*)get_zeroed_page(GFP_KERNEL_ACCOUNT);
                        if (lm_root == NULL)
                                return 1;
 
@@ -5499,7 +5499,7 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
         * Therefore we need to allocate shadow page tables in the first
         * 4GB of memory, which happens to fit the DMA32 zone.
         */
-       page = alloc_page(GFP_KERNEL | __GFP_DMA32);
+       page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_DMA32);
        if (!page)
                return -ENOMEM;
 
index 3052a59a30655bcadccb53ec0cd1c14dab2b7591..fd04d462fdaeec18392757fc193b3dc4f0dd85e5 100644 (file)
@@ -42,7 +42,7 @@ int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
        for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
                slot->arch.gfn_track[i] =
                        kvcalloc(npages, sizeof(*slot->arch.gfn_track[i]),
-                                GFP_KERNEL);
+                                GFP_KERNEL_ACCOUNT);
                if (!slot->arch.gfn_track[i])
                        goto track_free;
        }
index 96f87d356c79fe7cd1f5de4d8e3e19160957ad20..3de586f89730f6bd625426a32cd77f4d4eed25e2 100644 (file)
@@ -3879,7 +3879,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                r = -EINVAL;
                if (!lapic_in_kernel(vcpu))
                        goto out;
-               u.lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
+               u.lapic = kzalloc(sizeof(struct kvm_lapic_state),
+                               GFP_KERNEL_ACCOUNT);
 
                r = -ENOMEM;
                if (!u.lapic)
@@ -4066,7 +4067,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                break;
        }
        case KVM_GET_XSAVE: {
-               u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL);
+               u.xsave = kzalloc(sizeof(struct kvm_xsave), GFP_KERNEL_ACCOUNT);
                r = -ENOMEM;
                if (!u.xsave)
                        break;
@@ -4090,7 +4091,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                break;
        }
        case KVM_GET_XCRS: {
-               u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL);
+               u.xcrs = kzalloc(sizeof(struct kvm_xcrs), GFP_KERNEL_ACCOUNT);
                r = -ENOMEM;
                if (!u.xcrs)
                        break;
@@ -9040,14 +9041,15 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
                static_key_slow_inc(&kvm_no_apic_vcpu);
 
        vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
-                                      GFP_KERNEL);
+                                      GFP_KERNEL_ACCOUNT);
        if (!vcpu->arch.mce_banks) {
                r = -ENOMEM;
                goto fail_free_lapic;
        }
        vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
 
-       if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL)) {
+       if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask,
+                               GFP_KERNEL_ACCOUNT)) {
                r = -ENOMEM;
                goto fail_free_mce_banks;
        }
@@ -9306,13 +9308,13 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 
                slot->arch.rmap[i] =
                        kvcalloc(lpages, sizeof(*slot->arch.rmap[i]),
-                                GFP_KERNEL);
+                                GFP_KERNEL_ACCOUNT);
                if (!slot->arch.rmap[i])
                        goto out_free;
                if (i == 0)
                        continue;
 
-               linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL);
+               linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT);
                if (!linfo)
                        goto out_free;