]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blobdiff - mm/compaction.c
mm: compaction: abort compaction loop if lock is contended or run too long
[mirror_ubuntu-bionic-kernel.git] / mm / compaction.c
index 7fcd3a52e68d4b2a9bfc07c1056b7db3a2b154b1..0649cc1b3479daf56bc1a3b995b7c5c5ef59dd9d 100644 (file)
@@ -70,8 +70,7 @@ static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags,
 
                /* async aborts if taking too long or contended */
                if (!cc->sync) {
-                       if (cc->contended)
-                               *cc->contended = true;
+                       cc->contended = true;
                        return false;
                }
 
@@ -91,6 +90,60 @@ static inline bool compact_trylock_irqsave(spinlock_t *lock,
        return compact_checklock_irqsave(lock, flags, false, cc);
 }
 
+static void compact_capture_page(struct compact_control *cc)
+{
+       unsigned long flags;
+       int mtype, mtype_low, mtype_high;
+
+       if (!cc->page || *cc->page)
+               return;
+
+       /*
+        * For MIGRATE_MOVABLE allocations we capture a suitable page ASAP
+        * regardless of the migratetype of the freelist is is captured from.
+        * This is fine because the order for a high-order MIGRATE_MOVABLE
+        * allocation is typically at least a pageblock size and overall
+        * fragmentation is not impaired. Other allocation types must
+        * capture pages from their own migratelist because otherwise they
+        * could pollute other pageblocks like MIGRATE_MOVABLE with
+        * difficult to move pages and making fragmentation worse overall.
+        */
+       if (cc->migratetype == MIGRATE_MOVABLE) {
+               mtype_low = 0;
+               mtype_high = MIGRATE_PCPTYPES;
+       } else {
+               mtype_low = cc->migratetype;
+               mtype_high = cc->migratetype + 1;
+       }
+
+       /* Speculatively examine the free lists without zone lock */
+       for (mtype = mtype_low; mtype < mtype_high; mtype++) {
+               int order;
+               for (order = cc->order; order < MAX_ORDER; order++) {
+                       struct page *page;
+                       struct free_area *area;
+                       area = &(cc->zone->free_area[order]);
+                       if (list_empty(&area->free_list[mtype]))
+                               continue;
+
+                       /* Take the lock and attempt capture of the page */
+                       if (!compact_trylock_irqsave(&cc->zone->lock, &flags, cc))
+                               return;
+                       if (!list_empty(&area->free_list[mtype])) {
+                               page = list_entry(area->free_list[mtype].next,
+                                                       struct page, lru);
+                               if (capture_free_page(page, cc->order, mtype)) {
+                                       spin_unlock_irqrestore(&cc->zone->lock,
+                                                                       flags);
+                                       *cc->page = page;
+                                       return;
+                               }
+                       }
+                       spin_unlock_irqrestore(&cc->zone->lock, flags);
+               }
+       }
+}
+
 /*
  * Isolate free pages onto a private freelist. Caller must hold zone->lock.
  * If @strict is true, will abort returning 0 on any invalid PFNs or non-free
@@ -634,7 +687,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
 
        /* Perform the isolation */
        low_pfn = isolate_migratepages_range(zone, cc, low_pfn, end_pfn);
-       if (!low_pfn)
+       if (!low_pfn || cc->contended)
                return ISOLATE_ABORT;
 
        cc->migrate_pfn = low_pfn;
@@ -645,7 +698,6 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
 static int compact_finished(struct zone *zone,
                            struct compact_control *cc)
 {
-       unsigned int order;
        unsigned long watermark;
 
        if (fatal_signal_pending(current))
@@ -688,14 +740,22 @@ static int compact_finished(struct zone *zone,
                return COMPACT_CONTINUE;
 
        /* Direct compactor: Is a suitable page free? */
-       for (order = cc->order; order < MAX_ORDER; order++) {
-               /* Job done if page is free of the right migratetype */
-               if (!list_empty(&zone->free_area[order].free_list[cc->migratetype]))
-                       return COMPACT_PARTIAL;
-
-               /* Job done if allocation would set block type */
-               if (order >= pageblock_order && zone->free_area[order].nr_free)
+       if (cc->page) {
+               /* Was a suitable page captured? */
+               if (*cc->page)
                        return COMPACT_PARTIAL;
+       } else {
+               unsigned int order;
+               for (order = cc->order; order < MAX_ORDER; order++) {
+                       struct free_area *area = &zone->free_area[cc->order];
+                       /* Job done if page is free of the right migratetype */
+                       if (!list_empty(&area->free_list[cc->migratetype]))
+                               return COMPACT_PARTIAL;
+
+                       /* Job done if allocation would set block type */
+                       if (cc->order >= pageblock_order && area->nr_free)
+                               return COMPACT_PARTIAL;
+               }
        }
 
        return COMPACT_CONTINUE;
@@ -787,6 +847,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
                switch (isolate_migratepages(zone, cc)) {
                case ISOLATE_ABORT:
                        ret = COMPACT_PARTIAL;
+                       putback_lru_pages(&cc->migratepages);
+                       cc->nr_migratepages = 0;
                        goto out;
                case ISOLATE_NONE:
                        continue;
@@ -817,6 +879,9 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
                                goto out;
                        }
                }
+
+               /* Capture a page now if it is a suitable size */
+               compact_capture_page(cc);
        }
 
 out:
@@ -829,8 +894,10 @@ out:
 
 static unsigned long compact_zone_order(struct zone *zone,
                                 int order, gfp_t gfp_mask,
-                                bool sync, bool *contended)
+                                bool sync, bool *contended,
+                                struct page **page)
 {
+       unsigned long ret;
        struct compact_control cc = {
                .nr_freepages = 0,
                .nr_migratepages = 0,
@@ -838,12 +905,18 @@ static unsigned long compact_zone_order(struct zone *zone,
                .migratetype = allocflags_to_migratetype(gfp_mask),
                .zone = zone,
                .sync = sync,
-               .contended = contended,
+               .page = page,
        };
        INIT_LIST_HEAD(&cc.freepages);
        INIT_LIST_HEAD(&cc.migratepages);
 
-       return compact_zone(zone, &cc);
+       ret = compact_zone(zone, &cc);
+
+       VM_BUG_ON(!list_empty(&cc.freepages));
+       VM_BUG_ON(!list_empty(&cc.migratepages));
+
+       *contended = cc.contended;
+       return ret;
 }
 
 int sysctl_extfrag_threshold = 500;
@@ -860,7 +933,7 @@ int sysctl_extfrag_threshold = 500;
  */
 unsigned long try_to_compact_pages(struct zonelist *zonelist,
                        int order, gfp_t gfp_mask, nodemask_t *nodemask,
-                       bool sync, bool *contended)
+                       bool sync, bool *contended, struct page **page)
 {
        enum zone_type high_zoneidx = gfp_zone(gfp_mask);
        int may_enter_fs = gfp_mask & __GFP_FS;
@@ -868,28 +941,30 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
        struct zoneref *z;
        struct zone *zone;
        int rc = COMPACT_SKIPPED;
+       int alloc_flags = 0;
 
-       /*
-        * Check whether it is worth even starting compaction. The order check is
-        * made because an assumption is made that the page allocator can satisfy
-        * the "cheaper" orders without taking special steps
-        */
+       /* Check if the GFP flags allow compaction */
        if (!order || !may_enter_fs || !may_perform_io)
                return rc;
 
        count_vm_event(COMPACTSTALL);
 
+#ifdef CONFIG_CMA
+       if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
+               alloc_flags |= ALLOC_CMA;
+#endif
        /* Compact each zone in the list */
        for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
                                                                nodemask) {
                int status;
 
                status = compact_zone_order(zone, order, gfp_mask, sync,
-                                               contended);
+                                               contended, page);
                rc = max(status, rc);
 
                /* If a normal allocation would succeed, stop compacting */
-               if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0))
+               if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0,
+                                     alloc_flags))
                        break;
        }
 
@@ -940,6 +1015,7 @@ int compact_pgdat(pg_data_t *pgdat, int order)
        struct compact_control cc = {
                .order = order,
                .sync = false,
+               .page = NULL,
        };
 
        return __compact_pgdat(pgdat, &cc);
@@ -950,6 +1026,7 @@ static int compact_node(int nid)
        struct compact_control cc = {
                .order = -1,
                .sync = true,
+               .page = NULL,
        };
 
        return __compact_pgdat(NODE_DATA(nid), &cc);