hugepages_treat_as_movable
-This parameter is only useful when kernelcore= is specified at boot time to
-create ZONE_MOVABLE for pages that may be reclaimed or migrated. Huge pages
-are not movable so are not normally allocated from ZONE_MOVABLE. A non-zero
-value written to hugepages_treat_as_movable allows huge pages to be allocated
-from ZONE_MOVABLE.
-
-Once enabled, the ZONE_MOVABLE is treated as an area of memory the huge
-pages pool can easily grow or shrink within. Assuming that applications are
-not running that mlock() a lot of memory, it is likely the huge pages pool
-can grow to the size of ZONE_MOVABLE by repeatedly entering the desired value
-into nr_hugepages and triggering page reclaim.
+This parameter controls whether we can allocate hugepages from ZONE_MOVABLE
+or not. If set to non-zero, hugepages can be allocated from ZONE_MOVABLE.
+ZONE_MOVABLE is created when kernel boot parameter kernelcore= is specified,
+so this parameter has no effect if used without kernelcore=.
+
+Hugepage migration is now available in some situations which depend on the
+architecture and/or the hugepage size. If a hugepage supports migration,
+allocation from ZONE_MOVABLE is always enabled for the hugepage regardless
+of the value of this parameter.
+IOW, this parameter affects only non-migratable hugepages.
+
+Assuming that hugepages are not migratable in your system, one usecase of
+this parameter is that users can make hugepage pool more extensible by
+enabling the allocation from ZONE_MOVABLE. This is because on ZONE_MOVABLE
+page reclaim/migration/compaction work more and you can get contiguous
+memory more likely. Note that using ZONE_MOVABLE for non-migratable
+hugepages can do harm to other features like memory hotremove (because
+memory hotremove expects that memory blocks on ZONE_MOVABLE are always
+removable,) so it's a trade-off responsible for the users.
==============================================================
#include "internal.h"
const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
-static gfp_t htlb_alloc_mask = GFP_HIGHUSER;
unsigned long hugepages_treat_as_movable;
int hugetlb_max_hstate __read_mostly;
return page;
}
+/* Movability of hugepages depends on migration support. */
+static inline gfp_t htlb_alloc_mask(struct hstate *h)
+{
+ if (hugepages_treat_as_movable || hugepage_migration_support(h))
+ return GFP_HIGHUSER_MOVABLE;
+ else
+ return GFP_HIGHUSER;
+}
+
static struct page *dequeue_huge_page_vma(struct hstate *h,
struct vm_area_struct *vma,
unsigned long address, int avoid_reserve,
retry_cpuset:
cpuset_mems_cookie = get_mems_allowed();
zonelist = huge_zonelist(vma, address,
- htlb_alloc_mask, &mpol, &nodemask);
+ htlb_alloc_mask(h), &mpol, &nodemask);
for_each_zone_zonelist_nodemask(zone, z, zonelist,
MAX_NR_ZONES - 1, nodemask) {
- if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask)) {
+ if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask(h))) {
page = dequeue_huge_page_node(h, zone_to_nid(zone));
if (page) {
if (avoid_reserve)
return NULL;
page = alloc_pages_exact_node(nid,
- htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|
+ htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE|
__GFP_REPEAT|__GFP_NOWARN,
huge_page_order(h));
if (page) {
spin_unlock(&hugetlb_lock);
if (nid == NUMA_NO_NODE)
- page = alloc_pages(htlb_alloc_mask|__GFP_COMP|
+ page = alloc_pages(htlb_alloc_mask(h)|__GFP_COMP|
__GFP_REPEAT|__GFP_NOWARN,
huge_page_order(h));
else
page = alloc_pages_exact_node(nid,
- htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|
+ htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE|
__GFP_REPEAT|__GFP_NOWARN, huge_page_order(h));
if (page && arch_prepare_hugepage(page)) {
}
#endif /* CONFIG_NUMA */
-int hugetlb_treat_movable_handler(struct ctl_table *table, int write,
- void __user *buffer,
- size_t *length, loff_t *ppos)
-{
- proc_dointvec(table, write, buffer, length, ppos);
- if (hugepages_treat_as_movable)
- htlb_alloc_mask = GFP_HIGHUSER_MOVABLE;
- else
- htlb_alloc_mask = GFP_HIGHUSER;
- return 0;
-}
-
int hugetlb_overcommit_handler(struct ctl_table *table, int write,
void __user *buffer,
size_t *length, loff_t *ppos)