X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=mm%2Fmemory_hotplug.c;h=c1f9677af3bc6290d4d5c11b8040950f31cf5474;hb=9bd7f4a814c34f01e4a4691e9ab14c7a2ece302c;hp=8dccc317aac2a568bb7a3014705d894fc4248cd0;hpb=9157822b9d7282611093886acd237168691de383;p=mirror_ubuntu-artful-kernel.git diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 8dccc317aac2..c1f9677af3bc 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -243,35 +243,216 @@ void __init register_page_bootmem_info_node(struct pglist_data *pgdat) } #endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */ -static int __meminit __add_section(int nid, unsigned long phys_start_pfn, - bool want_memblock) +static void __meminit grow_zone_span(struct zone *zone, unsigned long start_pfn, + unsigned long end_pfn) +{ + unsigned long old_zone_end_pfn; + + zone_span_writelock(zone); + + old_zone_end_pfn = zone_end_pfn(zone); + if (zone_is_empty(zone) || start_pfn < zone->zone_start_pfn) + zone->zone_start_pfn = start_pfn; + + zone->spanned_pages = max(old_zone_end_pfn, end_pfn) - + zone->zone_start_pfn; + + zone_span_writeunlock(zone); +} + +static void resize_zone(struct zone *zone, unsigned long start_pfn, + unsigned long end_pfn) +{ + zone_span_writelock(zone); + + if (end_pfn - start_pfn) { + zone->zone_start_pfn = start_pfn; + zone->spanned_pages = end_pfn - start_pfn; + } else { + /* + * make it consist as free_area_init_core(), + * if spanned_pages = 0, then keep start_pfn = 0 + */ + zone->zone_start_pfn = 0; + zone->spanned_pages = 0; + } + + zone_span_writeunlock(zone); +} + +static void fix_zone_id(struct zone *zone, unsigned long start_pfn, + unsigned long end_pfn) +{ + enum zone_type zid = zone_idx(zone); + int nid = zone->zone_pgdat->node_id; + unsigned long pfn; + + for (pfn = start_pfn; pfn < end_pfn; pfn++) + set_page_links(pfn_to_page(pfn), zid, nid, pfn); +} + +static void __ref ensure_zone_is_initialized(struct zone *zone, + unsigned long start_pfn, unsigned long num_pages) +{ + if (!zone_is_initialized(zone)) + init_currently_empty_zone(zone, start_pfn, num_pages); +} + +static int __meminit move_pfn_range_left(struct zone *z1, struct zone *z2, + unsigned long start_pfn, unsigned long end_pfn) +{ + unsigned long flags; + unsigned long z1_start_pfn; + + ensure_zone_is_initialized(z1, start_pfn, end_pfn - start_pfn); + + pgdat_resize_lock(z1->zone_pgdat, &flags); + + /* can't move pfns which are higher than @z2 */ + if (end_pfn > zone_end_pfn(z2)) + goto out_fail; + /* the move out part must be at the left most of @z2 */ + if (start_pfn > z2->zone_start_pfn) + goto out_fail; + /* must included/overlap */ + if (end_pfn <= z2->zone_start_pfn) + goto out_fail; + + /* use start_pfn for z1's start_pfn if z1 is empty */ + if (!zone_is_empty(z1)) + z1_start_pfn = z1->zone_start_pfn; + else + z1_start_pfn = start_pfn; + + resize_zone(z1, z1_start_pfn, end_pfn); + resize_zone(z2, end_pfn, zone_end_pfn(z2)); + + pgdat_resize_unlock(z1->zone_pgdat, &flags); + + fix_zone_id(z1, start_pfn, end_pfn); + + return 0; +out_fail: + pgdat_resize_unlock(z1->zone_pgdat, &flags); + return -1; +} + +static int __meminit move_pfn_range_right(struct zone *z1, struct zone *z2, + unsigned long start_pfn, unsigned long end_pfn) +{ + unsigned long flags; + unsigned long z2_end_pfn; + + ensure_zone_is_initialized(z2, start_pfn, end_pfn - start_pfn); + + pgdat_resize_lock(z1->zone_pgdat, &flags); + + /* can't move pfns which are lower than @z1 */ + if (z1->zone_start_pfn > start_pfn) + goto out_fail; + /* the move out part mast at the right most of @z1 */ + if (zone_end_pfn(z1) > end_pfn) + goto out_fail; + /* must included/overlap */ + if (start_pfn >= zone_end_pfn(z1)) + goto out_fail; + + /* use end_pfn for z2's end_pfn if z2 is empty */ + if (!zone_is_empty(z2)) + z2_end_pfn = zone_end_pfn(z2); + else + z2_end_pfn = end_pfn; + + resize_zone(z1, z1->zone_start_pfn, start_pfn); + resize_zone(z2, start_pfn, z2_end_pfn); + + pgdat_resize_unlock(z1->zone_pgdat, &flags); + + fix_zone_id(z2, start_pfn, end_pfn); + + return 0; +out_fail: + pgdat_resize_unlock(z1->zone_pgdat, &flags); + return -1; +} + +static struct zone * __meminit move_pfn_range(int zone_shift, + unsigned long start_pfn, unsigned long end_pfn) +{ + struct zone *zone = page_zone(pfn_to_page(start_pfn)); + int ret = 0; + + if (zone_shift < 0) + ret = move_pfn_range_left(zone + zone_shift, zone, + start_pfn, end_pfn); + else if (zone_shift) + ret = move_pfn_range_right(zone, zone + zone_shift, + start_pfn, end_pfn); + + if (ret) + return NULL; + + return zone + zone_shift; +} + +static void __meminit grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn, + unsigned long end_pfn) +{ + unsigned long old_pgdat_end_pfn = pgdat_end_pfn(pgdat); + + if (!pgdat->node_spanned_pages || start_pfn < pgdat->node_start_pfn) + pgdat->node_start_pfn = start_pfn; + + pgdat->node_spanned_pages = max(old_pgdat_end_pfn, end_pfn) - + pgdat->node_start_pfn; +} + +static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn) +{ + struct pglist_data *pgdat = zone->zone_pgdat; + int nr_pages = PAGES_PER_SECTION; + int nid = pgdat->node_id; + int zone_type; + unsigned long flags, pfn; + + zone_type = zone - pgdat->node_zones; + ensure_zone_is_initialized(zone, phys_start_pfn, nr_pages); + + pgdat_resize_lock(zone->zone_pgdat, &flags); + grow_zone_span(zone, phys_start_pfn, phys_start_pfn + nr_pages); + grow_pgdat_span(zone->zone_pgdat, phys_start_pfn, + phys_start_pfn + nr_pages); + pgdat_resize_unlock(zone->zone_pgdat, &flags); + memmap_init_zone(nr_pages, nid, zone_type, + phys_start_pfn, MEMMAP_HOTPLUG); + + /* online_page_range is called later and expects pages reserved */ + for (pfn = phys_start_pfn; pfn < phys_start_pfn + nr_pages; pfn++) { + if (!pfn_valid(pfn)) + continue; + + SetPageReserved(pfn_to_page(pfn)); + } + return 0; +} + +static int __meminit __add_section(int nid, struct zone *zone, + unsigned long phys_start_pfn, bool want_memblock) { int ret; - int i; if (pfn_valid(phys_start_pfn)) return -EEXIST; - ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn); + ret = sparse_add_one_section(zone, phys_start_pfn); + if (ret < 0) return ret; - /* - * Make all the pages reserved so that nobody will stumble over half - * initialized state. - * FIXME: We also have to associate it with a node because pfn_to_node - * relies on having page with the proper node. - */ - for (i = 0; i < PAGES_PER_SECTION; i++) { - unsigned long pfn = phys_start_pfn + i; - struct page *page; - if (!pfn_valid(pfn)) - continue; + ret = __add_zone(zone, phys_start_pfn); - page = pfn_to_page(pfn); - set_page_node(page, nid); - SetPageReserved(page); - } + if (ret < 0) + return ret; if (!want_memblock) return 0; @@ -285,7 +466,7 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, * call this function after deciding the zone to which to * add the new pages. */ -int __ref __add_pages(int nid, unsigned long phys_start_pfn, +int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn, unsigned long nr_pages, bool want_memblock) { unsigned long i; @@ -293,6 +474,8 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn, int start_sec, end_sec; struct vmem_altmap *altmap; + clear_zone_contiguous(zone); + /* during initialize mem_map, align hot-added range to section */ start_sec = pfn_to_section_nr(phys_start_pfn); end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); @@ -312,7 +495,7 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn, } for (i = start_sec; i <= end_sec; i++) { - err = __add_section(nid, section_nr_to_pfn(i), want_memblock); + err = __add_section(nid, zone, section_nr_to_pfn(i), want_memblock); /* * EEXIST is finally dealt with by ioresource collision @@ -325,6 +508,7 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn, } vmemmap_populate_print_last(); out: + set_zone_contiguous(zone); return err; } EXPORT_SYMBOL_GPL(__add_pages); @@ -773,144 +957,39 @@ static void node_states_set_node(int node, struct memory_notify *arg) node_set_state(node, N_MEMORY); } -bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages, int online_type) -{ - struct pglist_data *pgdat = NODE_DATA(nid); - struct zone *movable_zone = &pgdat->node_zones[ZONE_MOVABLE]; - struct zone *default_zone = default_zone_for_pfn(nid, pfn, nr_pages); - - /* - * TODO there shouldn't be any inherent reason to have ZONE_NORMAL - * physically before ZONE_MOVABLE. All we need is they do not - * overlap. Historically we didn't allow ZONE_NORMAL after ZONE_MOVABLE - * though so let's stick with it for simplicity for now. - * TODO make sure we do not overlap with ZONE_DEVICE - */ - if (online_type == MMOP_ONLINE_KERNEL) { - if (zone_is_empty(movable_zone)) - return true; - return movable_zone->zone_start_pfn >= pfn + nr_pages; - } else if (online_type == MMOP_ONLINE_MOVABLE) { - return zone_end_pfn(default_zone) <= pfn; - } - - /* MMOP_ONLINE_KEEP will always succeed and inherits the current zone */ - return online_type == MMOP_ONLINE_KEEP; -} - -static void __meminit resize_zone_range(struct zone *zone, unsigned long start_pfn, - unsigned long nr_pages) +bool zone_can_shift(unsigned long pfn, unsigned long nr_pages, + enum zone_type target, int *zone_shift) { - unsigned long old_end_pfn = zone_end_pfn(zone); - - if (zone_is_empty(zone) || start_pfn < zone->zone_start_pfn) - zone->zone_start_pfn = start_pfn; - - zone->spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - zone->zone_start_pfn; -} - -static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned long start_pfn, - unsigned long nr_pages) -{ - unsigned long old_end_pfn = pgdat_end_pfn(pgdat); - - if (!pgdat->node_spanned_pages || start_pfn < pgdat->node_start_pfn) - pgdat->node_start_pfn = start_pfn; - - pgdat->node_spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - pgdat->node_start_pfn; -} - -void __ref move_pfn_range_to_zone(struct zone *zone, - unsigned long start_pfn, unsigned long nr_pages) -{ - struct pglist_data *pgdat = zone->zone_pgdat; - int nid = pgdat->node_id; - unsigned long flags; - - if (zone_is_empty(zone)) - init_currently_empty_zone(zone, start_pfn, nr_pages); - - clear_zone_contiguous(zone); - - /* TODO Huh pgdat is irqsave while zone is not. It used to be like that before */ - pgdat_resize_lock(pgdat, &flags); - zone_span_writelock(zone); - resize_zone_range(zone, start_pfn, nr_pages); - zone_span_writeunlock(zone); - resize_pgdat_range(pgdat, start_pfn, nr_pages); - pgdat_resize_unlock(pgdat, &flags); - - /* - * TODO now we have a visible range of pages which are not associated - * with their zone properly. Not nice but set_pfnblock_flags_mask - * expects the zone spans the pfn range. All the pages in the range - * are reserved so nobody should be touching them so we should be safe - */ - memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, MEMMAP_HOTPLUG); - - set_zone_contiguous(zone); -} + struct zone *zone = page_zone(pfn_to_page(pfn)); + enum zone_type idx = zone_idx(zone); + int i; -/* - * Returns a default kernel memory zone for the given pfn range. - * If no kernel zone covers this pfn range it will automatically go - * to the ZONE_NORMAL. - */ -struct zone *default_zone_for_pfn(int nid, unsigned long start_pfn, - unsigned long nr_pages) -{ - struct pglist_data *pgdat = NODE_DATA(nid); - int zid; + *zone_shift = 0; - for (zid = 0; zid <= ZONE_NORMAL; zid++) { - struct zone *zone = &pgdat->node_zones[zid]; + if (idx < target) { + /* pages must be at end of current zone */ + if (pfn + nr_pages != zone_end_pfn(zone)) + return false; - if (zone_intersects(zone, start_pfn, nr_pages)) - return zone; + /* no zones in use between current zone and target */ + for (i = idx + 1; i < target; i++) + if (zone_is_initialized(zone - idx + i)) + return false; } - return &pgdat->node_zones[ZONE_NORMAL]; -} - -static inline bool movable_pfn_range(int nid, struct zone *default_zone, - unsigned long start_pfn, unsigned long nr_pages) -{ - if (!allow_online_pfn_range(nid, start_pfn, nr_pages, - MMOP_ONLINE_KERNEL)) - return true; - - if (!movable_node_is_enabled()) - return false; - - return !zone_intersects(default_zone, start_pfn, nr_pages); -} - -/* - * Associates the given pfn range with the given node and the zone appropriate - * for the given online type. - */ -static struct zone * __meminit move_pfn_range(int online_type, int nid, - unsigned long start_pfn, unsigned long nr_pages) -{ - struct pglist_data *pgdat = NODE_DATA(nid); - struct zone *zone = default_zone_for_pfn(nid, start_pfn, nr_pages); + if (target < idx) { + /* pages must be at beginning of current zone */ + if (pfn != zone->zone_start_pfn) + return false; - if (online_type == MMOP_ONLINE_KEEP) { - struct zone *movable_zone = &pgdat->node_zones[ZONE_MOVABLE]; - /* - * MMOP_ONLINE_KEEP defaults to MMOP_ONLINE_KERNEL but use - * movable zone if that is not possible (e.g. we are within - * or past the existing movable zone). movable_node overrides - * this default and defaults to movable zone - */ - if (movable_pfn_range(nid, zone, start_pfn, nr_pages)) - zone = movable_zone; - } else if (online_type == MMOP_ONLINE_MOVABLE) { - zone = &pgdat->node_zones[ZONE_MOVABLE]; + /* no zones in use between current zone and target */ + for (i = target + 1; i < idx; i++) + if (zone_is_initialized(zone - idx + i)) + return false; } - move_pfn_range_to_zone(zone, start_pfn, nr_pages); - return zone; + *zone_shift = target - idx; + return true; } /* Must be protected by mem_hotplug_begin() */ @@ -923,18 +1002,26 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ int nid; int ret; struct memory_notify arg; + int zone_shift = 0; - nid = pfn_to_nid(pfn); - if (!allow_online_pfn_range(nid, pfn, nr_pages, online_type)) - return -EINVAL; + if (online_type == MMOP_ONLINE_KERNEL) { + if (!zone_can_shift(pfn, nr_pages, ZONE_NORMAL, &zone_shift)) + return -EINVAL; + } else if (online_type == MMOP_ONLINE_MOVABLE) { + if (!zone_can_shift(pfn, nr_pages, ZONE_MOVABLE, &zone_shift)) + return -EINVAL; + } - /* associate pfn range with the zone */ - zone = move_pfn_range(online_type, nid, pfn, nr_pages); + zone = move_pfn_range(zone_shift, pfn, pfn + nr_pages); + if (!zone) + return -EINVAL; arg.start_pfn = pfn; arg.nr_pages = nr_pages; node_states_check_changes_online(nr_pages, zone, &arg); + nid = zone_to_nid(zone); + ret = memory_notify(MEM_GOING_ONLINE, &arg); ret = notifier_to_errno(ret); if (ret) @@ -1129,6 +1216,39 @@ static int check_hotplug_memory_range(u64 start, u64 size) return 0; } +/* + * If movable zone has already been setup, newly added memory should be check. + * If its address is higher than movable zone, it should be added as movable. + * Without this check, movable zone may overlap with other zone. + */ +static int should_add_memory_movable(int nid, u64 start, u64 size) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + pg_data_t *pgdat = NODE_DATA(nid); + struct zone *movable_zone = pgdat->node_zones + ZONE_MOVABLE; + + if (zone_is_empty(movable_zone)) + return 0; + + if (movable_zone->zone_start_pfn <= start_pfn) + return 1; + + return 0; +} + +int zone_for_memory(int nid, u64 start, u64 size, int zone_default, + bool for_device) +{ +#ifdef CONFIG_ZONE_DEVICE + if (for_device) + return ZONE_DEVICE; +#endif + if (should_add_memory_movable(nid, start, size)) + return ZONE_MOVABLE; + + return zone_default; +} + static int online_memory_block(struct memory_block *mem, void *arg) { return device_online(&mem->dev); @@ -1174,7 +1294,7 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online) } /* call arch's memory hotadd */ - ret = arch_add_memory(nid, start, size, true); + ret = arch_add_memory(nid, start, size, false); if (ret < 0) goto error;