]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blobdiff - arch/x86/kernel/setup.c
x86/paravirt: call paravirt_pagetable_setup_{start, done}
[mirror_ubuntu-bionic-kernel.git] / arch / x86 / kernel / setup.c
index 4716460607b4ad1172c64d5441c84e9cf945c7e0..8ce6a91ce1084ef00e97fe3fd1c2a7ef178f9bbf 100644 (file)
@@ -389,19 +389,88 @@ static void __init parse_setup_data(void)
                default:
                        break;
                }
-#ifndef CONFIG_DEBUG_BOOT_PARAMS
-               free_early(pa_data, pa_data+sizeof(*data)+data->len);
-#endif
                pa_data = data->next;
                early_iounmap(data, PAGE_SIZE);
        }
 }
 
+static void __init e820_reserve_setup_data(void)
+{
+       struct setup_data *data;
+       u64 pa_data;
+       int found = 0;
+
+       if (boot_params.hdr.version < 0x0209)
+               return;
+       pa_data = boot_params.hdr.setup_data;
+       while (pa_data) {
+               data = early_ioremap(pa_data, sizeof(*data));
+               e820_update_range(pa_data, sizeof(*data)+data->len,
+                        E820_RAM, E820_RESERVED_KERN);
+               found = 1;
+               pa_data = data->next;
+               early_iounmap(data, sizeof(*data));
+       }
+       if (!found)
+               return;
+
+       sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+       memcpy(&e820_saved, &e820, sizeof(struct e820map));
+       printk(KERN_INFO "extended physical RAM map:\n");
+       e820_print_map("reserve setup_data");
+}
+
+static void __init reserve_early_setup_data(void)
+{
+       struct setup_data *data;
+       u64 pa_data;
+       char buf[32];
+
+       if (boot_params.hdr.version < 0x0209)
+               return;
+       pa_data = boot_params.hdr.setup_data;
+       while (pa_data) {
+               data = early_ioremap(pa_data, sizeof(*data));
+               sprintf(buf, "setup data %x", data->type);
+               reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
+               pa_data = data->next;
+               early_iounmap(data, sizeof(*data));
+       }
+}
+
 /*
  * --------- Crashkernel reservation ------------------------------
  */
 
 #ifdef CONFIG_KEXEC
+
+/**
+ * Reserve @size bytes of crashkernel memory at any suitable offset.
+ *
+ * @size: Size of the crashkernel memory to reserve.
+ * Returns the base address on success, and -1ULL on failure.
+ */
+unsigned long long find_and_reserve_crashkernel(unsigned long long size)
+{
+       const unsigned long long alignment = 16<<20;    /* 16M */
+       unsigned long long start = 0LL;
+
+       while (1) {
+               int ret;
+
+               start = find_e820_area(start, ULONG_MAX, size, alignment);
+               if (start == -1ULL)
+                       return start;
+
+               /* try to reserve it */
+               ret = reserve_bootmem_generic(start, size, BOOTMEM_EXCLUSIVE);
+               if (ret >= 0)
+                       return start;
+
+               start += alignment;
+       }
+}
+
 static inline unsigned long long get_total_mem(void)
 {
        unsigned long long total;
@@ -424,30 +493,36 @@ static void __init reserve_crashkernel(void)
 
        ret = parse_crashkernel(boot_command_line, total_mem,
                        &crash_size, &crash_base);
-       if (ret == 0 && crash_size > 0) {
-               if (crash_base <= 0) {
-                       printk(KERN_INFO "crashkernel reservation failed - "
-                                       "you have to specify a base address\n");
+       if (ret != 0 || crash_size <= 0)
+               return;
+
+       /* 0 means: find the address automatically */
+       if (crash_base <= 0) {
+               crash_base = find_and_reserve_crashkernel(crash_size);
+               if (crash_base == -1ULL) {
+                       pr_info("crashkernel reservation failed. "
+                               "No suitable area found.\n");
                        return;
                }
-
-               if (reserve_bootmem_generic(crash_base, crash_size,
-                                       BOOTMEM_EXCLUSIVE) < 0) {
-                       printk(KERN_INFO "crashkernel reservation failed - "
-                                       "memory is in use\n");
+       } else {
+               ret = reserve_bootmem_generic(crash_base, crash_size,
+                                       BOOTMEM_EXCLUSIVE);
+               if (ret < 0) {
+                       pr_info("crashkernel reservation failed - "
+                               "memory is in use\n");
                        return;
                }
+       }
 
-               printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
-                               "for crashkernel (System RAM: %ldMB)\n",
-                               (unsigned long)(crash_size >> 20),
-                               (unsigned long)(crash_base >> 20),
-                               (unsigned long)(total_mem >> 20));
+       printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+                       "for crashkernel (System RAM: %ldMB)\n",
+                       (unsigned long)(crash_size >> 20),
+                       (unsigned long)(crash_base >> 20),
+                       (unsigned long)(total_mem >> 20));
 
-               crashk_res.start = crash_base;
-               crashk_res.end   = crash_base + crash_size - 1;
-               insert_resource(&iomem_resource, &crashk_res);
-       }
+       crashk_res.start = crash_base;
+       crashk_res.end   = crash_base + crash_size - 1;
+       insert_resource(&iomem_resource, &crashk_res);
 }
 #else
 static void __init reserve_crashkernel(void)
@@ -521,14 +596,15 @@ void __init setup_arch(char **cmdline_p)
 {
 #ifdef CONFIG_X86_32
        memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
+       visws_early_detect();
        pre_setup_arch_hook();
        early_cpu_init();
-       early_ioremap_init();
-       reserve_setup_data();
 #else
        printk(KERN_INFO "Command line: %s\n", boot_command_line);
 #endif
 
+       early_ioremap_init();
+
        ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
        screen_info = boot_params.screen_info;
        edid_info = boot_params.edid_info;
@@ -566,6 +642,10 @@ void __init setup_arch(char **cmdline_p)
        ARCH_SETUP
 
        setup_memory_map();
+       parse_setup_data();
+       /* update the e820_saved too */
+       e820_reserve_setup_data();
+
        copy_edd();
 
        if (!boot_params.hdr.root_flags)
@@ -592,17 +672,14 @@ void __init setup_arch(char **cmdline_p)
        strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
        *cmdline_p = command_line;
 
-       parse_setup_data();
-
        parse_early_param();
 
+       /* after early param, so could get panic from serial */
+       reserve_early_setup_data();
+
        if (acpi_mps_check()) {
 #ifdef CONFIG_X86_LOCAL_APIC
-#ifdef CONFIG_X86_32
-               enable_local_apic = -1;
-#else
                disable_apic = 1;
-#endif
 #endif
                clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
        }
@@ -633,22 +710,18 @@ void __init setup_arch(char **cmdline_p)
        early_gart_iommu_check();
 #endif
 
-       e820_register_active_regions(0, 0, -1UL);
        /*
         * partially used pages are not usable - thus
         * we are rounding upwards:
         */
-       max_pfn = e820_end_of_ram();
+       max_pfn = e820_end_of_ram_pfn();
 
        /* preallocate 4k for mptable mpc */
        early_reserve_e820_mpc_new();
        /* update e820 for memory not covered by WB MTRRs */
        mtrr_bp_init();
-       if (mtrr_trim_uncached_memory(max_pfn)) {
-               remove_all_active_ranges();
-               e820_register_active_regions(0, 0, -1UL);
-               max_pfn = e820_end_of_ram();
-       }
+       if (mtrr_trim_uncached_memory(max_pfn))
+               max_pfn = e820_end_of_ram_pfn();
 
 #ifdef CONFIG_X86_32
        /* max_low_pfn get updated here */
@@ -660,12 +733,26 @@ void __init setup_arch(char **cmdline_p)
 
        /* How many end-of-memory variables you have, grandma! */
        /* need this before calling reserve_initrd */
-       max_low_pfn = max_pfn;
+       if (max_pfn > (1UL<<(32 - PAGE_SHIFT)))
+               max_low_pfn = e820_end_of_low_ram_pfn();
+       else
+               max_low_pfn = max_pfn;
+
        high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
 #endif
 
        /* max_pfn_mapped is updated here */
-       max_pfn_mapped = init_memory_mapping(0, (max_low_pfn << PAGE_SHIFT));
+       max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
+       max_pfn_mapped = max_low_pfn_mapped;
+
+#ifdef CONFIG_X86_64
+       if (max_pfn > max_low_pfn) {
+               max_pfn_mapped = init_memory_mapping(1UL<<32,
+                                                    max_pfn<<PAGE_SHIFT);
+               /* can we preseve max_low_pfn ?*/
+               max_low_pfn = max_pfn;
+       }
+#endif
 
        /*
         * NOTE: On x86-32, only from this point on, fixmaps are ready for use.
@@ -691,11 +778,6 @@ void __init setup_arch(char **cmdline_p)
         */
        acpi_boot_table_init();
 
-#ifdef CONFIG_X86_64
-       /* Remove active ranges so rediscovery with NUMA-awareness happens */
-       remove_all_active_ranges();
-#endif
-
 #ifdef CONFIG_ACPI_NUMA
        /*
         * Parse SRAT to discover nodes.
@@ -737,7 +819,9 @@ void __init setup_arch(char **cmdline_p)
        vmi_init();
 #endif
 
+       paravirt_pagetable_setup_start(swapper_pg_dir);
        paging_init();
+       paravirt_pagetable_setup_done(swapper_pg_dir);
 
 #ifdef CONFIG_X86_64
        map_vsyscall();
@@ -754,10 +838,6 @@ void __init setup_arch(char **cmdline_p)
         */
        acpi_boot_init();
 
-#ifdef CONFIG_X86_64
-       init_cpu_to_node();
-#endif
-
 #if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS)
        /*
         * get boot-time SMP configuration:
@@ -766,6 +846,19 @@ void __init setup_arch(char **cmdline_p)
                get_smp_config();
 #endif
 
+       prefill_possible_map();
+#ifdef CONFIG_X86_64
+       init_cpu_to_node();
+#endif
+
+#ifdef CONFIG_X86_NUMAQ
+       /*
+        * need to check online nodes num, call it
+        * here before time_init/tsc_init
+        */
+       numaq_tsc_disable();
+#endif
+
        init_apic_mappings();
        ioapic_init_mappings();