From: Linus Torvalds Date: Sun, 31 Dec 2017 21:03:05 +0000 (-0800) Subject: Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git... X-Git-Tag: v4.15~124 X-Git-Url: https://git.proxmox.com/?a=commitdiff_plain;h=52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd;hp=-c;p=mirror_ubuntu-bionic-kernel.git Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 page table isolation fixes from Thomas Gleixner: "Four patches addressing the PTI fallout as discussed and debugged yesterday: - Remove stale and pointless TLB flush invocations from the hotplug code - Remove stale preempt_disable/enable from __native_flush_tlb() - Plug the memory leak in the write_ldt() error path" * 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/ldt: Make LDT pgtable free conditional x86/ldt: Plug memory leak in error path x86/mm: Remove preempt_disable/enable() from __native_flush_tlb() x86/smpboot: Remove stale TLB flush invocations --- 52c90f2d32bfa7d6eccd66a56c44ace1f78fbadd diff --combined arch/x86/include/asm/tlbflush.h index f68f9c836cca,f9b48ce152eb..4a08dd2ab32a --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@@ -245,43 -245,40 +245,43 @@@ static inline void cr4_init_shadow(void this_cpu_write(cpu_tlbstate.cr4, __read_cr4()); } +static inline void __cr4_set(unsigned long cr4) +{ + lockdep_assert_irqs_disabled(); + this_cpu_write(cpu_tlbstate.cr4, cr4); + __write_cr4(cr4); +} + /* Set in this cpu's CR4. */ static inline void cr4_set_bits(unsigned long mask) { - unsigned long cr4; + unsigned long cr4, flags; + local_irq_save(flags); cr4 = this_cpu_read(cpu_tlbstate.cr4); - if ((cr4 | mask) != cr4) { - cr4 |= mask; - this_cpu_write(cpu_tlbstate.cr4, cr4); - __write_cr4(cr4); - } + if ((cr4 | mask) != cr4) + __cr4_set(cr4 | mask); + local_irq_restore(flags); } /* Clear in this cpu's CR4. */ static inline void cr4_clear_bits(unsigned long mask) { - unsigned long cr4; + unsigned long cr4, flags; + local_irq_save(flags); cr4 = this_cpu_read(cpu_tlbstate.cr4); - if ((cr4 & ~mask) != cr4) { - cr4 &= ~mask; - this_cpu_write(cpu_tlbstate.cr4, cr4); - __write_cr4(cr4); - } + if ((cr4 & ~mask) != cr4) + __cr4_set(cr4 & ~mask); + local_irq_restore(flags); } -static inline void cr4_toggle_bits(unsigned long mask) +static inline void cr4_toggle_bits_irqsoff(unsigned long mask) { unsigned long cr4; cr4 = this_cpu_read(cpu_tlbstate.cr4); - cr4 ^= mask; - this_cpu_write(cpu_tlbstate.cr4, cr4); - __write_cr4(cr4); + __cr4_set(cr4 ^ mask); } /* Read the CR4 shadow. */ @@@ -348,15 -345,17 +348,17 @@@ static inline void invalidate_user_asid */ static inline void __native_flush_tlb(void) { - invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid)); /* - * If current->mm == NULL then we borrow a mm which may change - * during a task switch and therefore we must not be preempted - * while we write CR3 back: + * Preemption or interrupts must be disabled to protect the access + * to the per CPU variable and to prevent being preempted between + * read_cr3() and write_cr3(). */ - preempt_disable(); + WARN_ON_ONCE(preemptible()); + + invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid)); + + /* If current->mm == NULL then the read_cr3() "borrows" an mm */ native_write_cr3(__native_read_cr3()); - preempt_enable(); } /* diff --combined arch/x86/kernel/smpboot.c index c5970efa8557,c3402fc30865..ed556d50d7ed --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@@ -77,7 -77,6 +77,7 @@@ #include #include #include +#include /* Number of siblings per CPU package */ int smp_num_siblings = 1; @@@ -101,12 -100,15 +101,12 @@@ DEFINE_PER_CPU_READ_MOSTLY(struct cpuin EXPORT_PER_CPU_SYMBOL(cpu_info); /* Logical package management. We might want to allocate that dynamically */ -static int *physical_to_logical_pkg __read_mostly; -static unsigned long *physical_package_map __read_mostly;; -static unsigned int max_physical_pkg_id __read_mostly; unsigned int __max_logical_packages __read_mostly; EXPORT_SYMBOL(__max_logical_packages); static unsigned int logical_packages __read_mostly; /* Maximum number of SMT threads on any online core */ -int __max_smt_threads __read_mostly; +int __read_mostly __max_smt_threads = 1; /* Flag to indicate if a complete sched domain rebuild is required */ bool x86_topology_update; @@@ -126,25 -128,16 +126,16 @@@ static inline void smpboot_setup_warm_r spin_lock_irqsave(&rtc_lock, flags); CMOS_WRITE(0xa, 0xf); spin_unlock_irqrestore(&rtc_lock, flags); - local_flush_tlb(); - pr_debug("1.\n"); *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = start_eip >> 4; - pr_debug("2.\n"); *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = start_eip & 0xf; - pr_debug("3.\n"); } static inline void smpboot_restore_warm_reset_vector(void) { unsigned long flags; - /* - * Install writable page 0 entry to set BIOS data area. - */ - local_flush_tlb(); - /* * Paranoid: Set warm reset code and vector here back * to default values. @@@ -237,7 -230,7 +228,7 @@@ static void notrace start_secondary(voi load_cr3(swapper_pg_dir); __flush_tlb_all(); #endif - + load_current_idt(); cpu_init(); x86_cpuinit.early_percpu_clock_init(); preempt_disable(); @@@ -248,19 -241,19 +239,19 @@@ /* otherwise gcc will move up smp_processor_id before the cpu_init */ barrier(); /* - * Check TSC synchronization with the BP: + * Check TSC synchronization with the boot CPU: */ check_tsc_sync_target(); /* - * Lock vector_lock and initialize the vectors on this cpu - * before setting the cpu online. We must set it online with - * vector_lock held to prevent a concurrent setup/teardown - * from seeing a half valid vector space. + * Lock vector_lock, set CPU online and bring the vector + * allocator online. Online must be set with vector_lock held + * to prevent a concurrent irq setup/teardown from seeing a + * half valid vector space. */ lock_vector_lock(); - setup_vector_irq(smp_processor_id()); set_cpu_online(smp_processor_id(), true); + lapic_online(); unlock_vector_lock(); cpu_set_state_online(smp_processor_id()); x86_platform.nmi_init(); @@@ -277,25 -270,6 +268,25 @@@ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } +/** + * topology_phys_to_logical_pkg - Map a physical package id to a logical + * + * Returns logical package id or -1 if not found + */ +int topology_phys_to_logical_pkg(unsigned int phys_pkg) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct cpuinfo_x86 *c = &cpu_data(cpu); + + if (c->initialized && c->phys_proc_id == phys_pkg) + return c->logical_proc_id; + } + return -1; +} +EXPORT_SYMBOL(topology_phys_to_logical_pkg); + /** * topology_update_package_map - Update the physical to logical package map * @pkg: The physical package id as retrieved via CPUID @@@ -303,23 -277,102 +294,23 @@@ */ int topology_update_package_map(unsigned int pkg, unsigned int cpu) { - unsigned int new; + int new; - /* Called from early boot ? */ - if (!physical_package_map) - return 0; - - if (pkg >= max_physical_pkg_id) - return -EINVAL; - - /* Set the logical package id */ - if (test_and_set_bit(pkg, physical_package_map)) + /* Already available somewhere? */ + new = topology_phys_to_logical_pkg(pkg); + if (new >= 0) goto found; - if (logical_packages >= __max_logical_packages) { - pr_warn("Package %u of CPU %u exceeds BIOS package data %u.\n", - logical_packages, cpu, __max_logical_packages); - return -ENOSPC; - } - new = logical_packages++; if (new != pkg) { pr_info("CPU %u Converting physical %u to logical package %u\n", cpu, pkg, new); } - physical_to_logical_pkg[pkg] = new; - found: - cpu_data(cpu).logical_proc_id = physical_to_logical_pkg[pkg]; + cpu_data(cpu).logical_proc_id = new; return 0; } -/** - * topology_phys_to_logical_pkg - Map a physical package id to a logical - * - * Returns logical package id or -1 if not found - */ -int topology_phys_to_logical_pkg(unsigned int phys_pkg) -{ - if (phys_pkg >= max_physical_pkg_id) - return -1; - return physical_to_logical_pkg[phys_pkg]; -} -EXPORT_SYMBOL(topology_phys_to_logical_pkg); - -static void __init smp_init_package_map(struct cpuinfo_x86 *c, unsigned int cpu) -{ - unsigned int ncpus; - size_t size; - - /* - * Today neither Intel nor AMD support heterogenous systems. That - * might change in the future.... - * - * While ideally we'd want '* smp_num_siblings' in the below @ncpus - * computation, this won't actually work since some Intel BIOSes - * report inconsistent HT data when they disable HT. - * - * In particular, they reduce the APIC-IDs to only include the cores, - * but leave the CPUID topology to say there are (2) siblings. - * This means we don't know how many threads there will be until - * after the APIC enumeration. - * - * By not including this we'll sometimes over-estimate the number of - * logical packages by the amount of !present siblings, but this is - * still better than MAX_LOCAL_APIC. - * - * We use total_cpus not nr_cpu_ids because nr_cpu_ids can be limited - * on the command line leading to a similar issue as the HT disable - * problem because the hyperthreads are usually enumerated after the - * primary cores. - */ - ncpus = boot_cpu_data.x86_max_cores; - if (!ncpus) { - pr_warn("x86_max_cores == zero !?!?"); - ncpus = 1; - } - - __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus); - logical_packages = 0; - - /* - * Possibly larger than what we need as the number of apic ids per - * package can be smaller than the actual used apic ids. - */ - max_physical_pkg_id = DIV_ROUND_UP(MAX_LOCAL_APIC, ncpus); - size = max_physical_pkg_id * sizeof(unsigned int); - physical_to_logical_pkg = kmalloc(size, GFP_KERNEL); - memset(physical_to_logical_pkg, 0xff, size); - size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long); - physical_package_map = kzalloc(size, GFP_KERNEL); - - pr_info("Max logical packages: %u\n", __max_logical_packages); - - topology_update_package_map(c->phys_proc_id, cpu); -} - void __init smp_store_boot_cpu_info(void) { int id = 0; /* CPU 0 */ @@@ -327,8 -380,7 +318,8 @@@ *c = boot_cpu_data; c->cpu_index = id; - smp_init_package_map(c, id); + topology_update_package_map(c->phys_proc_id, id); + c->initialized = true; } /* @@@ -339,16 -391,13 +330,16 @@@ void smp_store_cpu_info(int id { struct cpuinfo_x86 *c = &cpu_data(id); - *c = boot_cpu_data; + /* Copy boot_cpu_data only on the first bringup */ + if (!c->initialized) + *c = boot_cpu_data; c->cpu_index = id; /* * During boot time, CPU0 has this setup already. Save the info when * bringing up AP or offlined CPU0. */ identify_secondary_cpu(c); + c->initialized = true; } static bool @@@ -1032,7 -1081,7 +1023,7 @@@ int native_cpu_up(unsigned int cpu, str unsigned long flags; int err, ret = 0; - WARN_ON(irqs_disabled()); + lockdep_assert_irqs_enabled(); pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu); @@@ -1128,10 -1177,17 +1119,10 @@@ static __init void disable_smp(void cpumask_set_cpu(0, topology_core_cpumask(0)); } -enum { - SMP_OK, - SMP_NO_CONFIG, - SMP_NO_APIC, - SMP_FORCE_UP, -}; - /* * Various sanity checks. */ -static int __init smp_sanity_check(unsigned max_cpus) +static void __init smp_sanity_check(void) { preempt_disable(); @@@ -1168,6 -1224,16 +1159,6 @@@ physid_set(hard_smp_processor_id(), phys_cpu_present_map); } - /* - * If we couldn't find an SMP configuration at boot time, - * get out of here now! - */ - if (!smp_found_config && !acpi_lapic) { - preempt_enable(); - pr_notice("SMP motherboard not detected\n"); - return SMP_NO_CONFIG; - } - /* * Should not be necessary because the MP table should list the boot * CPU too, but we do it for the sake of robustness anyway. @@@ -1178,6 -1244,29 +1169,6 @@@ physid_set(hard_smp_processor_id(), phys_cpu_present_map); } preempt_enable(); - - /* - * If we couldn't find a local APIC, then get out of here now! - */ - if (APIC_INTEGRATED(boot_cpu_apic_version) && - !boot_cpu_has(X86_FEATURE_APIC)) { - if (!disable_apic) { - pr_err("BIOS bug, local APIC #%d not detected!...\n", - boot_cpu_physical_apicid); - pr_err("... forcing use of dummy APIC emulation (tell your hw vendor)\n"); - } - return SMP_NO_APIC; - } - - /* - * If SMP should be disabled, then really disable it! - */ - if (!max_cpus) { - pr_info("SMP mode deactivated\n"); - return SMP_FORCE_UP; - } - - return SMP_OK; } static void __init smp_cpu_index_default(void) @@@ -1192,18 -1281,9 +1183,18 @@@ } } +static void __init smp_get_logical_apicid(void) +{ + if (x2apic_mode) + cpu0_logical_apicid = apic_read(APIC_LDR); + else + cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); +} + /* - * Prepare for SMP bootup. The MP table or ACPI has been read - * earlier. Just do some sanity checking here and enable APIC mode. + * Prepare for SMP bootup. + * @max_cpus: configured maximum number of CPUs, It is a legacy parameter + * for common interface support. */ void __init native_smp_prepare_cpus(unsigned int max_cpus) { @@@ -1235,33 -1315,35 +1226,33 @@@ set_cpu_sibling_map(0); - switch (smp_sanity_check(max_cpus)) { - case SMP_NO_CONFIG: - disable_smp(); - if (APIC_init_uniprocessor()) - pr_notice("Local APIC not detected. Using dummy APIC emulation.\n"); - return; - case SMP_NO_APIC: + smp_sanity_check(); + + switch (apic_intr_mode) { + case APIC_PIC: + case APIC_VIRTUAL_WIRE_NO_CONFIG: disable_smp(); return; - case SMP_FORCE_UP: + case APIC_SYMMETRIC_IO_NO_ROUTING: disable_smp(); - apic_bsp_setup(false); + /* Setup local timer */ + x86_init.timers.setup_percpu_clockev(); return; - case SMP_OK: + case APIC_VIRTUAL_WIRE: + case APIC_SYMMETRIC_IO: break; } - if (read_apic_id() != boot_cpu_physical_apicid) { - panic("Boot APIC ID in local APIC unexpected (%d vs %d)", - read_apic_id(), boot_cpu_physical_apicid); - /* Or can we switch back to PIC here? */ - } + /* Setup local timer */ + x86_init.timers.setup_percpu_clockev(); - default_setup_apic_routing(); - cpu0_logical_apicid = apic_bsp_setup(false); + smp_get_logical_apicid(); pr_info("CPU0: "); print_cpu_info(&cpu_data(0)); + native_pv_lock_init(); + uv_system_init(); set_mtrr_aps_delayed_init(); @@@ -1293,22 -1375,14 +1284,22 @@@ void __init native_smp_prepare_boot_cpu void __init native_smp_cpus_done(unsigned int max_cpus) { + int ncpus; + pr_debug("Boot done\n"); + /* + * Today neither Intel nor AMD support heterogenous systems so + * extrapolate the boot cpu's data to all packages. + */ + ncpus = cpu_data(0).booted_cores * topology_max_smt_threads(); + __max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus); + pr_info("Max logical packages: %u\n", __max_logical_packages); if (x86_has_numa_in_package) set_sched_topology(x86_numa_in_package_topology); nmi_selftest(); impress_friends(); - setup_ioapic_dest(); mtrr_aps_init(); } @@@ -1467,14 -1541,13 +1458,14 @@@ void cpu_disable_common(void remove_cpu_from_maps(cpu); unlock_vector_lock(); fixup_irqs(); + lapic_offline(); } int native_cpu_disable(void) { int ret; - ret = check_irq_vectors_for_cpu_disable(); + ret = lapic_can_unplug_cpu(); if (ret) return ret;