]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/commitdiff
Merge tag 'drm-intel-fixes-2017-10-04' of git://anongit.freedesktop.org/git/drm-intel...
authorDave Airlie <airlied@redhat.com>
Fri, 6 Oct 2017 01:09:29 +0000 (11:09 +1000)
committerDave Airlie <airlied@redhat.com>
Fri, 6 Oct 2017 01:09:29 +0000 (11:09 +1000)
drm/i915 fixes for 4.14-rc4:

All 3 highest GLK bugs fixed by Imre:
- GLK drv reload - Fix DDI Phy init if it was already on.
- GLK suspend resume - Reprogram DMC firmware after s3/s4.
- GLK DC states - Fix idleness calculation.

* tag 'drm-intel-fixes-2017-10-04' of git://anongit.freedesktop.org/git/drm-intel:
  drm/i915/glk: Fix DMC/DC state idleness calculation
  drm/i915/cnl: Reprogram DMC firmware after S3/S4 resume
  drm/i915: Fix DDI PHY init if it was already on

204 files changed:
Documentation/cpu-freq/index.txt
Documentation/devicetree/bindings/leds/ams,as3645a.txt
MAINTAINERS
Makefile
arch/arm/boot/dts/omap3-n950-n9.dtsi
arch/arm64/include/asm/pgtable.h
arch/arm64/kernel/head.S
arch/arm64/mm/fault.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/um/kernel/time.c
arch/x86/events/intel/cstate.c
arch/x86/events/intel/rapl.c
arch/x86/events/intel/uncore_snbep.c
arch/x86/events/msr.c
arch/x86/ia32/ia32_signal.c
arch/x86/include/asm/asm.h
arch/x86/include/asm/fpu/internal.h
arch/x86/include/asm/fpu/types.h
arch/x86/include/asm/fpu/xstate.h
arch/x86/include/asm/thread_info.h
arch/x86/include/asm/trace/fpu.h
arch/x86/include/asm/uaccess.h
arch/x86/include/asm/xen/hypercall.h
arch/x86/kernel/fpu/core.c
arch/x86/kernel/fpu/init.c
arch/x86/kernel/fpu/regset.c
arch/x86/kernel/fpu/signal.c
arch/x86/kernel/fpu/xstate.c
arch/x86/kernel/irq_32.c
arch/x86/kernel/ksysfs.c
arch/x86/kernel/kvm.c
arch/x86/kernel/signal.c
arch/x86/kernel/traps.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
arch/x86/math-emu/fpu_entry.c
arch/x86/mm/extable.c
arch/x86/mm/fault.c
arch/x86/mm/mem_encrypt.c
arch/x86/mm/pkeys.c
arch/x86/mm/tlb.c
arch/x86/xen/mmu_pv.c
drivers/acpi/apei/ghes.c
drivers/base/power/opp/core.c
drivers/clocksource/numachip.c
drivers/cpufreq/cpufreq-dt-platdev.c
drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
drivers/gpu/drm/amd/amdkfd/kfd_events.c
drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/etnaviv/etnaviv_gem.c
drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
drivers/gpu/drm/qxl/qxl_display.c
drivers/gpu/drm/radeon/radeon_device.c
drivers/gpu/drm/sun4i/Kconfig
drivers/gpu/drm/sun4i/sun4i_hdmi.h
drivers/gpu/drm/tegra/trace.h
drivers/infiniband/core/security.c
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/hw/hfi1/chip.c
drivers/infiniband/hw/hfi1/chip.h
drivers/infiniband/hw/hfi1/eprom.c
drivers/infiniband/hw/hfi1/file_ops.c
drivers/infiniband/hw/hfi1/pcie.c
drivers/infiniband/hw/hfi1/platform.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mem.c
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/nes/nes_verbs.c
drivers/infiniband/ulp/ipoib/ipoib_ib.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/ipoib/ipoib_vlan.c
drivers/infiniband/ulp/iser/iser_memory.c
drivers/iommu/amd_iommu_init.c
drivers/iommu/io-pgtable-arm-v7s.c
drivers/iommu/mtk_iommu.c
drivers/irqchip/irq-mips-gic.c
drivers/leds/leds-as3645a.c
drivers/md/dm-raid.c
drivers/md/md.c
drivers/md/md.h
drivers/md/raid5.c
drivers/mmc/host/sdhci-pci-core.c
drivers/mmc/host/tmio_mmc_core.c
drivers/mtd/mtdpart.c
drivers/mtd/nand/atmel/pmecc.c
drivers/pci/pci-sysfs.c
drivers/platform/x86/fujitsu-laptop.c
drivers/scsi/aacraid/aachba.c
drivers/scsi/aacraid/aacraid.h
drivers/scsi/aacraid/linit.c
drivers/scsi/aacraid/src.c
drivers/scsi/lpfc/lpfc_init.c
drivers/scsi/scsi_error.c
drivers/scsi/scsi_transport_fc.c
drivers/scsi/scsi_transport_iscsi.c
drivers/xen/xen-pciback/conf_space_header.c
fs/btrfs/compression.c
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/extent_io.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/qgroup.c
fs/btrfs/relocation.c
fs/btrfs/send.c
fs/btrfs/tree-log.c
fs/btrfs/volumes.c
fs/iomap.c
fs/isofs/inode.c
fs/proc/array.c
fs/quota/dquot.c
fs/quota/quota_v2.c
fs/read_write.c
fs/xfs/libxfs/xfs_ag_resv.c
fs/xfs/libxfs/xfs_bmap.c
fs/xfs/xfs_aops.c
fs/xfs/xfs_bmap_util.c
fs/xfs/xfs_buf.c
fs/xfs/xfs_error.c
fs/xfs/xfs_file.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode_item.c
fs/xfs/xfs_ioctl.c
fs/xfs/xfs_iomap.c
fs/xfs/xfs_iomap.h
fs/xfs/xfs_pnfs.c
fs/xfs/xfs_super.c
include/linux/cpuhotplug.h
include/linux/iommu.h
include/linux/key.h
include/linux/pci.h
include/linux/sched.h
include/linux/timer.h
include/rdma/ib_verbs.h
include/trace/events/sched.h
include/uapi/rdma/ib_user_verbs.h
kernel/cpu.c
kernel/events/ring_buffer.c
kernel/exit.c
kernel/futex.c
kernel/irq/generic-chip.c
kernel/irq/irqdomain.c
kernel/irq/manage.c
kernel/locking/rwsem-xadd.c
kernel/sched/core.c
kernel/sched/debug.c
kernel/seccomp.c
kernel/sysctl.c
kernel/trace/trace_output.c
kernel/trace/trace_sched_wakeup.c
net/bluetooth/Kconfig
net/bluetooth/hci_sock.c
net/sunrpc/xprtrdma/frwr_ops.c
scripts/Makefile.build
security/keys/Kconfig
security/keys/big_key.c
security/keys/internal.h
security/keys/key.c
security/keys/keyctl.c
security/keys/keyring.c
security/keys/proc.c
security/keys/process_keys.c
security/keys/request_key_auth.c
tools/arch/s390/include/uapi/asm/kvm.h
tools/arch/x86/include/asm/cpufeatures.h
tools/arch/x86/include/asm/disabled-features.h
tools/include/asm-generic/hugetlb_encode.h [new file with mode: 0644]
tools/include/uapi/asm-generic/mman-common.h
tools/include/uapi/drm/drm.h
tools/include/uapi/drm/i915_drm.h
tools/include/uapi/linux/bpf.h
tools/include/uapi/linux/kvm.h
tools/include/uapi/linux/mman.h
tools/objtool/arch/x86/decode.c
tools/perf/MANIFEST
tools/perf/arch/s390/util/Build
tools/perf/arch/s390/util/sym-handling.c [deleted file]
tools/perf/util/callchain.c
tools/perf/util/evsel.c
tools/perf/util/symbol-elf.c
tools/perf/util/symbol.h
tools/perf/util/syscalltbl.c
tools/testing/selftests/Makefile
tools/testing/selftests/bpf/bpf_util.h
tools/testing/selftests/breakpoints/Makefile
tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
tools/testing/selftests/futex/Makefile
tools/testing/selftests/intel_pstate/Makefile
tools/testing/selftests/intel_pstate/run.sh
tools/testing/selftests/lib.mk
tools/testing/selftests/memfd/run_tests.sh [changed mode: 0644->0755]
tools/testing/selftests/mqueue/Makefile
tools/testing/selftests/net/.gitignore
tools/testing/selftests/net/Makefile
tools/testing/selftests/net/msg_zerocopy.c
tools/testing/selftests/net/netdevice.sh
tools/testing/selftests/net/reuseaddr_conflict.c [new file with mode: 0644]
tools/testing/selftests/seccomp/seccomp_bpf.c
tools/testing/selftests/sigaltstack/sas.c
tools/testing/selftests/sync/Makefile
tools/testing/selftests/timers/set-timer-lat.c
tools/testing/selftests/watchdog/Makefile

index 03a7cee6ac73a4f2dd48248196994538d81f6a4e..c15e75386a0523d5398290aa9a63cc1d9f40f45e 100644 (file)
@@ -32,8 +32,6 @@ cpufreq-stats.txt -   General description of sysfs cpufreq stats.
 
 index.txt      -       File index, Mailing list and Links (this document)
 
-intel-pstate.txt -     Intel pstate cpufreq driver specific file.
-
 pcc-cpufreq.txt -      PCC cpufreq driver specific file.
 
 
index 12c5ef26ec73924566361aa2ad96710d1e12b210..fdc40e354a64dd582d86841afac520a73d6fa5f0 100644 (file)
@@ -15,11 +15,14 @@ Required properties
 
 compatible     : Must be "ams,as3645a".
 reg            : The I2C address of the device. Typically 0x30.
+#address-cells : 1
+#size-cells    : 0
 
 
-Required properties of the "flash" child node
-=============================================
+Required properties of the flash child node (0)
+===============================================
 
+reg: 0
 flash-timeout-us: Flash timeout in microseconds. The value must be in
                  the range [100000, 850000] and divisible by 50000.
 flash-max-microamp: Maximum flash current in microamperes. Has to be
@@ -33,20 +36,21 @@ ams,input-max-microamp: Maximum flash controller input current. The
                        and divisible by 50000.
 
 
-Optional properties of the "flash" child node
-=============================================
+Optional properties of the flash child node
+===========================================
 
 label          : The label of the flash LED.
 
 
-Required properties of the "indicator" child node
-=================================================
+Required properties of the indicator child node (1)
+===================================================
 
+reg: 1
 led-max-microamp: Maximum indicator current. The allowed values are
                  2500, 5000, 7500 and 10000.
 
-Optional properties of the "indicator" child node
-=================================================
+Optional properties of the indicator child node
+===============================================
 
 label          : The label of the indicator LED.
 
@@ -55,16 +59,20 @@ Example
 =======
 
        as3645a@30 {
+               #address-cells: 1
+               #size-cells: 0
                reg = <0x30>;
                compatible = "ams,as3645a";
-               flash {
+               flash@0 {
+                       reg = <0x0>;
                        flash-timeout-us = <150000>;
                        flash-max-microamp = <320000>;
                        led-max-microamp = <60000>;
                        ams,input-max-microamp = <1750000>;
                        label = "as3645a:flash";
                };
-               indicator {
+               indicator@1 {
+                       reg = <0x1>;
                        led-max-microamp = <10000>;
                        label = "as3645a:indicator";
                };
index 6671f375f7fcdd1cf9e40f62c884f1ef9592dab9..65b0c88d5ee0d292914b85c9ba45b1fee5f466ff 100644 (file)
@@ -8597,6 +8597,12 @@ M:       Sean Wang <sean.wang@mediatek.com>
 S:     Maintained
 F:     drivers/media/rc/mtk-cir.c
 
+MEDIATEK PMIC LED DRIVER
+M:     Sean Wang <sean.wang@mediatek.com>
+S:     Maintained
+F:     drivers/leds/leds-mt6323.c
+F:     Documentation/devicetree/bindings/leds/leds-mt6323.txt
+
 MEDIATEK ETHERNET DRIVER
 M:     Felix Fietkau <nbd@openwrt.org>
 M:     John Crispin <john@phrozen.org>
index d1119941261c3c8dea12e3d5448b3dd4ff51d243..cf007a31d575d6312f3f5a860e9591dc329a86a4 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 14
 SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION = -rc3
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
@@ -1172,11 +1172,11 @@ headers_check: headers_install
 
 PHONY += kselftest
 kselftest:
-       $(Q)$(MAKE) -C tools/testing/selftests run_tests
+       $(Q)$(MAKE) -C $(srctree)/tools/testing/selftests run_tests
 
 PHONY += kselftest-clean
 kselftest-clean:
-       $(Q)$(MAKE) -C tools/testing/selftests clean
+       $(Q)$(MAKE) -C $(srctree)/tools/testing/selftests clean
 
 PHONY += kselftest-merge
 kselftest-merge:
index cb47ae79a5f9e87ed1e9ce84b148b6829c9e2935..1b0bd72945f21337d1e89c3e1636bbfb5cabeecd 100644 (file)
        clock-frequency = <400000>;
 
        as3645a@30 {
+               #address-cells = <1>;
+               #size-cells = <0>;
                reg = <0x30>;
                compatible = "ams,as3645a";
-               flash {
+               flash@0 {
+                       reg = <0x0>;
                        flash-timeout-us = <150000>;
                        flash-max-microamp = <320000>;
                        led-max-microamp = <60000>;
-                       peak-current-limit = <1750000>;
+                       ams,input-max-microamp = <1750000>;
                };
-               indicator {
+               indicator@1 {
+                       reg = <0x1>;
                        led-max-microamp = <10000>;
                };
        };
index bc4e92337d1690045b7b5c97efd11ad4fb8bb453..b46e54c2399b58b6451ea9dacc5c033115b05c64 100644 (file)
@@ -401,7 +401,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
 /* Find an entry in the third-level page table. */
 #define pte_index(addr)                (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 
-#define pte_offset_phys(dir,addr)      (pmd_page_paddr(*(dir)) + pte_index(addr) * sizeof(pte_t))
+#define pte_offset_phys(dir,addr)      (pmd_page_paddr(READ_ONCE(*(dir))) + pte_index(addr) * sizeof(pte_t))
 #define pte_offset_kernel(dir,addr)    ((pte_t *)__va(pte_offset_phys((dir), (addr))))
 
 #define pte_offset_map(dir,addr)       pte_offset_kernel((dir), (addr))
index 7434ec0c7a271632adce1a497bc097306199e580..0b243ecaf7ac87faa81fed0a09db996757d71f4e 100644 (file)
@@ -384,6 +384,7 @@ ENTRY(kimage_vaddr)
  * booted in EL1 or EL2 respectively.
  */
 ENTRY(el2_setup)
+       msr     SPsel, #1                       // We want to use SP_EL{1,2}
        mrs     x0, CurrentEL
        cmp     x0, #CurrentEL_EL2
        b.eq    1f
index 89993c4be1befe3d7629bf45174f10081027d2a2..2069e9bc0fca674ba1d6fe769be05e6f731ef93d 100644 (file)
@@ -651,7 +651,7 @@ static const struct fault_info fault_info[] = {
        { do_translation_fault, SIGSEGV, SEGV_MAPERR,   "level 0 translation fault"     },
        { do_translation_fault, SIGSEGV, SEGV_MAPERR,   "level 1 translation fault"     },
        { do_translation_fault, SIGSEGV, SEGV_MAPERR,   "level 2 translation fault"     },
-       { do_page_fault,        SIGSEGV, SEGV_MAPERR,   "level 3 translation fault"     },
+       { do_translation_fault, SIGSEGV, SEGV_MAPERR,   "level 3 translation fault"     },
        { do_bad,               SIGBUS,  0,             "unknown 8"                     },
        { do_page_fault,        SIGSEGV, SEGV_ACCERR,   "level 1 access flag fault"     },
        { do_page_fault,        SIGSEGV, SEGV_ACCERR,   "level 2 access flag fault"     },
index 17936f82d3c787c38327f41186f634d043d843a9..ec69fa45d5a2f249322d32218a722f4d54390e97 100644 (file)
@@ -1121,6 +1121,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 BEGIN_FTR_SECTION
        mtspr   SPRN_PPR, r0
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+/* Move canary into DSISR to check for later */
+BEGIN_FTR_SECTION
+       li      r0, 0x7fff
+       mtspr   SPRN_HDSISR, r0
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+
        ld      r0, VCPU_GPR(R0)(r4)
        ld      r4, VCPU_GPR(R4)(r4)
 
@@ -1956,9 +1963,14 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
 kvmppc_hdsi:
        ld      r3, VCPU_KVM(r9)
        lbz     r0, KVM_RADIX(r3)
-       cmpwi   r0, 0
        mfspr   r4, SPRN_HDAR
        mfspr   r6, SPRN_HDSISR
+BEGIN_FTR_SECTION
+       /* Look for DSISR canary. If we find it, retry instruction */
+       cmpdi   r6, 0x7fff
+       beq     6f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+       cmpwi   r0, 0
        bne     .Lradix_hdsi            /* on radix, just save DAR/DSISR/ASDR */
        /* HPTE not found fault or protection fault? */
        andis.  r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
index 0b034ebbda2a1c2dd0125a6d4c2135dd886eb440..7f69d17de3540ca8491270408946a00654493cd2 100644 (file)
@@ -98,7 +98,7 @@ static struct clocksource timer_clocksource = {
        .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-static void __init timer_setup(void)
+static void __init um_timer_setup(void)
 {
        int err;
 
@@ -132,5 +132,5 @@ void read_persistent_clock(struct timespec *ts)
 void __init time_init(void)
 {
        timer_set_signal_handler();
-       late_time_init = timer_setup;
+       late_time_init = um_timer_setup;
 }
index 4cf100ff2a3746f440049dd5c27c254e4dcc1776..72db0664a53dfd6fe64512ebda83caaa97db1abd 100644 (file)
@@ -552,6 +552,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
 
        X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE,  snb_cstates),
        X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_X, snb_cstates),
 
        X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE,  snb_cstates),
        X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, snb_cstates),
@@ -560,6 +561,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
        X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates),
 
        X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates),
+       X86_CSTATES_MODEL(INTEL_FAM6_ATOM_DENVERTON, glm_cstates),
+
+       X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GEMINI_LAKE, glm_cstates),
        { },
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
index 8e2457cb6b4a416e1c84d9baa990ea512ec74ba0..005908ee9333f0e87cdd4db7ca14a278d52fa773 100644 (file)
@@ -775,6 +775,9 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
        X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_rapl_init),
 
        X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init),
+       X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_DENVERTON, hsw_rapl_init),
+
+       X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GEMINI_LAKE, hsw_rapl_init),
        {},
 };
 
index db1fe377e6dd9ddfcfbc006a7346879f263d408a..a7196818416a57381b513254628206a9660ed578 100644 (file)
@@ -3462,7 +3462,7 @@ static struct intel_uncore_ops skx_uncore_iio_ops = {
 static struct intel_uncore_type skx_uncore_iio = {
        .name                   = "iio",
        .num_counters           = 4,
-       .num_boxes              = 5,
+       .num_boxes              = 6,
        .perf_ctr_bits          = 48,
        .event_ctl              = SKX_IIO0_MSR_PMON_CTL0,
        .perf_ctr               = SKX_IIO0_MSR_PMON_CTR0,
@@ -3492,7 +3492,7 @@ static const struct attribute_group skx_uncore_format_group = {
 static struct intel_uncore_type skx_uncore_irp = {
        .name                   = "irp",
        .num_counters           = 2,
-       .num_boxes              = 5,
+       .num_boxes              = 6,
        .perf_ctr_bits          = 48,
        .event_ctl              = SKX_IRP0_MSR_PMON_CTL0,
        .perf_ctr               = SKX_IRP0_MSR_PMON_CTR0,
index 4bb3ec69e8ea10537c25ef2d792be94f1d7a4127..06723671ae4e91d53b7327b44fe96b588826d838 100644 (file)
@@ -63,6 +63,14 @@ static bool test_intel(int idx)
        case INTEL_FAM6_ATOM_SILVERMONT1:
        case INTEL_FAM6_ATOM_SILVERMONT2:
        case INTEL_FAM6_ATOM_AIRMONT:
+
+       case INTEL_FAM6_ATOM_GOLDMONT:
+       case INTEL_FAM6_ATOM_DENVERTON:
+
+       case INTEL_FAM6_ATOM_GEMINI_LAKE:
+
+       case INTEL_FAM6_XEON_PHI_KNL:
+       case INTEL_FAM6_XEON_PHI_KNM:
                if (idx == PERF_MSR_SMI)
                        return true;
                break;
index e0bb46c0285752e73c8eb55420e29f97d88a818e..0e2a5edbce00111f6a41f1b3070610b5316d1e71 100644 (file)
@@ -231,7 +231,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
                 ksig->ka.sa.sa_restorer)
                sp = (unsigned long) ksig->ka.sa.sa_restorer;
 
-       if (fpu->fpstate_active) {
+       if (fpu->initialized) {
                unsigned long fx_aligned, math_size;
 
                sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size);
index c1eadbaf1115a4ad79cf55803e6c3bc72c15c4a4..b0dc91f4bedc680ac69d7ecc481a71c863bade77 100644 (file)
 # define __ASM_FORM_COMMA(x) " " #x ","
 #endif
 
-#ifdef CONFIG_X86_32
+#ifndef __x86_64__
+/* 32 bit */
 # define __ASM_SEL(a,b) __ASM_FORM(a)
 # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a)
 #else
+/* 64 bit */
 # define __ASM_SEL(a,b) __ASM_FORM(b)
 # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b)
 #endif
  * gets set up by the containing function.  If you forget to do this, objtool
  * may print a "call without frame pointer save/setup" warning.
  */
-register unsigned int __asm_call_sp asm("esp");
-#define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp)
+register unsigned long current_stack_pointer asm(_ASM_SP);
+#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
 #endif
 
 #endif /* _ASM_X86_ASM_H */
index 554cdb205d17586887e6b599c991b2fc090ba38a..e3221ffa304e301858db746a03d6cf8600790004 100644 (file)
 /*
  * High level FPU state handling functions:
  */
-extern void fpu__activate_curr(struct fpu *fpu);
-extern void fpu__activate_fpstate_read(struct fpu *fpu);
-extern void fpu__activate_fpstate_write(struct fpu *fpu);
-extern void fpu__current_fpstate_write_begin(void);
-extern void fpu__current_fpstate_write_end(void);
+extern void fpu__initialize(struct fpu *fpu);
+extern void fpu__prepare_read(struct fpu *fpu);
+extern void fpu__prepare_write(struct fpu *fpu);
 extern void fpu__save(struct fpu *fpu);
 extern void fpu__restore(struct fpu *fpu);
 extern int  fpu__restore_sig(void __user *buf, int ia32_frame);
@@ -120,20 +118,11 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu);
        err;                                                            \
 })
 
-#define check_insn(insn, output, input...)                             \
-({                                                                     \
-       int err;                                                        \
+#define kernel_insn(insn, output, input...)                            \
        asm volatile("1:" #insn "\n\t"                                  \
                     "2:\n"                                             \
-                    ".section .fixup,\"ax\"\n"                         \
-                    "3:  movl $-1,%[err]\n"                            \
-                    "    jmp  2b\n"                                    \
-                    ".previous\n"                                      \
-                    _ASM_EXTABLE(1b, 3b)                               \
-                    : [err] "=r" (err), output                         \
-                    : "0"(0), input);                                  \
-       err;                                                            \
-})
+                    _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore)  \
+                    : output : input)
 
 static inline int copy_fregs_to_user(struct fregs_state __user *fx)
 {
@@ -153,20 +142,16 @@ static inline int copy_fxregs_to_user(struct fxregs_state __user *fx)
 
 static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
 {
-       int err;
-
        if (IS_ENABLED(CONFIG_X86_32)) {
-               err = check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+               kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
        } else {
                if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) {
-                       err = check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
+                       kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
                } else {
                        /* See comment in copy_fxregs_to_kernel() below. */
-                       err = check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
+                       kernel_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
                }
        }
-       /* Copying from a kernel buffer to FPU registers should never fail: */
-       WARN_ON_FPU(err);
 }
 
 static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
@@ -183,9 +168,7 @@ static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
 
 static inline void copy_kernel_to_fregs(struct fregs_state *fx)
 {
-       int err = check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
-
-       WARN_ON_FPU(err);
+       kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
 }
 
 static inline int copy_user_to_fregs(struct fregs_state __user *fx)
@@ -281,18 +264,13 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
  * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact
  * XSAVE area format.
  */
-#define XSTATE_XRESTORE(st, lmask, hmask, err)                         \
+#define XSTATE_XRESTORE(st, lmask, hmask)                              \
        asm volatile(ALTERNATIVE(XRSTOR,                                \
                                 XRSTORS, X86_FEATURE_XSAVES)           \
                     "\n"                                               \
-                    "xor %[err], %[err]\n"                             \
                     "3:\n"                                             \
-                    ".pushsection .fixup,\"ax\"\n"                     \
-                    "4: movl $-2, %[err]\n"                            \
-                    "jmp 3b\n"                                         \
-                    ".popsection\n"                                    \
-                    _ASM_EXTABLE(661b, 4b)                             \
-                    : [err] "=r" (err)                                 \
+                    _ASM_EXTABLE_HANDLE(661b, 3b, ex_handler_fprestore)\
+                    :                                                  \
                     : "D" (st), "m" (*st), "a" (lmask), "d" (hmask)    \
                     : "memory")
 
@@ -336,7 +314,10 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
        else
                XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
 
-       /* We should never fault when copying from a kernel buffer: */
+       /*
+        * We should never fault when copying from a kernel buffer, and the FPU
+        * state we set at boot time should be valid.
+        */
        WARN_ON_FPU(err);
 }
 
@@ -350,7 +331,7 @@ static inline void copy_xregs_to_kernel(struct xregs_state *xstate)
        u32 hmask = mask >> 32;
        int err;
 
-       WARN_ON(!alternatives_patched);
+       WARN_ON_FPU(!alternatives_patched);
 
        XSTATE_XSAVE(xstate, lmask, hmask, err);
 
@@ -365,12 +346,8 @@ static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask)
 {
        u32 lmask = mask;
        u32 hmask = mask >> 32;
-       int err;
-
-       XSTATE_XRESTORE(xstate, lmask, hmask, err);
 
-       /* We should never fault when copying from a kernel buffer: */
-       WARN_ON_FPU(err);
+       XSTATE_XRESTORE(xstate, lmask, hmask);
 }
 
 /*
@@ -526,37 +503,16 @@ static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
  */
 static inline void fpregs_deactivate(struct fpu *fpu)
 {
-       WARN_ON_FPU(!fpu->fpregs_active);
-
-       fpu->fpregs_active = 0;
        this_cpu_write(fpu_fpregs_owner_ctx, NULL);
        trace_x86_fpu_regs_deactivated(fpu);
 }
 
 static inline void fpregs_activate(struct fpu *fpu)
 {
-       WARN_ON_FPU(fpu->fpregs_active);
-
-       fpu->fpregs_active = 1;
        this_cpu_write(fpu_fpregs_owner_ctx, fpu);
        trace_x86_fpu_regs_activated(fpu);
 }
 
-/*
- * The question "does this thread have fpu access?"
- * is slightly racy, since preemption could come in
- * and revoke it immediately after the test.
- *
- * However, even in that very unlikely scenario,
- * we can just assume we have FPU access - typically
- * to save the FP state - we'll just take a #NM
- * fault and get the FPU access back.
- */
-static inline int fpregs_active(void)
-{
-       return current->thread.fpu.fpregs_active;
-}
-
 /*
  * FPU state switching for scheduling.
  *
@@ -571,14 +527,13 @@ static inline int fpregs_active(void)
 static inline void
 switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 {
-       if (old_fpu->fpregs_active) {
+       if (old_fpu->initialized) {
                if (!copy_fpregs_to_fpstate(old_fpu))
                        old_fpu->last_cpu = -1;
                else
                        old_fpu->last_cpu = cpu;
 
                /* But leave fpu_fpregs_owner_ctx! */
-               old_fpu->fpregs_active = 0;
                trace_x86_fpu_regs_deactivated(old_fpu);
        } else
                old_fpu->last_cpu = -1;
@@ -595,7 +550,7 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu)
 {
        bool preload = static_cpu_has(X86_FEATURE_FPU) &&
-                      new_fpu->fpstate_active;
+                      new_fpu->initialized;
 
        if (preload) {
                if (!fpregs_state_valid(new_fpu, cpu))
@@ -617,8 +572,7 @@ static inline void user_fpu_begin(void)
        struct fpu *fpu = &current->thread.fpu;
 
        preempt_disable();
-       if (!fpregs_active())
-               fpregs_activate(fpu);
+       fpregs_activate(fpu);
        preempt_enable();
 }
 
index 3c80f5b9c09d8cf5407378a39ba27cfa380bafbf..a1520575d86b81fd2521d7e4013856b7e70d4db2 100644 (file)
@@ -68,6 +68,9 @@ struct fxregs_state {
 /* Default value for fxregs_state.mxcsr: */
 #define MXCSR_DEFAULT          0x1f80
 
+/* Copy both mxcsr & mxcsr_flags with a single u64 memcpy: */
+#define MXCSR_AND_FLAGS_SIZE sizeof(u64)
+
 /*
  * Software based FPU emulation state. This is arbitrary really,
  * it matches the x87 format to make it easier to understand:
@@ -290,36 +293,13 @@ struct fpu {
        unsigned int                    last_cpu;
 
        /*
-        * @fpstate_active:
+        * @initialized:
         *
-        * This flag indicates whether this context is active: if the task
+        * This flag indicates whether this context is initialized: if the task
         * is not running then we can restore from this context, if the task
         * is running then we should save into this context.
         */
-       unsigned char                   fpstate_active;
-
-       /*
-        * @fpregs_active:
-        *
-        * This flag determines whether a given context is actively
-        * loaded into the FPU's registers and that those registers
-        * represent the task's current FPU state.
-        *
-        * Note the interaction with fpstate_active:
-        *
-        *   # task does not use the FPU:
-        *   fpstate_active == 0
-        *
-        *   # task uses the FPU and regs are active:
-        *   fpstate_active == 1 && fpregs_active == 1
-        *
-        *   # the regs are inactive but still match fpstate:
-        *   fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu
-        *
-        * The third state is what we use for the lazy restore optimization
-        * on lazy-switching CPUs.
-        */
-       unsigned char                   fpregs_active;
+       unsigned char                   initialized;
 
        /*
         * @state:
index 1b2799e0699a4d47a3ef191f6177f0a4abc13ceb..83fee2469eb76079771c668545db95e929d1446c 100644 (file)
@@ -48,8 +48,12 @@ void fpu__xstate_clear_all_cpu_caps(void);
 void *get_xsave_addr(struct xregs_state *xsave, int xstate);
 const void *get_xsave_field_ptr(int xstate_field);
 int using_compacted_format(void);
-int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
-                       void __user *ubuf, struct xregs_state *xsave);
-int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
-                    struct xregs_state *xsave);
+int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
+int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
+int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf);
+int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf);
+
+/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
+extern int validate_xstate_header(const struct xstate_header *hdr);
+
 #endif
index 5161da1a0fa0a522c80b93f758a84914a88fad46..89e7eeb5cec1dfa6804d5bba2dfe010e6ad8f455 100644 (file)
@@ -158,17 +158,6 @@ struct thread_info {
  */
 #ifndef __ASSEMBLY__
 
-static inline unsigned long current_stack_pointer(void)
-{
-       unsigned long sp;
-#ifdef CONFIG_X86_64
-       asm("mov %%rsp,%0" : "=g" (sp));
-#else
-       asm("mov %%esp,%0" : "=g" (sp));
-#endif
-       return sp;
-}
-
 /*
  * Walks up the stack frames to make sure that the specified object is
  * entirely contained by a single stack frame.
index 342e59789fcdc2e62ce1fd94df32f99f12359bd0..39f7a27bef130fbb6c83a881951e440091ed9168 100644 (file)
@@ -12,25 +12,22 @@ DECLARE_EVENT_CLASS(x86_fpu,
 
        TP_STRUCT__entry(
                __field(struct fpu *, fpu)
-               __field(bool, fpregs_active)
-               __field(bool, fpstate_active)
+               __field(bool, initialized)
                __field(u64, xfeatures)
                __field(u64, xcomp_bv)
                ),
 
        TP_fast_assign(
                __entry->fpu            = fpu;
-               __entry->fpregs_active  = fpu->fpregs_active;
-               __entry->fpstate_active = fpu->fpstate_active;
+               __entry->initialized    = fpu->initialized;
                if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
                        __entry->xfeatures = fpu->state.xsave.header.xfeatures;
                        __entry->xcomp_bv  = fpu->state.xsave.header.xcomp_bv;
                }
        ),
-       TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d xfeatures: %llx xcomp_bv: %llx",
+       TP_printk("x86/fpu: %p initialized: %d xfeatures: %llx xcomp_bv: %llx",
                        __entry->fpu,
-                       __entry->fpregs_active,
-                       __entry->fpstate_active,
+                       __entry->initialized,
                        __entry->xfeatures,
                        __entry->xcomp_bv
        )
index 78e8fcc87d4c62a686e27ab6c82451911e862496..4b892917edeb787ce6380fd78a0d1c82f2299982 100644 (file)
@@ -337,7 +337,7 @@ do {                                                                        \
                     _ASM_EXTABLE(1b, 4b)                               \
                     _ASM_EXTABLE(2b, 4b)                               \
                     : "=r" (retval), "=&A"(x)                          \
-                    : "m" (__m(__ptr)), "m" __m(((u32 *)(__ptr)) + 1), \
+                    : "m" (__m(__ptr)), "m" __m(((u32 __user *)(__ptr)) + 1),  \
                       "i" (errret), "0" (retval));                     \
 })
 
index 128a1a0b145050749e82a0096b7f24de7680ab29..7cb282e9e58777aeb2ffd86b344d39a5189c5f84 100644 (file)
@@ -551,13 +551,13 @@ static inline void
 MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr,
                        struct desc_struct desc)
 {
-       u32 *p = (u32 *) &desc;
-
        mcl->op = __HYPERVISOR_update_descriptor;
        if (sizeof(maddr) == sizeof(long)) {
                mcl->args[0] = maddr;
                mcl->args[1] = *(unsigned long *)&desc;
        } else {
+               u32 *p = (u32 *)&desc;
+
                mcl->args[0] = maddr;
                mcl->args[1] = maddr >> 32;
                mcl->args[2] = *p++;
index e1114f070c2dfdedf9911cd587afb7cca1769785..f92a6593de1ec651d244d43d16acbd8d49959e4b 100644 (file)
@@ -100,7 +100,7 @@ void __kernel_fpu_begin(void)
 
        kernel_fpu_disable();
 
-       if (fpu->fpregs_active) {
+       if (fpu->initialized) {
                /*
                 * Ignore return value -- we don't care if reg state
                 * is clobbered.
@@ -116,7 +116,7 @@ void __kernel_fpu_end(void)
 {
        struct fpu *fpu = &current->thread.fpu;
 
-       if (fpu->fpregs_active)
+       if (fpu->initialized)
                copy_kernel_to_fpregs(&fpu->state);
 
        kernel_fpu_enable();
@@ -148,7 +148,7 @@ void fpu__save(struct fpu *fpu)
 
        preempt_disable();
        trace_x86_fpu_before_save(fpu);
-       if (fpu->fpregs_active) {
+       if (fpu->initialized) {
                if (!copy_fpregs_to_fpstate(fpu)) {
                        copy_kernel_to_fpregs(&fpu->state);
                }
@@ -189,10 +189,9 @@ EXPORT_SYMBOL_GPL(fpstate_init);
 
 int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 {
-       dst_fpu->fpregs_active = 0;
        dst_fpu->last_cpu = -1;
 
-       if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU))
+       if (!src_fpu->initialized || !static_cpu_has(X86_FEATURE_FPU))
                return 0;
 
        WARN_ON_FPU(src_fpu != &current->thread.fpu);
@@ -206,26 +205,14 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
        /*
         * Save current FPU registers directly into the child
         * FPU context, without any memory-to-memory copying.
-        * In lazy mode, if the FPU context isn't loaded into
-        * fpregs, CR0.TS will be set and do_device_not_available
-        * will load the FPU context.
         *
-        * We have to do all this with preemption disabled,
-        * mostly because of the FNSAVE case, because in that
-        * case we must not allow preemption in the window
-        * between the FNSAVE and us marking the context lazy.
-        *
-        * It shouldn't be an issue as even FNSAVE is plenty
-        * fast in terms of critical section length.
+        * ( The function 'fails' in the FNSAVE case, which destroys
+        *   register contents so we have to copy them back. )
         */
-       preempt_disable();
        if (!copy_fpregs_to_fpstate(dst_fpu)) {
-               memcpy(&src_fpu->state, &dst_fpu->state,
-                      fpu_kernel_xstate_size);
-
+               memcpy(&src_fpu->state, &dst_fpu->state, fpu_kernel_xstate_size);
                copy_kernel_to_fpregs(&src_fpu->state);
        }
-       preempt_enable();
 
        trace_x86_fpu_copy_src(src_fpu);
        trace_x86_fpu_copy_dst(dst_fpu);
@@ -237,45 +224,48 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
  * Activate the current task's in-memory FPU context,
  * if it has not been used before:
  */
-void fpu__activate_curr(struct fpu *fpu)
+void fpu__initialize(struct fpu *fpu)
 {
        WARN_ON_FPU(fpu != &current->thread.fpu);
 
-       if (!fpu->fpstate_active) {
+       if (!fpu->initialized) {
                fpstate_init(&fpu->state);
                trace_x86_fpu_init_state(fpu);
 
                trace_x86_fpu_activate_state(fpu);
                /* Safe to do for the current task: */
-               fpu->fpstate_active = 1;
+               fpu->initialized = 1;
        }
 }
-EXPORT_SYMBOL_GPL(fpu__activate_curr);
+EXPORT_SYMBOL_GPL(fpu__initialize);
 
 /*
  * This function must be called before we read a task's fpstate.
  *
- * If the task has not used the FPU before then initialize its
- * fpstate.
+ * There's two cases where this gets called:
+ *
+ * - for the current task (when coredumping), in which case we have
+ *   to save the latest FPU registers into the fpstate,
+ *
+ * - or it's called for stopped tasks (ptrace), in which case the
+ *   registers were already saved by the context-switch code when
+ *   the task scheduled out - we only have to initialize the registers
+ *   if they've never been initialized.
  *
  * If the task has used the FPU before then save it.
  */
-void fpu__activate_fpstate_read(struct fpu *fpu)
+void fpu__prepare_read(struct fpu *fpu)
 {
-       /*
-        * If fpregs are active (in the current CPU), then
-        * copy them to the fpstate:
-        */
-       if (fpu->fpregs_active) {
+       if (fpu == &current->thread.fpu) {
                fpu__save(fpu);
        } else {
-               if (!fpu->fpstate_active) {
+               if (!fpu->initialized) {
                        fpstate_init(&fpu->state);
                        trace_x86_fpu_init_state(fpu);
 
                        trace_x86_fpu_activate_state(fpu);
                        /* Safe to do for current and for stopped child tasks: */
-                       fpu->fpstate_active = 1;
+                       fpu->initialized = 1;
                }
        }
 }
@@ -283,17 +273,17 @@ void fpu__activate_fpstate_read(struct fpu *fpu)
 /*
  * This function must be called before we write a task's fpstate.
  *
- * If the task has used the FPU before then unlazy it.
+ * If the task has used the FPU before then invalidate any cached FPU registers.
  * If the task has not used the FPU before then initialize its fpstate.
  *
  * After this function call, after registers in the fpstate are
  * modified and the child task has woken up, the child task will
  * restore the modified FPU state from the modified context. If we
- * didn't clear its lazy status here then the lazy in-registers
+ * didn't clear its cached status here then the cached in-registers
  * state pending on its former CPU could be restored, corrupting
  * the modifications.
  */
-void fpu__activate_fpstate_write(struct fpu *fpu)
+void fpu__prepare_write(struct fpu *fpu)
 {
        /*
         * Only stopped child tasks can be used to modify the FPU
@@ -301,8 +291,8 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
         */
        WARN_ON_FPU(fpu == &current->thread.fpu);
 
-       if (fpu->fpstate_active) {
-               /* Invalidate any lazy state: */
+       if (fpu->initialized) {
+               /* Invalidate any cached state: */
                __fpu_invalidate_fpregs_state(fpu);
        } else {
                fpstate_init(&fpu->state);
@@ -310,73 +300,10 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
 
                trace_x86_fpu_activate_state(fpu);
                /* Safe to do for stopped child tasks: */
-               fpu->fpstate_active = 1;
+               fpu->initialized = 1;
        }
 }
 
-/*
- * This function must be called before we write the current
- * task's fpstate.
- *
- * This call gets the current FPU register state and moves
- * it in to the 'fpstate'.  Preemption is disabled so that
- * no writes to the 'fpstate' can occur from context
- * swiches.
- *
- * Must be followed by a fpu__current_fpstate_write_end().
- */
-void fpu__current_fpstate_write_begin(void)
-{
-       struct fpu *fpu = &current->thread.fpu;
-
-       /*
-        * Ensure that the context-switching code does not write
-        * over the fpstate while we are doing our update.
-        */
-       preempt_disable();
-
-       /*
-        * Move the fpregs in to the fpu's 'fpstate'.
-        */
-       fpu__activate_fpstate_read(fpu);
-
-       /*
-        * The caller is about to write to 'fpu'.  Ensure that no
-        * CPU thinks that its fpregs match the fpstate.  This
-        * ensures we will not be lazy and skip a XRSTOR in the
-        * future.
-        */
-       __fpu_invalidate_fpregs_state(fpu);
-}
-
-/*
- * This function must be paired with fpu__current_fpstate_write_begin()
- *
- * This will ensure that the modified fpstate gets placed back in
- * the fpregs if necessary.
- *
- * Note: This function may be called whether or not an _actual_
- * write to the fpstate occurred.
- */
-void fpu__current_fpstate_write_end(void)
-{
-       struct fpu *fpu = &current->thread.fpu;
-
-       /*
-        * 'fpu' now has an updated copy of the state, but the
-        * registers may still be out of date.  Update them with
-        * an XRSTOR if they are active.
-        */
-       if (fpregs_active())
-               copy_kernel_to_fpregs(&fpu->state);
-
-       /*
-        * Our update is done and the fpregs/fpstate are in sync
-        * if necessary.  Context switches can happen again.
-        */
-       preempt_enable();
-}
-
 /*
  * 'fpu__restore()' is called to copy FPU registers from
  * the FPU fpstate to the live hw registers and to activate
@@ -389,7 +316,7 @@ void fpu__current_fpstate_write_end(void)
  */
 void fpu__restore(struct fpu *fpu)
 {
-       fpu__activate_curr(fpu);
+       fpu__initialize(fpu);
 
        /* Avoid __kernel_fpu_begin() right after fpregs_activate() */
        kernel_fpu_disable();
@@ -414,15 +341,17 @@ void fpu__drop(struct fpu *fpu)
 {
        preempt_disable();
 
-       if (fpu->fpregs_active) {
-               /* Ignore delayed exceptions from user space */
-               asm volatile("1: fwait\n"
-                            "2:\n"
-                            _ASM_EXTABLE(1b, 2b));
-               fpregs_deactivate(fpu);
+       if (fpu == &current->thread.fpu) {
+               if (fpu->initialized) {
+                       /* Ignore delayed exceptions from user space */
+                       asm volatile("1: fwait\n"
+                                    "2:\n"
+                                    _ASM_EXTABLE(1b, 2b));
+                       fpregs_deactivate(fpu);
+               }
        }
 
-       fpu->fpstate_active = 0;
+       fpu->initialized = 0;
 
        trace_x86_fpu_dropped(fpu);
 
@@ -462,9 +391,11 @@ void fpu__clear(struct fpu *fpu)
         * Make sure fpstate is cleared and initialized.
         */
        if (static_cpu_has(X86_FEATURE_FPU)) {
-               fpu__activate_curr(fpu);
+               preempt_disable();
+               fpu__initialize(fpu);
                user_fpu_begin();
                copy_init_fpstate_to_fpregs();
+               preempt_enable();
        }
 }
 
index d5d44c452624c88e3abb5c75e68d00b55fee6019..7affb7e3d9a5b94326b51528119787f4f956640b 100644 (file)
@@ -240,7 +240,7 @@ static void __init fpu__init_system_ctx_switch(void)
        WARN_ON_FPU(!on_boot_cpu);
        on_boot_cpu = 0;
 
-       WARN_ON_FPU(current->thread.fpu.fpstate_active);
+       WARN_ON_FPU(current->thread.fpu.initialized);
 }
 
 /*
index b188b16841e376574c5f39e55b7b687ffdd09495..3ea15137238964cc6f972b2d11c83cb619de55bd 100644 (file)
@@ -16,14 +16,14 @@ int regset_fpregs_active(struct task_struct *target, const struct user_regset *r
 {
        struct fpu *target_fpu = &target->thread.fpu;
 
-       return target_fpu->fpstate_active ? regset->n : 0;
+       return target_fpu->initialized ? regset->n : 0;
 }
 
 int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset)
 {
        struct fpu *target_fpu = &target->thread.fpu;
 
-       if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->fpstate_active)
+       if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->initialized)
                return regset->n;
        else
                return 0;
@@ -38,7 +38,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
        if (!boot_cpu_has(X86_FEATURE_FXSR))
                return -ENODEV;
 
-       fpu__activate_fpstate_read(fpu);
+       fpu__prepare_read(fpu);
        fpstate_sanitize_xstate(fpu);
 
        return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
@@ -55,7 +55,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
        if (!boot_cpu_has(X86_FEATURE_FXSR))
                return -ENODEV;
 
-       fpu__activate_fpstate_write(fpu);
+       fpu__prepare_write(fpu);
        fpstate_sanitize_xstate(fpu);
 
        ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
@@ -89,10 +89,13 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
 
        xsave = &fpu->state.xsave;
 
-       fpu__activate_fpstate_read(fpu);
+       fpu__prepare_read(fpu);
 
        if (using_compacted_format()) {
-               ret = copyout_from_xsaves(pos, count, kbuf, ubuf, xsave);
+               if (kbuf)
+                       ret = copy_xstate_to_kernel(kbuf, xsave, pos, count);
+               else
+                       ret = copy_xstate_to_user(ubuf, xsave, pos, count);
        } else {
                fpstate_sanitize_xstate(fpu);
                /*
@@ -129,28 +132,29 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 
        xsave = &fpu->state.xsave;
 
-       fpu__activate_fpstate_write(fpu);
+       fpu__prepare_write(fpu);
 
-       if (boot_cpu_has(X86_FEATURE_XSAVES))
-               ret = copyin_to_xsaves(kbuf, ubuf, xsave);
-       else
+       if (using_compacted_format()) {
+               if (kbuf)
+                       ret = copy_kernel_to_xstate(xsave, kbuf);
+               else
+                       ret = copy_user_to_xstate(xsave, ubuf);
+       } else {
                ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
-
-       /*
-        * In case of failure, mark all states as init:
-        */
-       if (ret)
-               fpstate_init(&fpu->state);
+               if (!ret)
+                       ret = validate_xstate_header(&xsave->header);
+       }
 
        /*
         * mxcsr reserved bits must be masked to zero for security reasons.
         */
        xsave->i387.mxcsr &= mxcsr_feature_mask;
-       xsave->header.xfeatures &= xfeatures_mask;
+
        /*
-        * These bits must be zero.
+        * In case of failure, mark all states as init:
         */
-       memset(&xsave->header.reserved, 0, 48);
+       if (ret)
+               fpstate_init(&fpu->state);
 
        return ret;
 }
@@ -299,7 +303,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
        struct fpu *fpu = &target->thread.fpu;
        struct user_i387_ia32_struct env;
 
-       fpu__activate_fpstate_read(fpu);
+       fpu__prepare_read(fpu);
 
        if (!boot_cpu_has(X86_FEATURE_FPU))
                return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
@@ -329,7 +333,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
        struct user_i387_ia32_struct env;
        int ret;
 
-       fpu__activate_fpstate_write(fpu);
+       fpu__prepare_write(fpu);
        fpstate_sanitize_xstate(fpu);
 
        if (!boot_cpu_has(X86_FEATURE_FPU))
@@ -369,7 +373,7 @@ int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu)
        struct fpu *fpu = &tsk->thread.fpu;
        int fpvalid;
 
-       fpvalid = fpu->fpstate_active;
+       fpvalid = fpu->initialized;
        if (fpvalid)
                fpvalid = !fpregs_get(tsk, NULL,
                                      0, sizeof(struct user_i387_ia32_struct),
index 83c23c230b4c4fc78664ba08663a04c684c2105b..fb639e70048f58dfaef5576ab73e6998fa8ebbd1 100644 (file)
@@ -155,7 +155,8 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
  */
 int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 {
-       struct xregs_state *xsave = &current->thread.fpu.state.xsave;
+       struct fpu *fpu = &current->thread.fpu;
+       struct xregs_state *xsave = &fpu->state.xsave;
        struct task_struct *tsk = current;
        int ia32_fxstate = (buf != buf_fx);
 
@@ -170,13 +171,13 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
                        sizeof(struct user_i387_ia32_struct), NULL,
                        (struct _fpstate_32 __user *) buf) ? -1 : 1;
 
-       if (fpregs_active() || using_compacted_format()) {
+       if (fpu->initialized || using_compacted_format()) {
                /* Save the live register state to the user directly. */
                if (copy_fpregs_to_sigframe(buf_fx))
                        return -1;
                /* Update the thread's fxstate to save the fsave header. */
                if (ia32_fxstate)
-                       copy_fxregs_to_kernel(&tsk->thread.fpu);
+                       copy_fxregs_to_kernel(fpu);
        } else {
                /*
                 * It is a *bug* if kernel uses compacted-format for xsave
@@ -189,7 +190,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
                        return -1;
                }
 
-               fpstate_sanitize_xstate(&tsk->thread.fpu);
+               fpstate_sanitize_xstate(fpu);
                if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size))
                        return -1;
        }
@@ -213,8 +214,11 @@ sanitize_restored_xstate(struct task_struct *tsk,
        struct xstate_header *header = &xsave->header;
 
        if (use_xsave()) {
-               /* These bits must be zero. */
-               memset(header->reserved, 0, 48);
+               /*
+                * Note: we don't need to zero the reserved bits in the
+                * xstate_header here because we either didn't copy them at all,
+                * or we checked earlier that they aren't set.
+                */
 
                /*
                 * Init the state that is not present in the memory
@@ -223,7 +227,7 @@ sanitize_restored_xstate(struct task_struct *tsk,
                if (fx_only)
                        header->xfeatures = XFEATURE_MASK_FPSSE;
                else
-                       header->xfeatures &= (xfeatures_mask & xfeatures);
+                       header->xfeatures &= xfeatures;
        }
 
        if (use_fxsr()) {
@@ -279,7 +283,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
        if (!access_ok(VERIFY_READ, buf, size))
                return -EACCES;
 
-       fpu__activate_curr(fpu);
+       fpu__initialize(fpu);
 
        if (!static_cpu_has(X86_FEATURE_FPU))
                return fpregs_soft_set(current, NULL,
@@ -307,28 +311,29 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
                /*
                 * For 32-bit frames with fxstate, copy the user state to the
                 * thread's fpu state, reconstruct fxstate from the fsave
-                * header. Sanitize the copied state etc.
+                * header. Validate and sanitize the copied state.
                 */
                struct fpu *fpu = &tsk->thread.fpu;
                struct user_i387_ia32_struct env;
                int err = 0;
 
                /*
-                * Drop the current fpu which clears fpu->fpstate_active. This ensures
+                * Drop the current fpu which clears fpu->initialized. This ensures
                 * that any context-switch during the copy of the new state,
                 * avoids the intermediate state from getting restored/saved.
                 * Thus avoiding the new restored state from getting corrupted.
                 * We will be ready to restore/save the state only after
-                * fpu->fpstate_active is again set.
+                * fpu->initialized is again set.
                 */
                fpu__drop(fpu);
 
                if (using_compacted_format()) {
-                       err = copyin_to_xsaves(NULL, buf_fx,
-                                              &fpu->state.xsave);
+                       err = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
                } else {
-                       err = __copy_from_user(&fpu->state.xsave,
-                                              buf_fx, state_size);
+                       err = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
+
+                       if (!err && state_size > offsetof(struct xregs_state, header))
+                               err = validate_xstate_header(&fpu->state.xsave.header);
                }
 
                if (err || __copy_from_user(&env, buf, sizeof(env))) {
@@ -339,7 +344,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
                        sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
                }
 
-               fpu->fpstate_active = 1;
+               fpu->initialized = 1;
                preempt_disable();
                fpu__restore(fpu);
                preempt_enable();
index c24ac1efb12d7a1574450a1359699309148afcd6..f1d5476c902209eebeae83d3cd30fe5b6226921c 100644 (file)
@@ -483,6 +483,30 @@ int using_compacted_format(void)
        return boot_cpu_has(X86_FEATURE_XSAVES);
 }
 
+/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
+int validate_xstate_header(const struct xstate_header *hdr)
+{
+       /* No unknown or supervisor features may be set */
+       if (hdr->xfeatures & (~xfeatures_mask | XFEATURE_MASK_SUPERVISOR))
+               return -EINVAL;
+
+       /* Userspace must use the uncompacted format */
+       if (hdr->xcomp_bv)
+               return -EINVAL;
+
+       /*
+        * If 'reserved' is shrunken to add a new field, make sure to validate
+        * that new field here!
+        */
+       BUILD_BUG_ON(sizeof(hdr->reserved) != 48);
+
+       /* No reserved bits may be set */
+       if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
+               return -EINVAL;
+
+       return 0;
+}
+
 static void __xstate_dump_leaves(void)
 {
        int i;
@@ -867,7 +891,7 @@ const void *get_xsave_field_ptr(int xsave_state)
 {
        struct fpu *fpu = &current->thread.fpu;
 
-       if (!fpu->fpstate_active)
+       if (!fpu->initialized)
                return NULL;
        /*
         * fpu__save() takes the CPU's xstate registers
@@ -920,39 +944,130 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 }
 #endif /* ! CONFIG_ARCH_HAS_PKEYS */
 
+/*
+ * Weird legacy quirk: SSE and YMM states store information in the
+ * MXCSR and MXCSR_FLAGS fields of the FP area. That means if the FP
+ * area is marked as unused in the xfeatures header, we need to copy
+ * MXCSR and MXCSR_FLAGS if either SSE or YMM are in use.
+ */
+static inline bool xfeatures_mxcsr_quirk(u64 xfeatures)
+{
+       if (!(xfeatures & (XFEATURE_MASK_SSE|XFEATURE_MASK_YMM)))
+               return false;
+
+       if (xfeatures & XFEATURE_MASK_FP)
+               return false;
+
+       return true;
+}
+
 /*
  * This is similar to user_regset_copyout(), but will not add offset to
  * the source data pointer or increment pos, count, kbuf, and ubuf.
  */
-static inline int xstate_copyout(unsigned int pos, unsigned int count,
-                                void *kbuf, void __user *ubuf,
-                                const void *data, const int start_pos,
-                                const int end_pos)
+static inline void
+__copy_xstate_to_kernel(void *kbuf, const void *data,
+                       unsigned int offset, unsigned int size, unsigned int size_total)
 {
-       if ((count == 0) || (pos < start_pos))
-               return 0;
+       if (offset < size_total) {
+               unsigned int copy = min(size, size_total - offset);
 
-       if (end_pos < 0 || pos < end_pos) {
-               unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos));
+               memcpy(kbuf + offset, data, copy);
+       }
+}
 
-               if (kbuf) {
-                       memcpy(kbuf + pos, data, copy);
-               } else {
-                       if (__copy_to_user(ubuf + pos, data, copy))
-                               return -EFAULT;
+/*
+ * Convert from kernel XSAVES compacted format to standard format and copy
+ * to a kernel-space ptrace buffer.
+ *
+ * It supports partial copy but pos always starts from zero. This is called
+ * from xstateregs_get() and there we check the CPU has XSAVES.
+ */
+int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
+{
+       unsigned int offset, size;
+       struct xstate_header header;
+       int i;
+
+       /*
+        * Currently copy_regset_to_user() starts from pos 0:
+        */
+       if (unlikely(offset_start != 0))
+               return -EFAULT;
+
+       /*
+        * The destination is a ptrace buffer; we put in only user xstates:
+        */
+       memset(&header, 0, sizeof(header));
+       header.xfeatures = xsave->header.xfeatures;
+       header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR;
+
+       /*
+        * Copy xregs_state->header:
+        */
+       offset = offsetof(struct xregs_state, header);
+       size = sizeof(header);
+
+       __copy_xstate_to_kernel(kbuf, &header, offset, size, size_total);
+
+       for (i = 0; i < XFEATURE_MAX; i++) {
+               /*
+                * Copy only in-use xstates:
+                */
+               if ((header.xfeatures >> i) & 1) {
+                       void *src = __raw_xsave_addr(xsave, 1 << i);
+
+                       offset = xstate_offsets[i];
+                       size = xstate_sizes[i];
+
+                       /* The next component has to fit fully into the output buffer: */
+                       if (offset + size > size_total)
+                               break;
+
+                       __copy_xstate_to_kernel(kbuf, src, offset, size, size_total);
                }
+
+       }
+
+       if (xfeatures_mxcsr_quirk(header.xfeatures)) {
+               offset = offsetof(struct fxregs_state, mxcsr);
+               size = MXCSR_AND_FLAGS_SIZE;
+               __copy_xstate_to_kernel(kbuf, &xsave->i387.mxcsr, offset, size, size_total);
+       }
+
+       /*
+        * Fill xsave->i387.sw_reserved value for ptrace frame:
+        */
+       offset = offsetof(struct fxregs_state, sw_reserved);
+       size = sizeof(xstate_fx_sw_bytes);
+
+       __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, size_total);
+
+       return 0;
+}
+
+static inline int
+__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int offset, unsigned int size, unsigned int size_total)
+{
+       if (!size)
+               return 0;
+
+       if (offset < size_total) {
+               unsigned int copy = min(size, size_total - offset);
+
+               if (__copy_to_user(ubuf + offset, data, copy))
+                       return -EFAULT;
        }
        return 0;
 }
 
 /*
  * Convert from kernel XSAVES compacted format to standard format and copy
- * to a ptrace buffer. It supports partial copy but pos always starts from
+ * to a user-space buffer. It supports partial copy but pos always starts from
  * zero. This is called from xstateregs_get() and there we check the CPU
  * has XSAVES.
  */
-int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
-                       void __user *ubuf, struct xregs_state *xsave)
+int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
 {
        unsigned int offset, size;
        int ret, i;
@@ -961,7 +1076,7 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
        /*
         * Currently copy_regset_to_user() starts from pos 0:
         */
-       if (unlikely(pos != 0))
+       if (unlikely(offset_start != 0))
                return -EFAULT;
 
        /*
@@ -977,8 +1092,7 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
        offset = offsetof(struct xregs_state, header);
        size = sizeof(header);
 
-       ret = xstate_copyout(offset, size, kbuf, ubuf, &header, 0, count);
-
+       ret = __copy_xstate_to_user(ubuf, &header, offset, size, size_total);
        if (ret)
                return ret;
 
@@ -992,25 +1106,30 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
                        offset = xstate_offsets[i];
                        size = xstate_sizes[i];
 
-                       ret = xstate_copyout(offset, size, kbuf, ubuf, src, 0, count);
+                       /* The next component has to fit fully into the output buffer: */
+                       if (offset + size > size_total)
+                               break;
 
+                       ret = __copy_xstate_to_user(ubuf, src, offset, size, size_total);
                        if (ret)
                                return ret;
-
-                       if (offset + size >= count)
-                               break;
                }
 
        }
 
+       if (xfeatures_mxcsr_quirk(header.xfeatures)) {
+               offset = offsetof(struct fxregs_state, mxcsr);
+               size = MXCSR_AND_FLAGS_SIZE;
+               __copy_xstate_to_user(ubuf, &xsave->i387.mxcsr, offset, size, size_total);
+       }
+
        /*
         * Fill xsave->i387.sw_reserved value for ptrace frame:
         */
        offset = offsetof(struct fxregs_state, sw_reserved);
        size = sizeof(xstate_fx_sw_bytes);
 
-       ret = xstate_copyout(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count);
-
+       ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, size_total);
        if (ret)
                return ret;
 
@@ -1018,55 +1137,98 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
 }
 
 /*
- * Convert from a ptrace standard-format buffer to kernel XSAVES format
- * and copy to the target thread. This is called from xstateregs_set() and
- * there we check the CPU has XSAVES and a whole standard-sized buffer
- * exists.
+ * Convert from a ptrace standard-format kernel buffer to kernel XSAVES format
+ * and copy to the target thread. This is called from xstateregs_set().
  */
-int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
-                    struct xregs_state *xsave)
+int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
 {
        unsigned int offset, size;
        int i;
-       u64 xfeatures;
-       u64 allowed_features;
+       struct xstate_header hdr;
 
        offset = offsetof(struct xregs_state, header);
-       size = sizeof(xfeatures);
+       size = sizeof(hdr);
 
-       if (kbuf) {
-               memcpy(&xfeatures, kbuf + offset, size);
-       } else {
-               if (__copy_from_user(&xfeatures, ubuf + offset, size))
-                       return -EFAULT;
+       memcpy(&hdr, kbuf + offset, size);
+
+       if (validate_xstate_header(&hdr))
+               return -EINVAL;
+
+       for (i = 0; i < XFEATURE_MAX; i++) {
+               u64 mask = ((u64)1 << i);
+
+               if (hdr.xfeatures & mask) {
+                       void *dst = __raw_xsave_addr(xsave, 1 << i);
+
+                       offset = xstate_offsets[i];
+                       size = xstate_sizes[i];
+
+                       memcpy(dst, kbuf + offset, size);
+               }
+       }
+
+       if (xfeatures_mxcsr_quirk(hdr.xfeatures)) {
+               offset = offsetof(struct fxregs_state, mxcsr);
+               size = MXCSR_AND_FLAGS_SIZE;
+               memcpy(&xsave->i387.mxcsr, kbuf + offset, size);
        }
 
        /*
-        * Reject if the user sets any disabled or supervisor features:
+        * The state that came in from userspace was user-state only.
+        * Mask all the user states out of 'xfeatures':
+        */
+       xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR;
+
+       /*
+        * Add back in the features that came in from userspace:
         */
-       allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR;
+       xsave->header.xfeatures |= hdr.xfeatures;
 
-       if (xfeatures & ~allowed_features)
+       return 0;
+}
+
+/*
+ * Convert from a ptrace or sigreturn standard-format user-space buffer to
+ * kernel XSAVES format and copy to the target thread. This is called from
+ * xstateregs_set(), as well as potentially from the sigreturn() and
+ * rt_sigreturn() system calls.
+ */
+int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
+{
+       unsigned int offset, size;
+       int i;
+       struct xstate_header hdr;
+
+       offset = offsetof(struct xregs_state, header);
+       size = sizeof(hdr);
+
+       if (__copy_from_user(&hdr, ubuf + offset, size))
+               return -EFAULT;
+
+       if (validate_xstate_header(&hdr))
                return -EINVAL;
 
        for (i = 0; i < XFEATURE_MAX; i++) {
                u64 mask = ((u64)1 << i);
 
-               if (xfeatures & mask) {
+               if (hdr.xfeatures & mask) {
                        void *dst = __raw_xsave_addr(xsave, 1 << i);
 
                        offset = xstate_offsets[i];
                        size = xstate_sizes[i];
 
-                       if (kbuf) {
-                               memcpy(dst, kbuf + offset, size);
-                       } else {
-                               if (__copy_from_user(dst, ubuf + offset, size))
-                                       return -EFAULT;
-                       }
+                       if (__copy_from_user(dst, ubuf + offset, size))
+                               return -EFAULT;
                }
        }
 
+       if (xfeatures_mxcsr_quirk(hdr.xfeatures)) {
+               offset = offsetof(struct fxregs_state, mxcsr);
+               size = MXCSR_AND_FLAGS_SIZE;
+               if (__copy_from_user(&xsave->i387.mxcsr, ubuf + offset, size))
+                       return -EFAULT;
+       }
+
        /*
         * The state that came in from userspace was user-state only.
         * Mask all the user states out of 'xfeatures':
@@ -1076,7 +1238,7 @@ int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
        /*
         * Add back in the features that came in from userspace:
         */
-       xsave->header.xfeatures |= xfeatures;
+       xsave->header.xfeatures |= hdr.xfeatures;
 
        return 0;
 }
index 1f38d9a4d9deaf707af2b7e658bd9e9022ba8d75..d4eb450144fdb42c4f667b9f5b1a69d91aa78a2e 100644 (file)
@@ -64,7 +64,7 @@ static void call_on_stack(void *func, void *stack)
 
 static inline void *current_stack(void)
 {
-       return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
+       return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1));
 }
 
 static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
@@ -88,7 +88,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
 
        /* Save the next esp at the bottom of the stack */
        prev_esp = (u32 *)irqstk;
-       *prev_esp = current_stack_pointer();
+       *prev_esp = current_stack_pointer;
 
        if (unlikely(overflow))
                call_on_stack(print_stack_overflow, isp);
@@ -139,7 +139,7 @@ void do_softirq_own_stack(void)
 
        /* Push the previous esp onto the stack */
        prev_esp = (u32 *)irqstk;
-       *prev_esp = current_stack_pointer();
+       *prev_esp = current_stack_pointer;
 
        call_on_stack(__do_softirq, isp);
 }
index 4b0592ca9e47b332d0ce67f8bcf5f555653587b2..8c1cc08f514f4362bdaefa933fda3cd3769b04f9 100644 (file)
@@ -299,7 +299,7 @@ static int __init create_setup_data_nodes(struct kobject *parent)
        return 0;
 
 out_clean_nodes:
-       for (j = i - 1; j > 0; j--)
+       for (j = i - 1; j >= 0; j--)
                cleanup_setup_data_node(*(kobjp + j));
        kfree(kobjp);
 out_setup_data_kobj:
index aa60a08b65b1090392b542ec7dc642e9827b7606..e675704fa6f7d89ed919631284156717e342979f 100644 (file)
@@ -140,7 +140,8 @@ void kvm_async_pf_task_wait(u32 token)
 
        n.token = token;
        n.cpu = smp_processor_id();
-       n.halted = is_idle_task(current) || preempt_count() > 1;
+       n.halted = is_idle_task(current) || preempt_count() > 1 ||
+                  rcu_preempt_depth();
        init_swait_queue_head(&n.wq);
        hlist_add_head(&n.link, &b->list);
        raw_spin_unlock(&b->lock);
index e04442345fc0977cf73f2573f77b3df71310f0a8..4e188fda59612ed70b98342e8580cd1f311ed141 100644 (file)
@@ -263,7 +263,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
                sp = (unsigned long) ka->sa.sa_restorer;
        }
 
-       if (fpu->fpstate_active) {
+       if (fpu->initialized) {
                sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32),
                                          &buf_fx, &math_size);
                *fpstate = (void __user *)sp;
@@ -279,7 +279,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
                return (void __user *)-1L;
 
        /* save i387 and extended state */
-       if (fpu->fpstate_active &&
+       if (fpu->initialized &&
            copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size) < 0)
                return (void __user *)-1L;
 
@@ -755,7 +755,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
                /*
                 * Ensure the signal handler starts with the new fpu state.
                 */
-               if (fpu->fpstate_active)
+               if (fpu->initialized)
                        fpu__clear(fpu);
        }
        signal_setup_done(failed, ksig, stepping);
index 34ea3651362ef8383fb456f24125ec4881f42668..67db4f43309ecadc86f4d7e95c6a0db0650a0d18 100644 (file)
@@ -142,7 +142,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
         * from double_fault.
         */
        BUG_ON((unsigned long)(current_top_of_stack() -
-                              current_stack_pointer()) >= THREAD_SIZE);
+                              current_stack_pointer) >= THREAD_SIZE);
 
        preempt_enable_no_resched();
 }
index 6970249c09fca15879415bb2958bb0f782a7eb22..a2b804e10c95d71ac19aebc83c6f89316cea658d 100644 (file)
@@ -200,6 +200,8 @@ struct loaded_vmcs {
        int cpu;
        bool launched;
        bool nmi_known_unmasked;
+       unsigned long vmcs_host_cr3;    /* May not match real cr3 */
+       unsigned long vmcs_host_cr4;    /* May not match real cr4 */
        struct list_head loaded_vmcss_on_cpu_link;
 };
 
@@ -600,8 +602,6 @@ struct vcpu_vmx {
                int           gs_ldt_reload_needed;
                int           fs_reload_needed;
                u64           msr_host_bndcfgs;
-               unsigned long vmcs_host_cr3;    /* May not match real cr3 */
-               unsigned long vmcs_host_cr4;    /* May not match real cr4 */
        } host_state;
        struct {
                int vm86_active;
@@ -2202,46 +2202,44 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
        struct pi_desc old, new;
        unsigned int dest;
 
-       if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
-               !irq_remapping_cap(IRQ_POSTING_CAP)  ||
-               !kvm_vcpu_apicv_active(vcpu))
+       /*
+        * In case of hot-plug or hot-unplug, we may have to undo
+        * vmx_vcpu_pi_put even if there is no assigned device.  And we
+        * always keep PI.NDST up to date for simplicity: it makes the
+        * code easier, and CPU migration is not a fast path.
+        */
+       if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
                return;
 
+       /*
+        * First handle the simple case where no cmpxchg is necessary; just
+        * allow posting non-urgent interrupts.
+        *
+        * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
+        * PI.NDST: pi_post_block will do it for us and the wakeup_handler
+        * expects the VCPU to be on the blocked_vcpu_list that matches
+        * PI.NDST.
+        */
+       if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR ||
+           vcpu->cpu == cpu) {
+               pi_clear_sn(pi_desc);
+               return;
+       }
+
+       /* The full case.  */
        do {
                old.control = new.control = pi_desc->control;
 
-               /*
-                * If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there
-                * are two possible cases:
-                * 1. After running 'pre_block', context switch
-                *    happened. For this case, 'sn' was set in
-                *    vmx_vcpu_put(), so we need to clear it here.
-                * 2. After running 'pre_block', we were blocked,
-                *    and woken up by some other guy. For this case,
-                *    we don't need to do anything, 'pi_post_block'
-                *    will do everything for us. However, we cannot
-                *    check whether it is case #1 or case #2 here
-                *    (maybe, not needed), so we also clear sn here,
-                *    I think it is not a big deal.
-                */
-               if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) {
-                       if (vcpu->cpu != cpu) {
-                               dest = cpu_physical_id(cpu);
-
-                               if (x2apic_enabled())
-                                       new.ndst = dest;
-                               else
-                                       new.ndst = (dest << 8) & 0xFF00;
-                       }
+               dest = cpu_physical_id(cpu);
 
-                       /* set 'NV' to 'notification vector' */
-                       new.nv = POSTED_INTR_VECTOR;
-               }
+               if (x2apic_enabled())
+                       new.ndst = dest;
+               else
+                       new.ndst = (dest << 8) & 0xFF00;
 
-               /* Allow posting non-urgent interrupts */
                new.sn = 0;
-       } while (cmpxchg(&pi_desc->control, old.control,
-                       new.control) != old.control);
+       } while (cmpxchg64(&pi_desc->control, old.control,
+                          new.control) != old.control);
 }
 
 static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
@@ -5178,12 +5176,12 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
         */
        cr3 = __read_cr3();
        vmcs_writel(HOST_CR3, cr3);             /* 22.2.3  FIXME: shadow tables */
-       vmx->host_state.vmcs_host_cr3 = cr3;
+       vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
 
        /* Save the most likely value for this task's CR4 in the VMCS. */
        cr4 = cr4_read_shadow();
        vmcs_writel(HOST_CR4, cr4);                     /* 22.2.3, 22.2.5 */
-       vmx->host_state.vmcs_host_cr4 = cr4;
+       vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
 
        vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS);  /* 22.2.4 */
 #ifdef CONFIG_X86_64
@@ -9273,15 +9271,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
                vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
 
        cr3 = __get_current_cr3_fast();
-       if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) {
+       if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) {
                vmcs_writel(HOST_CR3, cr3);
-               vmx->host_state.vmcs_host_cr3 = cr3;
+               vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
        }
 
        cr4 = cr4_read_shadow();
-       if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
+       if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) {
                vmcs_writel(HOST_CR4, cr4);
-               vmx->host_state.vmcs_host_cr4 = cr4;
+               vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
        }
 
        /* When single-stepping over STI and MOV SS, we must clear the
@@ -9591,6 +9589,13 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 
        vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
 
+       /*
+        * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR
+        * or POSTED_INTR_WAKEUP_VECTOR.
+        */
+       vmx->pi_desc.nv = POSTED_INTR_VECTOR;
+       vmx->pi_desc.sn = 1;
+
        return &vmx->vcpu;
 
 free_vmcs:
@@ -9839,7 +9844,8 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
 
        WARN_ON(!is_guest_mode(vcpu));
 
-       if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code)) {
+       if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) &&
+               !to_vmx(vcpu)->nested.nested_run_pending) {
                vmcs12->vm_exit_intr_error_code = fault->error_code;
                nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
                                  PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
@@ -11704,6 +11710,37 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
        kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
 }
 
+static void __pi_post_block(struct kvm_vcpu *vcpu)
+{
+       struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+       struct pi_desc old, new;
+       unsigned int dest;
+
+       do {
+               old.control = new.control = pi_desc->control;
+               WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
+                    "Wakeup handler not enabled while the VCPU is blocked\n");
+
+               dest = cpu_physical_id(vcpu->cpu);
+
+               if (x2apic_enabled())
+                       new.ndst = dest;
+               else
+                       new.ndst = (dest << 8) & 0xFF00;
+
+               /* set 'NV' to 'notification vector' */
+               new.nv = POSTED_INTR_VECTOR;
+       } while (cmpxchg64(&pi_desc->control, old.control,
+                          new.control) != old.control);
+
+       if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
+               spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+               list_del(&vcpu->blocked_vcpu_list);
+               spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+               vcpu->pre_pcpu = -1;
+       }
+}
+
 /*
  * This routine does the following things for vCPU which is going
  * to be blocked if VT-d PI is enabled.
@@ -11719,7 +11756,6 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
  */
 static int pi_pre_block(struct kvm_vcpu *vcpu)
 {
-       unsigned long flags;
        unsigned int dest;
        struct pi_desc old, new;
        struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
@@ -11729,34 +11765,20 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)
                !kvm_vcpu_apicv_active(vcpu))
                return 0;
 
-       vcpu->pre_pcpu = vcpu->cpu;
-       spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
-                         vcpu->pre_pcpu), flags);
-       list_add_tail(&vcpu->blocked_vcpu_list,
-                     &per_cpu(blocked_vcpu_on_cpu,
-                     vcpu->pre_pcpu));
-       spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
-                              vcpu->pre_pcpu), flags);
+       WARN_ON(irqs_disabled());
+       local_irq_disable();
+       if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
+               vcpu->pre_pcpu = vcpu->cpu;
+               spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+               list_add_tail(&vcpu->blocked_vcpu_list,
+                             &per_cpu(blocked_vcpu_on_cpu,
+                                      vcpu->pre_pcpu));
+               spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+       }
 
        do {
                old.control = new.control = pi_desc->control;
 
-               /*
-                * We should not block the vCPU if
-                * an interrupt is posted for it.
-                */
-               if (pi_test_on(pi_desc) == 1) {
-                       spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
-                                         vcpu->pre_pcpu), flags);
-                       list_del(&vcpu->blocked_vcpu_list);
-                       spin_unlock_irqrestore(
-                                       &per_cpu(blocked_vcpu_on_cpu_lock,
-                                       vcpu->pre_pcpu), flags);
-                       vcpu->pre_pcpu = -1;
-
-                       return 1;
-               }
-
                WARN((pi_desc->sn == 1),
                     "Warning: SN field of posted-interrupts "
                     "is set before blocking\n");
@@ -11778,10 +11800,15 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)
 
                /* set 'NV' to 'wakeup vector' */
                new.nv = POSTED_INTR_WAKEUP_VECTOR;
-       } while (cmpxchg(&pi_desc->control, old.control,
-                       new.control) != old.control);
+       } while (cmpxchg64(&pi_desc->control, old.control,
+                          new.control) != old.control);
 
-       return 0;
+       /* We should not block the vCPU if an interrupt is posted for it.  */
+       if (pi_test_on(pi_desc) == 1)
+               __pi_post_block(vcpu);
+
+       local_irq_enable();
+       return (vcpu->pre_pcpu == -1);
 }
 
 static int vmx_pre_block(struct kvm_vcpu *vcpu)
@@ -11797,44 +11824,13 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu)
 
 static void pi_post_block(struct kvm_vcpu *vcpu)
 {
-       struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
-       struct pi_desc old, new;
-       unsigned int dest;
-       unsigned long flags;
-
-       if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
-               !irq_remapping_cap(IRQ_POSTING_CAP)  ||
-               !kvm_vcpu_apicv_active(vcpu))
+       if (vcpu->pre_pcpu == -1)
                return;
 
-       do {
-               old.control = new.control = pi_desc->control;
-
-               dest = cpu_physical_id(vcpu->cpu);
-
-               if (x2apic_enabled())
-                       new.ndst = dest;
-               else
-                       new.ndst = (dest << 8) & 0xFF00;
-
-               /* Allow posting non-urgent interrupts */
-               new.sn = 0;
-
-               /* set 'NV' to 'notification vector' */
-               new.nv = POSTED_INTR_VECTOR;
-       } while (cmpxchg(&pi_desc->control, old.control,
-                       new.control) != old.control);
-
-       if(vcpu->pre_pcpu != -1) {
-               spin_lock_irqsave(
-                       &per_cpu(blocked_vcpu_on_cpu_lock,
-                       vcpu->pre_pcpu), flags);
-               list_del(&vcpu->blocked_vcpu_list);
-               spin_unlock_irqrestore(
-                       &per_cpu(blocked_vcpu_on_cpu_lock,
-                       vcpu->pre_pcpu), flags);
-               vcpu->pre_pcpu = -1;
-       }
+       WARN_ON(irqs_disabled());
+       local_irq_disable();
+       __pi_post_block(vcpu);
+       local_irq_enable();
 }
 
 static void vmx_post_block(struct kvm_vcpu *vcpu)
index cd17b7d9a1076c1d28904bc4cd1ea06af5e0f2c0..03869eb7fcd67b64e54dcb67acd559a70fd61139 100644 (file)
@@ -7225,7 +7225,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        int r;
        sigset_t sigsaved;
 
-       fpu__activate_curr(fpu);
+       fpu__initialize(fpu);
 
        if (vcpu->sigset_active)
                sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
index d4a7df2205b8de2576bc7cdbf8455f2fa9987b66..220638a4cb94ea3065e1aaca9caaad7d447c8560 100644 (file)
@@ -114,7 +114,7 @@ void math_emulate(struct math_emu_info *info)
        struct desc_struct code_descriptor;
        struct fpu *fpu = &current->thread.fpu;
 
-       fpu__activate_curr(fpu);
+       fpu__initialize(fpu);
 
 #ifdef RE_ENTRANT_CHECKING
        if (emulating) {
index c076f710de4cb4e283125f706226fc059fbebb5b..c3521e2be39610c3d932c34126fadd5203f28958 100644 (file)
@@ -2,6 +2,7 @@
 #include <linux/uaccess.h>
 #include <linux/sched/debug.h>
 
+#include <asm/fpu/internal.h>
 #include <asm/traps.h>
 #include <asm/kdebug.h>
 
@@ -78,6 +79,29 @@ bool ex_handler_refcount(const struct exception_table_entry *fixup,
 }
 EXPORT_SYMBOL_GPL(ex_handler_refcount);
 
+/*
+ * Handler for when we fail to restore a task's FPU state.  We should never get
+ * here because the FPU state of a task using the FPU (task->thread.fpu.state)
+ * should always be valid.  However, past bugs have allowed userspace to set
+ * reserved bits in the XSAVE area using PTRACE_SETREGSET or sys_rt_sigreturn().
+ * These caused XRSTOR to fail when switching to the task, leaking the FPU
+ * registers of the task previously executing on the CPU.  Mitigate this class
+ * of vulnerability by restoring from the initial state (essentially, zeroing
+ * out all the FPU registers) if we can't restore from the task's FPU state.
+ */
+bool ex_handler_fprestore(const struct exception_table_entry *fixup,
+                         struct pt_regs *regs, int trapnr)
+{
+       regs->ip = ex_fixup_addr(fixup);
+
+       WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.",
+                 (void *)instruction_pointer(regs));
+
+       __copy_kernel_to_fpregs(&init_fpstate, -1);
+       return true;
+}
+EXPORT_SYMBOL_GPL(ex_handler_fprestore);
+
 bool ex_handler_ext(const struct exception_table_entry *fixup,
                   struct pt_regs *regs, int trapnr)
 {
index 39567b5c33da3b09a8ab6a50509e7de92625d31b..e2baeaa053a5b9feb76a1587fb4756786ffd76db 100644 (file)
@@ -192,8 +192,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
  * 6. T1   : reaches here, sees vma_pkey(vma)=5, when we really
  *          faulted on a pte with its pkey=4.
  */
-static void fill_sig_info_pkey(int si_code, siginfo_t *info,
-               struct vm_area_struct *vma)
+static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
 {
        /* This is effectively an #ifdef */
        if (!boot_cpu_has(X86_FEATURE_OSPKE))
@@ -209,7 +208,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info,
         * valid VMA, so we should never reach this without a
         * valid VMA.
         */
-       if (!vma) {
+       if (!pkey) {
                WARN_ONCE(1, "PKU fault with no VMA passed in");
                info->si_pkey = 0;
                return;
@@ -219,13 +218,12 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info,
         * absolutely guranteed to be 100% accurate because of
         * the race explained above.
         */
-       info->si_pkey = vma_pkey(vma);
+       info->si_pkey = *pkey;
 }
 
 static void
 force_sig_info_fault(int si_signo, int si_code, unsigned long address,
-                    struct task_struct *tsk, struct vm_area_struct *vma,
-                    int fault)
+                    struct task_struct *tsk, u32 *pkey, int fault)
 {
        unsigned lsb = 0;
        siginfo_t info;
@@ -240,7 +238,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
                lsb = PAGE_SHIFT;
        info.si_addr_lsb = lsb;
 
-       fill_sig_info_pkey(si_code, &info, vma);
+       fill_sig_info_pkey(si_code, &info, pkey);
 
        force_sig_info(si_signo, &info, tsk);
 }
@@ -762,8 +760,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
        struct task_struct *tsk = current;
        unsigned long flags;
        int sig;
-       /* No context means no VMA to pass down */
-       struct vm_area_struct *vma = NULL;
 
        /* Are we prepared to handle this kernel fault? */
        if (fixup_exception(regs, X86_TRAP_PF)) {
@@ -788,7 +784,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 
                        /* XXX: hwpoison faults will set the wrong code. */
                        force_sig_info_fault(signal, si_code, address,
-                                            tsk, vma, 0);
+                                            tsk, NULL, 0);
                }
 
                /*
@@ -896,8 +892,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
 
 static void
 __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
-                      unsigned long address, struct vm_area_struct *vma,
-                      int si_code)
+                      unsigned long address, u32 *pkey, int si_code)
 {
        struct task_struct *tsk = current;
 
@@ -945,7 +940,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
                tsk->thread.error_code  = error_code;
                tsk->thread.trap_nr     = X86_TRAP_PF;
 
-               force_sig_info_fault(SIGSEGV, si_code, address, tsk, vma, 0);
+               force_sig_info_fault(SIGSEGV, si_code, address, tsk, pkey, 0);
 
                return;
        }
@@ -958,9 +953,9 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
 
 static noinline void
 bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
-                    unsigned long address, struct vm_area_struct *vma)
+                    unsigned long address, u32 *pkey)
 {
-       __bad_area_nosemaphore(regs, error_code, address, vma, SEGV_MAPERR);
+       __bad_area_nosemaphore(regs, error_code, address, pkey, SEGV_MAPERR);
 }
 
 static void
@@ -968,6 +963,10 @@ __bad_area(struct pt_regs *regs, unsigned long error_code,
           unsigned long address,  struct vm_area_struct *vma, int si_code)
 {
        struct mm_struct *mm = current->mm;
+       u32 pkey;
+
+       if (vma)
+               pkey = vma_pkey(vma);
 
        /*
         * Something tried to access memory that isn't in our memory map..
@@ -975,7 +974,8 @@ __bad_area(struct pt_regs *regs, unsigned long error_code,
         */
        up_read(&mm->mmap_sem);
 
-       __bad_area_nosemaphore(regs, error_code, address, vma, si_code);
+       __bad_area_nosemaphore(regs, error_code, address,
+                              (vma) ? &pkey : NULL, si_code);
 }
 
 static noinline void
@@ -1018,7 +1018,7 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
 
 static void
 do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
-         struct vm_area_struct *vma, unsigned int fault)
+         u32 *pkey, unsigned int fault)
 {
        struct task_struct *tsk = current;
        int code = BUS_ADRERR;
@@ -1045,13 +1045,12 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
                code = BUS_MCEERR_AR;
        }
 #endif
-       force_sig_info_fault(SIGBUS, code, address, tsk, vma, fault);
+       force_sig_info_fault(SIGBUS, code, address, tsk, pkey, fault);
 }
 
 static noinline void
 mm_fault_error(struct pt_regs *regs, unsigned long error_code,
-              unsigned long address, struct vm_area_struct *vma,
-              unsigned int fault)
+              unsigned long address, u32 *pkey, unsigned int fault)
 {
        if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
                no_context(regs, error_code, address, 0, 0);
@@ -1075,9 +1074,9 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
        } else {
                if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
                             VM_FAULT_HWPOISON_LARGE))
-                       do_sigbus(regs, error_code, address, vma, fault);
+                       do_sigbus(regs, error_code, address, pkey, fault);
                else if (fault & VM_FAULT_SIGSEGV)
-                       bad_area_nosemaphore(regs, error_code, address, vma);
+                       bad_area_nosemaphore(regs, error_code, address, pkey);
                else
                        BUG();
        }
@@ -1267,6 +1266,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
        struct mm_struct *mm;
        int fault, major = 0;
        unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+       u32 pkey;
 
        tsk = current;
        mm = tsk->mm;
@@ -1467,9 +1467,10 @@ good_area:
                return;
        }
 
+       pkey = vma_pkey(vma);
        up_read(&mm->mmap_sem);
        if (unlikely(fault & VM_FAULT_ERROR)) {
-               mm_fault_error(regs, error_code, address, vma, fault);
+               mm_fault_error(regs, error_code, address, &pkey, fault);
                return;
        }
 
index 3fcc8e01683bef96b219d65dbdd0315db1f60605..16c5f37933a2ae2d120402f1871af93872aebb07 100644 (file)
@@ -10,6 +10,8 @@
  * published by the Free Software Foundation.
  */
 
+#define DISABLE_BRANCH_PROFILING
+
 #include <linux/linkage.h>
 #include <linux/init.h>
 #include <linux/mm.h>
index 2dab69a706ec062affe14eb7d9f10fd8538a9018..d7bc0eea20a5ed2fc8ec43ebc06429517cbb362b 100644 (file)
@@ -18,7 +18,6 @@
 
 #include <asm/cpufeature.h>             /* boot_cpu_has, ...            */
 #include <asm/mmu_context.h>            /* vma_pkey()                   */
-#include <asm/fpu/internal.h>           /* fpregs_active()              */
 
 int __execute_only_pkey(struct mm_struct *mm)
 {
@@ -45,7 +44,7 @@ int __execute_only_pkey(struct mm_struct *mm)
         */
        preempt_disable();
        if (!need_to_set_mm_pkey &&
-           fpregs_active() &&
+           current->thread.fpu.initialized &&
            !__pkru_allows_read(read_pkru(), execute_only_pkey)) {
                preempt_enable();
                return execute_only_pkey;
index 93fe97cce5819fd230da1d357cbec8f694a172ad..49d9778376d774c3d6f17eb47118176efb99b718 100644 (file)
@@ -191,7 +191,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
                         * mapped in the new pgd, we'll double-fault.  Forcibly
                         * map it.
                         */
-                       unsigned int index = pgd_index(current_stack_pointer());
+                       unsigned int index = pgd_index(current_stack_pointer);
                        pgd_t *pgd = next->pgd + index;
 
                        if (unlikely(pgd_none(*pgd)))
index 7330cb3b22833596cb46b17d43e6f6f22d543487..71495f1a86d72f7df1464ce3ffe805d880948bbd 100644 (file)
@@ -1238,21 +1238,16 @@ static void __init xen_pagetable_cleanhighmap(void)
         * from _brk_limit way up to the max_pfn_mapped (which is the end of
         * the ramdisk). We continue on, erasing PMD entries that point to page
         * tables - do note that they are accessible at this stage via __va.
-        * For good measure we also round up to the PMD - which means that if
+        * As Xen is aligning the memory end to a 4MB boundary, for good
+        * measure we also round up to PMD_SIZE * 2 - which means that if
         * anybody is using __ka address to the initial boot-stack - and try
         * to use it - they are going to crash. The xen_start_info has been
         * taken care of already in xen_setup_kernel_pagetable. */
        addr = xen_start_info->pt_base;
-       size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE);
+       size = xen_start_info->nr_pt_frames * PAGE_SIZE;
 
-       xen_cleanhighmap(addr, addr + size);
+       xen_cleanhighmap(addr, roundup(addr + size, PMD_SIZE * 2));
        xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base));
-#ifdef DEBUG
-       /* This is superfluous and is not necessary, but you know what
-        * lets do it. The MODULES_VADDR -> MODULES_END should be clear of
-        * anything at this stage. */
-       xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1);
-#endif
 }
 #endif
 
index 077f9bad6f44a57bd01e930f9dd740001eadcd76..3c3a37b8503bd43db4200230309ddeff96eb4e00 100644 (file)
@@ -743,17 +743,19 @@ static int ghes_proc(struct ghes *ghes)
        }
        ghes_do_proc(ghes, ghes->estatus);
 
+out:
+       ghes_clear_estatus(ghes);
+
+       if (rc == -ENOENT)
+               return rc;
+
        /*
         * GHESv2 type HEST entries introduce support for error acknowledgment,
         * so only acknowledge the error if this support is present.
         */
-       if (is_hest_type_generic_v2(ghes)) {
-               rc = ghes_ack_error(ghes->generic_v2);
-               if (rc)
-                       return rc;
-       }
-out:
-       ghes_clear_estatus(ghes);
+       if (is_hest_type_generic_v2(ghes))
+               return ghes_ack_error(ghes->generic_v2);
+
        return rc;
 }
 
index a8cc14fd8ae49ff92cb2fc3dd593273aefd6e10d..a6de325306933b57928b8d16d741247e93274102 100644 (file)
@@ -1581,6 +1581,9 @@ static int _opp_set_availability(struct device *dev, unsigned long freq,
 
        opp->available = availability_req;
 
+       dev_pm_opp_get(opp);
+       mutex_unlock(&opp_table->lock);
+
        /* Notify the change of the OPP availability */
        if (availability_req)
                blocking_notifier_call_chain(&opp_table->head, OPP_EVENT_ENABLE,
@@ -1589,8 +1592,12 @@ static int _opp_set_availability(struct device *dev, unsigned long freq,
                blocking_notifier_call_chain(&opp_table->head,
                                             OPP_EVENT_DISABLE, opp);
 
+       dev_pm_opp_put(opp);
+       goto put_table;
+
 unlock:
        mutex_unlock(&opp_table->lock);
+put_table:
        dev_pm_opp_put_opp_table(opp_table);
        return r;
 }
index 6a20dc8b253f4c66981e39bec82f1e2634c50f41..9a7d7f0f23feae971f1f8d959b66578e9c59833e 100644 (file)
@@ -43,7 +43,7 @@ static int numachip2_set_next_event(unsigned long delta, struct clock_event_devi
        return 0;
 }
 
-static struct clock_event_device numachip2_clockevent = {
+static const struct clock_event_device numachip2_clockevent __initconst = {
        .name            = "numachip2",
        .rating          = 400,
        .set_next_event  = numachip2_set_next_event,
index 430edadca527ac9763a09be6749696113ca9343c..a753c50e9e412ea96cb69fa454e81e084a3016be 100644 (file)
@@ -118,6 +118,10 @@ static const struct of_device_id blacklist[] __initconst = {
 
        { .compatible = "sigma,tango4", },
 
+       { .compatible = "ti,am33xx", },
+       { .compatible = "ti,am43", },
+       { .compatible = "ti,dra7", },
+
        { }
 };
 
index d228f5a990449f0b2c28314c972db2664c6bc1cd..dbbe986f90f29ffb6c77662866e336d8130d1af2 100644 (file)
@@ -636,7 +636,194 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev)
                                NUM_BANKS(ADDR_SURF_2_BANK);
                for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
                        WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]);
-       } else if (adev->asic_type == CHIP_OLAND || adev->asic_type == CHIP_HAINAN) {
+       } else if (adev->asic_type == CHIP_OLAND) {
+               tilemode[0] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+                               ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+                               PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+                               TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+                               NUM_BANKS(ADDR_SURF_16_BANK) |
+                               BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+                               BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+                               MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
+               tilemode[1] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+                               ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+                               PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+                               TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+                               NUM_BANKS(ADDR_SURF_16_BANK) |
+                               BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+                               BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+                               MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
+               tilemode[2] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+                               ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+                               PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+                               TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+                               NUM_BANKS(ADDR_SURF_16_BANK) |
+                               BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+                               BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+                               MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
+               tilemode[3] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+                               ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+                               PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+                               TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+                               NUM_BANKS(ADDR_SURF_16_BANK) |
+                               BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+                               BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+                               MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
+               tilemode[4] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+                               ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+                               PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+                               TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+                               NUM_BANKS(ADDR_SURF_16_BANK) |
+                               BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+                               BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+                               MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+               tilemode[5] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+                               ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+                               PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+                               TILE_SPLIT(split_equal_to_row_size) |
+                               NUM_BANKS(ADDR_SURF_16_BANK) |
+                               BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+                               BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+                               MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+               tilemode[6] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+                               ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+                               PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+                               TILE_SPLIT(split_equal_to_row_size) |
+                               NUM_BANKS(ADDR_SURF_16_BANK) |
+                               BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+                               BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+                               MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+               tilemode[7] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
+                               ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+                               PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+                               TILE_SPLIT(split_equal_to_row_size) |
+                               NUM_BANKS(ADDR_SURF_16_BANK) |
+                               BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+                               BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+                               MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
+               tilemode[8] =   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+                               ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+                               PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+                               TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+                               NUM_BANKS(ADDR_SURF_16_BANK) |
+                               BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+                               BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+                               MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+               tilemode[9] =   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+                               ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+                               PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+                               TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+                               NUM_BANKS(ADDR_SURF_16_BANK) |
+                               BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+                               BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+                               MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+               tilemode[10] =  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+                               ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+                               PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+                               TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+                               NUM_BANKS(ADDR_SURF_16_BANK) |
+                               BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+                               BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+                               MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
+               tilemode[11] =  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+                               ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+                               PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+                               TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+                               NUM_BANKS(ADDR_SURF_16_BANK) |
+                               BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+                               BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+                               MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+               tilemode[12] =  MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
+                               ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+                               PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+                               TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+                               NUM_BANKS(ADDR_SURF_16_BANK) |
+                               BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+                               BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+                               MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+               tilemode[13] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+                               ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+                               PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+                               TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+                               NUM_BANKS(ADDR_SURF_16_BANK) |
+                               BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+                               BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+                               MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+               tilemode[14] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+                               ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+                               PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+                               TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+                               NUM_BANKS(ADDR_SURF_16_BANK) |
+                               BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+                               BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+                               MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+               tilemode[15] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+                               ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+                               PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+                               TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+                               NUM_BANKS(ADDR_SURF_16_BANK) |
+                               BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+                               BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+                               MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+               tilemode[16] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+                               ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+                               PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+                               TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+                               NUM_BANKS(ADDR_SURF_16_BANK) |
+                               BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+                               BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+                               MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+               tilemode[17] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+                               ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+                               PIPE_CONFIG(ADDR_SURF_P4_8x16) |
+                               TILE_SPLIT(split_equal_to_row_size) |
+                               NUM_BANKS(ADDR_SURF_16_BANK) |
+                               BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+                               BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+                               MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+               tilemode[21] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+                               ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+                               PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+                               TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+                               NUM_BANKS(ADDR_SURF_16_BANK) |
+                               BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+                               BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+                               MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+               tilemode[22] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+                               ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+                               PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+                               TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+                               NUM_BANKS(ADDR_SURF_16_BANK) |
+                               BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+                               BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+                               MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4);
+               tilemode[23] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+                               ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+                               PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+                               TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+                               NUM_BANKS(ADDR_SURF_16_BANK) |
+                               BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+                               BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+                               MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+               tilemode[24] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+                               ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+                               PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+                               TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+                               NUM_BANKS(ADDR_SURF_16_BANK) |
+                               BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+                               BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+                               MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2);
+               tilemode[25] =  MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
+                               ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+                               PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
+                               TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
+                               NUM_BANKS(ADDR_SURF_8_BANK) |
+                               BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+                               BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+                               MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1);
+               for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+                       WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]);
+       } else if (adev->asic_type == CHIP_HAINAN) {
                tilemode[0] =   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
                                ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
                                PIPE_CONFIG(ADDR_SURF_P2) |
index e4a8c2e52cb2c14ab566d015b178a2e6bd243cac..660b3fbade4194f796ebe0be8e4fc7f7e9c46109 100644 (file)
@@ -892,6 +892,8 @@ static int kfd_ioctl_get_tile_config(struct file *filep,
        int err = 0;
 
        dev = kfd_device_by_id(args->gpu_id);
+       if (!dev)
+               return -EINVAL;
 
        dev->kfd2kgd->get_tile_config(dev->kgd, &config);
 
index 5979158c3f7b93e96627af2c03cd814228b0fe01..944abfad39c1f67447ca720d5e47c4b086336a82 100644 (file)
@@ -292,7 +292,10 @@ static int create_signal_event(struct file *devkfd,
                                struct kfd_event *ev)
 {
        if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) {
-               pr_warn("Signal event wasn't created because limit was reached\n");
+               if (!p->signal_event_limit_reached) {
+                       pr_warn("Signal event wasn't created because limit was reached\n");
+                       p->signal_event_limit_reached = true;
+               }
                return -ENOMEM;
        }
 
index 0649dd43e7806e28bbd84d35ea56c0e70506e292..ed71ad40e8f797ca3c7b7d4f129f5e9fda382d27 100644 (file)
@@ -184,7 +184,7 @@ static void uninitialize(struct kernel_queue *kq)
        if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ)
                kq->mqd->destroy_mqd(kq->mqd,
                                        kq->queue->mqd,
-                                       false,
+                                       KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
                                        QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
                                        kq->queue->pipe,
                                        kq->queue->queue);
@@ -210,6 +210,11 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
        uint32_t wptr, rptr;
        unsigned int *queue_address;
 
+       /* When rptr == wptr, the buffer is empty.
+        * When rptr == wptr + 1, the buffer is full.
+        * It is always rptr that advances to the position of wptr, rather than
+        * the opposite. So we can only use up to queue_size_dwords - 1 dwords.
+        */
        rptr = *kq->rptr_kernel;
        wptr = *kq->wptr_kernel;
        queue_address = (unsigned int *)kq->pq_kernel_addr;
@@ -219,11 +224,10 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
        pr_debug("wptr: %d\n", wptr);
        pr_debug("queue_address 0x%p\n", queue_address);
 
-       available_size = (rptr - 1 - wptr + queue_size_dwords) %
+       available_size = (rptr + queue_size_dwords - 1 - wptr) %
                                                        queue_size_dwords;
 
-       if (packet_size_in_dwords >= queue_size_dwords ||
-                       packet_size_in_dwords >= available_size) {
+       if (packet_size_in_dwords > available_size) {
                /*
                 * make sure calling functions know
                 * acquire_packet_buffer() failed
@@ -233,6 +237,14 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
        }
 
        if (wptr + packet_size_in_dwords >= queue_size_dwords) {
+               /* make sure after rolling back to position 0, there is
+                * still enough space.
+                */
+               if (packet_size_in_dwords >= rptr) {
+                       *buffer_ptr = NULL;
+                       return -ENOMEM;
+               }
+               /* fill nops, roll back and start at position 0 */
                while (wptr > 0) {
                        queue_address[wptr] = kq->nop_packet;
                        wptr = (wptr + 1) % queue_size_dwords;
index b397ec726400c2a52533356363b36bfd275dfb39..b87e96cee5facfea112a874f4e69bfad52f3b70b 100644 (file)
@@ -521,6 +521,7 @@ struct kfd_process {
        struct list_head signal_event_pages;
        u32 next_nonsignal_event_id;
        size_t signal_event_count;
+       bool signal_event_limit_reached;
 };
 
 /**
index 5a634594a6cea2d2be20e19a0e359229a860775c..57881167ccd22c9c16df18e1d93401edef2e1f82 100644 (file)
@@ -551,12 +551,15 @@ static const struct etnaviv_gem_ops etnaviv_gem_shmem_ops = {
 void etnaviv_gem_free_object(struct drm_gem_object *obj)
 {
        struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
+       struct etnaviv_drm_private *priv = obj->dev->dev_private;
        struct etnaviv_vram_mapping *mapping, *tmp;
 
        /* object should not be active */
        WARN_ON(is_active(etnaviv_obj));
 
+       mutex_lock(&priv->gem_lock);
        list_del(&etnaviv_obj->gem_node);
+       mutex_unlock(&priv->gem_lock);
 
        list_for_each_entry_safe(mapping, tmp, &etnaviv_obj->vram_list,
                                 obj_node) {
index 026ef4e02f85cab130586c5c0b8c9b97bd0101bb..46dfe0737f438d37e4e65dec320e9a7e773e3856 100644 (file)
@@ -445,8 +445,10 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
        cmdbuf->user_size = ALIGN(args->stream_size, 8);
 
        ret = etnaviv_gpu_submit(gpu, submit, cmdbuf);
-       if (ret == 0)
-               cmdbuf = NULL;
+       if (ret)
+               goto out;
+
+       cmdbuf = NULL;
 
        if (args->flags & ETNA_SUBMIT_FENCE_FD_OUT) {
                /*
index 14c5613b4388a839461915fc448d4772422861a3..afbf50d0c08fa1c89c49120717b7a51cb05d5b98 100644 (file)
@@ -509,23 +509,25 @@ static void qxl_primary_atomic_update(struct drm_plane *plane,
            .y2 = qfb->base.height
        };
 
-       if (!old_state->fb) {
-               qxl_io_log(qdev,
-                          "create primary fb: %dx%d,%d,%d\n",
-                          bo->surf.width, bo->surf.height,
-                          bo->surf.stride, bo->surf.format);
+       if (old_state->fb) {
+               qfb_old = to_qxl_framebuffer(old_state->fb);
+               bo_old = gem_to_qxl_bo(qfb_old->obj);
+       } else {
+               bo_old = NULL;
+       }
 
-               qxl_io_create_primary(qdev, 0, bo);
-               bo->is_primary = true;
+       if (bo == bo_old)
                return;
 
-       } else {
-               qfb_old = to_qxl_framebuffer(old_state->fb);
-               bo_old = gem_to_qxl_bo(qfb_old->obj);
+       if (bo_old && bo_old->is_primary) {
+               qxl_io_destroy_primary(qdev);
                bo_old->is_primary = false;
        }
 
-       bo->is_primary = true;
+       if (!bo->is_primary) {
+               qxl_io_create_primary(qdev, 0, bo);
+               bo->is_primary = true;
+       }
        qxl_draw_dirty_fb(qdev, qfb, bo, 0, 0, &norect, 1, 1);
 }
 
@@ -534,13 +536,15 @@ static void qxl_primary_atomic_disable(struct drm_plane *plane,
 {
        struct qxl_device *qdev = plane->dev->dev_private;
 
-       if (old_state->fb)
-       {       struct qxl_framebuffer *qfb =
+       if (old_state->fb) {
+               struct qxl_framebuffer *qfb =
                        to_qxl_framebuffer(old_state->fb);
                struct qxl_bo *bo = gem_to_qxl_bo(qfb->obj);
 
-               qxl_io_destroy_primary(qdev);
-               bo->is_primary = false;
+               if (bo->is_primary) {
+                       qxl_io_destroy_primary(qdev);
+                       bo->is_primary = false;
+               }
        }
 }
 
@@ -698,14 +702,15 @@ static void qxl_plane_cleanup_fb(struct drm_plane *plane,
        struct drm_gem_object *obj;
        struct qxl_bo *user_bo;
 
-       if (!plane->state->fb) {
-               /* we never executed prepare_fb, so there's nothing to
+       if (!old_state->fb) {
+               /*
+                * we never executed prepare_fb, so there's nothing to
                 * unpin.
                 */
                return;
        }
 
-       obj = to_qxl_framebuffer(plane->state->fb)->obj;
+       obj = to_qxl_framebuffer(old_state->fb)->obj;
        user_bo = gem_to_qxl_bo(obj);
        qxl_bo_unpin(user_bo);
 }
index 997131d58c7f639f8dbb765e51433a35a7da27ba..ffc10cadcf34ccb79a7d09ae253b4a2dffda7cfe 100644 (file)
@@ -1663,7 +1663,7 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend,
        radeon_agp_suspend(rdev);
 
        pci_save_state(dev->pdev);
-       if (freeze && rdev->family >= CHIP_CEDAR) {
+       if (freeze && rdev->family >= CHIP_CEDAR && !(rdev->flags & RADEON_IS_IGP)) {
                rdev->asic->asic_reset(rdev, true);
                pci_restore_state(dev->pdev);
        } else if (suspend) {
index 06f05302ee75e33b2a05f9c468b1a3bbd60619be..882d85db90539ae1b00e194a0cae43bf8fb4918d 100644 (file)
@@ -26,7 +26,7 @@ config DRM_SUN4I_HDMI_CEC
        bool "Allwinner A10 HDMI CEC Support"
        depends on DRM_SUN4I_HDMI
        select CEC_CORE
-       depends on CEC_PIN
+       select CEC_PIN
        help
          Choose this option if you have an Allwinner SoC with an HDMI
          controller and want to use CEC.
index 1457750988da4930792eceb3e543f211d66f84f7..a1f8cba251a245af7227d853da784518d19d405d 100644 (file)
@@ -15,7 +15,7 @@
 #include <drm/drm_connector.h>
 #include <drm/drm_encoder.h>
 
-#include <media/cec.h>
+#include <media/cec-pin.h>
 
 #define SUN4I_HDMI_CTRL_REG            0x004
 #define SUN4I_HDMI_CTRL_ENABLE                 BIT(31)
index e9b7cdad5c4c1beac73bca274321cc57280006bd..5a1ab4046e926fefe4cb6d73d2c3ebe031eb45bb 100644 (file)
@@ -63,6 +63,6 @@ DEFINE_EVENT(register_access, sor_readl,
 
 /* This part must be outside protection */
 #undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/tegra
 #define TRACE_INCLUDE_FILE trace
 #include <trace/define_trace.h>
index 70ad19c4c73e77da961ffccd9958887dcead8027..88bdafb297f5fe9f722c7a8edee27554ea3b6862 100644 (file)
@@ -432,8 +432,10 @@ int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev)
        atomic_set(&qp->qp_sec->error_list_count, 0);
        init_completion(&qp->qp_sec->error_complete);
        ret = security_ib_alloc_security(&qp->qp_sec->security);
-       if (ret)
+       if (ret) {
                kfree(qp->qp_sec);
+               qp->qp_sec = NULL;
+       }
 
        return ret;
 }
index 4ab30d832ac5b8a5ac4994b9c4f0b3f3802e019c..52a2cf2d83aaf483944ad9720e55121f2adb6484 100644 (file)
@@ -3869,15 +3869,15 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
        resp.raw_packet_caps = attr.raw_packet_caps;
        resp.response_length += sizeof(resp.raw_packet_caps);
 
-       if (ucore->outlen < resp.response_length + sizeof(resp.xrq_caps))
+       if (ucore->outlen < resp.response_length + sizeof(resp.tm_caps))
                goto end;
 
-       resp.xrq_caps.max_rndv_hdr_size = attr.xrq_caps.max_rndv_hdr_size;
-       resp.xrq_caps.max_num_tags      = attr.xrq_caps.max_num_tags;
-       resp.xrq_caps.max_ops           = attr.xrq_caps.max_ops;
-       resp.xrq_caps.max_sge           = attr.xrq_caps.max_sge;
-       resp.xrq_caps.flags             = attr.xrq_caps.flags;
-       resp.response_length += sizeof(resp.xrq_caps);
+       resp.tm_caps.max_rndv_hdr_size  = attr.tm_caps.max_rndv_hdr_size;
+       resp.tm_caps.max_num_tags       = attr.tm_caps.max_num_tags;
+       resp.tm_caps.max_ops            = attr.tm_caps.max_ops;
+       resp.tm_caps.max_sge            = attr.tm_caps.max_sge;
+       resp.tm_caps.flags              = attr.tm_caps.flags;
+       resp.response_length += sizeof(resp.tm_caps);
 end:
        err = ib_copy_to_udata(ucore, &resp, resp.response_length);
        return err;
index b2ed4b9cda6eef62f81b5ac22b71b4cc3a5a2d10..0be42787759fa78c73d0e0d9776209c2362a3ddc 100644 (file)
@@ -1066,6 +1066,8 @@ static int read_idle_sma(struct hfi1_devdata *dd, u64 *data);
 static int thermal_init(struct hfi1_devdata *dd);
 
 static void update_statusp(struct hfi1_pportdata *ppd, u32 state);
+static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd,
+                                           int msecs);
 static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
                                  int msecs);
 static void log_state_transition(struct hfi1_pportdata *ppd, u32 state);
@@ -8238,6 +8240,7 @@ static irqreturn_t general_interrupt(int irq, void *data)
        u64 regs[CCE_NUM_INT_CSRS];
        u32 bit;
        int i;
+       irqreturn_t handled = IRQ_NONE;
 
        this_cpu_inc(*dd->int_counter);
 
@@ -8258,9 +8261,10 @@ static irqreturn_t general_interrupt(int irq, void *data)
        for_each_set_bit(bit, (unsigned long *)&regs[0],
                         CCE_NUM_INT_CSRS * 64) {
                is_interrupt(dd, bit);
+               handled = IRQ_HANDLED;
        }
 
-       return IRQ_HANDLED;
+       return handled;
 }
 
 static irqreturn_t sdma_interrupt(int irq, void *data)
@@ -9413,7 +9417,7 @@ static void set_qsfp_int_n(struct hfi1_pportdata *ppd, u8 enable)
        write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK, mask);
 }
 
-void reset_qsfp(struct hfi1_pportdata *ppd)
+int reset_qsfp(struct hfi1_pportdata *ppd)
 {
        struct hfi1_devdata *dd = ppd->dd;
        u64 mask, qsfp_mask;
@@ -9443,6 +9447,13 @@ void reset_qsfp(struct hfi1_pportdata *ppd)
         * for alarms and warnings
         */
        set_qsfp_int_n(ppd, 1);
+
+       /*
+        * After the reset, AOC transmitters are enabled by default. They need
+        * to be turned off to complete the QSFP setup before they can be
+        * enabled again.
+        */
+       return set_qsfp_tx(ppd, 0);
 }
 
 static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
@@ -10305,6 +10316,7 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
 {
        struct hfi1_devdata *dd = ppd->dd;
        u32 previous_state;
+       int offline_state_ret;
        int ret;
 
        update_lcb_cache(dd);
@@ -10326,28 +10338,11 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
                ppd->offline_disabled_reason =
                HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT);
 
-       /*
-        * Wait for offline transition. It can take a while for
-        * the link to go down.
-        */
-       ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 10000);
-       if (ret < 0)
-               return ret;
-
-       /*
-        * Now in charge of LCB - must be after the physical state is
-        * offline.quiet and before host_link_state is changed.
-        */
-       set_host_lcb_access(dd);
-       write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
-
-       /* make sure the logical state is also down */
-       ret = wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
-       if (ret)
-               force_logical_link_state_down(ppd);
-
-       ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
+       offline_state_ret = wait_phys_link_offline_substates(ppd, 10000);
+       if (offline_state_ret < 0)
+               return offline_state_ret;
 
+       /* Disabling AOC transmitters */
        if (ppd->port_type == PORT_TYPE_QSFP &&
            ppd->qsfp_info.limiting_active &&
            qsfp_mod_present(ppd)) {
@@ -10364,6 +10359,30 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
                }
        }
 
+       /*
+        * Wait for the offline.Quiet transition if it hasn't happened yet. It
+        * can take a while for the link to go down.
+        */
+       if (offline_state_ret != PLS_OFFLINE_QUIET) {
+               ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 30000);
+               if (ret < 0)
+                       return ret;
+       }
+
+       /*
+        * Now in charge of LCB - must be after the physical state is
+        * offline.quiet and before host_link_state is changed.
+        */
+       set_host_lcb_access(dd);
+       write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
+
+       /* make sure the logical state is also down */
+       ret = wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
+       if (ret)
+               force_logical_link_state_down(ppd);
+
+       ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
+
        /*
         * The LNI has a mandatory wait time after the physical state
         * moves to Offline.Quiet.  The wait time may be different
@@ -10396,6 +10415,9 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
                        & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
                /* went down while attempting link up */
                check_lni_states(ppd);
+
+               /* The QSFP doesn't need to be reset on LNI failure */
+               ppd->qsfp_info.reset_needed = 0;
        }
 
        /* the active link width (downgrade) is 0 on link down */
@@ -12804,6 +12826,39 @@ static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state,
        return 0;
 }
 
+/*
+ * wait_phys_link_offline_quiet_substates - wait for any offline substate
+ * @ppd: port device
+ * @msecs: the number of milliseconds to wait
+ *
+ * Wait up to msecs milliseconds for any offline physical link
+ * state change to occur.
+ * Returns 0 if at least one state is reached, otherwise -ETIMEDOUT.
+ */
+static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd,
+                                           int msecs)
+{
+       u32 read_state;
+       unsigned long timeout;
+
+       timeout = jiffies + msecs_to_jiffies(msecs);
+       while (1) {
+               read_state = read_physical_state(ppd->dd);
+               if ((read_state & 0xF0) == PLS_OFFLINE)
+                       break;
+               if (time_after(jiffies, timeout)) {
+                       dd_dev_err(ppd->dd,
+                                  "timeout waiting for phy link offline.quiet substates. Read state 0x%x, %dms\n",
+                                  read_state, msecs);
+                       return -ETIMEDOUT;
+               }
+               usleep_range(1950, 2050); /* sleep 2ms-ish */
+       }
+
+       log_state_transition(ppd, read_state);
+       return read_state;
+}
+
 #define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
 (r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
 
index b8345a60a0fbc452215dba3c679daa6785fb4e4a..50b8645d0b876dbf6d58f56d677e3cfcc7984e55 100644 (file)
 #define PLS_OFFLINE_READY_TO_QUIET_LT     0x92
 #define PLS_OFFLINE_REPORT_FAILURE                0x93
 #define PLS_OFFLINE_READY_TO_QUIET_BCC    0x94
+#define PLS_OFFLINE_QUIET_DURATION        0x95
 #define PLS_POLLING                               0x20
 #define PLS_POLLING_QUIET                         0x20
 #define PLS_POLLING_ACTIVE                        0x21
@@ -722,7 +723,7 @@ void handle_link_downgrade(struct work_struct *work);
 void handle_link_bounce(struct work_struct *work);
 void handle_start_link(struct work_struct *work);
 void handle_sma_message(struct work_struct *work);
-void reset_qsfp(struct hfi1_pportdata *ppd);
+int reset_qsfp(struct hfi1_pportdata *ppd);
 void qsfp_event(struct work_struct *work);
 void start_freeze_handling(struct hfi1_pportdata *ppd, int flags);
 int send_idle_sma(struct hfi1_devdata *dd, u64 message);
index d46b171079010d2cf7e42dd7557f0abc2bef1348..1613af1c58d9ddaafca831b1282a6462fdc10b66 100644 (file)
@@ -204,7 +204,10 @@ done_asic:
        return ret;
 }
 
-/* magic character sequence that trails an image */
+/* magic character sequence that begins an image */
+#define IMAGE_START_MAGIC "APO="
+
+/* magic character sequence that might trail an image */
 #define IMAGE_TRAIL_MAGIC "egamiAPO"
 
 /* EPROM file types */
@@ -250,6 +253,7 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
 {
        void *buffer;
        void *p;
+       u32 length;
        int ret;
 
        buffer = kmalloc(P1_SIZE, GFP_KERNEL);
@@ -262,15 +266,21 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
                return ret;
        }
 
-       /* scan for image magic that may trail the actual data */
-       p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE);
-       if (!p) {
+       /* config partition is valid only if it starts with IMAGE_START_MAGIC */
+       if (memcmp(buffer, IMAGE_START_MAGIC, strlen(IMAGE_START_MAGIC))) {
                kfree(buffer);
                return -ENOENT;
        }
 
+       /* scan for image magic that may trail the actual data */
+       p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE);
+       if (p)
+               length = p - buffer;
+       else
+               length = P1_SIZE;
+
        *data = buffer;
-       *size = p - buffer;
+       *size = length;
        return 0;
 }
 
index 2bc89260235a1db1c744931cde7ec07fd3202495..d9a1e989313641b06f32ffdd4677ce8aa7e32802 100644 (file)
@@ -930,15 +930,8 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo)
        switch (ret) {
        case 0:
                ret = setup_base_ctxt(fd, uctxt);
-               if (uctxt->subctxt_cnt) {
-                       /*
-                        * Base context is done (successfully or not), notify
-                        * anybody using a sub-context that is waiting for
-                        * this completion.
-                        */
-                       clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags);
-                       wake_up(&uctxt->wait);
-               }
+               if (ret)
+                       deallocate_ctxt(uctxt);
                break;
        case 1:
                ret = complete_subctxt(fd);
@@ -1305,25 +1298,25 @@ static int setup_base_ctxt(struct hfi1_filedata *fd,
        /* Now allocate the RcvHdr queue and eager buffers. */
        ret = hfi1_create_rcvhdrq(dd, uctxt);
        if (ret)
-               return ret;
+               goto done;
 
        ret = hfi1_setup_eagerbufs(uctxt);
        if (ret)
-               goto setup_failed;
+               goto done;
 
        /* If sub-contexts are enabled, do the appropriate setup */
        if (uctxt->subctxt_cnt)
                ret = setup_subctxt(uctxt);
        if (ret)
-               goto setup_failed;
+               goto done;
 
        ret = hfi1_alloc_ctxt_rcv_groups(uctxt);
        if (ret)
-               goto setup_failed;
+               goto done;
 
        ret = init_user_ctxt(fd, uctxt);
        if (ret)
-               goto setup_failed;
+               goto done;
 
        user_init(uctxt);
 
@@ -1331,12 +1324,22 @@ static int setup_base_ctxt(struct hfi1_filedata *fd,
        fd->uctxt = uctxt;
        hfi1_rcd_get(uctxt);
 
-       return 0;
+done:
+       if (uctxt->subctxt_cnt) {
+               /*
+                * On error, set the failed bit so sub-contexts will clean up
+                * correctly.
+                */
+               if (ret)
+                       set_bit(HFI1_CTXT_BASE_FAILED, &uctxt->event_flags);
 
-setup_failed:
-       /* Set the failed bit so sub-context init can do the right thing */
-       set_bit(HFI1_CTXT_BASE_FAILED, &uctxt->event_flags);
-       deallocate_ctxt(uctxt);
+               /*
+                * Base context is done (successfully or not), notify anybody
+                * using a sub-context that is waiting for this completion.
+                */
+               clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags);
+               wake_up(&uctxt->wait);
+       }
 
        return ret;
 }
index 82447b7cdda1e958d7e7c42c3447ec4a72c1fb4a..09e50fd2a08f07bf7b2d42d3d4b4a1a00644b2ce 100644 (file)
@@ -68,7 +68,7 @@
 /*
  * Code to adjust PCIe capabilities.
  */
-static int tune_pcie_caps(struct hfi1_devdata *);
+static void tune_pcie_caps(struct hfi1_devdata *);
 
 /*
  * Do all the common PCIe setup and initialization.
@@ -351,7 +351,7 @@ int pcie_speeds(struct hfi1_devdata *dd)
  */
 int request_msix(struct hfi1_devdata *dd, u32 msireq)
 {
-       int nvec, ret;
+       int nvec;
 
        nvec = pci_alloc_irq_vectors(dd->pcidev, 1, msireq,
                                     PCI_IRQ_MSIX | PCI_IRQ_LEGACY);
@@ -360,12 +360,7 @@ int request_msix(struct hfi1_devdata *dd, u32 msireq)
                return nvec;
        }
 
-       ret = tune_pcie_caps(dd);
-       if (ret) {
-               dd_dev_err(dd, "tune_pcie_caps() failed: %d\n", ret);
-               pci_free_irq_vectors(dd->pcidev);
-               return ret;
-       }
+       tune_pcie_caps(dd);
 
        /* check for legacy IRQ */
        if (nvec == 1 && !dd->pcidev->msix_enabled)
@@ -502,7 +497,7 @@ uint aspm_mode = ASPM_MODE_DISABLED;
 module_param_named(aspm, aspm_mode, uint, S_IRUGO);
 MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic");
 
-static int tune_pcie_caps(struct hfi1_devdata *dd)
+static void tune_pcie_caps(struct hfi1_devdata *dd)
 {
        struct pci_dev *parent;
        u16 rc_mpss, rc_mps, ep_mpss, ep_mps;
@@ -513,22 +508,14 @@ static int tune_pcie_caps(struct hfi1_devdata *dd)
         * Turn on extended tags in DevCtl in case the BIOS has turned it off
         * to improve WFR SDMA bandwidth
         */
-       ret = pcie_capability_read_word(dd->pcidev,
-                                       PCI_EXP_DEVCTL, &ectl);
-       if (ret) {
-               dd_dev_err(dd, "Unable to read from PCI config\n");
-               return ret;
-       }
-
-       if (!(ectl & PCI_EXP_DEVCTL_EXT_TAG)) {
+       ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &ectl);
+       if ((!ret) && !(ectl & PCI_EXP_DEVCTL_EXT_TAG)) {
                dd_dev_info(dd, "Enabling PCIe extended tags\n");
                ectl |= PCI_EXP_DEVCTL_EXT_TAG;
                ret = pcie_capability_write_word(dd->pcidev,
                                                 PCI_EXP_DEVCTL, ectl);
-               if (ret) {
-                       dd_dev_err(dd, "Unable to write to PCI config\n");
-                       return ret;
-               }
+               if (ret)
+                       dd_dev_info(dd, "Unable to write to PCI config\n");
        }
        /* Find out supported and configured values for parent (root) */
        parent = dd->pcidev->bus->self;
@@ -536,15 +523,22 @@ static int tune_pcie_caps(struct hfi1_devdata *dd)
         * The driver cannot perform the tuning if it does not have
         * access to the upstream component.
         */
-       if (!parent)
-               return -EINVAL;
+       if (!parent) {
+               dd_dev_info(dd, "Parent not found\n");
+               return;
+       }
        if (!pci_is_root_bus(parent->bus)) {
                dd_dev_info(dd, "Parent not root\n");
-               return -EINVAL;
+               return;
+       }
+       if (!pci_is_pcie(parent)) {
+               dd_dev_info(dd, "Parent is not PCI Express capable\n");
+               return;
+       }
+       if (!pci_is_pcie(dd->pcidev)) {
+               dd_dev_info(dd, "PCI device is not PCI Express capable\n");
+               return;
        }
-
-       if (!pci_is_pcie(parent) || !pci_is_pcie(dd->pcidev))
-               return -EINVAL;
        rc_mpss = parent->pcie_mpss;
        rc_mps = ffs(pcie_get_mps(parent)) - 8;
        /* Find out supported and configured values for endpoint (us) */
@@ -590,8 +584,6 @@ static int tune_pcie_caps(struct hfi1_devdata *dd)
                ep_mrrs = max_mrrs;
                pcie_set_readrq(dd->pcidev, ep_mrrs);
        }
-
-       return 0;
 }
 
 /* End of PCIe capability tuning */
index a8af96d2b1b0ae0dc5e98ae08e3b5c9b5faa50e9..d486355880cb0da37e23755e8ed49197fed90fc8 100644 (file)
@@ -790,7 +790,9 @@ static int tune_active_qsfp(struct hfi1_pportdata *ppd, u32 *ptr_tx_preset,
         * reuse of stale settings established in our previous pass through.
         */
        if (ppd->qsfp_info.reset_needed) {
-               reset_qsfp(ppd);
+               ret = reset_qsfp(ppd);
+               if (ret)
+                       return ret;
                refresh_qsfp_cache(ppd, &ppd->qsfp_info);
        } else {
                ppd->qsfp_info.reset_needed = 1;
index 05fb4bdff6a0a53f175cbe5599e58c586acf4571..d6fbad8f34aa71848c7291122ad847b6944a27a6 100644 (file)
@@ -778,13 +778,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
        }
 
        if (MLX5_CAP_GEN(mdev, tag_matching)) {
-               props->xrq_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE;
-               props->xrq_caps.max_num_tags =
+               props->tm_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE;
+               props->tm_caps.max_num_tags =
                        (1 << MLX5_CAP_GEN(mdev, log_tag_matching_list_sz)) - 1;
-               props->xrq_caps.flags = IB_TM_CAP_RC;
-               props->xrq_caps.max_ops =
+               props->tm_caps.flags = IB_TM_CAP_RC;
+               props->tm_caps.max_ops =
                        1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
-               props->xrq_caps.max_sge = MLX5_TM_MAX_SGE;
+               props->tm_caps.max_sge = MLX5_TM_MAX_SGE;
        }
 
        if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) {
index 914f212e7ef60652a655682a2d668c9cec19b64a..f3dbd75a0a968eade1c316e0fec9179e0012730a 100644 (file)
@@ -50,13 +50,9 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
 {
        unsigned long tmp;
        unsigned long m;
-       int i, k;
-       u64 base = 0;
-       int p = 0;
-       int skip;
-       int mask;
-       u64 len;
-       u64 pfn;
+       u64 base = ~0, p = 0;
+       u64 len, pfn;
+       int i = 0;
        struct scatterlist *sg;
        int entry;
        unsigned long page_shift = umem->page_shift;
@@ -76,33 +72,24 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
        m = find_first_bit(&tmp, BITS_PER_LONG);
        if (max_page_shift)
                m = min_t(unsigned long, max_page_shift - page_shift, m);
-       skip = 1 << m;
-       mask = skip - 1;
-       i = 0;
+
        for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
                len = sg_dma_len(sg) >> page_shift;
                pfn = sg_dma_address(sg) >> page_shift;
-               for (k = 0; k < len; k++) {
-                       if (!(i & mask)) {
-                               tmp = (unsigned long)pfn;
-                               m = min_t(unsigned long, m, find_first_bit(&tmp, BITS_PER_LONG));
-                               skip = 1 << m;
-                               mask = skip - 1;
-                               base = pfn;
-                               p = 0;
-                       } else {
-                               if (base + p != pfn) {
-                                       tmp = (unsigned long)p;
-                                       m = find_first_bit(&tmp, BITS_PER_LONG);
-                                       skip = 1 << m;
-                                       mask = skip - 1;
-                                       base = pfn;
-                                       p = 0;
-                               }
-                       }
-                       p++;
-                       i++;
+               if (base + p != pfn) {
+                       /* If either the offset or the new
+                        * base are unaligned update m
+                        */
+                       tmp = (unsigned long)(pfn | p);
+                       if (!IS_ALIGNED(tmp, 1 << m))
+                               m = find_first_bit(&tmp, BITS_PER_LONG);
+
+                       base = pfn;
+                       p = 0;
                }
+
+               p += len;
+               i += len;
        }
 
        if (i) {
index 0e2789d9bb4d0575561aab569e227125cbc38882..37bbc543847a528f73f613df066fdbb4e36484c1 100644 (file)
@@ -47,7 +47,8 @@ enum {
 
 #define MLX5_UMR_ALIGN 2048
 
-static int clean_mr(struct mlx5_ib_mr *mr);
+static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
+static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
 static int mr_cache_max_order(struct mlx5_ib_dev *dev);
 static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
 
@@ -1270,8 +1271,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 
                err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift,
                                         update_xlt_flags);
+
                if (err) {
-                       mlx5_ib_dereg_mr(&mr->ibmr);
+                       dereg_mr(dev, mr);
                        return ERR_PTR(err);
                }
        }
@@ -1356,7 +1358,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
                err = mr_umem_get(pd, addr, len, access_flags, &mr->umem,
                                  &npages, &page_shift, &ncont, &order);
                if (err < 0) {
-                       clean_mr(mr);
+                       clean_mr(dev, mr);
                        return err;
                }
        }
@@ -1410,7 +1412,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
                if (err) {
                        mlx5_ib_warn(dev, "Failed to rereg UMR\n");
                        ib_umem_release(mr->umem);
-                       clean_mr(mr);
+                       clean_mr(dev, mr);
                        return err;
                }
        }
@@ -1469,9 +1471,8 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
        }
 }
 
-static int clean_mr(struct mlx5_ib_mr *mr)
+static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 {
-       struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
        int allocated_from_cache = mr->allocated_from_cache;
        int err;
 
@@ -1507,10 +1508,8 @@ static int clean_mr(struct mlx5_ib_mr *mr)
        return 0;
 }
 
-int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
+static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 {
-       struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
-       struct mlx5_ib_mr *mr = to_mmr(ibmr);
        int npages = mr->npages;
        struct ib_umem *umem = mr->umem;
 
@@ -1539,7 +1538,7 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
        }
 #endif
 
-       clean_mr(mr);
+       clean_mr(dev, mr);
 
        if (umem) {
                ib_umem_release(umem);
@@ -1549,6 +1548,14 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
        return 0;
 }
 
+int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
+{
+       struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
+       struct mlx5_ib_mr *mr = to_mmr(ibmr);
+
+       return dereg_mr(dev, mr);
+}
+
 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
                               enum ib_mr_type mr_type,
                               u32 max_num_sg)
index f0dc5f4aa177e26d622574ec638368fcb7d988cf..442b9bdc0f03bc70d955700e51703b14676b1661 100644 (file)
@@ -3232,7 +3232,7 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
                                            mr->ibmr.iova);
                        set_wqe_32bit_value(wqe->wqe_words,
                                            NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX,
-                                           mr->ibmr.length);
+                                           lower_32_bits(mr->ibmr.length));
                        set_wqe_32bit_value(wqe->wqe_words,
                                            NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0);
                        set_wqe_32bit_value(wqe->wqe_words,
@@ -3274,7 +3274,7 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
                                            mr->npages * 8);
 
                        nes_debug(NES_DBG_IW_TX, "SQ_REG_MR: iova_start: %llx, "
-                                 "length: %d, rkey: %0x, pgl_paddr: %llx, "
+                                 "length: %lld, rkey: %0x, pgl_paddr: %llx, "
                                  "page_list_len: %u, wqe_misc: %x\n",
                                  (unsigned long long) mr->ibmr.iova,
                                  mr->ibmr.length,
index 2e075377242e2baccc54cda5859d5b3ba7e768d0..6cd61638b44142029b85d0f057b8d04c6b2138c6 100644 (file)
@@ -1000,19 +1000,6 @@ static inline int update_parent_pkey(struct ipoib_dev_priv *priv)
                 */
                priv->dev->broadcast[8] = priv->pkey >> 8;
                priv->dev->broadcast[9] = priv->pkey & 0xff;
-
-               /*
-                * Update the broadcast address in the priv->broadcast object,
-                * in case it already exists, otherwise no one will do that.
-                */
-               if (priv->broadcast) {
-                       spin_lock_irq(&priv->lock);
-                       memcpy(priv->broadcast->mcmember.mgid.raw,
-                              priv->dev->broadcast + 4,
-                       sizeof(union ib_gid));
-                       spin_unlock_irq(&priv->lock);
-               }
-
                return 0;
        }
 
index bac95b509a9b2edc23af7b5e8505fc03acbb80c0..dcc77014018db037cd24540dfe8ddf06d9b70fef 100644 (file)
@@ -2180,6 +2180,7 @@ static struct net_device *ipoib_add_port(const char *format,
 {
        struct ipoib_dev_priv *priv;
        struct ib_port_attr attr;
+       struct rdma_netdev *rn;
        int result = -ENOMEM;
 
        priv = ipoib_intf_alloc(hca, port, format);
@@ -2279,7 +2280,8 @@ register_failed:
        ipoib_dev_cleanup(priv->dev);
 
 device_init_failed:
-       free_netdev(priv->dev);
+       rn = netdev_priv(priv->dev);
+       rn->free_rdma_netdev(priv->dev);
        kfree(priv);
 
 alloc_mem_failed:
@@ -2328,7 +2330,7 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
                return;
 
        list_for_each_entry_safe(priv, tmp, dev_list, list) {
-               struct rdma_netdev *rn = netdev_priv(priv->dev);
+               struct rdma_netdev *parent_rn = netdev_priv(priv->dev);
 
                ib_unregister_event_handler(&priv->event_handler);
                flush_workqueue(ipoib_workqueue);
@@ -2350,10 +2352,15 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
                unregister_netdev(priv->dev);
                mutex_unlock(&priv->sysfs_mutex);
 
-               rn->free_rdma_netdev(priv->dev);
+               parent_rn->free_rdma_netdev(priv->dev);
+
+               list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
+                       struct rdma_netdev *child_rn;
 
-               list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list)
+                       child_rn = netdev_priv(cpriv->dev);
+                       child_rn->free_rdma_netdev(cpriv->dev);
                        kfree(cpriv);
+               }
 
                kfree(priv);
        }
index 9927cd6b7082b1dc24cbfa38431e6156646bf15f..55a9b71ed05a7ff8ff0ac9f7eb84af1d4f6476a0 100644 (file)
@@ -141,14 +141,17 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
                return restart_syscall();
        }
 
-       priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
-       if (!priv) {
+       if (!down_write_trylock(&ppriv->vlan_rwsem)) {
                rtnl_unlock();
                mutex_unlock(&ppriv->sysfs_mutex);
-               return -ENOMEM;
+               return restart_syscall();
        }
 
-       down_write(&ppriv->vlan_rwsem);
+       priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
+       if (!priv) {
+               result = -ENOMEM;
+               goto out;
+       }
 
        /*
         * First ensure this isn't a duplicate. We check the parent device and
@@ -175,8 +178,11 @@ out:
        rtnl_unlock();
        mutex_unlock(&ppriv->sysfs_mutex);
 
-       if (result) {
-               free_netdev(priv->dev);
+       if (result && priv) {
+               struct rdma_netdev *rn;
+
+               rn = netdev_priv(priv->dev);
+               rn->free_rdma_netdev(priv->dev);
                kfree(priv);
        }
 
@@ -204,7 +210,12 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
                return restart_syscall();
        }
 
-       down_write(&ppriv->vlan_rwsem);
+       if (!down_write_trylock(&ppriv->vlan_rwsem)) {
+               rtnl_unlock();
+               mutex_unlock(&ppriv->sysfs_mutex);
+               return restart_syscall();
+       }
+
        list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
                if (priv->pkey == pkey &&
                    priv->child_type == IPOIB_LEGACY_CHILD) {
@@ -224,7 +235,10 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
        mutex_unlock(&ppriv->sysfs_mutex);
 
        if (dev) {
-               free_netdev(dev);
+               struct rdma_netdev *rn;
+
+               rn = netdev_priv(dev);
+               rn->free_rdma_netdev(priv->dev);
                kfree(priv);
                return 0;
        }
index 9c3e9ab53a415710b0e8bef65647efc4716dc239..322209d5ff5829b6dfd5f4de54f4d11fe141c63d 100644 (file)
@@ -154,7 +154,7 @@ static void iser_dump_page_vec(struct iser_page_vec *page_vec)
 {
        int i;
 
-       iser_err("page vec npages %d data length %d\n",
+       iser_err("page vec npages %d data length %lld\n",
                 page_vec->npages, page_vec->fake_mr.length);
        for (i = 0; i < page_vec->npages; i++)
                iser_err("vec[%d]: %llx\n", i, page_vec->pages[i]);
index 382de42b835939e167604053060e9944e19131ba..6fe2d03460730cabe0aa78060114844ad1a2a3cf 100644 (file)
@@ -874,7 +874,7 @@ static bool copy_device_table(void)
                hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
                entry = (((u64) hi) << 32) + lo;
                if (last_entry && last_entry != entry) {
-                       pr_err("IOMMU:%d should use the same dev table as others!/n",
+                       pr_err("IOMMU:%d should use the same dev table as others!\n",
                                iommu->index);
                        return false;
                }
@@ -882,7 +882,7 @@ static bool copy_device_table(void)
 
                old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12;
                if (old_devtb_size != dev_table_size) {
-                       pr_err("The device table size of IOMMU:%d is not expected!/n",
+                       pr_err("The device table size of IOMMU:%d is not expected!\n",
                                iommu->index);
                        return false;
                }
@@ -890,7 +890,7 @@ static bool copy_device_table(void)
 
        old_devtb_phys = entry & PAGE_MASK;
        if (old_devtb_phys >= 0x100000000ULL) {
-               pr_err("The address of old device table is above 4G, not trustworthy!/n");
+               pr_err("The address of old device table is above 4G, not trustworthy!\n");
                return false;
        }
        old_devtb = memremap(old_devtb_phys, dev_table_size, MEMREMAP_WB);
@@ -901,7 +901,7 @@ static bool copy_device_table(void)
        old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag,
                                get_order(dev_table_size));
        if (old_dev_tbl_cpy == NULL) {
-               pr_err("Failed to allocate memory for copying old device table!/n");
+               pr_err("Failed to allocate memory for copying old device table!\n");
                return false;
        }
 
index d665d0dc16e8f787813a6106d15bd83afacc4f34..6961fc393f0b25828f5981fad35ea744c7768b41 100644 (file)
@@ -245,7 +245,7 @@ static void __arm_v7s_free_table(void *table, int lvl,
 static void __arm_v7s_pte_sync(arm_v7s_iopte *ptep, int num_entries,
                               struct io_pgtable_cfg *cfg)
 {
-       if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA))
+       if (cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)
                return;
 
        dma_sync_single_for_device(cfg->iommu_dev, __arm_v7s_dma_addr(ptep),
index bd515be5b380e32ac224b55d5b9a2dfff26a15ef..16d33ac19db0f77837c30f44de044a3a46b9c558 100644 (file)
@@ -371,7 +371,8 @@ static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova,
        int ret;
 
        spin_lock_irqsave(&dom->pgtlock, flags);
-       ret = dom->iop->map(dom->iop, iova, paddr, size, prot);
+       ret = dom->iop->map(dom->iop, iova, paddr & DMA_BIT_MASK(32),
+                           size, prot);
        spin_unlock_irqrestore(&dom->pgtlock, flags);
 
        return ret;
index 40159ac12ac8caeaa56c55eae063339f94103e03..c90976d7e53ccc596b65a0864ef169f1aa1fafd8 100644 (file)
@@ -175,14 +175,13 @@ static void gic_mask_irq(struct irq_data *d)
 
 static void gic_unmask_irq(struct irq_data *d)
 {
-       struct cpumask *affinity = irq_data_get_affinity_mask(d);
        unsigned int intr = GIC_HWIRQ_TO_SHARED(d->hwirq);
        unsigned int cpu;
 
        write_gic_smask(intr);
 
        gic_clear_pcpu_masks(intr);
-       cpu = cpumask_first_and(affinity, cpu_online_mask);
+       cpu = cpumask_first(irq_data_get_effective_affinity_mask(d));
        set_bit(intr, per_cpu_ptr(pcpu_masks, cpu));
 }
 
@@ -420,13 +419,17 @@ static int gic_shared_irq_domain_map(struct irq_domain *d, unsigned int virq,
                                     irq_hw_number_t hw, unsigned int cpu)
 {
        int intr = GIC_HWIRQ_TO_SHARED(hw);
+       struct irq_data *data;
        unsigned long flags;
 
+       data = irq_get_irq_data(virq);
+
        spin_lock_irqsave(&gic_lock, flags);
        write_gic_map_pin(intr, GIC_MAP_PIN_MAP_TO_PIN | gic_cpu_pin);
        write_gic_map_vp(intr, BIT(mips_cm_vp_id(cpu)));
        gic_clear_pcpu_masks(intr);
        set_bit(intr, per_cpu_ptr(pcpu_masks, cpu));
+       irq_data_update_effective_affinity(data, cpumask_of(cpu));
        spin_unlock_irqrestore(&gic_lock, flags);
 
        return 0;
@@ -645,7 +648,7 @@ static int __init gic_of_init(struct device_node *node,
 
        /* Find the first available CPU vector. */
        i = 0;
-       reserved = (C_SW0 | C_SW1) >> __fls(C_SW0);
+       reserved = (C_SW0 | C_SW1) >> __ffs(C_SW0);
        while (!of_property_read_u32_index(node, "mti,reserved-cpu-vectors",
                                           i++, &cpu_vec))
                reserved |= BIT(cpu_vec);
@@ -684,11 +687,11 @@ static int __init gic_of_init(struct device_node *node,
 
        gicconfig = read_gic_config();
        gic_shared_intrs = gicconfig & GIC_CONFIG_NUMINTERRUPTS;
-       gic_shared_intrs >>= __fls(GIC_CONFIG_NUMINTERRUPTS);
+       gic_shared_intrs >>= __ffs(GIC_CONFIG_NUMINTERRUPTS);
        gic_shared_intrs = (gic_shared_intrs + 1) * 8;
 
        gic_vpes = gicconfig & GIC_CONFIG_PVPS;
-       gic_vpes >>= __fls(GIC_CONFIG_PVPS);
+       gic_vpes >>= __ffs(GIC_CONFIG_PVPS);
        gic_vpes = gic_vpes + 1;
 
        if (cpu_has_veic) {
index bbbbe08982332a53fdad499cd5643f85b97e5898..9a257f9693009c7ec3a163dc83e7c0b2e5839dde 100644 (file)
 #define AS_PEAK_mA_TO_REG(a) \
        ((min_t(u32, AS_PEAK_mA_MAX, a) - 1250) / 250)
 
+/* LED numbers for Devicetree */
+#define AS_LED_FLASH                           0
+#define AS_LED_INDICATOR                       1
+
 enum as_mode {
        AS_MODE_EXT_TORCH = 0 << AS_CONTROL_MODE_SETTING_SHIFT,
        AS_MODE_INDICATOR = 1 << AS_CONTROL_MODE_SETTING_SHIFT,
@@ -491,10 +495,29 @@ static int as3645a_parse_node(struct as3645a *flash,
                              struct device_node *node)
 {
        struct as3645a_config *cfg = &flash->cfg;
+       struct device_node *child;
        const char *name;
        int rval;
 
-       flash->flash_node = of_get_child_by_name(node, "flash");
+       for_each_child_of_node(node, child) {
+               u32 id = 0;
+
+               of_property_read_u32(child, "reg", &id);
+
+               switch (id) {
+               case AS_LED_FLASH:
+                       flash->flash_node = of_node_get(child);
+                       break;
+               case AS_LED_INDICATOR:
+                       flash->indicator_node = of_node_get(child);
+                       break;
+               default:
+                       dev_warn(&flash->client->dev,
+                                "unknown LED %u encountered, ignoring\n", id);
+                       break;
+               }
+       }
+
        if (!flash->flash_node) {
                dev_err(&flash->client->dev, "can't find flash node\n");
                return -ENODEV;
@@ -534,11 +557,10 @@ static int as3645a_parse_node(struct as3645a *flash,
        of_property_read_u32(flash->flash_node, "voltage-reference",
                             &cfg->voltage_reference);
 
-       of_property_read_u32(flash->flash_node, "peak-current-limit",
+       of_property_read_u32(flash->flash_node, "ams,input-max-microamp",
                             &cfg->peak);
        cfg->peak = AS_PEAK_mA_TO_REG(cfg->peak);
 
-       flash->indicator_node = of_get_child_by_name(node, "indicator");
        if (!flash->indicator_node) {
                dev_warn(&flash->client->dev,
                         "can't find indicator node\n");
@@ -721,6 +743,7 @@ static int as3645a_remove(struct i2c_client *client)
        as3645a_set_control(flash, AS_MODE_EXT_TORCH, false);
 
        v4l2_flash_release(flash->vf);
+       v4l2_flash_release(flash->vfind);
 
        led_classdev_flash_unregister(&flash->fled);
        led_classdev_unregister(&flash->iled_cdev);
index 5bfe285ea9d1c815ae8014064a29e4a08566d374..1ac58c5651b7f8086ccba297547ff13823804706 100644 (file)
@@ -3238,7 +3238,7 @@ static int raid_map(struct dm_target *ti, struct bio *bio)
        if (unlikely(bio_end_sector(bio) > mddev->array_sectors))
                return DM_MAPIO_REQUEUE;
 
-       mddev->pers->make_request(mddev, bio);
+       md_handle_request(mddev, bio);
 
        return DM_MAPIO_SUBMITTED;
 }
index 08fcaebc61bdb52b92f8ba37ccd4d2b3dac41779..0ff1bbf6c90e5cebc782268313ce39f8c66f7270 100644 (file)
@@ -266,6 +266,37 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
  * call has finished, the bio has been linked into some internal structure
  * and so is visible to ->quiesce(), so we don't need the refcount any more.
  */
+void md_handle_request(struct mddev *mddev, struct bio *bio)
+{
+check_suspended:
+       rcu_read_lock();
+       if (mddev->suspended) {
+               DEFINE_WAIT(__wait);
+               for (;;) {
+                       prepare_to_wait(&mddev->sb_wait, &__wait,
+                                       TASK_UNINTERRUPTIBLE);
+                       if (!mddev->suspended)
+                               break;
+                       rcu_read_unlock();
+                       schedule();
+                       rcu_read_lock();
+               }
+               finish_wait(&mddev->sb_wait, &__wait);
+       }
+       atomic_inc(&mddev->active_io);
+       rcu_read_unlock();
+
+       if (!mddev->pers->make_request(mddev, bio)) {
+               atomic_dec(&mddev->active_io);
+               wake_up(&mddev->sb_wait);
+               goto check_suspended;
+       }
+
+       if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
+               wake_up(&mddev->sb_wait);
+}
+EXPORT_SYMBOL(md_handle_request);
+
 static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 {
        const int rw = bio_data_dir(bio);
@@ -285,23 +316,6 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
                bio_endio(bio);
                return BLK_QC_T_NONE;
        }
-check_suspended:
-       rcu_read_lock();
-       if (mddev->suspended) {
-               DEFINE_WAIT(__wait);
-               for (;;) {
-                       prepare_to_wait(&mddev->sb_wait, &__wait,
-                                       TASK_UNINTERRUPTIBLE);
-                       if (!mddev->suspended)
-                               break;
-                       rcu_read_unlock();
-                       schedule();
-                       rcu_read_lock();
-               }
-               finish_wait(&mddev->sb_wait, &__wait);
-       }
-       atomic_inc(&mddev->active_io);
-       rcu_read_unlock();
 
        /*
         * save the sectors now since our bio can
@@ -310,20 +324,14 @@ check_suspended:
        sectors = bio_sectors(bio);
        /* bio could be mergeable after passing to underlayer */
        bio->bi_opf &= ~REQ_NOMERGE;
-       if (!mddev->pers->make_request(mddev, bio)) {
-               atomic_dec(&mddev->active_io);
-               wake_up(&mddev->sb_wait);
-               goto check_suspended;
-       }
+
+       md_handle_request(mddev, bio);
 
        cpu = part_stat_lock();
        part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
        part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
        part_stat_unlock();
 
-       if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
-               wake_up(&mddev->sb_wait);
-
        return BLK_QC_T_NONE;
 }
 
@@ -439,16 +447,22 @@ static void md_submit_flush_data(struct work_struct *ws)
        struct mddev *mddev = container_of(ws, struct mddev, flush_work);
        struct bio *bio = mddev->flush_bio;
 
+       /*
+        * must reset flush_bio before calling into md_handle_request to avoid a
+        * deadlock, because other bios passed md_handle_request suspend check
+        * could wait for this and below md_handle_request could wait for those
+        * bios because of suspend check
+        */
+       mddev->flush_bio = NULL;
+       wake_up(&mddev->sb_wait);
+
        if (bio->bi_iter.bi_size == 0)
                /* an empty barrier - all done */
                bio_endio(bio);
        else {
                bio->bi_opf &= ~REQ_PREFLUSH;
-               mddev->pers->make_request(mddev, bio);
+               md_handle_request(mddev, bio);
        }
-
-       mddev->flush_bio = NULL;
-       wake_up(&mddev->sb_wait);
 }
 
 void md_flush_request(struct mddev *mddev, struct bio *bio)
index 561d22b9a9a8acc9479cabc389948753ffe703eb..d8287d3cd1bf81b048e90166d91afe76566202b3 100644 (file)
@@ -692,6 +692,7 @@ extern void md_stop_writes(struct mddev *mddev);
 extern int md_rdev_init(struct md_rdev *rdev);
 extern void md_rdev_clear(struct md_rdev *rdev);
 
+extern void md_handle_request(struct mddev *mddev, struct bio *bio);
 extern void mddev_suspend(struct mddev *mddev);
 extern void mddev_resume(struct mddev *mddev);
 extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
index 076409455b603582f589ea8fcc68173d421ca33a..928e24a071338ab6e1fe7668c8caa42b2803ccfe 100644 (file)
@@ -6575,14 +6575,17 @@ static ssize_t
 raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
 {
        struct r5conf *conf;
-       unsigned long new;
+       unsigned int new;
        int err;
        struct r5worker_group *new_groups, *old_groups;
        int group_cnt, worker_cnt_per_group;
 
        if (len >= PAGE_SIZE)
                return -EINVAL;
-       if (kstrtoul(page, 10, &new))
+       if (kstrtouint(page, 10, &new))
+               return -EINVAL;
+       /* 8192 should be big enough */
+       if (new > 8192)
                return -EINVAL;
 
        err = mddev_lock(mddev);
index bbaddf18a1b3c4022592f631878441ad19a1fca2..d0ccc6729fd29e734996b28538cd6e8e1745bf2b 100644 (file)
@@ -392,6 +392,7 @@ static const struct sdhci_pci_fixes sdhci_intel_pch_sdio = {
 
 enum {
        INTEL_DSM_FNS           =  0,
+       INTEL_DSM_V18_SWITCH    =  3,
        INTEL_DSM_DRV_STRENGTH  =  9,
        INTEL_DSM_D3_RETUNE     = 10,
 };
@@ -557,6 +558,19 @@ static void intel_hs400_enhanced_strobe(struct mmc_host *mmc,
        sdhci_writel(host, val, INTEL_HS400_ES_REG);
 }
 
+static void sdhci_intel_voltage_switch(struct sdhci_host *host)
+{
+       struct sdhci_pci_slot *slot = sdhci_priv(host);
+       struct intel_host *intel_host = sdhci_pci_priv(slot);
+       struct device *dev = &slot->chip->pdev->dev;
+       u32 result = 0;
+       int err;
+
+       err = intel_dsm(intel_host, dev, INTEL_DSM_V18_SWITCH, &result);
+       pr_debug("%s: %s DSM error %d result %u\n",
+                mmc_hostname(host->mmc), __func__, err, result);
+}
+
 static const struct sdhci_ops sdhci_intel_byt_ops = {
        .set_clock              = sdhci_set_clock,
        .set_power              = sdhci_intel_set_power,
@@ -565,6 +579,7 @@ static const struct sdhci_ops sdhci_intel_byt_ops = {
        .reset                  = sdhci_reset,
        .set_uhs_signaling      = sdhci_set_uhs_signaling,
        .hw_reset               = sdhci_pci_hw_reset,
+       .voltage_switch         = sdhci_intel_voltage_switch,
 };
 
 static void byt_read_dsm(struct sdhci_pci_slot *slot)
index 12cf8288d6635eafef4677630d669c1a60238b45..a7293e186e03fc44ccb271405e99fbfaef06d770 100644 (file)
@@ -129,50 +129,6 @@ static int tmio_mmc_next_sg(struct tmio_mmc_host *host)
 
 #define CMDREQ_TIMEOUT 5000
 
-#ifdef CONFIG_MMC_DEBUG
-
-#define STATUS_TO_TEXT(a, status, i) \
-       do { \
-               if ((status) & TMIO_STAT_##a) { \
-                       if ((i)++) \
-                               printk(KERN_DEBUG " | "); \
-                       printk(KERN_DEBUG #a); \
-               } \
-       } while (0)
-
-static void pr_debug_status(u32 status)
-{
-       int i = 0;
-
-       pr_debug("status: %08x = ", status);
-       STATUS_TO_TEXT(CARD_REMOVE, status, i);
-       STATUS_TO_TEXT(CARD_INSERT, status, i);
-       STATUS_TO_TEXT(SIGSTATE, status, i);
-       STATUS_TO_TEXT(WRPROTECT, status, i);
-       STATUS_TO_TEXT(CARD_REMOVE_A, status, i);
-       STATUS_TO_TEXT(CARD_INSERT_A, status, i);
-       STATUS_TO_TEXT(SIGSTATE_A, status, i);
-       STATUS_TO_TEXT(CMD_IDX_ERR, status, i);
-       STATUS_TO_TEXT(STOPBIT_ERR, status, i);
-       STATUS_TO_TEXT(ILL_FUNC, status, i);
-       STATUS_TO_TEXT(CMD_BUSY, status, i);
-       STATUS_TO_TEXT(CMDRESPEND, status, i);
-       STATUS_TO_TEXT(DATAEND, status, i);
-       STATUS_TO_TEXT(CRCFAIL, status, i);
-       STATUS_TO_TEXT(DATATIMEOUT, status, i);
-       STATUS_TO_TEXT(CMDTIMEOUT, status, i);
-       STATUS_TO_TEXT(RXOVERFLOW, status, i);
-       STATUS_TO_TEXT(TXUNDERRUN, status, i);
-       STATUS_TO_TEXT(RXRDY, status, i);
-       STATUS_TO_TEXT(TXRQ, status, i);
-       STATUS_TO_TEXT(ILL_ACCESS, status, i);
-       printk("\n");
-}
-
-#else
-#define pr_debug_status(s)  do { } while (0)
-#endif
-
 static void tmio_mmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
 {
        struct tmio_mmc_host *host = mmc_priv(mmc);
@@ -762,9 +718,6 @@ irqreturn_t tmio_mmc_irq(int irq, void *devid)
        status = sd_ctrl_read16_and_16_as_32(host, CTL_STATUS);
        ireg = status & TMIO_MASK_IRQ & ~host->sdcard_irq_mask;
 
-       pr_debug_status(status);
-       pr_debug_status(ireg);
-
        /* Clear the status except the interrupt status */
        sd_ctrl_write32_as_16_and_16(host, CTL_STATUS, TMIO_MASK_IRQ);
 
index 5736b0c90b339b6bc3e8e1ae3189341910b958f2..a308e707392d595902b77a03e2078ffe8da97d9e 100644 (file)
@@ -581,6 +581,14 @@ static struct mtd_part *allocate_partition(struct mtd_info *parent,
                slave->mtd.erasesize = parent->erasesize;
        }
 
+       /*
+        * Slave erasesize might differ from the master one if the master
+        * exposes several regions with different erasesize. Adjust
+        * wr_alignment accordingly.
+        */
+       if (!(slave->mtd.flags & MTD_NO_ERASE))
+               wr_alignment = slave->mtd.erasesize;
+
        tmp = slave->offset;
        remainder = do_div(tmp, wr_alignment);
        if ((slave->mtd.flags & MTD_WRITEABLE) && remainder) {
index 146af82183145d9d5164665aeaccdf789c4cca98..8268636675efc8b3d81959f0f1911a145c42fcc1 100644 (file)
@@ -363,7 +363,7 @@ atmel_pmecc_create_user(struct atmel_pmecc *pmecc,
        size += (req->ecc.strength + 1) * sizeof(u16);
        /* Reserve space for mu, dmu and delta. */
        size = ALIGN(size, sizeof(s32));
-       size += (req->ecc.strength + 1) * sizeof(s32);
+       size += (req->ecc.strength + 1) * sizeof(s32) * 3;
 
        user = kzalloc(size, GFP_KERNEL);
        if (!user)
index 1eecfa301f7fb2efbfd025f70d39b7d159a42dde..8e075ea2743ef41b4ed08065f502527cbe804c82 100644 (file)
@@ -686,7 +686,7 @@ static ssize_t driver_override_store(struct device *dev,
                                     const char *buf, size_t count)
 {
        struct pci_dev *pdev = to_pci_dev(dev);
-       char *driver_override, *old = pdev->driver_override, *cp;
+       char *driver_override, *old, *cp;
 
        /* We need to keep extra room for a newline */
        if (count >= (PAGE_SIZE - 1))
@@ -700,12 +700,15 @@ static ssize_t driver_override_store(struct device *dev,
        if (cp)
                *cp = '\0';
 
+       device_lock(dev);
+       old = pdev->driver_override;
        if (strlen(driver_override)) {
                pdev->driver_override = driver_override;
        } else {
                kfree(driver_override);
                pdev->driver_override = NULL;
        }
+       device_unlock(dev);
 
        kfree(old);
 
@@ -716,8 +719,12 @@ static ssize_t driver_override_show(struct device *dev,
                                    struct device_attribute *attr, char *buf)
 {
        struct pci_dev *pdev = to_pci_dev(dev);
+       ssize_t len;
 
-       return snprintf(buf, PAGE_SIZE, "%s\n", pdev->driver_override);
+       device_lock(dev);
+       len = snprintf(buf, PAGE_SIZE, "%s\n", pdev->driver_override);
+       device_unlock(dev);
+       return len;
 }
 static DEVICE_ATTR_RW(driver_override);
 
index 85de30f93a9cc45c1fe0d634c7f687947e1a39bb..56a8195096a229c6975d3f78746ecbc4c6169660 100644 (file)
@@ -254,10 +254,12 @@ static int bl_update_status(struct backlight_device *b)
 {
        struct acpi_device *device = bl_get_data(b);
 
-       if (b->props.power == FB_BLANK_POWERDOWN)
-               call_fext_func(fext, FUNC_BACKLIGHT, 0x1, 0x4, 0x3);
-       else
-               call_fext_func(fext, FUNC_BACKLIGHT, 0x1, 0x4, 0x0);
+       if (fext) {
+               if (b->props.power == FB_BLANK_POWERDOWN)
+                       call_fext_func(fext, FUNC_BACKLIGHT, 0x1, 0x4, 0x3);
+               else
+                       call_fext_func(fext, FUNC_BACKLIGHT, 0x1, 0x4, 0x0);
+       }
 
        return set_lcd_level(device, b->props.brightness);
 }
index a64285ab0728f14c870db3d72b19190d516d44f3..af3e4d3f9735fdc3430eea0ea05cc1a78e2fa306 100644 (file)
@@ -699,13 +699,13 @@ static void _aac_probe_container1(void * context, struct fib * fibptr)
        int status;
 
        dresp = (struct aac_mount *) fib_data(fibptr);
-       if (!(fibptr->dev->supplement_adapter_info.supported_options2 &
-           AAC_OPTION_VARIABLE_BLOCK_SIZE))
+       if (!aac_supports_2T(fibptr->dev)) {
                dresp->mnt[0].capacityhigh = 0;
-       if ((le32_to_cpu(dresp->status) != ST_OK) ||
-           (le32_to_cpu(dresp->mnt[0].vol) != CT_NONE)) {
-               _aac_probe_container2(context, fibptr);
-               return;
+               if ((le32_to_cpu(dresp->status) == ST_OK) &&
+                       (le32_to_cpu(dresp->mnt[0].vol) != CT_NONE)) {
+                       _aac_probe_container2(context, fibptr);
+                       return;
+               }
        }
        scsicmd = (struct scsi_cmnd *) context;
 
index 92fabf2b0c24c4c24f79ae8626d80ef09853816d..403a639574e5ea10c5c8500141204ebc513bd7c1 100644 (file)
@@ -2701,6 +2701,11 @@ static inline int aac_is_src(struct aac_dev *dev)
        return 0;
 }
 
+static inline int aac_supports_2T(struct aac_dev *dev)
+{
+       return (dev->adapter_info.options & AAC_OPT_NEW_COMM_64);
+}
+
 char * get_container_type(unsigned type);
 extern int numacb;
 extern char aac_driver_version[];
index 87cc4a93e637e6db517c12b0c01bb212eb2e8445..62beb259646692c26ea9a05a72cfe26019d24eed 100644 (file)
@@ -906,12 +906,14 @@ static int aac_eh_dev_reset(struct scsi_cmnd *cmd)
 
        bus = aac_logical_to_phys(scmd_channel(cmd));
        cid = scmd_id(cmd);
-       info = &aac->hba_map[bus][cid];
-       if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS ||
-           info->devtype != AAC_DEVTYPE_NATIVE_RAW)
+
+       if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS)
                return FAILED;
 
-       if (info->reset_state > 0)
+       info = &aac->hba_map[bus][cid];
+
+       if (info->devtype != AAC_DEVTYPE_NATIVE_RAW &&
+           info->reset_state > 0)
                return FAILED;
 
        pr_err("%s: Host adapter reset request. SCSI hang ?\n",
@@ -962,12 +964,14 @@ static int aac_eh_target_reset(struct scsi_cmnd *cmd)
 
        bus = aac_logical_to_phys(scmd_channel(cmd));
        cid = scmd_id(cmd);
-       info = &aac->hba_map[bus][cid];
-       if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS ||
-           info->devtype != AAC_DEVTYPE_NATIVE_RAW)
+
+       if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS)
                return FAILED;
 
-       if (info->reset_state > 0)
+       info = &aac->hba_map[bus][cid];
+
+       if (info->devtype != AAC_DEVTYPE_NATIVE_RAW &&
+           info->reset_state > 0)
                return FAILED;
 
        pr_err("%s: Host adapter reset request. SCSI hang ?\n",
index 48c2b2b34b7222ae656ab4f0c9113d59a0d58d24..0c9361c87ec8de8b853f6ccaa6132663a4b982bd 100644 (file)
@@ -740,6 +740,8 @@ static void aac_send_iop_reset(struct aac_dev *dev)
        aac_set_intx_mode(dev);
 
        src_writel(dev, MUnit.IDR, IOP_SRC_RESET_MASK);
+
+       msleep(5000);
 }
 
 static void aac_send_hardware_soft_reset(struct aac_dev *dev)
index 7e7ae786121b6e8458f7a7aa8954141c77e223a5..100bc4c8798d76852adb9224edc763f70f0741ff 100644 (file)
@@ -6131,6 +6131,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
                                "Extents and RPI headers enabled.\n");
                }
                mempool_free(mboxq, phba->mbox_mem_pool);
+               rc = -EIO;
                goto out_free_bsmbx;
        }
 
index 38942050b2656bd104abb465a79b29c4e7039d43..dab876c6547392c0ccb75047c4cb767640407834 100644 (file)
@@ -580,7 +580,8 @@ int scsi_check_sense(struct scsi_cmnd *scmd)
                if (sshdr.asc == 0x20 || /* Invalid command operation code */
                    sshdr.asc == 0x21 || /* Logical block address out of range */
                    sshdr.asc == 0x24 || /* Invalid field in cdb */
-                   sshdr.asc == 0x26) { /* Parameter value invalid */
+                   sshdr.asc == 0x26 || /* Parameter value invalid */
+                   sshdr.asc == 0x27) { /* Write protected */
                        set_host_byte(scmd, DID_TARGET_FAILURE);
                }
                return SUCCESS;
index ba9d70f8a6a17f9b25008b3f86b03932c9218eb2..cbd4495d0ff9dedb92852d029845418e1a173072 100644 (file)
@@ -2739,7 +2739,8 @@ fc_remote_port_add(struct Scsi_Host *shost, int channel,
 
        list_for_each_entry(rport, &fc_host->rports, peers) {
 
-               if ((rport->port_state == FC_PORTSTATE_BLOCKED) &&
+               if ((rport->port_state == FC_PORTSTATE_BLOCKED ||
+                    rport->port_state == FC_PORTSTATE_NOTPRESENT) &&
                        (rport->channel == channel)) {
 
                        switch (fc_host->tgtid_bind_type) {
@@ -2876,7 +2877,6 @@ fc_remote_port_add(struct Scsi_Host *shost, int channel,
                        memcpy(&rport->port_name, &ids->port_name,
                                sizeof(rport->port_name));
                        rport->port_id = ids->port_id;
-                       rport->roles = ids->roles;
                        rport->port_state = FC_PORTSTATE_ONLINE;
                        rport->flags &= ~FC_RPORT_FAST_FAIL_TIMEDOUT;
 
@@ -2885,15 +2885,7 @@ fc_remote_port_add(struct Scsi_Host *shost, int channel,
                                                fci->f->dd_fcrport_size);
                        spin_unlock_irqrestore(shost->host_lock, flags);
 
-                       if (ids->roles & FC_PORT_ROLE_FCP_TARGET) {
-                               scsi_target_unblock(&rport->dev, SDEV_RUNNING);
-
-                               /* initiate a scan of the target */
-                               spin_lock_irqsave(shost->host_lock, flags);
-                               rport->flags |= FC_RPORT_SCAN_PENDING;
-                               scsi_queue_work(shost, &rport->scan_work);
-                               spin_unlock_irqrestore(shost->host_lock, flags);
-                       }
+                       fc_remote_port_rolechg(rport, ids->roles);
                        return rport;
                }
        }
index 8934f19bce8ea815d696189e7bb075c43cffaa5c..0190aeff5f7fde124da9e5d621a8e20d1f0b3eae 100644 (file)
@@ -3689,7 +3689,7 @@ iscsi_if_rx(struct sk_buff *skb)
                uint32_t group;
 
                nlh = nlmsg_hdr(skb);
-               if (nlh->nlmsg_len < sizeof(*nlh) ||
+               if (nlh->nlmsg_len < sizeof(*nlh) + sizeof(*ev) ||
                    skb->len < nlh->nlmsg_len) {
                        break;
                }
index 5fbfd9cfb6d63e61c1c89e6a9deb071ce6de2d32..5b3d57fc82d39bc8fdbcfb89582589a2be854357 100644 (file)
@@ -169,6 +169,9 @@ static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data)
 static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data)
 {
        struct pci_bar_info *bar = data;
+       unsigned int pos = (offset - PCI_BASE_ADDRESS_0) / 4;
+       const struct resource *res = dev->resource;
+       u32 mask;
 
        if (unlikely(!bar)) {
                pr_warn(DRV_NAME ": driver data not found for %s\n",
@@ -179,7 +182,13 @@ static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data)
        /* A write to obtain the length must happen as a 32-bit write.
         * This does not (yet) support writing individual bytes
         */
-       if (value == ~0)
+       if (res[pos].flags & IORESOURCE_IO)
+               mask = ~PCI_BASE_ADDRESS_IO_MASK;
+       else if (pos && (res[pos - 1].flags & IORESOURCE_MEM_64))
+               mask = 0;
+       else
+               mask = ~PCI_BASE_ADDRESS_MEM_MASK;
+       if ((value | mask) == ~0U)
                bar->which = 1;
        else {
                u32 tmpval;
index b51d23f5cafa9eff1cfc9b1936817095a1b3b5e0..280384bf34f13b20b7a0817a36fe49a53d618285 100644 (file)
@@ -107,7 +107,8 @@ static void end_compressed_bio_read(struct bio *bio)
        struct inode *inode;
        struct page *page;
        unsigned long index;
-       int ret;
+       unsigned int mirror = btrfs_io_bio(bio)->mirror_num;
+       int ret = 0;
 
        if (bio->bi_status)
                cb->errors = 1;
@@ -118,6 +119,21 @@ static void end_compressed_bio_read(struct bio *bio)
        if (!refcount_dec_and_test(&cb->pending_bios))
                goto out;
 
+       /*
+        * Record the correct mirror_num in cb->orig_bio so that
+        * read-repair can work properly.
+        */
+       ASSERT(btrfs_io_bio(cb->orig_bio));
+       btrfs_io_bio(cb->orig_bio)->mirror_num = mirror;
+       cb->mirror_num = mirror;
+
+       /*
+        * Some IO in this cb have failed, just skip checksum as there
+        * is no way it could be correct.
+        */
+       if (cb->errors == 1)
+               goto csum_failed;
+
        inode = cb->inode;
        ret = check_compressed_csum(BTRFS_I(inode), cb,
                                    (u64)bio->bi_iter.bi_sector << 9);
index 5a8933da39a75f504c5bb0329a2b62508be40042..899ddaeeacec8dcf1bf280e6c3b30c30eaa8c8c2 100644 (file)
@@ -709,7 +709,6 @@ struct btrfs_delayed_root;
 #define BTRFS_FS_OPEN                          5
 #define BTRFS_FS_QUOTA_ENABLED                 6
 #define BTRFS_FS_QUOTA_ENABLING                        7
-#define BTRFS_FS_QUOTA_DISABLING               8
 #define BTRFS_FS_UPDATE_UUID_TREE_GEN          9
 #define BTRFS_FS_CREATING_FREE_SPACE_TREE      10
 #define BTRFS_FS_BTREE_ERR                     11
index 487bbe4fb3c6e7a92156aec7060dd84e3c2525a0..dfdab849037b70887e9a61d5b1044a313658d437 100644 (file)
@@ -3643,7 +3643,14 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
        u64 flags;
 
        do_barriers = !btrfs_test_opt(fs_info, NOBARRIER);
-       backup_super_roots(fs_info);
+
+       /*
+        * max_mirrors == 0 indicates we're from commit_transaction,
+        * not from fsync where the tree roots in fs_info have not
+        * been consistent on disk.
+        */
+       if (max_mirrors == 0)
+               backup_super_roots(fs_info);
 
        sb = fs_info->super_for_commit;
        dev_item = &sb->dev_item;
index 3e5bb0cdd3cdd46dbc92273d370d9593963820e7..12ab19a4b93e16167d3c58f3a4991ed979fe46e5 100644 (file)
@@ -3471,8 +3471,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
        unsigned int write_flags = 0;
        unsigned long nr_written = 0;
 
-       if (wbc->sync_mode == WB_SYNC_ALL)
-               write_flags = REQ_SYNC;
+       write_flags = wbc_to_write_flags(wbc);
 
        trace___extent_writepage(page, inode, wbc);
 
@@ -3718,7 +3717,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
        unsigned long i, num_pages;
        unsigned long bio_flags = 0;
        unsigned long start, end;
-       unsigned int write_flags = (epd->sync_io ? REQ_SYNC : 0) | REQ_META;
+       unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
        int ret = 0;
 
        clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
@@ -4063,9 +4062,6 @@ static void flush_epd_write_bio(struct extent_page_data *epd)
        if (epd->bio) {
                int ret;
 
-               bio_set_op_attrs(epd->bio, REQ_OP_WRITE,
-                                epd->sync_io ? REQ_SYNC : 0);
-
                ret = submit_one_bio(epd->bio, 0, epd->bio_flags);
                BUG_ON(ret < 0); /* -ENOMEM */
                epd->bio = NULL;
index 128f3e58634f4a6821b4cf18547db075cfbe03d5..d94e3f68b9b134f8e3027dec70f1952c42e8ce52 100644 (file)
@@ -135,6 +135,18 @@ static inline void btrfs_cleanup_ordered_extents(struct inode *inode,
                                                 const u64 offset,
                                                 const u64 bytes)
 {
+       unsigned long index = offset >> PAGE_SHIFT;
+       unsigned long end_index = (offset + bytes - 1) >> PAGE_SHIFT;
+       struct page *page;
+
+       while (index <= end_index) {
+               page = find_get_page(inode->i_mapping, index);
+               index++;
+               if (!page)
+                       continue;
+               ClearPagePrivate2(page);
+               put_page(page);
+       }
        return __endio_write_update_ordered(inode, offset + PAGE_SIZE,
                                            bytes - PAGE_SIZE, false);
 }
@@ -8357,11 +8369,8 @@ static void btrfs_endio_direct_read(struct bio *bio)
        struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
        blk_status_t err = bio->bi_status;
 
-       if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED) {
+       if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED)
                err = btrfs_subio_endio_read(inode, io_bio, err);
-               if (!err)
-                       bio->bi_status = 0;
-       }
 
        unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
                      dip->logical_offset + dip->bytes - 1);
@@ -8369,7 +8378,7 @@ static void btrfs_endio_direct_read(struct bio *bio)
 
        kfree(dip);
 
-       dio_bio->bi_status = bio->bi_status;
+       dio_bio->bi_status = err;
        dio_end_io(dio_bio);
 
        if (io_bio->end_io)
@@ -8387,6 +8396,7 @@ static void __endio_write_update_ordered(struct inode *inode,
        btrfs_work_func_t func;
        u64 ordered_offset = offset;
        u64 ordered_bytes = bytes;
+       u64 last_offset;
        int ret;
 
        if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
@@ -8398,6 +8408,7 @@ static void __endio_write_update_ordered(struct inode *inode,
        }
 
 again:
+       last_offset = ordered_offset;
        ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
                                                   &ordered_offset,
                                                   ordered_bytes,
@@ -8408,6 +8419,12 @@ again:
        btrfs_init_work(&ordered->work, func, finish_ordered_fn, NULL, NULL);
        btrfs_queue_work(wq, &ordered->work);
 out_test:
+       /*
+        * If btrfs_dec_test_ordered_pending does not find any ordered extent
+        * in the range, we can exit.
+        */
+       if (ordered_offset == last_offset)
+               return;
        /*
         * our bio might span multiple ordered extents.  If we haven't
         * completed the accounting for the whole dio, go back and try again
index d6715c2bcdc472d567bab9fd617e808fbcbd480f..6c7a49faf4e0636e67c9f818893975dc48d7436a 100644 (file)
@@ -2773,9 +2773,9 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
        }
        mutex_unlock(&fs_devices->device_list_mutex);
 
-       fi_args->nodesize = fs_info->super_copy->nodesize;
-       fi_args->sectorsize = fs_info->super_copy->sectorsize;
-       fi_args->clone_alignment = fs_info->super_copy->sectorsize;
+       fi_args->nodesize = fs_info->nodesize;
+       fi_args->sectorsize = fs_info->sectorsize;
+       fi_args->clone_alignment = fs_info->sectorsize;
 
        if (copy_to_user(arg, fi_args, sizeof(*fi_args)))
                ret = -EFAULT;
@@ -3032,7 +3032,7 @@ static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
 out:
        if (ret)
                btrfs_cmp_data_free(cmp);
-       return 0;
+       return ret;
 }
 
 static int btrfs_cmp_data(u64 len, struct cmp_pages *cmp)
@@ -4061,6 +4061,10 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
                ret = PTR_ERR(new_root);
                goto out;
        }
+       if (!is_fstree(new_root->objectid)) {
+               ret = -ENOENT;
+               goto out;
+       }
 
        path = btrfs_alloc_path();
        if (!path) {
index 5c8b61c86e61f9ed5445f5022a3b20f1134068bf..e172d4843eae2d8eb6f0d29dce38fb7f693f4ed0 100644 (file)
@@ -807,7 +807,6 @@ static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
        }
        ret = 0;
 out:
-       set_bit(BTRFS_FS_QUOTA_DISABLING, &root->fs_info->flags);
        btrfs_free_path(path);
        return ret;
 }
@@ -953,7 +952,6 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
        if (!fs_info->quota_root)
                goto out;
        clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
-       set_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags);
        btrfs_qgroup_wait_for_completion(fs_info, false);
        spin_lock(&fs_info->qgroup_lock);
        quota_root = fs_info->quota_root;
@@ -1307,6 +1305,8 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
                }
        }
        ret = del_qgroup_item(trans, quota_root, qgroupid);
+       if (ret && ret != -ENOENT)
+               goto out;
 
        while (!list_empty(&qgroup->groups)) {
                list = list_first_entry(&qgroup->groups,
@@ -2086,8 +2086,6 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
 
        if (test_and_clear_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags))
                set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
-       if (test_and_clear_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags))
-               clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
 
        spin_lock(&fs_info->qgroup_lock);
        while (!list_empty(&fs_info->dirty_qgroups)) {
index 3a49a3c2fca4549a8e6fd19cf4ec9de119aa0a5d..9841faef08ea72d0b50aa3ad3c881bd27bd7fc76 100644 (file)
@@ -2400,11 +2400,11 @@ void free_reloc_roots(struct list_head *list)
        while (!list_empty(list)) {
                reloc_root = list_entry(list->next, struct btrfs_root,
                                        root_list);
+               __del_reloc_root(reloc_root);
                free_extent_buffer(reloc_root->node);
                free_extent_buffer(reloc_root->commit_root);
                reloc_root->node = NULL;
                reloc_root->commit_root = NULL;
-               __del_reloc_root(reloc_root);
        }
 }
 
index 32b043ef8ac9a3e25f492e99cd4fe4ef347205cf..8fd195cfe81b97b92412b95ac645188eb9a5a693 100644 (file)
@@ -2630,7 +2630,7 @@ static int send_create_inode(struct send_ctx *sctx, u64 ino)
        } else {
                btrfs_warn(sctx->send_root->fs_info, "unexpected inode type %o",
                                (int)(mode & S_IFMT));
-               ret = -ENOTSUPP;
+               ret = -EOPNOTSUPP;
                goto out;
        }
 
index ad7f4bab640be36d9f43b8b2f3371a7bd871738f..c800d067fcbf0c8eec90c479eefe5df13e2dc945 100644 (file)
@@ -4181,6 +4181,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
        struct extent_map *em, *n;
        struct list_head extents;
        struct extent_map_tree *tree = &inode->extent_tree;
+       u64 logged_start, logged_end;
        u64 test_gen;
        int ret = 0;
        int num = 0;
@@ -4190,10 +4191,11 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
        down_write(&inode->dio_sem);
        write_lock(&tree->lock);
        test_gen = root->fs_info->last_trans_committed;
+       logged_start = start;
+       logged_end = end;
 
        list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
                list_del_init(&em->list);
-
                /*
                 * Just an arbitrary number, this can be really CPU intensive
                 * once we start getting a lot of extents, and really once we
@@ -4208,6 +4210,12 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
 
                if (em->generation <= test_gen)
                        continue;
+
+               if (em->start < logged_start)
+                       logged_start = em->start;
+               if ((em->start + em->len - 1) > logged_end)
+                       logged_end = em->start + em->len - 1;
+
                /* Need a ref to keep it from getting evicted from cache */
                refcount_inc(&em->refs);
                set_bit(EXTENT_FLAG_LOGGING, &em->flags);
@@ -4216,7 +4224,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
        }
 
        list_sort(NULL, &extents, extent_cmp);
-       btrfs_get_logged_extents(inode, logged_list, start, end);
+       btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end);
        /*
         * Some ordered extents started by fsync might have completed
         * before we could collect them into the list logged_list, which
index 0e8f16c305df192b412b86be4a69acb1a2236243..b39737568c223c208d92b2f4ab73ac0263ad3ff4 100644 (file)
@@ -6166,7 +6166,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
        map_length = length;
 
        btrfs_bio_counter_inc_blocked(fs_info);
-       ret = __btrfs_map_block(fs_info, bio_op(bio), logical,
+       ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical,
                                &map_length, &bbio, mirror_num, 1);
        if (ret) {
                btrfs_bio_counter_dec(fs_info);
index 8194d30bdca08e9cfa244e3a1df1df219ae1b806..be61cf742b5ed1d0d201ff415e9355fa2684508c 100644 (file)
@@ -1009,6 +1009,13 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
        WARN_ON_ONCE(ret);
        ret = 0;
 
+       if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
+           !inode->i_sb->s_dio_done_wq) {
+               ret = sb_init_dio_done_wq(inode->i_sb);
+               if (ret < 0)
+                       goto out_free_dio;
+       }
+
        inode_dio_begin(inode);
 
        blk_start_plug(&plug);
@@ -1031,13 +1038,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
        if (ret < 0)
                iomap_dio_set_error(dio, ret);
 
-       if (ret >= 0 && iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) &&
-                       !inode->i_sb->s_dio_done_wq) {
-               ret = sb_init_dio_done_wq(inode->i_sb);
-               if (ret < 0)
-                       iomap_dio_set_error(dio, ret);
-       }
-
        if (!atomic_dec_and_test(&dio->ref)) {
                if (!is_sync_kiocb(iocb))
                        return -EIOCBQUEUED;
index db692f554158854d4fa683d116827594d6ffd978..447a24d77b894ef733412ba201cadcaa9a226f7e 100644 (file)
@@ -514,9 +514,11 @@ static int isofs_show_options(struct seq_file *m, struct dentry *root)
        if (sbi->s_fmode != ISOFS_INVALID_MODE)
                seq_printf(m, ",fmode=%o", sbi->s_fmode);
 
+#ifdef CONFIG_JOLIET
        if (sbi->s_nls_iocharset &&
            strcmp(sbi->s_nls_iocharset->charset, CONFIG_NLS_DEFAULT) != 0)
                seq_printf(m, ",iocharset=%s", sbi->s_nls_iocharset->charset);
+#endif
        return 0;
 }
 
index 525157ca25cbe6a706ecb05c631b2b6d0bcb59a9..77a8eacbe032f22d5bd9514738341a55dcc46b34 100644 (file)
@@ -119,30 +119,25 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
  * simple bit tests.
  */
 static const char * const task_state_array[] = {
-       "R (running)",          /*   0 */
-       "S (sleeping)",         /*   1 */
-       "D (disk sleep)",       /*   2 */
-       "T (stopped)",          /*   4 */
-       "t (tracing stop)",     /*   8 */
-       "X (dead)",             /*  16 */
-       "Z (zombie)",           /*  32 */
+
+       /* states in TASK_REPORT: */
+       "R (running)",          /* 0x00 */
+       "S (sleeping)",         /* 0x01 */
+       "D (disk sleep)",       /* 0x02 */
+       "T (stopped)",          /* 0x04 */
+       "t (tracing stop)",     /* 0x08 */
+       "X (dead)",             /* 0x10 */
+       "Z (zombie)",           /* 0x20 */
+       "P (parked)",           /* 0x40 */
+
+       /* states beyond TASK_REPORT: */
+       "I (idle)",             /* 0x80 */
 };
 
 static inline const char *get_task_state(struct task_struct *tsk)
 {
-       unsigned int state = (tsk->state | tsk->exit_state) & TASK_REPORT;
-
-       /*
-        * Parked tasks do not run; they sit in __kthread_parkme().
-        * Without this check, we would report them as running, which is
-        * clearly wrong, so we report them as sleeping instead.
-        */
-       if (tsk->state == TASK_PARKED)
-               state = TASK_INTERRUPTIBLE;
-
-       BUILD_BUG_ON(1 + ilog2(TASK_REPORT) != ARRAY_SIZE(task_state_array)-1);
-
-       return task_state_array[fls(state)];
+       BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != ARRAY_SIZE(task_state_array));
+       return task_state_array[__get_task_state(tsk)];
 }
 
 static inline int get_task_umask(struct task_struct *tsk)
index 8381db9db6d9bcc3acfa645bf9f73cbb5933c564..50b0556a124f2508531ce9e6fc61721aeb179cc8 100644 (file)
@@ -1980,7 +1980,9 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
                ret = dquot_add_space(transfer_to[cnt], cur_space, rsv_space, 0,
                                      &warn_to[cnt]);
                if (ret) {
+                       spin_lock(&transfer_to[cnt]->dq_dqb_lock);
                        dquot_decr_inodes(transfer_to[cnt], inode_usage);
+                       spin_unlock(&transfer_to[cnt]->dq_dqb_lock);
                        goto over_quota;
                }
        }
index c0187cda2c1ed3ff65b449a74aa71c05ca29eb58..a73e5b34db4181272bc943c4e0ea406797ff0311 100644 (file)
@@ -328,12 +328,16 @@ static int v2_write_dquot(struct dquot *dquot)
        if (!dquot->dq_off) {
                alloc = true;
                down_write(&dqopt->dqio_sem);
+       } else {
+               down_read(&dqopt->dqio_sem);
        }
        ret = qtree_write_dquot(
                        sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv,
                        dquot);
        if (alloc)
                up_write(&dqopt->dqio_sem);
+       else
+               up_read(&dqopt->dqio_sem);
        return ret;
 }
 
index a2b9a47235c5ba48b4db2bf81d03c1579b562f79..f0d4b16873e89aa23522c9ece34b8f1900638176 100644 (file)
@@ -112,7 +112,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence,
                 * In the generic case the entire file is data, so as long as
                 * offset isn't at the end of the file then the offset is data.
                 */
-               if (offset >= eof)
+               if ((unsigned long long)offset >= eof)
                        return -ENXIO;
                break;
        case SEEK_HOLE:
@@ -120,7 +120,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int whence,
                 * There is a virtual hole at the end of the file, so as long as
                 * offset isn't i_size or larger, return i_size.
                 */
-               if (offset >= eof)
+               if ((unsigned long long)offset >= eof)
                        return -ENXIO;
                offset = eof;
                break;
index b008ff3250eba4f3884e18841c4c316b7e7cbf50..df3e600835e8de81e69ac295b06fae6fecefd7a4 100644 (file)
@@ -156,7 +156,8 @@ __xfs_ag_resv_free(
        trace_xfs_ag_resv_free(pag, type, 0);
 
        resv = xfs_perag_resv(pag, type);
-       pag->pag_mount->m_ag_max_usable += resv->ar_asked;
+       if (pag->pag_agno == 0)
+               pag->pag_mount->m_ag_max_usable += resv->ar_asked;
        /*
         * AGFL blocks are always considered "free", so whatever
         * was reserved at mount time must be given back at umount.
@@ -216,7 +217,14 @@ __xfs_ag_resv_init(
                return error;
        }
 
-       mp->m_ag_max_usable -= ask;
+       /*
+        * Reduce the maximum per-AG allocation length by however much we're
+        * trying to reserve for an AG.  Since this is a filesystem-wide
+        * counter, we only make the adjustment for AG 0.  This assumes that
+        * there aren't any AGs hungrier for per-AG reservation than AG 0.
+        */
+       if (pag->pag_agno == 0)
+               mp->m_ag_max_usable -= ask;
 
        resv = xfs_perag_resv(pag, type);
        resv->ar_asked = ask;
index 459f4b4f08fe5a1eba6fd725cb3a8866a5876a11..044a363119bead470113a35227a8ee6923ebab65 100644 (file)
@@ -49,7 +49,6 @@
 #include "xfs_rmap.h"
 #include "xfs_ag_resv.h"
 #include "xfs_refcount.h"
-#include "xfs_rmap_btree.h"
 #include "xfs_icache.h"
 
 
@@ -192,12 +191,8 @@ xfs_bmap_worst_indlen(
        int             maxrecs;        /* maximum record count at this level */
        xfs_mount_t     *mp;            /* mount structure */
        xfs_filblks_t   rval;           /* return value */
-       xfs_filblks_t   orig_len;
 
        mp = ip->i_mount;
-
-       /* Calculate the worst-case size of the bmbt. */
-       orig_len = len;
        maxrecs = mp->m_bmap_dmxr[0];
        for (level = 0, rval = 0;
             level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
@@ -205,20 +200,12 @@ xfs_bmap_worst_indlen(
                len += maxrecs - 1;
                do_div(len, maxrecs);
                rval += len;
-               if (len == 1) {
-                       rval += XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
+               if (len == 1)
+                       return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
                                level - 1;
-                       break;
-               }
                if (level == 0)
                        maxrecs = mp->m_bmap_dmxr[1];
        }
-
-       /* Calculate the worst-case size of the rmapbt. */
-       if (xfs_sb_version_hasrmapbt(&mp->m_sb))
-               rval += 1 + xfs_rmapbt_calc_size(mp, orig_len) +
-                               mp->m_rmap_maxlevels;
-
        return rval;
 }
 
index 29172609f2a31b756cd40da7b42f288fe8b0915b..f18e5932aec4bfe5f1e1b3fb0d69d1b32c540b16 100644 (file)
@@ -343,7 +343,8 @@ xfs_end_io(
                error = xfs_reflink_end_cow(ip, offset, size);
                break;
        case XFS_IO_UNWRITTEN:
-               error = xfs_iomap_write_unwritten(ip, offset, size);
+               /* writeback should never update isize */
+               error = xfs_iomap_write_unwritten(ip, offset, size, false);
                break;
        default:
                ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
index cd9a5400ba4fef2a75d5c24f3f626f5ad0211fae..bc6c6e10a9699f789b998035fde33973796f46fa 100644 (file)
@@ -1459,7 +1459,19 @@ xfs_shift_file_space(
                return error;
 
        /*
-        * The extent shiting code works on extent granularity. So, if
+        * Clean out anything hanging around in the cow fork now that
+        * we've flushed all the dirty data out to disk to avoid having
+        * CoW extents at the wrong offsets.
+        */
+       if (xfs_is_reflink_inode(ip)) {
+               error = xfs_reflink_cancel_cow_range(ip, offset, NULLFILEOFF,
+                               true);
+               if (error)
+                       return error;
+       }
+
+       /*
+        * The extent shifting code works on extent granularity. So, if
         * stop_fsb is not the starting block of extent, we need to split
         * the extent at stop_fsb.
         */
index da14658da3103475940555600581f0bf12217d55..2f97c12ca75e4d5554b94b7a195b4dca53f90174 100644 (file)
@@ -1258,8 +1258,6 @@ xfs_buf_ioapply_map(
        int             size;
        int             offset;
 
-       total_nr_pages = bp->b_page_count;
-
        /* skip the pages in the buffer before the start offset */
        page_index = 0;
        offset = *buf_offset;
index bd786a9ac2c38879cf6e94ad660cd1bda19873c5..eaf86f55b7f2156c351aa26fd0190871ef1772bd 100644 (file)
@@ -347,7 +347,7 @@ xfs_verifier_error(
 {
        struct xfs_mount *mp = bp->b_target->bt_mount;
 
-       xfs_alert(mp, "Metadata %s detected at %pF, %s block 0x%llx",
+       xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx",
                  bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
                  __return_address, bp->b_ops->name, bp->b_bn);
 
index ebdd0bd2b2616a7e9052b4da8378721b144dc0af..309e26c9dddb49715e81cb38b3aed00e33214463 100644 (file)
@@ -58,7 +58,7 @@ xfs_zero_range(
        xfs_off_t               count,
        bool                    *did_zero)
 {
-       return iomap_zero_range(VFS_I(ip), pos, count, NULL, &xfs_iomap_ops);
+       return iomap_zero_range(VFS_I(ip), pos, count, did_zero, &xfs_iomap_ops);
 }
 
 int
@@ -377,8 +377,6 @@ restart:
         */
        spin_lock(&ip->i_flags_lock);
        if (iocb->ki_pos > i_size_read(inode)) {
-               bool    zero = false;
-
                spin_unlock(&ip->i_flags_lock);
                if (!drained_dio) {
                        if (*iolock == XFS_IOLOCK_SHARED) {
@@ -399,7 +397,7 @@ restart:
                        drained_dio = true;
                        goto restart;
                }
-               error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero);
+               error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), NULL);
                if (error)
                        return error;
        } else
@@ -436,7 +434,6 @@ xfs_dio_write_end_io(
        struct inode            *inode = file_inode(iocb->ki_filp);
        struct xfs_inode        *ip = XFS_I(inode);
        loff_t                  offset = iocb->ki_pos;
-       bool                    update_size = false;
        int                     error = 0;
 
        trace_xfs_end_io_direct_write(ip, offset, size);
@@ -447,6 +444,21 @@ xfs_dio_write_end_io(
        if (size <= 0)
                return size;
 
+       if (flags & IOMAP_DIO_COW) {
+               error = xfs_reflink_end_cow(ip, offset, size);
+               if (error)
+                       return error;
+       }
+
+       /*
+        * Unwritten conversion updates the in-core isize after extent
+        * conversion but before updating the on-disk size. Updating isize any
+        * earlier allows a racing dio read to find unwritten extents before
+        * they are converted.
+        */
+       if (flags & IOMAP_DIO_UNWRITTEN)
+               return xfs_iomap_write_unwritten(ip, offset, size, true);
+
        /*
         * We need to update the in-core inode size here so that we don't end up
         * with the on-disk inode size being outside the in-core inode size. We
@@ -461,20 +473,11 @@ xfs_dio_write_end_io(
        spin_lock(&ip->i_flags_lock);
        if (offset + size > i_size_read(inode)) {
                i_size_write(inode, offset + size);
-               update_size = true;
-       }
-       spin_unlock(&ip->i_flags_lock);
-
-       if (flags & IOMAP_DIO_COW) {
-               error = xfs_reflink_end_cow(ip, offset, size);
-               if (error)
-                       return error;
-       }
-
-       if (flags & IOMAP_DIO_UNWRITTEN)
-               error = xfs_iomap_write_unwritten(ip, offset, size);
-       else if (update_size)
+               spin_unlock(&ip->i_flags_lock);
                error = xfs_setfilesize(ip, offset, size);
+       } else {
+               spin_unlock(&ip->i_flags_lock);
+       }
 
        return error;
 }
index 5599dda4727af6736fa1ae15429456b2e5fc6a04..4ec5b7f4540137622856883ab5dafb73d95f89ab 100644 (file)
@@ -1624,10 +1624,12 @@ xfs_itruncate_extents(
                goto out;
 
        /*
-        * Clear the reflink flag if we truncated everything.
+        * Clear the reflink flag if there are no data fork blocks and
+        * there are no extents staged in the cow fork.
         */
-       if (ip->i_d.di_nblocks == 0 && xfs_is_reflink_inode(ip)) {
-               ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
+       if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) {
+               if (ip->i_d.di_nblocks == 0)
+                       ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
                xfs_inode_clear_cowblocks_tag(ip);
        }
 
index 6d0f74ec31e89c35769b55f3402003c2548ae980..a705f34b58fad089659a019dcdd1caecffd01aa3 100644 (file)
@@ -745,7 +745,7 @@ xfs_iflush_done(
                 */
                iip = INODE_ITEM(blip);
                if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) ||
-                   lip->li_flags & XFS_LI_FAILED)
+                   (blip->li_flags & XFS_LI_FAILED))
                        need_ail++;
 
                blip = next;
index 5049e8ab6e302e05b4c0d66fcf3eaa1ba9e2721b..aa75389be8cfa81d06695f80701c2093b65289b0 100644 (file)
@@ -1088,6 +1088,7 @@ xfs_ioctl_setattr_dax_invalidate(
        int                     *join_flags)
 {
        struct inode            *inode = VFS_I(ip);
+       struct super_block      *sb = inode->i_sb;
        int                     error;
 
        *join_flags = 0;
@@ -1100,7 +1101,7 @@ xfs_ioctl_setattr_dax_invalidate(
        if (fa->fsx_xflags & FS_XFLAG_DAX) {
                if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
                        return -EINVAL;
-               if (ip->i_mount->m_sb.sb_blocksize != PAGE_SIZE)
+               if (bdev_dax_supported(sb, sb->s_blocksize) < 0)
                        return -EINVAL;
        }
 
index a1909bc064e9e70c90ef6ced3017935951aefc2e..f179bdf1644dc346b2f7f064543eca0c3f808c6a 100644 (file)
@@ -829,7 +829,8 @@ int
 xfs_iomap_write_unwritten(
        xfs_inode_t     *ip,
        xfs_off_t       offset,
-       xfs_off_t       count)
+       xfs_off_t       count,
+       bool            update_isize)
 {
        xfs_mount_t     *mp = ip->i_mount;
        xfs_fileoff_t   offset_fsb;
@@ -840,6 +841,7 @@ xfs_iomap_write_unwritten(
        xfs_trans_t     *tp;
        xfs_bmbt_irec_t imap;
        struct xfs_defer_ops dfops;
+       struct inode    *inode = VFS_I(ip);
        xfs_fsize_t     i_size;
        uint            resblks;
        int             error;
@@ -899,7 +901,8 @@ xfs_iomap_write_unwritten(
                i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb);
                if (i_size > offset + count)
                        i_size = offset + count;
-
+               if (update_isize && i_size > i_size_read(inode))
+                       i_size_write(inode, i_size);
                i_size = xfs_new_eof(ip, i_size);
                if (i_size) {
                        ip->i_d.di_size = i_size;
index 00db3ecea0840c2587f6c0de2f2828bcaaab6fbe..ee535065c5d0e3795158e9cf1ce7dafad5f3a729 100644 (file)
@@ -27,7 +27,7 @@ int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
                        struct xfs_bmbt_irec *, int);
 int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t,
                        struct xfs_bmbt_irec *);
-int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t);
+int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
 
 void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
                struct xfs_bmbt_irec *);
index 2f2dc3c09ad008a75fa0ffba6fd5d7aed392c162..4246876df7b759cdd63ddd43b5eb4612af0b0c93 100644 (file)
@@ -274,7 +274,7 @@ xfs_fs_commit_blocks(
                                        (end - 1) >> PAGE_SHIFT);
                WARN_ON_ONCE(error);
 
-               error = xfs_iomap_write_unwritten(ip, start, length);
+               error = xfs_iomap_write_unwritten(ip, start, length, false);
                if (error)
                        goto out_drop_iolock;
        }
index c996f4ae4a5f293199ede0d0137329cc0919418b..584cf2d573babe6b8331b856e617c4e6f08b5a53 100644 (file)
@@ -1654,6 +1654,16 @@ xfs_fs_fill_super(
                "DAX and reflink have not been tested together!");
        }
 
+       if (mp->m_flags & XFS_MOUNT_DISCARD) {
+               struct request_queue *q = bdev_get_queue(sb->s_bdev);
+
+               if (!blk_queue_discard(q)) {
+                       xfs_warn(mp, "mounting with \"discard\" option, but "
+                                       "the device does not support discard");
+                       mp->m_flags &= ~XFS_MOUNT_DISCARD;
+               }
+       }
+
        if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
                if (mp->m_sb.sb_rblocks) {
                        xfs_alert(mp,
index f24bfb2b9a2dcca0ac61034e86fe3bc489da7947..6d508767e14455d52470fe806f5f923637561edc 100644 (file)
@@ -3,8 +3,27 @@
 
 #include <linux/types.h>
 
+/*
+ * CPU-up                      CPU-down
+ *
+ * BP          AP              BP              AP
+ *
+ * OFFLINE                     OFFLINE
+ *   |                           ^
+ *   v                           |
+ * BRINGUP_CPU->AP_OFFLINE     BRINGUP_CPU  <- AP_IDLE_DEAD (idle thread/play_dead)
+ *               |                             AP_OFFLINE
+ *               v (IRQ-off)     ,---------------^
+ *             AP_ONLNE          | (stop_machine)
+ *               |             TEARDOWN_CPU <- AP_ONLINE_IDLE
+ *               |                               ^
+ *               v                               |
+ *              AP_ACTIVE                      AP_ACTIVE
+ */
+
 enum cpuhp_state {
-       CPUHP_OFFLINE,
+       CPUHP_INVALID = -1,
+       CPUHP_OFFLINE = 0,
        CPUHP_CREATE_THREADS,
        CPUHP_PERF_PREPARE,
        CPUHP_PERF_X86_PREPARE,
index a7f2ac689d2917e2902e78604e8b6b33df1ee333..41b8c575785916f7691a7686a5c5d4e7d99003fa 100644 (file)
@@ -167,11 +167,11 @@ struct iommu_resv_region {
  * @map: map a physically contiguous memory region to an iommu domain
  * @unmap: unmap a physically contiguous memory region from an iommu domain
  * @map_sg: map a scatter-gather list of physically contiguous memory chunks
+ *          to an iommu domain
  * @flush_tlb_all: Synchronously flush all hardware TLBs for this domain
  * @tlb_range_add: Add a given iova range to the flush queue for this domain
  * @tlb_sync: Flush all queued ranges from the hardware TLBs and empty flush
  *            queue
- * to an iommu domain
  * @iova_to_phys: translate iova to physical address
  * @add_device: add device to iommu grouping
  * @remove_device: remove device from iommu grouping
index 044114185120633b8ca5c678b22570c3c2f8e264..e315e16b6ff8a0aaade2552ee404d35e27da5570 100644 (file)
@@ -187,6 +187,7 @@ struct key {
 #define KEY_FLAG_BUILTIN       8       /* set if key is built in to the kernel */
 #define KEY_FLAG_ROOT_CAN_INVAL        9       /* set if key can be invalidated by root without permission */
 #define KEY_FLAG_KEEP          10      /* set if key should not be removed */
+#define KEY_FLAG_UID_KEYRING   11      /* set if key is a user or user session keyring */
 
        /* the key type and key description string
         * - the desc is used to match a key against search criteria
@@ -243,6 +244,7 @@ extern struct key *key_alloc(struct key_type *type,
 #define KEY_ALLOC_NOT_IN_QUOTA         0x0002  /* not in quota */
 #define KEY_ALLOC_BUILT_IN             0x0004  /* Key is built into kernel */
 #define KEY_ALLOC_BYPASS_RESTRICTION   0x0008  /* Override the check on restricted keyrings */
+#define KEY_ALLOC_UID_KEYRING          0x0010  /* allocating a user or user session keyring */
 
 extern void key_revoke(struct key *key);
 extern void key_invalidate(struct key *key);
index f68c58a93dd045b9b58de9882088621253ec7758..f4f8ee5a7362e982c0084d619c8ed1996a38f770 100644 (file)
@@ -1685,6 +1685,8 @@ static inline int pci_get_new_domain_nr(void) { return -ENOSYS; }
 
 #define dev_is_pci(d) (false)
 #define dev_is_pf(d) (false)
+static inline bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags)
+{ return false; }
 #endif /* CONFIG_PCI */
 
 /* Include architecture-dependent settings and functions */
index 92fb8dd5a9e4884bfde2225bb0c6d933ed13c660..26a7df4e558c249c40b979ead73425989f8e8c7f 100644 (file)
@@ -65,25 +65,23 @@ struct task_group;
  */
 
 /* Used in tsk->state: */
-#define TASK_RUNNING                   0
-#define TASK_INTERRUPTIBLE             1
-#define TASK_UNINTERRUPTIBLE           2
-#define __TASK_STOPPED                 4
-#define __TASK_TRACED                  8
+#define TASK_RUNNING                   0x0000
+#define TASK_INTERRUPTIBLE             0x0001
+#define TASK_UNINTERRUPTIBLE           0x0002
+#define __TASK_STOPPED                 0x0004
+#define __TASK_TRACED                  0x0008
 /* Used in tsk->exit_state: */
-#define EXIT_DEAD                      16
-#define EXIT_ZOMBIE                    32
+#define EXIT_DEAD                      0x0010
+#define EXIT_ZOMBIE                    0x0020
 #define EXIT_TRACE                     (EXIT_ZOMBIE | EXIT_DEAD)
 /* Used in tsk->state again: */
-#define TASK_DEAD                      64
-#define TASK_WAKEKILL                  128
-#define TASK_WAKING                    256
-#define TASK_PARKED                    512
-#define TASK_NOLOAD                    1024
-#define TASK_NEW                       2048
-#define TASK_STATE_MAX                 4096
-
-#define TASK_STATE_TO_CHAR_STR         "RSDTtXZxKWPNn"
+#define TASK_PARKED                    0x0040
+#define TASK_DEAD                      0x0080
+#define TASK_WAKEKILL                  0x0100
+#define TASK_WAKING                    0x0200
+#define TASK_NOLOAD                    0x0400
+#define TASK_NEW                       0x0800
+#define TASK_STATE_MAX                 0x1000
 
 /* Convenience macros for the sake of set_current_state: */
 #define TASK_KILLABLE                  (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
@@ -99,7 +97,8 @@ struct task_group;
 /* get_task_state(): */
 #define TASK_REPORT                    (TASK_RUNNING | TASK_INTERRUPTIBLE | \
                                         TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
-                                        __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD)
+                                        __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \
+                                        TASK_PARKED)
 
 #define task_is_traced(task)           ((task->state & __TASK_TRACED) != 0)
 
@@ -1243,17 +1242,34 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk)
        return task_pgrp_nr_ns(tsk, &init_pid_ns);
 }
 
-static inline char task_state_to_char(struct task_struct *task)
+#define TASK_REPORT_IDLE       (TASK_REPORT + 1)
+#define TASK_REPORT_MAX                (TASK_REPORT_IDLE << 1)
+
+static inline unsigned int __get_task_state(struct task_struct *tsk)
+{
+       unsigned int tsk_state = READ_ONCE(tsk->state);
+       unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT;
+
+       BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX);
+
+       if (tsk_state == TASK_IDLE)
+               state = TASK_REPORT_IDLE;
+
+       return fls(state);
+}
+
+static inline char __task_state_to_char(unsigned int state)
 {
-       const char stat_nam[] = TASK_STATE_TO_CHAR_STR;
-       unsigned long state = task->state;
+       static const char state_char[] = "RSDTtXZPI";
 
-       state = state ? __ffs(state) + 1 : 0;
+       BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != sizeof(state_char) - 1);
 
-       /* Make sure the string lines up properly with the number of task states: */
-       BUILD_BUG_ON(sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1);
+       return state_char[state];
+}
 
-       return state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?';
+static inline char task_state_to_char(struct task_struct *tsk)
+{
+       return __task_state_to_char(__get_task_state(tsk));
 }
 
 /**
index e6789b8757d5021439c332751e52726bde83bec1..6383c528b1484fed5731b3074e1d1accf414ff31 100644 (file)
@@ -168,6 +168,20 @@ static inline void init_timer_on_stack_key(struct timer_list *timer,
 #define setup_pinned_deferrable_timer_on_stack(timer, fn, data)                \
        __setup_timer_on_stack((timer), (fn), (data), TIMER_DEFERRABLE | TIMER_PINNED)
 
+#define TIMER_DATA_TYPE                unsigned long
+#define TIMER_FUNC_TYPE                void (*)(TIMER_DATA_TYPE)
+
+static inline void timer_setup(struct timer_list *timer,
+                              void (*callback)(struct timer_list *),
+                              unsigned int flags)
+{
+       __setup_timer(timer, (TIMER_FUNC_TYPE)callback,
+                     (TIMER_DATA_TYPE)timer, flags);
+}
+
+#define from_timer(var, callback_timer, timer_fieldname) \
+       container_of(callback_timer, typeof(*var), timer_fieldname)
+
 /**
  * timer_pending - is a timer pending?
  * @timer: the timer in question
index bdb1279a415b39f0597a02a6eb36aa6bd5e7ded5..e8608b2dc844fb27c0e7e5ceb359fe357ecf6553 100644 (file)
@@ -285,7 +285,7 @@ enum ib_tm_cap_flags {
        IB_TM_CAP_RC                = 1 << 0,
 };
 
-struct ib_xrq_caps {
+struct ib_tm_caps {
        /* Max size of RNDV header */
        u32 max_rndv_hdr_size;
        /* Max number of entries in tag matching list */
@@ -358,7 +358,7 @@ struct ib_device_attr {
        struct ib_rss_caps      rss_caps;
        u32                     max_wq_type_rq;
        u32                     raw_packet_caps; /* Use ib_raw_packet_caps enum */
-       struct ib_xrq_caps      xrq_caps;
+       struct ib_tm_caps       tm_caps;
 };
 
 enum ib_mtu {
@@ -1739,7 +1739,7 @@ struct ib_mr {
        u32                lkey;
        u32                rkey;
        u64                iova;
-       u32                length;
+       u64                length;
        unsigned int       page_size;
        bool               need_inval;
        union {
index ae1409ffe99a00817f02a2cc51771840712fc1c3..3c8b7f6256701b6c6eeb05b2bae9387fa95df692 100644 (file)
@@ -114,7 +114,10 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct *
         * Preemption ignores task state, therefore preempted tasks are always
         * RUNNING (we will not have dequeued if state != RUNNING).
         */
-       return preempt ? TASK_RUNNING | TASK_STATE_MAX : p->state;
+       if (preempt)
+               return TASK_STATE_MAX;
+
+       return __get_task_state(p);
 }
 #endif /* CREATE_TRACE_POINTS */
 
@@ -152,12 +155,14 @@ TRACE_EVENT(sched_switch,
 
        TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
                __entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
-               __entry->prev_state & (TASK_STATE_MAX-1) ?
-                 __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
-                               { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
-                               { 16, "Z" }, { 32, "X" }, { 64, "x" },
-                               { 128, "K" }, { 256, "W" }, { 512, "P" },
-                               { 1024, "N" }) : "R",
+
+               (__entry->prev_state & (TASK_REPORT_MAX - 1)) ?
+                 __print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|",
+                               { 0x01, "S" }, { 0x02, "D" }, { 0x04, "T" },
+                               { 0x08, "t" }, { 0x10, "X" }, { 0x20, "Z" },
+                               { 0x40, "P" }, { 0x80, "I" }) :
+                 "R",
+
                __entry->prev_state & TASK_STATE_MAX ? "+" : "",
                __entry->next_comm, __entry->next_pid, __entry->next_prio)
 );
index 9a0b6479fe0c626a05cc86da382181c81b1db73f..d4e0b53bfc75ccdf03585c7b402389a21aba4dca 100644 (file)
@@ -261,7 +261,7 @@ struct ib_uverbs_ex_query_device_resp {
        struct ib_uverbs_rss_caps rss_caps;
        __u32  max_wq_type_rq;
        __u32 raw_packet_caps;
-       struct ib_uverbs_tm_caps xrq_caps;
+       struct ib_uverbs_tm_caps tm_caps;
 };
 
 struct ib_uverbs_query_port {
index acf5308fad51f17706197ca6550ce7ae3c8b5378..8de11a29e4955d846c0d1d90a2c0b3a207e21a3d 100644 (file)
  * @bringup:   Single callback bringup or teardown selector
  * @cb_state:  The state for a single callback (install/uninstall)
  * @result:    Result of the operation
- * @done:      Signal completion to the issuer of the task
+ * @done_up:   Signal completion to the issuer of the task for cpu-up
+ * @done_down: Signal completion to the issuer of the task for cpu-down
  */
 struct cpuhp_cpu_state {
        enum cpuhp_state        state;
        enum cpuhp_state        target;
+       enum cpuhp_state        fail;
 #ifdef CONFIG_SMP
        struct task_struct      *thread;
        bool                    should_run;
@@ -58,18 +60,39 @@ struct cpuhp_cpu_state {
        bool                    single;
        bool                    bringup;
        struct hlist_node       *node;
+       struct hlist_node       *last;
        enum cpuhp_state        cb_state;
        int                     result;
-       struct completion       done;
+       struct completion       done_up;
+       struct completion       done_down;
 #endif
 };
 
-static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state);
+static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
+       .fail = CPUHP_INVALID,
+};
 
 #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
-static struct lock_class_key cpuhp_state_key;
-static struct lockdep_map cpuhp_state_lock_map =
-       STATIC_LOCKDEP_MAP_INIT("cpuhp_state", &cpuhp_state_key);
+static struct lockdep_map cpuhp_state_up_map =
+       STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
+static struct lockdep_map cpuhp_state_down_map =
+       STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);
+
+
+static void inline cpuhp_lock_acquire(bool bringup)
+{
+       lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
+}
+
+static void inline cpuhp_lock_release(bool bringup)
+{
+       lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
+}
+#else
+
+static void inline cpuhp_lock_acquire(bool bringup) { }
+static void inline cpuhp_lock_release(bool bringup) { }
+
 #endif
 
 /**
@@ -123,13 +146,16 @@ static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
 /**
  * cpuhp_invoke_callback _ Invoke the callbacks for a given state
  * @cpu:       The cpu for which the callback should be invoked
- * @step:      The step in the state machine
+ * @state:     The state to do callbacks for
  * @bringup:   True if the bringup callback should be invoked
+ * @node:      For multi-instance, do a single entry callback for install/remove
+ * @lastp:     For multi-instance rollback, remember how far we got
  *
  * Called from cpu hotplug and from the state register machinery.
  */
 static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
-                                bool bringup, struct hlist_node *node)
+                                bool bringup, struct hlist_node *node,
+                                struct hlist_node **lastp)
 {
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
        struct cpuhp_step *step = cpuhp_get_step(state);
@@ -137,7 +163,17 @@ static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
        int (*cb)(unsigned int cpu);
        int ret, cnt;
 
+       if (st->fail == state) {
+               st->fail = CPUHP_INVALID;
+
+               if (!(bringup ? step->startup.single : step->teardown.single))
+                       return 0;
+
+               return -EAGAIN;
+       }
+
        if (!step->multi_instance) {
+               WARN_ON_ONCE(lastp && *lastp);
                cb = bringup ? step->startup.single : step->teardown.single;
                if (!cb)
                        return 0;
@@ -152,6 +188,7 @@ static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
 
        /* Single invocation for instance add/remove */
        if (node) {
+               WARN_ON_ONCE(lastp && *lastp);
                trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
                ret = cbm(cpu, node);
                trace_cpuhp_exit(cpu, st->state, state, ret);
@@ -161,13 +198,23 @@ static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
        /* State transition. Invoke on all instances */
        cnt = 0;
        hlist_for_each(node, &step->list) {
+               if (lastp && node == *lastp)
+                       break;
+
                trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
                ret = cbm(cpu, node);
                trace_cpuhp_exit(cpu, st->state, state, ret);
-               if (ret)
-                       goto err;
+               if (ret) {
+                       if (!lastp)
+                               goto err;
+
+                       *lastp = node;
+                       return ret;
+               }
                cnt++;
        }
+       if (lastp)
+               *lastp = NULL;
        return 0;
 err:
        /* Rollback the instances if one failed */
@@ -178,12 +225,39 @@ err:
        hlist_for_each(node, &step->list) {
                if (!cnt--)
                        break;
-               cbm(cpu, node);
+
+               trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
+               ret = cbm(cpu, node);
+               trace_cpuhp_exit(cpu, st->state, state, ret);
+               /*
+                * Rollback must not fail,
+                */
+               WARN_ON_ONCE(ret);
        }
        return ret;
 }
 
 #ifdef CONFIG_SMP
+static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
+{
+       struct completion *done = bringup ? &st->done_up : &st->done_down;
+       wait_for_completion(done);
+}
+
+static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
+{
+       struct completion *done = bringup ? &st->done_up : &st->done_down;
+       complete(done);
+}
+
+/*
+ * The former STARTING/DYING states, ran with IRQs disabled and must not fail.
+ */
+static bool cpuhp_is_atomic_state(enum cpuhp_state state)
+{
+       return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
+}
+
 /* Serializes the updates to cpu_online_mask, cpu_present_mask */
 static DEFINE_MUTEX(cpu_add_remove_lock);
 bool cpuhp_tasks_frozen;
@@ -271,14 +345,79 @@ void cpu_hotplug_enable(void)
 EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
 #endif /* CONFIG_HOTPLUG_CPU */
 
-static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st);
+static inline enum cpuhp_state
+cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
+{
+       enum cpuhp_state prev_state = st->state;
+
+       st->rollback = false;
+       st->last = NULL;
+
+       st->target = target;
+       st->single = false;
+       st->bringup = st->state < target;
+
+       return prev_state;
+}
+
+static inline void
+cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
+{
+       st->rollback = true;
+
+       /*
+        * If we have st->last we need to undo partial multi_instance of this
+        * state first. Otherwise start undo at the previous state.
+        */
+       if (!st->last) {
+               if (st->bringup)
+                       st->state--;
+               else
+                       st->state++;
+       }
+
+       st->target = prev_state;
+       st->bringup = !st->bringup;
+}
+
+/* Regular hotplug invocation of the AP hotplug thread */
+static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
+{
+       if (!st->single && st->state == st->target)
+               return;
+
+       st->result = 0;
+       /*
+        * Make sure the above stores are visible before should_run becomes
+        * true. Paired with the mb() above in cpuhp_thread_fun()
+        */
+       smp_mb();
+       st->should_run = true;
+       wake_up_process(st->thread);
+       wait_for_ap_thread(st, st->bringup);
+}
+
+static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target)
+{
+       enum cpuhp_state prev_state;
+       int ret;
+
+       prev_state = cpuhp_set_state(st, target);
+       __cpuhp_kick_ap(st);
+       if ((ret = st->result)) {
+               cpuhp_reset_state(st, prev_state);
+               __cpuhp_kick_ap(st);
+       }
+
+       return ret;
+}
 
 static int bringup_wait_for_ap(unsigned int cpu)
 {
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 
        /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
-       wait_for_completion(&st->done);
+       wait_for_ap_thread(st, true);
        if (WARN_ON_ONCE((!cpu_online(cpu))))
                return -ECANCELED;
 
@@ -286,12 +425,10 @@ static int bringup_wait_for_ap(unsigned int cpu)
        stop_machine_unpark(cpu);
        kthread_unpark(st->thread);
 
-       /* Should we go further up ? */
-       if (st->target > CPUHP_AP_ONLINE_IDLE) {
-               __cpuhp_kick_ap_work(st);
-               wait_for_completion(&st->done);
-       }
-       return st->result;
+       if (st->target <= CPUHP_AP_ONLINE_IDLE)
+               return 0;
+
+       return cpuhp_kick_ap(st, st->target);
 }
 
 static int bringup_cpu(unsigned int cpu)
@@ -317,32 +454,6 @@ static int bringup_cpu(unsigned int cpu)
 /*
  * Hotplug state machine related functions
  */
-static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
-{
-       for (st->state++; st->state < st->target; st->state++) {
-               struct cpuhp_step *step = cpuhp_get_step(st->state);
-
-               if (!step->skip_onerr)
-                       cpuhp_invoke_callback(cpu, st->state, true, NULL);
-       }
-}
-
-static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
-                               enum cpuhp_state target)
-{
-       enum cpuhp_state prev_state = st->state;
-       int ret = 0;
-
-       for (; st->state > target; st->state--) {
-               ret = cpuhp_invoke_callback(cpu, st->state, false, NULL);
-               if (ret) {
-                       st->target = prev_state;
-                       undo_cpu_down(cpu, st);
-                       break;
-               }
-       }
-       return ret;
-}
 
 static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
 {
@@ -350,7 +461,7 @@ static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
                struct cpuhp_step *step = cpuhp_get_step(st->state);
 
                if (!step->skip_onerr)
-                       cpuhp_invoke_callback(cpu, st->state, false, NULL);
+                       cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
        }
 }
 
@@ -362,7 +473,7 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
 
        while (st->state < target) {
                st->state++;
-               ret = cpuhp_invoke_callback(cpu, st->state, true, NULL);
+               ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
                if (ret) {
                        st->target = prev_state;
                        undo_cpu_up(cpu, st);
@@ -379,7 +490,8 @@ static void cpuhp_create(unsigned int cpu)
 {
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 
-       init_completion(&st->done);
+       init_completion(&st->done_up);
+       init_completion(&st->done_down);
 }
 
 static int cpuhp_should_run(unsigned int cpu)
@@ -389,69 +501,90 @@ static int cpuhp_should_run(unsigned int cpu)
        return st->should_run;
 }
 
-/* Execute the teardown callbacks. Used to be CPU_DOWN_PREPARE */
-static int cpuhp_ap_offline(unsigned int cpu, struct cpuhp_cpu_state *st)
-{
-       enum cpuhp_state target = max((int)st->target, CPUHP_TEARDOWN_CPU);
-
-       return cpuhp_down_callbacks(cpu, st, target);
-}
-
-/* Execute the online startup callbacks. Used to be CPU_ONLINE */
-static int cpuhp_ap_online(unsigned int cpu, struct cpuhp_cpu_state *st)
-{
-       return cpuhp_up_callbacks(cpu, st, st->target);
-}
-
 /*
  * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
  * callbacks when a state gets [un]installed at runtime.
+ *
+ * Each invocation of this function by the smpboot thread does a single AP
+ * state callback.
+ *
+ * It has 3 modes of operation:
+ *  - single: runs st->cb_state
+ *  - up:     runs ++st->state, while st->state < st->target
+ *  - down:   runs st->state--, while st->state > st->target
+ *
+ * When complete or on error, should_run is cleared and the completion is fired.
  */
 static void cpuhp_thread_fun(unsigned int cpu)
 {
        struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
-       int ret = 0;
+       bool bringup = st->bringup;
+       enum cpuhp_state state;
 
        /*
-        * Paired with the mb() in cpuhp_kick_ap_work and
-        * cpuhp_invoke_ap_callback, so the work set is consistent visible.
+        * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
+        * that if we see ->should_run we also see the rest of the state.
         */
        smp_mb();
-       if (!st->should_run)
+
+       if (WARN_ON_ONCE(!st->should_run))
                return;
 
-       st->should_run = false;
+       cpuhp_lock_acquire(bringup);
 
-       lock_map_acquire(&cpuhp_state_lock_map);
-       /* Single callback invocation for [un]install ? */
        if (st->single) {
-               if (st->cb_state < CPUHP_AP_ONLINE) {
-                       local_irq_disable();
-                       ret = cpuhp_invoke_callback(cpu, st->cb_state,
-                                                   st->bringup, st->node);
-                       local_irq_enable();
+               state = st->cb_state;
+               st->should_run = false;
+       } else {
+               if (bringup) {
+                       st->state++;
+                       state = st->state;
+                       st->should_run = (st->state < st->target);
+                       WARN_ON_ONCE(st->state > st->target);
                } else {
-                       ret = cpuhp_invoke_callback(cpu, st->cb_state,
-                                                   st->bringup, st->node);
+                       state = st->state;
+                       st->state--;
+                       st->should_run = (st->state > st->target);
+                       WARN_ON_ONCE(st->state < st->target);
                }
-       } else if (st->rollback) {
-               BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
+       }
+
+       WARN_ON_ONCE(!cpuhp_is_ap_state(state));
 
-               undo_cpu_down(cpu, st);
-               st->rollback = false;
+       if (st->rollback) {
+               struct cpuhp_step *step = cpuhp_get_step(state);
+               if (step->skip_onerr)
+                       goto next;
+       }
+
+       if (cpuhp_is_atomic_state(state)) {
+               local_irq_disable();
+               st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
+               local_irq_enable();
+
+               /*
+                * STARTING/DYING must not fail!
+                */
+               WARN_ON_ONCE(st->result);
        } else {
-               /* Cannot happen .... */
-               BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
-
-               /* Regular hotplug work */
-               if (st->state < st->target)
-                       ret = cpuhp_ap_online(cpu, st);
-               else if (st->state > st->target)
-                       ret = cpuhp_ap_offline(cpu, st);
+               st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
+       }
+
+       if (st->result) {
+               /*
+                * If we fail on a rollback, we're up a creek without no
+                * paddle, no way forward, no way back. We loose, thanks for
+                * playing.
+                */
+               WARN_ON_ONCE(st->rollback);
+               st->should_run = false;
        }
-       lock_map_release(&cpuhp_state_lock_map);
-       st->result = ret;
-       complete(&st->done);
+
+next:
+       cpuhp_lock_release(bringup);
+
+       if (!st->should_run)
+               complete_ap_thread(st, bringup);
 }
 
 /* Invoke a single callback on a remote cpu */
@@ -460,62 +593,64 @@ cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
                         struct hlist_node *node)
 {
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+       int ret;
 
        if (!cpu_online(cpu))
                return 0;
 
-       lock_map_acquire(&cpuhp_state_lock_map);
-       lock_map_release(&cpuhp_state_lock_map);
+       cpuhp_lock_acquire(false);
+       cpuhp_lock_release(false);
+
+       cpuhp_lock_acquire(true);
+       cpuhp_lock_release(true);
 
        /*
         * If we are up and running, use the hotplug thread. For early calls
         * we invoke the thread function directly.
         */
        if (!st->thread)
-               return cpuhp_invoke_callback(cpu, state, bringup, node);
+               return cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
 
+       st->rollback = false;
+       st->last = NULL;
+
+       st->node = node;
+       st->bringup = bringup;
        st->cb_state = state;
        st->single = true;
-       st->bringup = bringup;
-       st->node = node;
 
-       /*
-        * Make sure the above stores are visible before should_run becomes
-        * true. Paired with the mb() above in cpuhp_thread_fun()
-        */
-       smp_mb();
-       st->should_run = true;
-       wake_up_process(st->thread);
-       wait_for_completion(&st->done);
-       return st->result;
-}
+       __cpuhp_kick_ap(st);
 
-/* Regular hotplug invocation of the AP hotplug thread */
-static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st)
-{
-       st->result = 0;
-       st->single = false;
        /*
-        * Make sure the above stores are visible before should_run becomes
-        * true. Paired with the mb() above in cpuhp_thread_fun()
+        * If we failed and did a partial, do a rollback.
         */
-       smp_mb();
-       st->should_run = true;
-       wake_up_process(st->thread);
+       if ((ret = st->result) && st->last) {
+               st->rollback = true;
+               st->bringup = !bringup;
+
+               __cpuhp_kick_ap(st);
+       }
+
+       return ret;
 }
 
 static int cpuhp_kick_ap_work(unsigned int cpu)
 {
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
-       enum cpuhp_state state = st->state;
+       enum cpuhp_state prev_state = st->state;
+       int ret;
+
+       cpuhp_lock_acquire(false);
+       cpuhp_lock_release(false);
+
+       cpuhp_lock_acquire(true);
+       cpuhp_lock_release(true);
+
+       trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
+       ret = cpuhp_kick_ap(st, st->target);
+       trace_cpuhp_exit(cpu, st->state, prev_state, ret);
 
-       trace_cpuhp_enter(cpu, st->target, state, cpuhp_kick_ap_work);
-       lock_map_acquire(&cpuhp_state_lock_map);
-       lock_map_release(&cpuhp_state_lock_map);
-       __cpuhp_kick_ap_work(st);
-       wait_for_completion(&st->done);
-       trace_cpuhp_exit(cpu, st->state, state, st->result);
-       return st->result;
+       return ret;
 }
 
 static struct smp_hotplug_thread cpuhp_threads = {
@@ -581,6 +716,7 @@ static int take_cpu_down(void *_param)
        struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
        enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
        int err, cpu = smp_processor_id();
+       int ret;
 
        /* Ensure this CPU doesn't handle any more interrupts. */
        err = __cpu_disable();
@@ -594,8 +730,13 @@ static int take_cpu_down(void *_param)
        WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
        st->state--;
        /* Invoke the former CPU_DYING callbacks */
-       for (; st->state > target; st->state--)
-               cpuhp_invoke_callback(cpu, st->state, false, NULL);
+       for (; st->state > target; st->state--) {
+               ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
+               /*
+                * DYING must not fail!
+                */
+               WARN_ON_ONCE(ret);
+       }
 
        /* Give up timekeeping duties */
        tick_handover_do_timer();
@@ -639,7 +780,7 @@ static int takedown_cpu(unsigned int cpu)
         *
         * Wait for the stop thread to go away.
         */
-       wait_for_completion(&st->done);
+       wait_for_ap_thread(st, false);
        BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
 
        /* Interrupts are moved away from the dying cpu, reenable alloc/free */
@@ -658,7 +799,7 @@ static void cpuhp_complete_idle_dead(void *arg)
 {
        struct cpuhp_cpu_state *st = arg;
 
-       complete(&st->done);
+       complete_ap_thread(st, false);
 }
 
 void cpuhp_report_idle_dead(void)
@@ -676,11 +817,32 @@ void cpuhp_report_idle_dead(void)
                                 cpuhp_complete_idle_dead, st, 0);
 }
 
-#else
-#define takedown_cpu           NULL
-#endif
+static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
+{
+       for (st->state++; st->state < st->target; st->state++) {
+               struct cpuhp_step *step = cpuhp_get_step(st->state);
 
-#ifdef CONFIG_HOTPLUG_CPU
+               if (!step->skip_onerr)
+                       cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
+       }
+}
+
+static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
+                               enum cpuhp_state target)
+{
+       enum cpuhp_state prev_state = st->state;
+       int ret = 0;
+
+       for (; st->state > target; st->state--) {
+               ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
+               if (ret) {
+                       st->target = prev_state;
+                       undo_cpu_down(cpu, st);
+                       break;
+               }
+       }
+       return ret;
+}
 
 /* Requires cpu_add_remove_lock to be held */
 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
@@ -699,13 +861,13 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
 
        cpuhp_tasks_frozen = tasks_frozen;
 
-       prev_state = st->state;
-       st->target = target;
+       prev_state = cpuhp_set_state(st, target);
        /*
         * If the current CPU state is in the range of the AP hotplug thread,
         * then we need to kick the thread.
         */
        if (st->state > CPUHP_TEARDOWN_CPU) {
+               st->target = max((int)target, CPUHP_TEARDOWN_CPU);
                ret = cpuhp_kick_ap_work(cpu);
                /*
                 * The AP side has done the error rollback already. Just
@@ -720,6 +882,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
                 */
                if (st->state > CPUHP_TEARDOWN_CPU)
                        goto out;
+
+               st->target = target;
        }
        /*
         * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
@@ -727,9 +891,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
         */
        ret = cpuhp_down_callbacks(cpu, st, target);
        if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
-               st->target = prev_state;
-               st->rollback = true;
-               cpuhp_kick_ap_work(cpu);
+               cpuhp_reset_state(st, prev_state);
+               __cpuhp_kick_ap(st);
        }
 
 out:
@@ -754,11 +917,15 @@ out:
        cpu_maps_update_done();
        return err;
 }
+
 int cpu_down(unsigned int cpu)
 {
        return do_cpu_down(cpu, CPUHP_OFFLINE);
 }
 EXPORT_SYMBOL(cpu_down);
+
+#else
+#define takedown_cpu           NULL
 #endif /*CONFIG_HOTPLUG_CPU*/
 
 /**
@@ -772,11 +939,16 @@ void notify_cpu_starting(unsigned int cpu)
 {
        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
        enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
+       int ret;
 
        rcu_cpu_starting(cpu);  /* Enables RCU usage on this CPU. */
        while (st->state < target) {
                st->state++;
-               cpuhp_invoke_callback(cpu, st->state, true, NULL);
+               ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
+               /*
+                * STARTING must not fail!
+                */
+               WARN_ON_ONCE(ret);
        }
 }
 
@@ -794,7 +966,7 @@ void cpuhp_online_idle(enum cpuhp_state state)
                return;
 
        st->state = CPUHP_AP_ONLINE_IDLE;
-       complete(&st->done);
+       complete_ap_thread(st, true);
 }
 
 /* Requires cpu_add_remove_lock to be held */
@@ -829,7 +1001,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
 
        cpuhp_tasks_frozen = tasks_frozen;
 
-       st->target = target;
+       cpuhp_set_state(st, target);
        /*
         * If the current CPU state is in the range of the AP hotplug thread,
         * then we need to kick the thread once more.
@@ -1296,6 +1468,10 @@ static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
        struct cpuhp_step *sp = cpuhp_get_step(state);
        int ret;
 
+       /*
+        * If there's nothing to do, we done.
+        * Relies on the union for multi_instance.
+        */
        if ((bringup && !sp->startup.single) ||
            (!bringup && !sp->teardown.single))
                return 0;
@@ -1307,9 +1483,9 @@ static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
        if (cpuhp_is_ap_state(state))
                ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
        else
-               ret = cpuhp_invoke_callback(cpu, state, bringup, node);
+               ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
 #else
-       ret = cpuhp_invoke_callback(cpu, state, bringup, node);
+       ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
 #endif
        BUG_ON(ret && !bringup);
        return ret;
@@ -1641,9 +1817,55 @@ static ssize_t show_cpuhp_target(struct device *dev,
 }
 static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target);
 
+
+static ssize_t write_cpuhp_fail(struct device *dev,
+                               struct device_attribute *attr,
+                               const char *buf, size_t count)
+{
+       struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
+       struct cpuhp_step *sp;
+       int fail, ret;
+
+       ret = kstrtoint(buf, 10, &fail);
+       if (ret)
+               return ret;
+
+       /*
+        * Cannot fail STARTING/DYING callbacks.
+        */
+       if (cpuhp_is_atomic_state(fail))
+               return -EINVAL;
+
+       /*
+        * Cannot fail anything that doesn't have callbacks.
+        */
+       mutex_lock(&cpuhp_state_mutex);
+       sp = cpuhp_get_step(fail);
+       if (!sp->startup.single && !sp->teardown.single)
+               ret = -EINVAL;
+       mutex_unlock(&cpuhp_state_mutex);
+       if (ret)
+               return ret;
+
+       st->fail = fail;
+
+       return count;
+}
+
+static ssize_t show_cpuhp_fail(struct device *dev,
+                              struct device_attribute *attr, char *buf)
+{
+       struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
+
+       return sprintf(buf, "%d\n", st->fail);
+}
+
+static DEVICE_ATTR(fail, 0644, show_cpuhp_fail, write_cpuhp_fail);
+
 static struct attribute *cpuhp_cpu_attrs[] = {
        &dev_attr_state.attr,
        &dev_attr_target.attr,
+       &dev_attr_fail.attr,
        NULL
 };
 
index af71a84e12eea343c6047184599fedf7f5592e65..f684d8e5fa2be2fd10e4d5e2f1e65a5c2afeb629 100644 (file)
@@ -412,6 +412,19 @@ err:
        return NULL;
 }
 
+static bool __always_inline rb_need_aux_wakeup(struct ring_buffer *rb)
+{
+       if (rb->aux_overwrite)
+               return false;
+
+       if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) {
+               rb->aux_wakeup = rounddown(rb->aux_head, rb->aux_watermark);
+               return true;
+       }
+
+       return false;
+}
+
 /*
  * Commit the data written by hardware into the ring buffer by adjusting
  * aux_head and posting a PERF_RECORD_AUX into the perf buffer. It is the
@@ -451,10 +464,8 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
        }
 
        rb->user_page->aux_head = rb->aux_head;
-       if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) {
+       if (rb_need_aux_wakeup(rb))
                wakeup = true;
-               rb->aux_wakeup = rounddown(rb->aux_head, rb->aux_watermark);
-       }
 
        if (wakeup) {
                if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED)
@@ -484,9 +495,8 @@ int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size)
        rb->aux_head += size;
 
        rb->user_page->aux_head = rb->aux_head;
-       if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) {
+       if (rb_need_aux_wakeup(rb)) {
                perf_output_wakeup(handle);
-               rb->aux_wakeup = rounddown(rb->aux_head, rb->aux_watermark);
                handle->wakeup = rb->aux_wakeup + rb->aux_watermark;
        }
 
index 3481ababd06aa6cdd2aedd1fee0e56026acddc3b..f2cd53e92147c35c43d773b1eb688098fe4b7db3 100644 (file)
@@ -1600,12 +1600,10 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
        struct waitid_info info = {.status = 0};
        long err = kernel_waitid(which, upid, &info, options, ru ? &r : NULL);
        int signo = 0;
+
        if (err > 0) {
                signo = SIGCHLD;
                err = 0;
-       }
-
-       if (!err) {
                if (ru && copy_to_user(ru, &r, sizeof(struct rusage)))
                        return -EFAULT;
        }
@@ -1723,16 +1721,15 @@ COMPAT_SYSCALL_DEFINE5(waitid,
        if (err > 0) {
                signo = SIGCHLD;
                err = 0;
-       }
-
-       if (!err && uru) {
-               /* kernel_waitid() overwrites everything in ru */
-               if (COMPAT_USE_64BIT_TIME)
-                       err = copy_to_user(uru, &ru, sizeof(ru));
-               else
-                       err = put_compat_rusage(&ru, uru);
-               if (err)
-                       return -EFAULT;
+               if (uru) {
+                       /* kernel_waitid() overwrites everything in ru */
+                       if (COMPAT_USE_64BIT_TIME)
+                               err = copy_to_user(uru, &ru, sizeof(ru));
+                       else
+                               err = put_compat_rusage(&ru, uru);
+                       if (err)
+                               return -EFAULT;
+               }
        }
 
        if (!infop)
index 3d38eaf0549209ddb72e83780df6167a4423e8a2..0518a0bfc746bab4f257f2fa81ef7361f99ff113 100644 (file)
@@ -821,8 +821,6 @@ static void get_pi_state(struct futex_pi_state *pi_state)
 /*
  * Drops a reference to the pi_state object and frees or caches it
  * when the last reference is gone.
- *
- * Must be called with the hb lock held.
  */
 static void put_pi_state(struct futex_pi_state *pi_state)
 {
@@ -837,16 +835,22 @@ static void put_pi_state(struct futex_pi_state *pi_state)
         * and has cleaned up the pi_state already
         */
        if (pi_state->owner) {
-               raw_spin_lock_irq(&pi_state->owner->pi_lock);
-               list_del_init(&pi_state->list);
-               raw_spin_unlock_irq(&pi_state->owner->pi_lock);
+               struct task_struct *owner;
 
-               rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner);
+               raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
+               owner = pi_state->owner;
+               if (owner) {
+                       raw_spin_lock(&owner->pi_lock);
+                       list_del_init(&pi_state->list);
+                       raw_spin_unlock(&owner->pi_lock);
+               }
+               rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner);
+               raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
        }
 
-       if (current->pi_state_cache)
+       if (current->pi_state_cache) {
                kfree(pi_state);
-       else {
+       else {
                /*
                 * pi_state->list is already empty.
                 * clear pi_state->owner.
@@ -907,13 +911,14 @@ void exit_pi_state_list(struct task_struct *curr)
                raw_spin_unlock_irq(&curr->pi_lock);
 
                spin_lock(&hb->lock);
-
-               raw_spin_lock_irq(&curr->pi_lock);
+               raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
+               raw_spin_lock(&curr->pi_lock);
                /*
                 * We dropped the pi-lock, so re-check whether this
                 * task still owns the PI-state:
                 */
                if (head->next != next) {
+                       raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
                        spin_unlock(&hb->lock);
                        continue;
                }
@@ -922,9 +927,10 @@ void exit_pi_state_list(struct task_struct *curr)
                WARN_ON(list_empty(&pi_state->list));
                list_del_init(&pi_state->list);
                pi_state->owner = NULL;
-               raw_spin_unlock_irq(&curr->pi_lock);
+               raw_spin_unlock(&curr->pi_lock);
 
                get_pi_state(pi_state);
+               raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
                spin_unlock(&hb->lock);
 
                rt_mutex_futex_unlock(&pi_state->pi_mutex);
@@ -1208,6 +1214,10 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
 
        WARN_ON(!list_empty(&pi_state->list));
        list_add(&pi_state->list, &p->pi_state_list);
+       /*
+        * Assignment without holding pi_state->pi_mutex.wait_lock is safe
+        * because there is no concurrency as the object is not published yet.
+        */
        pi_state->owner = p;
        raw_spin_unlock_irq(&p->pi_lock);
 
@@ -2878,6 +2888,7 @@ retry:
                raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
                spin_unlock(&hb->lock);
 
+               /* drops pi_state->pi_mutex.wait_lock */
                ret = wake_futex_pi(uaddr, uval, pi_state);
 
                put_pi_state(pi_state);
index f7086b78ad6e1e1bcb3995537b024d7ed057ef0e..5270a54b9fa4dbf6a4cbdf3e990edc6a0156a915 100644 (file)
@@ -322,7 +322,6 @@ int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
                /* Calc pointer to the next generic chip */
                tmp += sizeof(*gc) + num_ct * sizeof(struct irq_chip_type);
        }
-       d->name = name;
        return 0;
 }
 EXPORT_SYMBOL_GPL(__irq_alloc_domain_generic_chips);
index e84b7056bb083349b8326ee5dae71284a4fc22cb..ac4644e92b499949b1a11652ddde3e3bccba8072 100644 (file)
@@ -945,7 +945,7 @@ static int virq_debug_show(struct seq_file *m, void *private)
        struct irq_desc *desc;
        struct irq_domain *domain;
        struct radix_tree_iter iter;
-       void **slot;
+       void __rcu **slot;
        int i;
 
        seq_printf(m, " %-16s  %-6s  %-10s  %-10s  %s\n",
@@ -1453,7 +1453,7 @@ out_free_desc:
 /* The irq_data was moved, fix the revmap to refer to the new location */
 static void irq_domain_fix_revmap(struct irq_data *d)
 {
-       void **slot;
+       void __rcu **slot;
 
        if (d->hwirq < d->domain->revmap_size)
                return; /* Not using radix tree. */
index 573dc52b0806054bc485c1290d0ed58ccfbb5c87..d00132b5c325b389f6646de064527df64e2d873e 100644 (file)
@@ -1643,6 +1643,10 @@ const void *free_irq(unsigned int irq, void *dev_id)
 #endif
 
        action = __free_irq(irq, dev_id);
+
+       if (!action)
+               return NULL;
+
        devname = action->name;
        kfree(action);
        return devname;
index 02f660666ab8976ea51215427050d1fe3807326f..1fefe6dcafd7403a9b172e11ebd0f41d35332211 100644 (file)
@@ -612,6 +612,33 @@ struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
        unsigned long flags;
        DEFINE_WAKE_Q(wake_q);
 
+       /*
+       * __rwsem_down_write_failed_common(sem)
+       *   rwsem_optimistic_spin(sem)
+       *     osq_unlock(sem->osq)
+       *   ...
+       *   atomic_long_add_return(&sem->count)
+       *
+       *      - VS -
+       *
+       *              __up_write()
+       *                if (atomic_long_sub_return_release(&sem->count) < 0)
+       *                  rwsem_wake(sem)
+       *                    osq_is_locked(&sem->osq)
+       *
+       * And __up_write() must observe !osq_is_locked() when it observes the
+       * atomic_long_add_return() in order to not miss a wakeup.
+       *
+       * This boils down to:
+       *
+       * [S.rel] X = 1                [RmW] r0 = (Y += 0)
+       *         MB                         RMB
+       * [RmW]   Y += 1               [L]   r1 = X
+       *
+       * exists (r0=1 /\ r1=0)
+       */
+       smp_rmb();
+
        /*
         * If a spinner is present, it is not necessary to do the wakeup.
         * Try to do wakeup only if the trylock succeeds to minimize
index 18a6966567daf30517a9f832e4a88b20ae705401..d17c5da523a0bc817a6b32413ee7e0ba3e7a6b2e 100644 (file)
@@ -5166,6 +5166,28 @@ void sched_show_task(struct task_struct *p)
        put_task_stack(p);
 }
 
+static inline bool
+state_filter_match(unsigned long state_filter, struct task_struct *p)
+{
+       /* no filter, everything matches */
+       if (!state_filter)
+               return true;
+
+       /* filter, but doesn't match */
+       if (!(p->state & state_filter))
+               return false;
+
+       /*
+        * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows
+        * TASK_KILLABLE).
+        */
+       if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE)
+               return false;
+
+       return true;
+}
+
+
 void show_state_filter(unsigned long state_filter)
 {
        struct task_struct *g, *p;
@@ -5188,7 +5210,7 @@ void show_state_filter(unsigned long state_filter)
                 */
                touch_nmi_watchdog();
                touch_all_softlockup_watchdogs();
-               if (!state_filter || (p->state & state_filter))
+               if (state_filter_match(state_filter, p))
                        sched_show_task(p);
        }
 
index 01217fb5a5de9bd007506020cffb5052ab3abba8..2f93e4a2d9f623915d0023f9b3a7d8b7d7b95cf7 100644 (file)
@@ -466,8 +466,6 @@ static char *task_group_path(struct task_group *tg)
 }
 #endif
 
-static const char stat_nam[] = TASK_STATE_TO_CHAR_STR;
-
 static void
 print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 {
index c24579dfa7a1411f05d91904fcdb1af8fdcf2ce8..bb3a38005b9cc3f3b9f80d49c40bb9428e12a20d 100644 (file)
@@ -473,14 +473,19 @@ static long seccomp_attach_filter(unsigned int flags,
        return 0;
 }
 
+void __get_seccomp_filter(struct seccomp_filter *filter)
+{
+       /* Reference count is bounded by the number of total processes. */
+       refcount_inc(&filter->usage);
+}
+
 /* get_seccomp_filter - increments the reference count of the filter on @tsk */
 void get_seccomp_filter(struct task_struct *tsk)
 {
        struct seccomp_filter *orig = tsk->seccomp.filter;
        if (!orig)
                return;
-       /* Reference count is bounded by the number of total processes. */
-       refcount_inc(&orig->usage);
+       __get_seccomp_filter(orig);
 }
 
 static inline void seccomp_filter_free(struct seccomp_filter *filter)
@@ -491,10 +496,8 @@ static inline void seccomp_filter_free(struct seccomp_filter *filter)
        }
 }
 
-/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
-void put_seccomp_filter(struct task_struct *tsk)
+static void __put_seccomp_filter(struct seccomp_filter *orig)
 {
-       struct seccomp_filter *orig = tsk->seccomp.filter;
        /* Clean up single-reference branches iteratively. */
        while (orig && refcount_dec_and_test(&orig->usage)) {
                struct seccomp_filter *freeme = orig;
@@ -503,6 +506,12 @@ void put_seccomp_filter(struct task_struct *tsk)
        }
 }
 
+/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
+void put_seccomp_filter(struct task_struct *tsk)
+{
+       __put_seccomp_filter(tsk->seccomp.filter);
+}
+
 static void seccomp_init_siginfo(siginfo_t *info, int syscall, int reason)
 {
        memset(info, 0, sizeof(*info));
@@ -1025,13 +1034,13 @@ long seccomp_get_filter(struct task_struct *task, unsigned long filter_off,
        if (!data)
                goto out;
 
-       get_seccomp_filter(task);
+       __get_seccomp_filter(filter);
        spin_unlock_irq(&task->sighand->siglock);
 
        if (copy_to_user(data, fprog->filter, bpf_classic_proglen(fprog)))
                ret = -EFAULT;
 
-       put_seccomp_filter(task);
+       __put_seccomp_filter(filter);
        return ret;
 
 out:
index 6648fbbb8157fc12703d02fa0fdc9ff85c527ac4..423554ad361020b389b8f9fc296884ae64f74493 100644 (file)
@@ -367,7 +367,8 @@ static struct ctl_table kern_table[] = {
                .data           = &sysctl_sched_time_avg,
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &one,
        },
 #ifdef CONFIG_SCHEDSTATS
        {
index bac629af2285748bbdac64b6b5c632f2fcaed49d..c738e764e2a55cfd3303a3262d748c94a917a86d 100644 (file)
@@ -656,15 +656,6 @@ int trace_print_lat_context(struct trace_iterator *iter)
        return !trace_seq_has_overflowed(s);
 }
 
-static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
-
-static int task_state_char(unsigned long state)
-{
-       int bit = state ? __ffs(state) + 1 : 0;
-
-       return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
-}
-
 /**
  * ftrace_find_event - find a registered event
  * @type: the type of event to look for
@@ -930,8 +921,8 @@ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter,
 
        trace_assign_type(field, iter->ent);
 
-       T = task_state_char(field->next_state);
-       S = task_state_char(field->prev_state);
+       T = __task_state_to_char(field->next_state);
+       S = __task_state_to_char(field->prev_state);
        trace_find_cmdline(field->next_pid, comm);
        trace_seq_printf(&iter->seq,
                         " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
@@ -966,8 +957,8 @@ static int trace_ctxwake_raw(struct trace_iterator *iter, char S)
        trace_assign_type(field, iter->ent);
 
        if (!S)
-               S = task_state_char(field->prev_state);
-       T = task_state_char(field->next_state);
+               S = __task_state_to_char(field->prev_state);
+       T = __task_state_to_char(field->next_state);
        trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n",
                         field->prev_pid,
                         field->prev_prio,
@@ -1002,8 +993,8 @@ static int trace_ctxwake_hex(struct trace_iterator *iter, char S)
        trace_assign_type(field, iter->ent);
 
        if (!S)
-               S = task_state_char(field->prev_state);
-       T = task_state_char(field->next_state);
+               S = __task_state_to_char(field->prev_state);
+       T = __task_state_to_char(field->next_state);
 
        SEQ_PUT_HEX_FIELD(s, field->prev_pid);
        SEQ_PUT_HEX_FIELD(s, field->prev_prio);
index ddec53b6764617e8fe431d93bf9f970a5015b4be..0c331978b1a636e9f2736f77bd93e97415c1ca10 100644 (file)
@@ -397,10 +397,10 @@ tracing_sched_switch_trace(struct trace_array *tr,
        entry   = ring_buffer_event_data(event);
        entry->prev_pid                 = prev->pid;
        entry->prev_prio                = prev->prio;
-       entry->prev_state               = prev->state;
+       entry->prev_state               = __get_task_state(prev);
        entry->next_pid                 = next->pid;
        entry->next_prio                = next->prio;
-       entry->next_state               = next->state;
+       entry->next_state               = __get_task_state(next);
        entry->next_cpu = task_cpu(next);
 
        if (!call_filter_check_discard(call, entry, buffer, event))
@@ -425,10 +425,10 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
        entry   = ring_buffer_event_data(event);
        entry->prev_pid                 = curr->pid;
        entry->prev_prio                = curr->prio;
-       entry->prev_state               = curr->state;
+       entry->prev_state               = __get_task_state(curr);
        entry->next_pid                 = wakee->pid;
        entry->next_prio                = wakee->prio;
-       entry->next_state               = wakee->state;
+       entry->next_state               = __get_task_state(wakee);
        entry->next_cpu                 = task_cpu(wakee);
 
        if (!call_filter_check_discard(call, entry, buffer, event))
index c18115d22f00be37f68d9bbdbb04f626de6cf3c4..db82a40875e8da3c9ad39881046d27bd8f0ee596 100644 (file)
@@ -126,14 +126,4 @@ config BT_DEBUGFS
          Provide extensive information about internal Bluetooth states
          in debugfs.
 
-config BT_LEGACY_IOCTL
-       bool "Enable legacy ioctl interfaces"
-       depends on BT && BT_BREDR
-       default y
-       help
-         Enable support for legacy ioctl interfaces.  This is only needed
-         for old and deprecated applications using direct ioctl calls for
-         controller management.  Since Linux 3.4 all configuration and
-         setup is done via mgmt interface and this is no longer needed.
-
 source "drivers/bluetooth/Kconfig"
index 0bad296fe0af970f5989b8a121039b99863088b9..65d734c165bd6368b8607d0b5078d345b5e642a8 100644 (file)
@@ -878,7 +878,6 @@ static int hci_sock_release(struct socket *sock)
        return 0;
 }
 
-#ifdef CONFIG_BT_LEGACY_IOCTL
 static int hci_sock_blacklist_add(struct hci_dev *hdev, void __user *arg)
 {
        bdaddr_t bdaddr;
@@ -1050,7 +1049,6 @@ done:
        release_sock(sk);
        return err;
 }
-#endif
 
 static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
                         int addr_len)
@@ -1971,11 +1969,7 @@ static const struct proto_ops hci_sock_ops = {
        .getname        = hci_sock_getname,
        .sendmsg        = hci_sock_sendmsg,
        .recvmsg        = hci_sock_recvmsg,
-#ifdef CONFIG_BT_LEGACY_IOCTL
        .ioctl          = hci_sock_ioctl,
-#else
-       .ioctl          = sock_no_ioctl,
-#endif
        .poll           = datagram_poll,
        .listen         = sock_no_listen,
        .shutdown       = sock_no_shutdown,
index 5a936a6a31a3245cc6ab0f6e9804d268bc198261..df062e086bdbbda8eb9ec724754937ea42134505 100644 (file)
@@ -401,7 +401,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
        if (unlikely(n != mw->mw_nents))
                goto out_mapmr_err;
 
-       dprintk("RPC:       %s: Using frmr %p to map %u segments (%u bytes)\n",
+       dprintk("RPC:       %s: Using frmr %p to map %u segments (%llu bytes)\n",
                __func__, frmr, mw->mw_nents, mr->length);
 
        key = (u8)(mr->rkey & 0x000000FF);
index 2e3a10e79ca9ed645eaedbf0f8242332e96ba400..061d0c3a420ad58084df157a143e76133f304dc2 100644 (file)
@@ -265,6 +265,8 @@ objtool_args += --no-fp
 endif
 ifdef CONFIG_GCOV_KERNEL
 objtool_args += --no-unreachable
+else
+objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable)
 endif
 
 # 'OBJECT_FILES_NON_STANDARD := y': skip objtool checking for a directory
index a7a23b5541f85a4994e0cc83d7d132e8a5681938..91eafada3164ec9dc77074565c156988c4101554 100644 (file)
@@ -45,10 +45,8 @@ config BIG_KEYS
        bool "Large payload keys"
        depends on KEYS
        depends on TMPFS
-       depends on (CRYPTO_ANSI_CPRNG = y || CRYPTO_DRBG = y)
        select CRYPTO_AES
-       select CRYPTO_ECB
-       select CRYPTO_RNG
+       select CRYPTO_GCM
        help
          This option provides support for holding large keys within the kernel
          (for example Kerberos ticket caches).  The data may be stored out to
index 6acb00f6f22cdcd223426756a49deebf53b86595..e607830b6154ce4c8dbcd5563fb0d3607f680c8f 100644 (file)
@@ -1,5 +1,6 @@
 /* Large capacity key type
  *
+ * Copyright (C) 2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
  * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
 #include <linux/shmem_fs.h>
 #include <linux/err.h>
 #include <linux/scatterlist.h>
+#include <linux/random.h>
 #include <keys/user-type.h>
 #include <keys/big_key-type.h>
-#include <crypto/rng.h>
-#include <crypto/skcipher.h>
+#include <crypto/aead.h>
 
 /*
  * Layout of key payload words.
@@ -49,7 +50,12 @@ enum big_key_op {
 /*
  * Key size for big_key data encryption
  */
-#define ENC_KEY_SIZE   16
+#define ENC_KEY_SIZE 32
+
+/*
+ * Authentication tag length
+ */
+#define ENC_AUTHTAG_SIZE 16
 
 /*
  * big_key defined keys take an arbitrary string as the description and an
@@ -64,57 +70,62 @@ struct key_type key_type_big_key = {
        .destroy                = big_key_destroy,
        .describe               = big_key_describe,
        .read                   = big_key_read,
+       /* no ->update(); don't add it without changing big_key_crypt() nonce */
 };
 
 /*
- * Crypto names for big_key data encryption
+ * Crypto names for big_key data authenticated encryption
  */
-static const char big_key_rng_name[] = "stdrng";
-static const char big_key_alg_name[] = "ecb(aes)";
+static const char big_key_alg_name[] = "gcm(aes)";
 
 /*
- * Crypto algorithms for big_key data encryption
+ * Crypto algorithms for big_key data authenticated encryption
  */
-static struct crypto_rng *big_key_rng;
-static struct crypto_skcipher *big_key_skcipher;
+static struct crypto_aead *big_key_aead;
 
 /*
- * Generate random key to encrypt big_key data
+ * Since changing the key affects the entire object, we need a mutex.
  */
-static inline int big_key_gen_enckey(u8 *key)
-{
-       return crypto_rng_get_bytes(big_key_rng, key, ENC_KEY_SIZE);
-}
+static DEFINE_MUTEX(big_key_aead_lock);
 
 /*
  * Encrypt/decrypt big_key data
  */
 static int big_key_crypt(enum big_key_op op, u8 *data, size_t datalen, u8 *key)
 {
-       int ret = -EINVAL;
+       int ret;
        struct scatterlist sgio;
-       SKCIPHER_REQUEST_ON_STACK(req, big_key_skcipher);
-
-       if (crypto_skcipher_setkey(big_key_skcipher, key, ENC_KEY_SIZE)) {
+       struct aead_request *aead_req;
+       /* We always use a zero nonce. The reason we can get away with this is
+        * because we're using a different randomly generated key for every
+        * different encryption. Notably, too, key_type_big_key doesn't define
+        * an .update function, so there's no chance we'll wind up reusing the
+        * key to encrypt updated data. Simply put: one key, one encryption.
+        */
+       u8 zero_nonce[crypto_aead_ivsize(big_key_aead)];
+
+       aead_req = aead_request_alloc(big_key_aead, GFP_KERNEL);
+       if (!aead_req)
+               return -ENOMEM;
+
+       memset(zero_nonce, 0, sizeof(zero_nonce));
+       sg_init_one(&sgio, data, datalen + (op == BIG_KEY_ENC ? ENC_AUTHTAG_SIZE : 0));
+       aead_request_set_crypt(aead_req, &sgio, &sgio, datalen, zero_nonce);
+       aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
+       aead_request_set_ad(aead_req, 0);
+
+       mutex_lock(&big_key_aead_lock);
+       if (crypto_aead_setkey(big_key_aead, key, ENC_KEY_SIZE)) {
                ret = -EAGAIN;
                goto error;
        }
-
-       skcipher_request_set_tfm(req, big_key_skcipher);
-       skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP,
-                                     NULL, NULL);
-
-       sg_init_one(&sgio, data, datalen);
-       skcipher_request_set_crypt(req, &sgio, &sgio, datalen, NULL);
-
        if (op == BIG_KEY_ENC)
-               ret = crypto_skcipher_encrypt(req);
+               ret = crypto_aead_encrypt(aead_req);
        else
-               ret = crypto_skcipher_decrypt(req);
-
-       skcipher_request_zero(req);
-
+               ret = crypto_aead_decrypt(aead_req);
 error:
+       mutex_unlock(&big_key_aead_lock);
+       aead_request_free(aead_req);
        return ret;
 }
 
@@ -146,16 +157,13 @@ int big_key_preparse(struct key_preparsed_payload *prep)
                 *
                 * File content is stored encrypted with randomly generated key.
                 */
-               size_t enclen = ALIGN(datalen, crypto_skcipher_blocksize(big_key_skcipher));
+               size_t enclen = datalen + ENC_AUTHTAG_SIZE;
                loff_t pos = 0;
 
-               /* prepare aligned data to encrypt */
                data = kmalloc(enclen, GFP_KERNEL);
                if (!data)
                        return -ENOMEM;
-
                memcpy(data, prep->data, datalen);
-               memset(data + datalen, 0x00, enclen - datalen);
 
                /* generate random key */
                enckey = kmalloc(ENC_KEY_SIZE, GFP_KERNEL);
@@ -163,13 +171,12 @@ int big_key_preparse(struct key_preparsed_payload *prep)
                        ret = -ENOMEM;
                        goto error;
                }
-
-               ret = big_key_gen_enckey(enckey);
-               if (ret)
+               ret = get_random_bytes_wait(enckey, ENC_KEY_SIZE);
+               if (unlikely(ret))
                        goto err_enckey;
 
                /* encrypt aligned data */
-               ret = big_key_crypt(BIG_KEY_ENC, data, enclen, enckey);
+               ret = big_key_crypt(BIG_KEY_ENC, data, datalen, enckey);
                if (ret)
                        goto err_enckey;
 
@@ -195,7 +202,7 @@ int big_key_preparse(struct key_preparsed_payload *prep)
                *path = file->f_path;
                path_get(path);
                fput(file);
-               kfree(data);
+               kzfree(data);
        } else {
                /* Just store the data in a buffer */
                void *data = kmalloc(datalen, GFP_KERNEL);
@@ -211,9 +218,9 @@ int big_key_preparse(struct key_preparsed_payload *prep)
 err_fput:
        fput(file);
 err_enckey:
-       kfree(enckey);
+       kzfree(enckey);
 error:
-       kfree(data);
+       kzfree(data);
        return ret;
 }
 
@@ -227,7 +234,7 @@ void big_key_free_preparse(struct key_preparsed_payload *prep)
 
                path_put(path);
        }
-       kfree(prep->payload.data[big_key_data]);
+       kzfree(prep->payload.data[big_key_data]);
 }
 
 /*
@@ -259,7 +266,7 @@ void big_key_destroy(struct key *key)
                path->mnt = NULL;
                path->dentry = NULL;
        }
-       kfree(key->payload.data[big_key_data]);
+       kzfree(key->payload.data[big_key_data]);
        key->payload.data[big_key_data] = NULL;
 }
 
@@ -295,7 +302,7 @@ long big_key_read(const struct key *key, char __user *buffer, size_t buflen)
                struct file *file;
                u8 *data;
                u8 *enckey = (u8 *)key->payload.data[big_key_data];
-               size_t enclen = ALIGN(datalen, crypto_skcipher_blocksize(big_key_skcipher));
+               size_t enclen = datalen + ENC_AUTHTAG_SIZE;
                loff_t pos = 0;
 
                data = kmalloc(enclen, GFP_KERNEL);
@@ -328,7 +335,7 @@ long big_key_read(const struct key *key, char __user *buffer, size_t buflen)
 err_fput:
                fput(file);
 error:
-               kfree(data);
+               kzfree(data);
        } else {
                ret = datalen;
                if (copy_to_user(buffer, key->payload.data[big_key_data],
@@ -344,47 +351,31 @@ error:
  */
 static int __init big_key_init(void)
 {
-       struct crypto_skcipher *cipher;
-       struct crypto_rng *rng;
        int ret;
 
-       rng = crypto_alloc_rng(big_key_rng_name, 0, 0);
-       if (IS_ERR(rng)) {
-               pr_err("Can't alloc rng: %ld\n", PTR_ERR(rng));
-               return PTR_ERR(rng);
-       }
-
-       big_key_rng = rng;
-
-       /* seed RNG */
-       ret = crypto_rng_reset(rng, NULL, crypto_rng_seedsize(rng));
-       if (ret) {
-               pr_err("Can't reset rng: %d\n", ret);
-               goto error_rng;
-       }
-
        /* init block cipher */
-       cipher = crypto_alloc_skcipher(big_key_alg_name, 0, CRYPTO_ALG_ASYNC);
-       if (IS_ERR(cipher)) {
-               ret = PTR_ERR(cipher);
+       big_key_aead = crypto_alloc_aead(big_key_alg_name, 0, CRYPTO_ALG_ASYNC);
+       if (IS_ERR(big_key_aead)) {
+               ret = PTR_ERR(big_key_aead);
                pr_err("Can't alloc crypto: %d\n", ret);
-               goto error_rng;
+               return ret;
+       }
+       ret = crypto_aead_setauthsize(big_key_aead, ENC_AUTHTAG_SIZE);
+       if (ret < 0) {
+               pr_err("Can't set crypto auth tag len: %d\n", ret);
+               goto free_aead;
        }
-
-       big_key_skcipher = cipher;
 
        ret = register_key_type(&key_type_big_key);
        if (ret < 0) {
                pr_err("Can't register type: %d\n", ret);
-               goto error_cipher;
+               goto free_aead;
        }
 
        return 0;
 
-error_cipher:
-       crypto_free_skcipher(big_key_skcipher);
-error_rng:
-       crypto_free_rng(big_key_rng);
+free_aead:
+       crypto_free_aead(big_key_aead);
        return ret;
 }
 
index 1c02c65470384aa076e2de6885ecba356eb44342..503adbae7b0dd0b096aa7fd672fddb0d3513d115 100644 (file)
@@ -141,7 +141,7 @@ extern key_ref_t keyring_search_aux(key_ref_t keyring_ref,
 extern key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx);
 extern key_ref_t search_process_keyrings(struct keyring_search_context *ctx);
 
-extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check);
+extern struct key *find_keyring_by_name(const char *name, bool uid_keyring);
 
 extern int install_user_keyrings(void);
 extern int install_thread_keyring_to_cred(struct cred *);
index 83da68d98b40b452a1c8b37121a6ca270387d4f6..eb914a838840df416f26af2c0cd73f87c60a5032 100644 (file)
@@ -54,10 +54,10 @@ void __key_check(const struct key *key)
 struct key_user *key_user_lookup(kuid_t uid)
 {
        struct key_user *candidate = NULL, *user;
-       struct rb_node *parent = NULL;
-       struct rb_node **p;
+       struct rb_node *parent, **p;
 
 try_again:
+       parent = NULL;
        p = &key_user_tree.rb_node;
        spin_lock(&key_user_lock);
 
@@ -302,6 +302,8 @@ struct key *key_alloc(struct key_type *type, const char *desc,
                key->flags |= 1 << KEY_FLAG_IN_QUOTA;
        if (flags & KEY_ALLOC_BUILT_IN)
                key->flags |= 1 << KEY_FLAG_BUILTIN;
+       if (flags & KEY_ALLOC_UID_KEYRING)
+               key->flags |= 1 << KEY_FLAG_UID_KEYRING;
 
 #ifdef KEY_DEBUGGING
        key->magic = KEY_DEBUG_MAGIC;
index ab0b337c84b4c02e4856719398edb94d1caf101a..365ff85d7e27122db220fad6d633f8e0b352209f 100644 (file)
@@ -766,12 +766,17 @@ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen)
 
        key = key_ref_to_ptr(key_ref);
 
+       if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) {
+               ret = -ENOKEY;
+               goto error2;
+       }
+
        /* see if we can read it directly */
        ret = key_permission(key_ref, KEY_NEED_READ);
        if (ret == 0)
                goto can_read_key;
        if (ret != -EACCES)
-               goto error;
+               goto error2;
 
        /* we can't; see if it's searchable from this process's keyrings
         * - we automatically take account of the fact that it may be
@@ -1406,11 +1411,9 @@ long keyctl_assume_authority(key_serial_t id)
        }
 
        ret = keyctl_change_reqkey_auth(authkey);
-       if (ret < 0)
-               goto error;
+       if (ret == 0)
+               ret = authkey->serial;
        key_put(authkey);
-
-       ret = authkey->serial;
 error:
        return ret;
 }
index de81793f9920787101dec77eca28ddfbe91ebbc7..4fa82a8a9c0e6cb778a08874724ea546cd602ade 100644 (file)
@@ -423,7 +423,7 @@ static void keyring_describe(const struct key *keyring, struct seq_file *m)
 }
 
 struct keyring_read_iterator_context {
-       size_t                  qty;
+       size_t                  buflen;
        size_t                  count;
        key_serial_t __user     *buffer;
 };
@@ -435,9 +435,9 @@ static int keyring_read_iterator(const void *object, void *data)
        int ret;
 
        kenter("{%s,%d},,{%zu/%zu}",
-              key->type->name, key->serial, ctx->count, ctx->qty);
+              key->type->name, key->serial, ctx->count, ctx->buflen);
 
-       if (ctx->count >= ctx->qty)
+       if (ctx->count >= ctx->buflen)
                return 1;
 
        ret = put_user(key->serial, ctx->buffer);
@@ -472,16 +472,12 @@ static long keyring_read(const struct key *keyring,
                return 0;
 
        /* Calculate how much data we could return */
-       ctx.qty = nr_keys * sizeof(key_serial_t);
-
        if (!buffer || !buflen)
-               return ctx.qty;
-
-       if (buflen > ctx.qty)
-               ctx.qty = buflen;
+               return nr_keys * sizeof(key_serial_t);
 
        /* Copy the IDs of the subscribed keys into the buffer */
        ctx.buffer = (key_serial_t __user *)buffer;
+       ctx.buflen = buflen;
        ctx.count = 0;
        ret = assoc_array_iterate(&keyring->keys, keyring_read_iterator, &ctx);
        if (ret < 0) {
@@ -1101,15 +1097,15 @@ found:
 /*
  * Find a keyring with the specified name.
  *
- * All named keyrings in the current user namespace are searched, provided they
- * grant Search permission directly to the caller (unless this check is
- * skipped).  Keyrings whose usage points have reached zero or who have been
- * revoked are skipped.
+ * Only keyrings that have nonzero refcount, are not revoked, and are owned by a
+ * user in the current user namespace are considered.  If @uid_keyring is %true,
+ * the keyring additionally must have been allocated as a user or user session
+ * keyring; otherwise, it must grant Search permission directly to the caller.
  *
  * Returns a pointer to the keyring with the keyring's refcount having being
  * incremented on success.  -ENOKEY is returned if a key could not be found.
  */
-struct key *find_keyring_by_name(const char *name, bool skip_perm_check)
+struct key *find_keyring_by_name(const char *name, bool uid_keyring)
 {
        struct key *keyring;
        int bucket;
@@ -1137,10 +1133,15 @@ struct key *find_keyring_by_name(const char *name, bool skip_perm_check)
                        if (strcmp(keyring->description, name) != 0)
                                continue;
 
-                       if (!skip_perm_check &&
-                           key_permission(make_key_ref(keyring, 0),
-                                          KEY_NEED_SEARCH) < 0)
-                               continue;
+                       if (uid_keyring) {
+                               if (!test_bit(KEY_FLAG_UID_KEYRING,
+                                             &keyring->flags))
+                                       continue;
+                       } else {
+                               if (key_permission(make_key_ref(keyring, 0),
+                                                  KEY_NEED_SEARCH) < 0)
+                                       continue;
+                       }
 
                        /* we've got a match but we might end up racing with
                         * key_cleanup() if the keyring is currently 'dead'
index bf08d02b6646ae2077b15903471bfb4575b04f5a..de834309d100206bedca18c84493af6fdb665d7a 100644 (file)
@@ -187,7 +187,7 @@ static int proc_keys_show(struct seq_file *m, void *v)
        struct keyring_search_context ctx = {
                .index_key.type         = key->type,
                .index_key.description  = key->description,
-               .cred                   = current_cred(),
+               .cred                   = m->file->f_cred,
                .match_data.cmp         = lookup_user_key_possessed,
                .match_data.raw_data    = key,
                .match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT,
@@ -207,11 +207,7 @@ static int proc_keys_show(struct seq_file *m, void *v)
                }
        }
 
-       /* check whether the current task is allowed to view the key (assuming
-        * non-possession)
-        * - the caller holds a spinlock, and thus the RCU read lock, making our
-        *   access to __current_cred() safe
-        */
+       /* check whether the current task is allowed to view the key */
        rc = key_task_permission(key_ref, ctx.cred, KEY_NEED_VIEW);
        if (rc < 0)
                return 0;
index 86bced9fdbdf22eb60170584d87730e1179a2744..293d3598153bf0c8611f319f4c1d7c50f364e600 100644 (file)
@@ -77,7 +77,8 @@ int install_user_keyrings(void)
                if (IS_ERR(uid_keyring)) {
                        uid_keyring = keyring_alloc(buf, user->uid, INVALID_GID,
                                                    cred, user_keyring_perm,
-                                                   KEY_ALLOC_IN_QUOTA,
+                                                   KEY_ALLOC_UID_KEYRING |
+                                                       KEY_ALLOC_IN_QUOTA,
                                                    NULL, NULL);
                        if (IS_ERR(uid_keyring)) {
                                ret = PTR_ERR(uid_keyring);
@@ -94,7 +95,8 @@ int install_user_keyrings(void)
                        session_keyring =
                                keyring_alloc(buf, user->uid, INVALID_GID,
                                              cred, user_keyring_perm,
-                                             KEY_ALLOC_IN_QUOTA,
+                                             KEY_ALLOC_UID_KEYRING |
+                                                 KEY_ALLOC_IN_QUOTA,
                                              NULL, NULL);
                        if (IS_ERR(session_keyring)) {
                                ret = PTR_ERR(session_keyring);
index afe9d22ab3611f2dc621db0092e1b102e005be78..6ebf1af8fce963eeb0cc4c423c153318096e65a6 100644 (file)
@@ -120,6 +120,18 @@ static void request_key_auth_revoke(struct key *key)
        }
 }
 
+static void free_request_key_auth(struct request_key_auth *rka)
+{
+       if (!rka)
+               return;
+       key_put(rka->target_key);
+       key_put(rka->dest_keyring);
+       if (rka->cred)
+               put_cred(rka->cred);
+       kfree(rka->callout_info);
+       kfree(rka);
+}
+
 /*
  * Destroy an instantiation authorisation token key.
  */
@@ -129,15 +141,7 @@ static void request_key_auth_destroy(struct key *key)
 
        kenter("{%d}", key->serial);
 
-       if (rka->cred) {
-               put_cred(rka->cred);
-               rka->cred = NULL;
-       }
-
-       key_put(rka->target_key);
-       key_put(rka->dest_keyring);
-       kfree(rka->callout_info);
-       kfree(rka);
+       free_request_key_auth(rka);
 }
 
 /*
@@ -151,22 +155,18 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info,
        const struct cred *cred = current->cred;
        struct key *authkey = NULL;
        char desc[20];
-       int ret;
+       int ret = -ENOMEM;
 
        kenter("%d,", target->serial);
 
        /* allocate a auth record */
-       rka = kmalloc(sizeof(*rka), GFP_KERNEL);
-       if (!rka) {
-               kleave(" = -ENOMEM");
-               return ERR_PTR(-ENOMEM);
-       }
-       rka->callout_info = kmalloc(callout_len, GFP_KERNEL);
-       if (!rka->callout_info) {
-               kleave(" = -ENOMEM");
-               kfree(rka);
-               return ERR_PTR(-ENOMEM);
-       }
+       rka = kzalloc(sizeof(*rka), GFP_KERNEL);
+       if (!rka)
+               goto error;
+       rka->callout_info = kmemdup(callout_info, callout_len, GFP_KERNEL);
+       if (!rka->callout_info)
+               goto error_free_rka;
+       rka->callout_len = callout_len;
 
        /* see if the calling process is already servicing the key request of
         * another process */
@@ -176,8 +176,12 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info,
 
                /* if the auth key has been revoked, then the key we're
                 * servicing is already instantiated */
-               if (test_bit(KEY_FLAG_REVOKED, &cred->request_key_auth->flags))
-                       goto auth_key_revoked;
+               if (test_bit(KEY_FLAG_REVOKED,
+                            &cred->request_key_auth->flags)) {
+                       up_read(&cred->request_key_auth->sem);
+                       ret = -EKEYREVOKED;
+                       goto error_free_rka;
+               }
 
                irka = cred->request_key_auth->payload.data[0];
                rka->cred = get_cred(irka->cred);
@@ -193,8 +197,6 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info,
 
        rka->target_key = key_get(target);
        rka->dest_keyring = key_get(dest_keyring);
-       memcpy(rka->callout_info, callout_info, callout_len);
-       rka->callout_len = callout_len;
 
        /* allocate the auth key */
        sprintf(desc, "%x", target->serial);
@@ -205,32 +207,22 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info,
                            KEY_USR_VIEW, KEY_ALLOC_NOT_IN_QUOTA, NULL);
        if (IS_ERR(authkey)) {
                ret = PTR_ERR(authkey);
-               goto error_alloc;
+               goto error_free_rka;
        }
 
        /* construct the auth key */
        ret = key_instantiate_and_link(authkey, rka, 0, NULL, NULL);
        if (ret < 0)
-               goto error_inst;
+               goto error_put_authkey;
 
        kleave(" = {%d,%d}", authkey->serial, refcount_read(&authkey->usage));
        return authkey;
 
-auth_key_revoked:
-       up_read(&cred->request_key_auth->sem);
-       kfree(rka->callout_info);
-       kfree(rka);
-       kleave("= -EKEYREVOKED");
-       return ERR_PTR(-EKEYREVOKED);
-
-error_inst:
-       key_revoke(authkey);
+error_put_authkey:
        key_put(authkey);
-error_alloc:
-       key_put(rka->target_key);
-       key_put(rka->dest_keyring);
-       kfree(rka->callout_info);
-       kfree(rka);
+error_free_rka:
+       free_request_key_auth(rka);
+error:
        kleave("= %d", ret);
        return ERR_PTR(ret);
 }
index 69d09c39bbcd00858121ebf9f6bb1f7a57ff20e1..cd7359e23d869465d1cb70b637c64c85283e37ac 100644 (file)
@@ -88,6 +88,12 @@ struct kvm_s390_io_adapter_req {
 /* kvm attributes for KVM_S390_VM_TOD */
 #define KVM_S390_VM_TOD_LOW            0
 #define KVM_S390_VM_TOD_HIGH           1
+#define KVM_S390_VM_TOD_EXT            2
+
+struct kvm_s390_vm_tod_clock {
+       __u8  epoch_idx;
+       __u64 tod;
+};
 
 /* kvm attributes for KVM_S390_VM_CPU_MODEL */
 /* processor related attributes are r/w */
index 8ea315a11fe0d4461abe7aeea2738be97a79b189..2519c6c801c917d7a30b3826c86679edf18616d5 100644 (file)
 
 #define X86_FEATURE_HW_PSTATE  ( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+#define X86_FEATURE_SME                ( 7*32+10) /* AMD Secure Memory Encryption */
 
 #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
 #define X86_FEATURE_INTEL_PT   ( 7*32+15) /* Intel Processor Trace */
 #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
 #define X86_FEATURE_AVIC       (15*32+13) /* Virtual Interrupt Controller */
 #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
+#define X86_FEATURE_VGIF       (15*32+16) /* Virtual GIF */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
 #define X86_FEATURE_AVX512VBMI  (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
index 5dff775af7cd6456f7177d9ce5888ae78dc6bc10..c10c9128f54e6b7296014a74e7a253a1eedaacd9 100644 (file)
 # define DISABLE_K6_MTRR       (1<<(X86_FEATURE_K6_MTRR & 31))
 # define DISABLE_CYRIX_ARR     (1<<(X86_FEATURE_CYRIX_ARR & 31))
 # define DISABLE_CENTAUR_MCR   (1<<(X86_FEATURE_CENTAUR_MCR & 31))
+# define DISABLE_PCID          0
 #else
 # define DISABLE_VME           0
 # define DISABLE_K6_MTRR       0
 # define DISABLE_CYRIX_ARR     0
 # define DISABLE_CENTAUR_MCR   0
+# define DISABLE_PCID          (1<<(X86_FEATURE_PCID & 31))
 #endif /* CONFIG_X86_64 */
 
 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
@@ -49,7 +51,7 @@
 #define DISABLED_MASK1 0
 #define DISABLED_MASK2 0
 #define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR)
-#define DISABLED_MASK4 0
+#define DISABLED_MASK4 (DISABLE_PCID)
 #define DISABLED_MASK5 0
 #define DISABLED_MASK6 0
 #define DISABLED_MASK7 0
diff --git a/tools/include/asm-generic/hugetlb_encode.h b/tools/include/asm-generic/hugetlb_encode.h
new file mode 100644 (file)
index 0000000..e4732d3
--- /dev/null
@@ -0,0 +1,34 @@
+#ifndef _ASM_GENERIC_HUGETLB_ENCODE_H_
+#define _ASM_GENERIC_HUGETLB_ENCODE_H_
+
+/*
+ * Several system calls take a flag to request "hugetlb" huge pages.
+ * Without further specification, these system calls will use the
+ * system's default huge page size.  If a system supports multiple
+ * huge page sizes, the desired huge page size can be specified in
+ * bits [26:31] of the flag arguments.  The value in these 6 bits
+ * will encode the log2 of the huge page size.
+ *
+ * The following definitions are associated with this huge page size
+ * encoding in flag arguments.  System call specific header files
+ * that use this encoding should include this file.  They can then
+ * provide definitions based on these with their own specific prefix.
+ * for example:
+ * #define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT
+ */
+
+#define HUGETLB_FLAG_ENCODE_SHIFT      26
+#define HUGETLB_FLAG_ENCODE_MASK       0x3f
+
+#define HUGETLB_FLAG_ENCODE_64KB       (16 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_512KB      (19 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_1MB                (20 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_2MB                (21 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_8MB                (23 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_16MB       (24 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_256MB      (28 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_1GB                (30 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_2GB                (31 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_16GB       (34 << HUGETLB_FLAG_ENCODE_SHIFT)
+
+#endif /* _ASM_GENERIC_HUGETLB_ENCODE_H_ */
index 8c27db0c5c08ce84fc7299bd9bff2cee571b9397..203268f9231e155d72307995989feab4857defe7 100644 (file)
                                           overrides the coredump filter bits */
 #define MADV_DODUMP    17              /* Clear the MADV_DONTDUMP flag */
 
+#define MADV_WIPEONFORK 18             /* Zero memory on fork, child only */
+#define MADV_KEEPONFORK 19             /* Undo MADV_WIPEONFORK */
+
 /* compatibility flags */
 #define MAP_FILE       0
 
-/*
- * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size.
- * This gives us 6 bits, which is enough until someone invents 128 bit address
- * spaces.
- *
- * Assume these are all power of twos.
- * When 0 use the default page size.
- */
-#define MAP_HUGE_SHIFT 26
-#define MAP_HUGE_MASK  0x3f
-
 #define PKEY_DISABLE_ACCESS    0x1
 #define PKEY_DISABLE_WRITE     0x2
 #define PKEY_ACCESS_MASK       (PKEY_DISABLE_ACCESS |\
index 101593ab10ac795808db33848092eb62ac542560..97677cd6964db099689f96f11b9731c748bebcfa 100644 (file)
@@ -700,6 +700,7 @@ struct drm_prime_handle {
 
 struct drm_syncobj_create {
        __u32 handle;
+#define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0)
        __u32 flags;
 };
 
@@ -718,6 +719,24 @@ struct drm_syncobj_handle {
        __u32 pad;
 };
 
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
+struct drm_syncobj_wait {
+       __u64 handles;
+       /* absolute timeout */
+       __s64 timeout_nsec;
+       __u32 count_handles;
+       __u32 flags;
+       __u32 first_signaled; /* only valid when not waiting all */
+       __u32 pad;
+};
+
+struct drm_syncobj_array {
+       __u64 handles;
+       __u32 count_handles;
+       __u32 pad;
+};
+
 #if defined(__cplusplus)
 }
 #endif
@@ -840,6 +859,9 @@ extern "C" {
 #define DRM_IOCTL_SYNCOBJ_DESTROY      DRM_IOWR(0xC0, struct drm_syncobj_destroy)
 #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle)
 #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle)
+#define DRM_IOCTL_SYNCOBJ_WAIT         DRM_IOWR(0xC3, struct drm_syncobj_wait)
+#define DRM_IOCTL_SYNCOBJ_RESET                DRM_IOWR(0xC4, struct drm_syncobj_array)
+#define DRM_IOCTL_SYNCOBJ_SIGNAL       DRM_IOWR(0xC5, struct drm_syncobj_array)
 
 /**
  * Device specific ioctls should only be in their respective headers
index 7ccbd6a2bbe07b387b43bb8b4ac5eba0f52d1cd0..6598fb76d2c27741d2c916f914b70c5f472911f9 100644 (file)
@@ -260,6 +260,8 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_GEM_CONTEXT_GETPARAM  0x34
 #define DRM_I915_GEM_CONTEXT_SETPARAM  0x35
 #define DRM_I915_PERF_OPEN             0x36
+#define DRM_I915_PERF_ADD_CONFIG       0x37
+#define DRM_I915_PERF_REMOVE_CONFIG    0x38
 
 #define DRM_IOCTL_I915_INIT            DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH           DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -315,6 +317,8 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM    DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, struct drm_i915_gem_context_param)
 #define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM    DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, struct drm_i915_gem_context_param)
 #define DRM_IOCTL_I915_PERF_OPEN       DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param)
+#define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
+#define DRM_IOCTL_I915_PERF_REMOVE_CONFIG      DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -431,6 +435,11 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_HAS_EXEC_BATCH_FIRST         48
 
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
+ * drm_i915_gem_exec_fence structures.  See I915_EXEC_FENCE_ARRAY.
+ */
+#define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
+
 typedef struct drm_i915_getparam {
        __s32 param;
        /*
@@ -812,6 +821,17 @@ struct drm_i915_gem_exec_object2 {
        __u64 rsvd2;
 };
 
+struct drm_i915_gem_exec_fence {
+       /**
+        * User's handle for a drm_syncobj to wait on or signal.
+        */
+       __u32 handle;
+
+#define I915_EXEC_FENCE_WAIT            (1<<0)
+#define I915_EXEC_FENCE_SIGNAL          (1<<1)
+       __u32 flags;
+};
+
 struct drm_i915_gem_execbuffer2 {
        /**
         * List of gem_exec_object2 structs
@@ -826,7 +846,11 @@ struct drm_i915_gem_execbuffer2 {
        __u32 DR1;
        __u32 DR4;
        __u32 num_cliprects;
-       /** This is a struct drm_clip_rect *cliprects */
+       /**
+        * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY
+        * is not set.  If I915_EXEC_FENCE_ARRAY is set, then this is a
+        * struct drm_i915_gem_exec_fence *fences.
+        */
        __u64 cliprects_ptr;
 #define I915_EXEC_RING_MASK              (7<<0)
 #define I915_EXEC_DEFAULT                (0<<0)
@@ -927,7 +951,14 @@ struct drm_i915_gem_execbuffer2 {
  * element).
  */
 #define I915_EXEC_BATCH_FIRST          (1<<18)
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_BATCH_FIRST<<1))
+
+/* Setting I915_FENCE_ARRAY implies that num_cliprects and cliprects_ptr
+ * define an array of i915_gem_exec_fence structures which specify a set of
+ * dma fences to wait upon or signal.
+ */
+#define I915_EXEC_FENCE_ARRAY   (1<<19)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1))
 
 #define I915_EXEC_CONTEXT_ID_MASK      (0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
@@ -1467,6 +1498,22 @@ enum drm_i915_perf_record_type {
        DRM_I915_PERF_RECORD_MAX /* non-ABI */
 };
 
+/**
+ * Structure to upload perf dynamic configuration into the kernel.
+ */
+struct drm_i915_perf_oa_config {
+       /** String formatted like "%08x-%04x-%04x-%04x-%012x" */
+       char uuid[36];
+
+       __u32 n_mux_regs;
+       __u32 n_boolean_regs;
+       __u32 n_flex_regs;
+
+       __u64 __user mux_regs_ptr;
+       __u64 __user boolean_regs_ptr;
+       __u64 __user flex_regs_ptr;
+};
+
 #if defined(__cplusplus)
 }
 #endif
index 461811e5714092f17619902593b9ac47e8572a6d..43ab5c402f98f3c2dc15ccc49ec475530a39e401 100644 (file)
@@ -143,12 +143,6 @@ enum bpf_attach_type {
 
 #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
 
-enum bpf_sockmap_flags {
-       BPF_SOCKMAP_UNSPEC,
-       BPF_SOCKMAP_STRPARSER,
-       __MAX_BPF_SOCKMAP_FLAG
-};
-
 /* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
  * to the given target_fd cgroup the descendent cgroup will be able to
  * override effective bpf program that was inherited from this cgroup
@@ -368,9 +362,20 @@ union bpf_attr {
  * int bpf_redirect(ifindex, flags)
  *     redirect to another netdev
  *     @ifindex: ifindex of the net device
- *     @flags: bit 0 - if set, redirect to ingress instead of egress
- *             other bits - reserved
- *     Return: TC_ACT_REDIRECT
+ *     @flags:
+ *       cls_bpf:
+ *          bit 0 - if set, redirect to ingress instead of egress
+ *          other bits - reserved
+ *       xdp_bpf:
+ *         all bits - reserved
+ *     Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error
+ *            xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error
+ * int bpf_redirect_map(map, key, flags)
+ *     redirect to endpoint in map
+ *     @map: pointer to dev map
+ *     @key: index in map to lookup
+ *     @flags: --
+ *     Return: XDP_REDIRECT on success or XDP_ABORT on error
  *
  * u32 bpf_get_route_realm(skb)
  *     retrieve a dst's tclassid
@@ -632,7 +637,7 @@ union bpf_attr {
        FN(skb_adjust_room),            \
        FN(redirect_map),               \
        FN(sk_redirect_map),            \
-       FN(sock_map_update),
+       FN(sock_map_update),            \
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -753,20 +758,23 @@ struct bpf_sock {
        __u32 family;
        __u32 type;
        __u32 protocol;
+       __u32 mark;
+       __u32 priority;
 };
 
 #define XDP_PACKET_HEADROOM 256
 
 /* User return codes for XDP prog type.
  * A valid XDP program must return one of these defined values. All other
- * return codes are reserved for future use. Unknown return codes will result
- * in packet drop.
+ * return codes are reserved for future use. Unknown return codes will
+ * result in packet drops and a warning via bpf_warn_invalid_xdp_action().
  */
 enum xdp_action {
        XDP_ABORTED = 0,
        XDP_DROP,
        XDP_PASS,
        XDP_TX,
+       XDP_REDIRECT,
 };
 
 /* user accessible metadata for XDP packet hook
index 6cd63c18708ae1d23dbc280ed49aed55f817a2f5..83888758741184f969f3b8fd1738e38bdc24da78 100644 (file)
@@ -711,7 +711,8 @@ struct kvm_ppc_one_seg_page_size {
 struct kvm_ppc_smmu_info {
        __u64 flags;
        __u32 slb_size;
-       __u32 pad;
+       __u16 data_keys;        /* # storage keys supported for data */
+       __u16 instr_keys;       /* # storage keys supported for instructions */
        struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
 };
 
index 81d8edf11789aaae96a25da00b8a8f3ec0ed83aa..a937480d7cd345f4e1b12c2e409f58c0489e8a4e 100644 (file)
@@ -1,7 +1,8 @@
 #ifndef _UAPI_LINUX_MMAN_H
 #define _UAPI_LINUX_MMAN_H
 
-#include <uapi/asm/mman.h>
+#include <asm/mman.h>
+#include <asm-generic/hugetlb_encode.h>
 
 #define MREMAP_MAYMOVE 1
 #define MREMAP_FIXED   2
 #define OVERCOMMIT_ALWAYS              1
 #define OVERCOMMIT_NEVER               2
 
+/*
+ * Huge page size encoding when MAP_HUGETLB is specified, and a huge page
+ * size other than the default is desired.  See hugetlb_encode.h.
+ * All known huge page size encodings are provided here.  It is the
+ * responsibility of the application to know which sizes are supported on
+ * the running system.  See mmap(2) man page for details.
+ */
+#define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT
+#define MAP_HUGE_MASK  HUGETLB_FLAG_ENCODE_MASK
+
+#define MAP_HUGE_64KB  HUGETLB_FLAG_ENCODE_64KB
+#define MAP_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB
+#define MAP_HUGE_1MB   HUGETLB_FLAG_ENCODE_1MB
+#define MAP_HUGE_2MB   HUGETLB_FLAG_ENCODE_2MB
+#define MAP_HUGE_8MB   HUGETLB_FLAG_ENCODE_8MB
+#define MAP_HUGE_16MB  HUGETLB_FLAG_ENCODE_16MB
+#define MAP_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB
+#define MAP_HUGE_1GB   HUGETLB_FLAG_ENCODE_1GB
+#define MAP_HUGE_2GB   HUGETLB_FLAG_ENCODE_2GB
+#define MAP_HUGE_16GB  HUGETLB_FLAG_ENCODE_16GB
+
 #endif /* _UAPI_LINUX_MMAN_H */
index 0f22768c0d4d0c26ae86ad3e51b49bd760f4ed6a..34a579f806e390337bdee738ae507364c02e7ad7 100644 (file)
@@ -284,11 +284,16 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
        case 0x8d:
                if (sib == 0x24 && rex_w && !rex_b && !rex_x) {
 
-                       /* lea disp(%rsp), reg */
                        *type = INSN_STACK;
-                       op->src.type = OP_SRC_ADD;
+                       if (!insn.displacement.value) {
+                               /* lea (%rsp), reg */
+                               op->src.type = OP_SRC_REG;
+                       } else {
+                               /* lea disp(%rsp), reg */
+                               op->src.type = OP_SRC_ADD;
+                               op->src.offset = insn.displacement.value;
+                       }
                        op->src.reg = CFI_SP;
-                       op->src.offset = insn.displacement.value;
                        op->dest.type = OP_DEST_REG;
                        op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
 
index 62072822dc85d986671b6b15587229f81cfe19ec..627b7cada1442b65dbbcc600661f35caf8429cdc 100644 (file)
@@ -1,34 +1,8 @@
 tools/perf
-tools/arch/alpha/include/asm/barrier.h
-tools/arch/arm/include/asm/barrier.h
-tools/arch/arm64/include/asm/barrier.h
-tools/arch/ia64/include/asm/barrier.h
-tools/arch/mips/include/asm/barrier.h
-tools/arch/powerpc/include/asm/barrier.h
-tools/arch/s390/include/asm/barrier.h
-tools/arch/sh/include/asm/barrier.h
-tools/arch/sparc/include/asm/barrier.h
-tools/arch/sparc/include/asm/barrier_32.h
-tools/arch/sparc/include/asm/barrier_64.h
-tools/arch/tile/include/asm/barrier.h
-tools/arch/x86/include/asm/barrier.h
-tools/arch/x86/include/asm/cmpxchg.h
-tools/arch/x86/include/asm/cpufeatures.h
-tools/arch/x86/include/asm/disabled-features.h
-tools/arch/x86/include/asm/required-features.h
-tools/arch/x86/include/uapi/asm/svm.h
-tools/arch/x86/include/uapi/asm/vmx.h
-tools/arch/x86/include/uapi/asm/kvm.h
-tools/arch/x86/include/uapi/asm/kvm_perf.h
-tools/arch/x86/lib/memcpy_64.S
-tools/arch/x86/lib/memset_64.S
-tools/arch/s390/include/uapi/asm/kvm_perf.h
-tools/arch/s390/include/uapi/asm/sie.h
-tools/arch/xtensa/include/asm/barrier.h
+tools/arch
 tools/scripts
 tools/build
-tools/arch/x86/include/asm/atomic.h
-tools/arch/x86/include/asm/rmwcc.h
+tools/include
 tools/lib/traceevent
 tools/lib/api
 tools/lib/bpf
@@ -42,60 +16,3 @@ tools/lib/find_bit.c
 tools/lib/bitmap.c
 tools/lib/str_error_r.c
 tools/lib/vsprintf.c
-tools/include/asm/alternative-asm.h
-tools/include/asm/atomic.h
-tools/include/asm/barrier.h
-tools/include/asm/bug.h
-tools/include/asm-generic/atomic-gcc.h
-tools/include/asm-generic/barrier.h
-tools/include/asm-generic/bitops/arch_hweight.h
-tools/include/asm-generic/bitops/atomic.h
-tools/include/asm-generic/bitops/const_hweight.h
-tools/include/asm-generic/bitops/__ffs.h
-tools/include/asm-generic/bitops/__ffz.h
-tools/include/asm-generic/bitops/__fls.h
-tools/include/asm-generic/bitops/find.h
-tools/include/asm-generic/bitops/fls64.h
-tools/include/asm-generic/bitops/fls.h
-tools/include/asm-generic/bitops/hweight.h
-tools/include/asm-generic/bitops.h
-tools/include/linux/atomic.h
-tools/include/linux/bitops.h
-tools/include/linux/compiler.h
-tools/include/linux/compiler-gcc.h
-tools/include/linux/coresight-pmu.h
-tools/include/linux/bug.h
-tools/include/linux/filter.h
-tools/include/linux/hash.h
-tools/include/linux/kernel.h
-tools/include/linux/list.h
-tools/include/linux/log2.h
-tools/include/uapi/asm-generic/fcntl.h
-tools/include/uapi/asm-generic/ioctls.h
-tools/include/uapi/asm-generic/mman-common.h
-tools/include/uapi/asm-generic/mman.h
-tools/include/uapi/drm/drm.h
-tools/include/uapi/drm/i915_drm.h
-tools/include/uapi/linux/bpf.h
-tools/include/uapi/linux/bpf_common.h
-tools/include/uapi/linux/fcntl.h
-tools/include/uapi/linux/hw_breakpoint.h
-tools/include/uapi/linux/kvm.h
-tools/include/uapi/linux/mman.h
-tools/include/uapi/linux/perf_event.h
-tools/include/uapi/linux/sched.h
-tools/include/uapi/linux/stat.h
-tools/include/uapi/linux/vhost.h
-tools/include/uapi/sound/asound.h
-tools/include/linux/poison.h
-tools/include/linux/rbtree.h
-tools/include/linux/rbtree_augmented.h
-tools/include/linux/refcount.h
-tools/include/linux/string.h
-tools/include/linux/stringify.h
-tools/include/linux/types.h
-tools/include/linux/err.h
-tools/include/linux/bitmap.h
-tools/include/linux/time64.h
-tools/arch/*/include/uapi/asm/mman.h
-tools/arch/*/include/uapi/asm/perf_regs.h
index bd518b623d7a203e576653705866f910409fa144..5bd7b9260cc0858c36730ee367aecc56df6c91bb 100644 (file)
@@ -1,5 +1,4 @@
 libperf-y += header.o
-libperf-y += sym-handling.o
 libperf-y += kvm-stat.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
diff --git a/tools/perf/arch/s390/util/sym-handling.c b/tools/perf/arch/s390/util/sym-handling.c
deleted file mode 100644 (file)
index e103f6e..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Architecture specific ELF symbol handling and relocation mapping.
- *
- * Copyright 2017 IBM Corp.
- * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- */
-
-#include "symbol.h"
-
-#ifdef HAVE_LIBELF_SUPPORT
-bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
-{
-       if (ehdr.e_type == ET_EXEC)
-               return false;
-       return ehdr.e_type == ET_REL || ehdr.e_type == ET_DYN;
-}
-
-void arch__adjust_sym_map_offset(GElf_Sym *sym,
-                                GElf_Shdr *shdr __maybe_unused,
-                                struct map *map)
-{
-       if (map->type == MAP__FUNCTION)
-               sym->st_value += map->start;
-}
-#endif
index 510b513e0f01fe96e110b9eab45db527b8c86104..be09d77cade02edde9973597219ac8b4ee50d3bc 100644 (file)
@@ -65,8 +65,6 @@ static int parse_callchain_mode(const char *value)
                callchain_param.mode = CHAIN_FOLDED;
                return 0;
        }
-
-       pr_err("Invalid callchain mode: %s\n", value);
        return -1;
 }
 
@@ -82,8 +80,6 @@ static int parse_callchain_order(const char *value)
                callchain_param.order_set = true;
                return 0;
        }
-
-       pr_err("Invalid callchain order: %s\n", value);
        return -1;
 }
 
@@ -105,8 +101,6 @@ static int parse_callchain_sort_key(const char *value)
                callchain_param.branch_callstack = 1;
                return 0;
        }
-
-       pr_err("Invalid callchain sort key: %s\n", value);
        return -1;
 }
 
@@ -124,8 +118,6 @@ static int parse_callchain_value(const char *value)
                callchain_param.value = CCVAL_COUNT;
                return 0;
        }
-
-       pr_err("Invalid callchain config key: %s\n", value);
        return -1;
 }
 
@@ -319,12 +311,27 @@ int perf_callchain_config(const char *var, const char *value)
 
                return ret;
        }
-       if (!strcmp(var, "print-type"))
-               return parse_callchain_mode(value);
-       if (!strcmp(var, "order"))
-               return parse_callchain_order(value);
-       if (!strcmp(var, "sort-key"))
-               return parse_callchain_sort_key(value);
+       if (!strcmp(var, "print-type")){
+               int ret;
+               ret = parse_callchain_mode(value);
+               if (ret == -1)
+                       pr_err("Invalid callchain mode: %s\n", value);
+               return ret;
+       }
+       if (!strcmp(var, "order")){
+               int ret;
+               ret = parse_callchain_order(value);
+               if (ret == -1)
+                       pr_err("Invalid callchain order: %s\n", value);
+               return ret;
+       }
+       if (!strcmp(var, "sort-key")){
+               int ret;
+               ret = parse_callchain_sort_key(value);
+               if (ret == -1)
+                       pr_err("Invalid callchain sort key: %s\n", value);
+               return ret;
+       }
        if (!strcmp(var, "threshold")) {
                callchain_param.min_percent = strtod(value, &endptr);
                if (value == endptr) {
index 4bb89373eb52893e7a3c7206da85af1e72afdfc3..0dccdb89572cdb455724a6a48a86d96821fc53e2 100644 (file)
@@ -271,12 +271,17 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
        return evsel;
 }
 
+static bool perf_event_can_profile_kernel(void)
+{
+       return geteuid() == 0 || perf_event_paranoid() == -1;
+}
+
 struct perf_evsel *perf_evsel__new_cycles(bool precise)
 {
        struct perf_event_attr attr = {
                .type   = PERF_TYPE_HARDWARE,
                .config = PERF_COUNT_HW_CPU_CYCLES,
-               .exclude_kernel = geteuid() != 0,
+               .exclude_kernel = !perf_event_can_profile_kernel(),
        };
        struct perf_evsel *evsel;
 
index 5c39f420111e5f679fa4a486383174d9c10d68b5..9cf781f0d8a2d797c88bf0bef5cab8759a6cbf96 100644 (file)
@@ -810,12 +810,6 @@ static u64 ref_reloc(struct kmap *kmap)
 void __weak arch__sym_update(struct symbol *s __maybe_unused,
                GElf_Sym *sym __maybe_unused) { }
 
-void __weak arch__adjust_sym_map_offset(GElf_Sym *sym, GElf_Shdr *shdr,
-                                      struct map *map __maybe_unused)
-{
-       sym->st_value -= shdr->sh_addr - shdr->sh_offset;
-}
-
 int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
                  struct symsrc *runtime_ss, int kmodule)
 {
@@ -996,7 +990,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
 
                        /* Adjust symbol to map to file offset */
                        if (adjust_kernel_syms)
-                               arch__adjust_sym_map_offset(&sym, &shdr, map);
+                               sym.st_value -= shdr.sh_addr - shdr.sh_offset;
 
                        if (strcmp(section_name,
                                   (curr_dso->short_name +
index 2bd6a1f01a1cc5b5bb15ee0aae59ae921ebe2a86..aad99e7e179bbb8f95bd39cf0d659d136ed2ef6d 100644 (file)
@@ -344,9 +344,6 @@ int setup_intlist(struct intlist **list, const char *list_str,
 #ifdef HAVE_LIBELF_SUPPORT
 bool elf__needs_adjust_symbols(GElf_Ehdr ehdr);
 void arch__sym_update(struct symbol *s, GElf_Sym *sym);
-void arch__adjust_sym_map_offset(GElf_Sym *sym,
-                                GElf_Shdr *shdr __maybe_unused,
-                                struct map *map __maybe_unused);
 #endif
 
 #define SYMBOL_A 0
index 19e5db90394c2bf03ec1c08810c270cb965f65c8..6eea7cff3d4e96473644fcd7c034a8a32abfcb10 100644 (file)
@@ -15,9 +15,9 @@
 
 #include "syscalltbl.h"
 #include <stdlib.h>
+#include <linux/compiler.h>
 
 #ifdef HAVE_SYSCALL_TABLE
-#include <linux/compiler.h>
 #include <string.h>
 #include "string2.h"
 #include "util.h"
index 26ce4f7168be534de2eef4be6650de9617b03b33..ff805643b5f723a1eb5420f50f88819e529b7299 100644 (file)
@@ -52,6 +52,10 @@ override LDFLAGS =
 override MAKEFLAGS =
 endif
 
+ifneq ($(KBUILD_SRC),)
+override LDFLAGS =
+endif
+
 BUILD := $(O)
 ifndef BUILD
   BUILD := $(KBUILD_OUTPUT)
@@ -62,32 +66,32 @@ endif
 
 export BUILD
 all:
-       for TARGET in $(TARGETS); do            \
+       @for TARGET in $(TARGETS); do           \
                BUILD_TARGET=$$BUILD/$$TARGET;  \
                mkdir $$BUILD_TARGET  -p;       \
                make OUTPUT=$$BUILD_TARGET -C $$TARGET;\
        done;
 
 run_tests: all
-       for TARGET in $(TARGETS); do \
+       @for TARGET in $(TARGETS); do \
                BUILD_TARGET=$$BUILD/$$TARGET;  \
                make OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\
        done;
 
 hotplug:
-       for TARGET in $(TARGETS_HOTPLUG); do \
+       @for TARGET in $(TARGETS_HOTPLUG); do \
                BUILD_TARGET=$$BUILD/$$TARGET;  \
                make OUTPUT=$$BUILD_TARGET -C $$TARGET;\
        done;
 
 run_hotplug: hotplug
-       for TARGET in $(TARGETS_HOTPLUG); do \
+       @for TARGET in $(TARGETS_HOTPLUG); do \
                BUILD_TARGET=$$BUILD/$$TARGET;  \
                make OUTPUT=$$BUILD_TARGET -C $$TARGET run_full_test;\
        done;
 
 clean_hotplug:
-       for TARGET in $(TARGETS_HOTPLUG); do \
+       @for TARGET in $(TARGETS_HOTPLUG); do \
                BUILD_TARGET=$$BUILD/$$TARGET;  \
                make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
        done;
@@ -103,7 +107,7 @@ install:
 ifdef INSTALL_PATH
        @# Ask all targets to install their files
        mkdir -p $(INSTALL_PATH)
-       for TARGET in $(TARGETS); do \
+       @for TARGET in $(TARGETS); do \
                BUILD_TARGET=$$BUILD/$$TARGET;  \
                make OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \
        done;
@@ -128,7 +132,7 @@ else
 endif
 
 clean:
-       for TARGET in $(TARGETS); do \
+       @for TARGET in $(TARGETS); do \
                BUILD_TARGET=$$BUILD/$$TARGET;  \
                make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
        done;
index 20ecbaa0d85d72b860678caf49c3e421105802b8..6c53a8906eff4c4fe6d98863050132d928de50f8 100644 (file)
@@ -12,6 +12,7 @@ static inline unsigned int bpf_num_possible_cpus(void)
        unsigned int start, end, possible_cpus = 0;
        char buff[128];
        FILE *fp;
+       int n;
 
        fp = fopen(fcpu, "r");
        if (!fp) {
@@ -20,17 +21,17 @@ static inline unsigned int bpf_num_possible_cpus(void)
        }
 
        while (fgets(buff, sizeof(buff), fp)) {
-               if (sscanf(buff, "%u-%u", &start, &end) == 2) {
-                       possible_cpus = start == 0 ? end + 1 : 0;
-                       break;
+               n = sscanf(buff, "%u-%u", &start, &end);
+               if (n == 0) {
+                       printf("Failed to retrieve # possible CPUs!\n");
+                       exit(1);
+               } else if (n == 1) {
+                       end = start;
                }
+               possible_cpus = start == 0 ? end + 1 : 0;
+               break;
        }
-
        fclose(fp);
-       if (!possible_cpus) {
-               printf("Failed to retrieve # possible CPUs!\n");
-               exit(1);
-       }
 
        return possible_cpus;
 }
index 6b214b7b10fb7c1c7ad6330c3769fbd1c46838d7..247b0a1899d70f55a5a02e2aac35a3136854652d 100644 (file)
@@ -2,14 +2,14 @@
 uname_M := $(shell uname -m 2>/dev/null || echo not)
 ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
 
+TEST_GEN_PROGS := step_after_suspend_test
+
 ifeq ($(ARCH),x86)
-TEST_GEN_PROGS := breakpoint_test
+TEST_GEN_PROGS += breakpoint_test
 endif
 ifneq (,$(filter $(ARCH),aarch64 arm64))
-TEST_GEN_PROGS := breakpoint_test_arm64
+TEST_GEN_PROGS += breakpoint_test_arm64
 endif
 
-TEST_GEN_PROGS += step_after_suspend_test
-
 include ../lib.mk
 
index 2a1cb990874613c5ee9da2d371b95899d8eaa81a..a4fd4c851a5b7f27520801858d8f1f24e34ba56e 100644 (file)
@@ -1,6 +1,8 @@
 #!/bin/sh
 # description: Register/unregister many kprobe events
 
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
 # ftrace fentry skip size depends on the machine architecture.
 # Currently HAVE_KPROBES_ON_FTRACE defined on x86 and powerpc64le
 case `uname -m` in
index 7c647f619d63faf3eff4c135c1b302565b7f78d5..f0c0369ccb7972d6deffb12534d1c64cc913a382 100644 (file)
@@ -7,14 +7,17 @@ TEST_PROGS := run.sh
 include ../lib.mk
 
 all:
-       for DIR in $(SUBDIRS); do               \
+       @for DIR in $(SUBDIRS); do              \
                BUILD_TARGET=$(OUTPUT)/$$DIR;   \
                mkdir $$BUILD_TARGET  -p;       \
                make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+               if [ -e $$DIR/$(TEST_PROGS) ]; then
+                       rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/;
+               fi
        done
 
 override define RUN_TESTS
-       $(OUTPUT)/run.sh
+       @cd $(OUTPUT); ./run.sh
 endef
 
 override define INSTALL_RULE
@@ -33,7 +36,7 @@ override define EMIT_TESTS
 endef
 
 override define CLEAN
-       for DIR in $(SUBDIRS); do               \
+       @for DIR in $(SUBDIRS); do              \
                BUILD_TARGET=$(OUTPUT)/$$DIR;   \
                mkdir $$BUILD_TARGET  -p;       \
                make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
index 849a90ffe8dd2d311b9b66bed9058fefdf28ba7d..a97e24edde39ed9d9d51441b01547856641b9606 100644 (file)
@@ -1,7 +1,9 @@
 CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE
 LDLIBS := $(LDLIBS) -lm
 
+ifeq (,$(filter $(ARCH),x86))
 TEST_GEN_FILES := msr aperf
+endif
 
 TEST_PROGS := run.sh
 
index 7868c106b8b1b80c41a8af739c8def48fd378bd8..d3ab48f91cd6a6ef1742653256e15418933de2cf 100755 (executable)
 
 EVALUATE_ONLY=0
 
-max_cpus=$(($(nproc)-1))
+if ! uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ | grep -q x86; then
+       echo "$0 # Skipped: Test can only run on x86 architectures."
+       exit 0
+fi
 
-# compile programs
-gcc aperf.c -Wall -D_GNU_SOURCE -o aperf  -lm
-[ $? -ne 0 ] && echo "Problem compiling aperf.c." && exit 1
-gcc -o msr msr.c -lm
-[ $? -ne 0 ] && echo "Problem compiling msr.c." && exit 1
+max_cpus=$(($(nproc)-1))
 
 function run_test () {
 
index 693616651da5cbc27a779868e64697df80582ef3..f65886af7c0cac60e4502e8973b4a602fe67c7b6 100644 (file)
@@ -6,7 +6,14 @@ ifeq (0,$(MAKELEVEL))
 OUTPUT := $(shell pwd)
 endif
 
+# The following are built by lib.mk common compile rules.
+# TEST_CUSTOM_PROGS should be used by tests that require
+# custom build rule and prevent common build rule use.
+# TEST_PROGS are for test shell scripts.
+# TEST_CUSTOM_PROGS and TEST_PROGS will be run by common run_tests
+# and install targets. Common clean doesn't touch them.
 TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS))
+TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED))
 TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
 
 all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
@@ -20,17 +27,28 @@ define RUN_TESTS
                test_num=`echo $$test_num+1 | bc`;      \
                echo "selftests: $$BASENAME_TEST";      \
                echo "========================================";        \
-               if [ ! -x $$BASENAME_TEST ]; then       \
+               if [ ! -x $$TEST ]; then        \
                        echo "selftests: Warning: file $$BASENAME_TEST is not executable, correct this.";\
                        echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; \
                else                                    \
-                       cd `dirname $$TEST` > /dev/null; (./$$BASENAME_TEST && echo "ok 1..$$test_num selftests: $$BASENAME_TEST [PASS]") || echo "not ok 1..$$test_num selftests:  $$BASENAME_TEST [FAIL]"; cd - > /dev/null;\
+                       cd `dirname $$TEST` > /dev/null; (./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && echo "ok 1..$$test_num selftests: $$BASENAME_TEST [PASS]") || echo "not ok 1..$$test_num selftests:  $$BASENAME_TEST [FAIL]"; cd - > /dev/null;\
                fi;                                     \
        done;
 endef
 
 run_tests: all
-       $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_PROGS))
+ifneq ($(KBUILD_SRC),)
+       @if [ "X$(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)" != "X" ]; then
+               @rsync -aq $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT)
+       fi
+       @if [ "X$(TEST_PROGS)" != "X" ]; then
+               $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(OUTPUT)/$(TEST_PROGS))
+       else
+               $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS))
+       fi
+else
+       $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS))
+endif
 
 define INSTALL_RULE
        @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then                                        \
@@ -38,10 +56,10 @@ define INSTALL_RULE
                echo "rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/";    \
                rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/;           \
        fi
-       @if [ "X$(TEST_GEN_PROGS)$(TEST_GEN_PROGS_EXTENDED)$(TEST_GEN_FILES)" != "X" ]; then                                    \
+       @if [ "X$(TEST_GEN_PROGS)$(TEST_CUSTOM_PROGS)$(TEST_GEN_PROGS_EXTENDED)$(TEST_GEN_FILES)" != "X" ]; then                                        \
                mkdir -p ${INSTALL_PATH};                                                                               \
-               echo "rsync -a $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/";        \
-               rsync -a $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/;               \
+               echo "rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/";   \
+               rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/;          \
        fi
 endef
 
@@ -53,15 +71,20 @@ else
 endif
 
 define EMIT_TESTS
-       @for TEST in $(TEST_GEN_PROGS) $(TEST_PROGS); do \
+       @for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \
                BASENAME_TEST=`basename $$TEST`;        \
-               echo "(./$$BASENAME_TEST && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \
+               echo "(./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \
        done;
 endef
 
 emit_tests:
        $(EMIT_TESTS)
 
+# define if isn't already. It is undefined in make O= case.
+ifeq ($(RM),)
+RM := rm -f
+endif
+
 define CLEAN
        $(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
 endef
@@ -69,6 +92,15 @@ endef
 clean:
        $(CLEAN)
 
+# When make O= with kselftest target from main level
+# the following aren't defined.
+#
+ifneq ($(KBUILD_SRC),)
+LINK.c = $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH)
+COMPILE.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c
+LINK.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH)
+endif
+
 $(OUTPUT)/%:%.c
        $(LINK.c) $^ $(LDLIBS) -o $@
 
old mode 100644 (file)
new mode 100755 (executable)
index 79a664aeb8d76509a2f8e46aadf742571765a26a..0f5e347b068d3bb22d07ddd8308cc55fc263aebb 100644 (file)
@@ -5,8 +5,8 @@ TEST_GEN_PROGS := mq_open_tests mq_perf_tests
 include ../lib.mk
 
 override define RUN_TESTS
-       @./mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]"
-       @./mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]"
+       $(OUTPUT)/mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]"
+       $(OUTPUT)//mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]"
 endef
 
 override define EMIT_TESTS
index 9801253e48021035b650a88e3d3d0c2affea8675..c612d6e38c6282cad7b56396e1d3e2c1c9d3a46c 100644 (file)
@@ -6,3 +6,4 @@ reuseport_bpf
 reuseport_bpf_cpu
 reuseport_bpf_numa
 reuseport_dualstack
+reuseaddr_conflict
index de1f5772b878ee1f4aee9e1452d1f0e83696af62..d86bca991f456a70206c790f733b0032bc7ad224 100644 (file)
@@ -5,9 +5,9 @@ CFLAGS += -I../../../../usr/include/
 
 TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
 TEST_GEN_FILES =  socket
-TEST_GEN_FILES += psock_fanout psock_tpacket
-TEST_GEN_FILES += reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
-TEST_GEN_FILES += reuseport_dualstack msg_zerocopy
+TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
+TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
+TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict
 
 include ../lib.mk
 
index 40232af5b023ee251bfc369fd73860d912809891..3ab6ec4039059cf127345652bccf6c34d5c4d273 100644 (file)
@@ -55,7 +55,7 @@
 #include <unistd.h>
 
 #ifndef SO_EE_ORIGIN_ZEROCOPY
-#define SO_EE_ORIGIN_ZEROCOPY          SO_EE_ORIGIN_UPAGE
+#define SO_EE_ORIGIN_ZEROCOPY          5
 #endif
 
 #ifndef SO_ZEROCOPY
index 4e00568d70c2c398651675ad763f646453b45b3b..90cb903c33815bcecc3a64a9f153c779476a6848 100755 (executable)
@@ -178,7 +178,7 @@ if [ "$(id -u)" -ne 0 ];then
        exit 0
 fi
 
-ip -Version 2>/dev/null >/dev/null
+ip link show 2>/dev/null >/dev/null
 if [ $? -ne 0 ];then
        echo "SKIP: Could not run test without the ip tool"
        exit 0
diff --git a/tools/testing/selftests/net/reuseaddr_conflict.c b/tools/testing/selftests/net/reuseaddr_conflict.c
new file mode 100644 (file)
index 0000000..7c5b126
--- /dev/null
@@ -0,0 +1,114 @@
+/*
+ * Test for the regression introduced by
+ *
+ * b9470c27607b ("inet: kill smallest_size and smallest_port")
+ *
+ * If we open an ipv4 socket on a port with reuseaddr we shouldn't reset the tb
+ * when we open the ipv6 conterpart, which is what was happening previously.
+ */
+#include <errno.h>
+#include <error.h>
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#define PORT 9999
+
+int open_port(int ipv6, int any)
+{
+       int fd = -1;
+       int reuseaddr = 1;
+       int v6only = 1;
+       int addrlen;
+       int ret = -1;
+       struct sockaddr *addr;
+       int family = ipv6 ? AF_INET6 : AF_INET;
+
+       struct sockaddr_in6 addr6 = {
+               .sin6_family = AF_INET6,
+               .sin6_port = htons(PORT),
+               .sin6_addr = in6addr_any
+       };
+       struct sockaddr_in addr4 = {
+               .sin_family = AF_INET,
+               .sin_port = htons(PORT),
+               .sin_addr.s_addr = any ? htonl(INADDR_ANY) : inet_addr("127.0.0.1"),
+       };
+
+
+       if (ipv6) {
+               addr = (struct sockaddr*)&addr6;
+               addrlen = sizeof(addr6);
+       } else {
+               addr = (struct sockaddr*)&addr4;
+               addrlen = sizeof(addr4);
+       }
+
+       if ((fd = socket(family, SOCK_STREAM, IPPROTO_TCP)) < 0) {
+               perror("socket");
+               goto out;
+       }
+
+       if (ipv6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (void*)&v6only,
+                              sizeof(v6only)) < 0) {
+               perror("setsockopt IPV6_V6ONLY");
+               goto out;
+       }
+
+       if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &reuseaddr,
+                      sizeof(reuseaddr)) < 0) {
+               perror("setsockopt SO_REUSEADDR");
+               goto out;
+       }
+
+       if (bind(fd, addr, addrlen) < 0) {
+               perror("bind");
+               goto out;
+       }
+
+       if (any)
+               return fd;
+
+       if (listen(fd, 1) < 0) {
+               perror("listen");
+               goto out;
+       }
+       return fd;
+out:
+       close(fd);
+       return ret;
+}
+
+int main(void)
+{
+       int listenfd;
+       int fd1, fd2;
+
+       fprintf(stderr, "Opening 127.0.0.1:%d\n", PORT);
+       listenfd = open_port(0, 0);
+       if (listenfd < 0)
+               error(1, errno, "Couldn't open listen socket");
+       fprintf(stderr, "Opening INADDR_ANY:%d\n", PORT);
+       fd1 = open_port(0, 1);
+       if (fd1 >= 0)
+               error(1, 0, "Was allowed to create an ipv4 reuseport on a already bound non-reuseport socket");
+       fprintf(stderr, "Opening in6addr_any:%d\n", PORT);
+       fd1 = open_port(1, 1);
+       if (fd1 < 0)
+               error(1, errno, "Couldn't open ipv6 reuseport");
+       fprintf(stderr, "Opening INADDR_ANY:%d\n", PORT);
+       fd2 = open_port(0, 1);
+       if (fd2 >= 0)
+               error(1, 0, "Was allowed to create an ipv4 reuseport on a already bound non-reuseport socket");
+       close(fd1);
+       fprintf(stderr, "Opening INADDR_ANY:%d after closing ipv6 socket\n", PORT);
+       fd1 = open_port(0, 1);
+       if (fd1 >= 0)
+               error(1, 0, "Was allowed to create an ipv4 reuseport on an already bound non-reuseport socket with no ipv6");
+       fprintf(stderr, "Success");
+       return 0;
+}
index 67c3e276430363754e8354dfb2bb7f2485a26c66..24dbf634e2dd8c869a7201e7d89efe4102fa6373 100644 (file)
@@ -6,10 +6,18 @@
  */
 
 #include <sys/types.h>
-#include <asm/siginfo.h>
-#define __have_siginfo_t 1
-#define __have_sigval_t 1
-#define __have_sigevent_t 1
+
+/*
+ * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
+ * we need to use the kernel's siginfo.h file and trick glibc
+ * into accepting it.
+ */
+#if !__GLIBC_PREREQ(2, 26)
+# include <asm/siginfo.h>
+# define __have_siginfo_t 1
+# define __have_sigval_t 1
+# define __have_sigevent_t 1
+#endif
 
 #include <errno.h>
 #include <linux/filter.h>
@@ -884,7 +892,7 @@ TEST_F_SIGNAL(TRAP, ign, SIGSYS)
        syscall(__NR_getpid);
 }
 
-static struct siginfo TRAP_info;
+static siginfo_t TRAP_info;
 static volatile int TRAP_nr;
 static void TRAP_action(int nr, siginfo_t *info, void *void_context)
 {
index 7d406c3973ba4944a4baee8363f59c736a98b52e..97bb150837df02422fc8a005d683cebb6191878f 100644 (file)
@@ -39,7 +39,11 @@ void my_usr1(int sig, siginfo_t *si, void *u)
        stack_t stk;
        struct stk_data *p;
 
+#if __s390x__
+       register unsigned long sp asm("%15");
+#else
        register unsigned long sp asm("sp");
+#endif
 
        if (sp < (unsigned long)sstack ||
                        sp >= (unsigned long)sstack + SIGSTKSZ) {
index 4981c6b6d050e95b77fa1540640a4b71770179f2..8e04d0afcbd7f9f820af5c550a0fa4741fc62141 100644 (file)
@@ -2,12 +2,16 @@ CFLAGS += -O2 -g -std=gnu89 -pthread -Wall -Wextra
 CFLAGS += -I../../../../usr/include/
 LDFLAGS += -pthread
 
-TEST_PROGS = sync_test
-
-all: $(TEST_PROGS)
+.PHONY: all clean
 
 include ../lib.mk
 
+# lib.mk TEST_CUSTOM_PROGS var is for custom tests that need special
+# build rules. lib.mk will run and install them.
+
+TEST_CUSTOM_PROGS := $(OUTPUT)/sync_test
+all: $(TEST_CUSTOM_PROGS)
+
 OBJS = sync_test.o sync.o
 
 TESTS += sync_alloc.o
@@ -18,6 +22,16 @@ TESTS += sync_stress_parallelism.o
 TESTS += sync_stress_consumer.o
 TESTS += sync_stress_merge.o
 
-sync_test: $(OBJS) $(TESTS)
+OBJS := $(patsubst %,$(OUTPUT)/%,$(OBJS))
+TESTS := $(patsubst %,$(OUTPUT)/%,$(TESTS))
+
+$(TEST_CUSTOM_PROGS): $(TESTS) $(OBJS)
+       $(CC) -o $(TEST_CUSTOM_PROGS) $(OBJS) $(TESTS) $(CFLAGS) $(LDFLAGS)
+
+$(OBJS): $(OUTPUT)/%.o: %.c
+       $(CC) -c $^ -o $@
+
+$(TESTS): $(OUTPUT)/%.o: %.c
+       $(CC) -c $^ -o $@
 
-EXTRA_CLEAN := sync_test $(OBJS) $(TESTS)
+EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(OBJS) $(TESTS)
index 9c92b7bd56410a38a0da0370bbee202f22d0778c..50da45437daab0b5785a4925ac793644cb9d02fd 100644 (file)
@@ -143,7 +143,8 @@ int setup_timer(int clock_id, int flags, int interval, timer_t *tm1)
                        printf("%-22s %s missing CAP_WAKE_ALARM?    : [UNSUPPORTED]\n",
                                        clockstring(clock_id),
                                        flags ? "ABSTIME":"RELTIME");
-                       return 0;
+                       /* Indicate timer isn't set, so caller doesn't wait */
+                       return 1;
                }
                printf("%s - timer_create() failed\n", clockstring(clock_id));
                return -1;
@@ -213,8 +214,9 @@ int do_timer(int clock_id, int flags)
        int err;
 
        err = setup_timer(clock_id, flags, interval, &tm1);
+       /* Unsupported case - return 0 to not fail the test */
        if (err)
-               return err;
+               return err == 1 ? 0 : err;
 
        while (alarmcount < 5)
                sleep(1);
@@ -228,18 +230,17 @@ int do_timer_oneshot(int clock_id, int flags)
        timer_t tm1;
        const int interval = 0;
        struct timeval timeout;
-       fd_set fds;
        int err;
 
        err = setup_timer(clock_id, flags, interval, &tm1);
+       /* Unsupported case - return 0 to not fail the test */
        if (err)
-               return err;
+               return err == 1 ? 0 : err;
 
        memset(&timeout, 0, sizeof(timeout));
        timeout.tv_sec = 5;
-       FD_ZERO(&fds);
        do {
-               err = select(FD_SETSIZE, &fds, NULL, NULL, &timeout);
+               err = select(0, NULL, NULL, NULL, &timeout);
        } while (err == -1 && errno == EINTR);
 
        timer_delete(tm1);
index f863c664e3d143be1dd439a6098d5e389a49e762..ee068511fd0bc68ba8cb8b54bbe8a5c9371c322e 100644 (file)
@@ -1,8 +1,3 @@
-TEST_PROGS := watchdog-test
-
-all: $(TEST_PROGS)
+TEST_GEN_PROGS := watchdog-test
 
 include ../lib.mk
-
-clean:
-       rm -fr $(TEST_PROGS)