]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/commitdiff
Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 31 Dec 2017 20:27:19 +0000 (12:27 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 31 Dec 2017 20:27:19 +0000 (12:27 -0800)
Pull scheduler fixes from Thomas Gleixner:
 "Three patches addressing the fallout of the CPU_ISOLATION changes
  especially with NO_HZ_FULL plus documentation of boot parameter
  dependency"

* 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/isolation: Document boot parameters dependency on CONFIG_CPU_ISOLATION=y
  sched/isolation: Enable CONFIG_CPU_ISOLATION=y by default
  sched/isolation: Make CONFIG_NO_HZ_FULL select CONFIG_CPU_ISOLATION

480 files changed:
Documentation/admin-guide/kernel-parameters.txt
Documentation/admin-guide/thunderbolt.rst
Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
Documentation/devicetree/bindings/sound/da7218.txt
Documentation/devicetree/bindings/sound/da7219.txt
Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
Documentation/x86/x86_64/mm.txt
MAINTAINERS
Makefile
arch/arm/lib/csumpartialcopyuser.S
arch/arm64/kvm/hyp/debug-sr.c
arch/parisc/boot/compressed/misc.c
arch/parisc/include/asm/thread_info.h
arch/parisc/kernel/entry.S
arch/parisc/kernel/hpmc.S
arch/parisc/kernel/unwind.c
arch/parisc/lib/delay.c
arch/powerpc/include/asm/mmu_context.h
arch/powerpc/kernel/process.c
arch/powerpc/kvm/book3s_xive.c
arch/powerpc/net/bpf_jit_comp64.c
arch/powerpc/perf/core-book3s.c
arch/powerpc/perf/imc-pmu.c
arch/powerpc/sysdev/fsl_msi.c
arch/s390/net/bpf_jit_comp.c
arch/sparc/lib/hweight.S
arch/sparc/mm/fault_32.c
arch/sparc/mm/fault_64.c
arch/sparc/net/bpf_jit_comp_64.c
arch/um/include/asm/mmu_context.h
arch/um/kernel/trap.c
arch/unicore32/include/asm/mmu_context.h
arch/x86/Kconfig
arch/x86/boot/compressed/pagetable.c
arch/x86/entry/calling.h
arch/x86/entry/entry_32.S
arch/x86/entry/entry_64.S
arch/x86/entry/entry_64_compat.S
arch/x86/entry/vsyscall/vsyscall_64.c
arch/x86/events/intel/core.c
arch/x86/events/intel/ds.c
arch/x86/events/perf_event.h
arch/x86/include/asm/asm.h
arch/x86/include/asm/cpu_entry_area.h [new file with mode: 0644]
arch/x86/include/asm/cpufeature.h
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/desc.h
arch/x86/include/asm/disabled-features.h
arch/x86/include/asm/espfix.h
arch/x86/include/asm/fixmap.h
arch/x86/include/asm/hypervisor.h
arch/x86/include/asm/intel_ds.h [new file with mode: 0644]
arch/x86/include/asm/invpcid.h [new file with mode: 0644]
arch/x86/include/asm/irqdomain.h
arch/x86/include/asm/irqflags.h
arch/x86/include/asm/kdebug.h
arch/x86/include/asm/mmu.h
arch/x86/include/asm/mmu_context.h
arch/x86/include/asm/paravirt.h
arch/x86/include/asm/pgalloc.h
arch/x86/include/asm/pgtable.h
arch/x86/include/asm/pgtable_32_types.h
arch/x86/include/asm/pgtable_64.h
arch/x86/include/asm/pgtable_64_types.h
arch/x86/include/asm/processor-flags.h
arch/x86/include/asm/processor.h
arch/x86/include/asm/pti.h [new file with mode: 0644]
arch/x86/include/asm/stacktrace.h
arch/x86/include/asm/switch_to.h
arch/x86/include/asm/thread_info.h
arch/x86/include/asm/tlbflush.h
arch/x86/include/asm/trace/irq_vectors.h
arch/x86/include/asm/traps.h
arch/x86/include/asm/unwind.h
arch/x86/include/asm/vsyscall.h
arch/x86/include/uapi/asm/processor-flags.h
arch/x86/kernel/apic/io_apic.c
arch/x86/kernel/apic/vector.c
arch/x86/kernel/asm-offsets.c
arch/x86/kernel/asm-offsets_32.c
arch/x86/kernel/asm-offsets_64.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/microcode/intel.c
arch/x86/kernel/doublefault.c
arch/x86/kernel/dumpstack.c
arch/x86/kernel/dumpstack_32.c
arch/x86/kernel/dumpstack_64.c
arch/x86/kernel/head_64.S
arch/x86/kernel/ioport.c
arch/x86/kernel/irq.c
arch/x86/kernel/irq_64.c
arch/x86/kernel/ldt.c
arch/x86/kernel/paravirt_patch_64.c
arch/x86/kernel/process.c
arch/x86/kernel/process_32.c
arch/x86/kernel/process_64.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/tls.c
arch/x86/kernel/traps.c
arch/x86/kernel/unwind_orc.c
arch/x86/kernel/vmlinux.lds.S
arch/x86/kvm/emulate.c
arch/x86/kvm/mmu.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
arch/x86/lib/delay.c
arch/x86/mm/Makefile
arch/x86/mm/cpu_entry_area.c [new file with mode: 0644]
arch/x86/mm/debug_pagetables.c
arch/x86/mm/dump_pagetables.c
arch/x86/mm/fault.c
arch/x86/mm/init.c
arch/x86/mm/init_32.c
arch/x86/mm/kasan_init_64.c
arch/x86/mm/pgtable.c
arch/x86/mm/pgtable_32.c
arch/x86/mm/pti.c [new file with mode: 0644]
arch/x86/mm/tlb.c
arch/x86/platform/efi/efi_64.c
arch/x86/platform/uv/tlb_uv.c
arch/x86/platform/uv/uv_irq.c
arch/x86/power/cpu.c
arch/x86/xen/enlighten.c
arch/x86/xen/enlighten_pv.c
arch/x86/xen/mmu_pv.c
arch/x86/xen/setup.c
block/bio.c
block/blk-map.c
block/blk-throttle.c
block/bounce.c
block/kyber-iosched.c
crypto/af_alg.c
crypto/algif_aead.c
crypto/algif_skcipher.c
crypto/mcryptd.c
crypto/skcipher.c
drivers/acpi/apei/erst.c
drivers/acpi/cppc_acpi.c
drivers/acpi/nfit/core.c
drivers/android/binder.c
drivers/base/cacheinfo.c
drivers/block/null_blk.c
drivers/clk/clk.c
drivers/clk/sunxi/clk-sun9i-mmc.c
drivers/cpufreq/cpufreq_governor.c
drivers/cpufreq/imx6q-cpufreq.c
drivers/gpio/gpio-bcm-kona.c
drivers/gpio/gpio-brcmstb.c
drivers/gpio/gpio-reg.c
drivers/gpio/gpio-tegra.c
drivers/gpio/gpio-xgene-sb.c
drivers/gpio/gpiolib-acpi.c
drivers/gpio/gpiolib-devprop.c
drivers/gpio/gpiolib-of.c
drivers/gpio/gpiolib.c
drivers/gpio/gpiolib.h
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
drivers/gpu/drm/amd/display/dc/core/dc_link.c
drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
drivers/gpu/drm/drm_lease.c
drivers/gpu/drm/drm_plane.c
drivers/gpu/drm/drm_syncobj.c
drivers/gpu/drm/i915/gvt/display.c
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_sw_fence.c
drivers/gpu/drm/i915/intel_breadcrumbs.c
drivers/gpu/drm/i915/intel_ddi.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_lpe_audio.c
drivers/gpu/drm/nouveau/nouveau_bo.c
drivers/gpu/drm/nouveau/nouveau_drm.c
drivers/gpu/drm/nouveau/nouveau_drv.h
drivers/gpu/drm/nouveau/nouveau_fbcon.c
drivers/gpu/drm/nouveau/nouveau_mem.c
drivers/gpu/drm/nouveau/nouveau_ttm.c
drivers/gpu/drm/nouveau/nouveau_vmm.c
drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
drivers/gpu/drm/sun4i/sun4i_tcon.c
drivers/gpu/drm/ttm/ttm_page_alloc.c
drivers/hid/hid-core.c
drivers/hid/hid-cp2112.c
drivers/hid/hid-holtekff.c
drivers/hv/vmbus_drv.c
drivers/hwmon/hwmon.c
drivers/infiniband/core/security.c
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/cxgb4/cq.c
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
drivers/infiniband/hw/cxgb4/qp.c
drivers/infiniband/hw/cxgb4/t4.h
drivers/infiniband/hw/hfi1/hfi.h
drivers/infiniband/hw/hfi1/pcie.c
drivers/infiniband/hw/mlx5/cmd.c
drivers/infiniband/hw/mlx5/cmd.h
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
drivers/infiniband/ulp/ipoib/ipoib_ib.c
drivers/iommu/amd_iommu.c
drivers/iommu/intel_irq_remapping.c
drivers/irqchip/irq-gic-v3-its.c
drivers/irqchip/irq-renesas-intc-irqpin.c
drivers/leds/led-core.c
drivers/mfd/arizona-irq.c
drivers/mfd/cros_ec_spi.c
drivers/mfd/twl4030-audio.c
drivers/mfd/twl6040.c
drivers/mtd/mtdcore.c
drivers/mtd/nand/brcmnand/brcmnand.c
drivers/mtd/nand/gpio.c
drivers/mtd/nand/gpmi-nand/gpmi-nand.c
drivers/net/ethernet/arc/emac.h
drivers/net/ethernet/arc/emac_main.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
drivers/net/ethernet/broadcom/tg3.c
drivers/net/ethernet/broadcom/tg3.h
drivers/net/ethernet/freescale/fec_main.c
drivers/net/ethernet/marvell/mvneta.c
drivers/net/ethernet/mediatek/mtk_eth_soc.c
drivers/net/ethernet/mellanox/mlx5/core/cmd.c
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/eq.c
drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/health.c
drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
drivers/net/ethernet/mellanox/mlx5/core/lag.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/qp.c
drivers/net/ethernet/mellanox/mlx5/core/rl.c
drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
drivers/net/ethernet/netronome/nfp/bpf/main.c
drivers/net/ethernet/netronome/nfp/bpf/main.h
drivers/net/ethernet/qualcomm/emac/emac.c
drivers/net/ethernet/stmicro/stmmac/common.h
drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
drivers/net/ethernet/stmicro/stmmac/enh_desc.c
drivers/net/ethernet/stmicro/stmmac/norm_desc.c
drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/phy/marvell.c
drivers/net/phy/mdio-xgene.c
drivers/net/phy/micrel.c
drivers/net/phy/phylink.c
drivers/net/vxlan.c
drivers/net/wireless/mac80211_hwsim.c
drivers/nvdimm/btt.c
drivers/nvdimm/btt.h
drivers/nvdimm/pfn_devs.c
drivers/nvme/host/core.c
drivers/nvme/host/fc.c
drivers/nvmem/meson-mx-efuse.c
drivers/parisc/lba_pci.c
drivers/pci/pci-driver.c
drivers/phy/motorola/phy-cpcap-usb.c
drivers/phy/renesas/Kconfig
drivers/phy/rockchip/phy-rockchip-typec.c
drivers/phy/tegra/xusb.c
drivers/pinctrl/intel/pinctrl-cherryview.c
drivers/pinctrl/pinctrl-single.c
drivers/pinctrl/stm32/pinctrl-stm32.c
drivers/s390/net/qeth_core_main.c
drivers/scsi/aacraid/aacraid.h
drivers/scsi/aacraid/linit.c
drivers/scsi/osd/osd_initiator.c
drivers/scsi/scsi_devinfo.c
drivers/scsi/scsi_scan.c
drivers/scsi/scsi_sysfs.c
drivers/scsi/scsi_transport_spi.c
drivers/scsi/storvsc_drv.c
drivers/spi/spi-armada-3700.c
drivers/spi/spi-atmel.c
drivers/spi/spi-rspi.c
drivers/spi/spi-sun4i.c
drivers/spi/spi-xilinx.c
drivers/staging/android/ion/Kconfig
drivers/staging/android/ion/ion.c
drivers/staging/android/ion/ion_cma_heap.c
drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
drivers/target/target_core_pscsi.c
drivers/thunderbolt/nhi.c
drivers/tty/n_tty.c
drivers/usb/chipidea/ci_hdrc_msm.c
drivers/usb/core/config.c
drivers/usb/core/quirks.c
drivers/usb/host/xhci-debugfs.c
drivers/usb/host/xhci-pci.c
drivers/usb/host/xhci.c
drivers/usb/serial/ftdi_sio.c
drivers/usb/serial/ftdi_sio_ids.h
drivers/usb/serial/option.c
drivers/usb/serial/qcserial.c
drivers/usb/usbip/stub_dev.c
drivers/usb/usbip/stub_main.c
drivers/usb/usbip/stub_rx.c
drivers/usb/usbip/stub_tx.c
drivers/usb/usbip/usbip_common.c
drivers/usb/usbip/vhci_hcd.c
drivers/usb/usbip/vhci_rx.c
drivers/usb/usbip/vhci_tx.c
drivers/xen/balloon.c
fs/xfs/libxfs/xfs_alloc.c
fs/xfs/libxfs/xfs_attr.c
fs/xfs/libxfs/xfs_attr_leaf.c
fs/xfs/libxfs/xfs_attr_leaf.h
fs/xfs/libxfs/xfs_bmap.c
fs/xfs/libxfs/xfs_defer.c
fs/xfs/libxfs/xfs_defer.h
fs/xfs/libxfs/xfs_iext_tree.c
fs/xfs/libxfs/xfs_refcount.c
fs/xfs/libxfs/xfs_rmap.c
fs/xfs/libxfs/xfs_rmap.h
fs/xfs/xfs_extfree_item.c
fs/xfs/xfs_fsops.c
fs/xfs/xfs_icache.c
fs/xfs/xfs_icache.h
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode.h
fs/xfs/xfs_reflink.c
fs/xfs/xfs_super.c
include/asm-generic/mm_hooks.h
include/asm-generic/pgtable.h
include/crypto/mcryptd.h
include/kvm/arm_arch_timer.h
include/linux/bio.h
include/linux/blk_types.h
include/linux/blkdev.h
include/linux/bpf_verifier.h
include/linux/gpio/driver.h
include/linux/ipv6.h
include/linux/irq.h
include/linux/irqdesc.h
include/linux/irqdomain.h
include/linux/mfd/rtsx_pci.h
include/linux/mlx5/driver.h
include/linux/mlx5/mlx5_ifc.h
include/linux/pti.h [new file with mode: 0644]
include/linux/spi/spi.h
include/linux/tick.h
include/net/cfg80211.h
include/net/pkt_cls.h
include/net/sock.h
include/net/xfrm.h
include/trace/events/clk.h
include/trace/events/kvm.h
include/trace/events/tcp.h
include/xen/balloon.h
init/main.c
kernel/bpf/verifier.c
kernel/fork.c
kernel/irq/debug.h
kernel/irq/debugfs.c
kernel/irq/generic-chip.c
kernel/irq/internals.h
kernel/irq/irqdomain.c
kernel/irq/msi.c
kernel/sched/cpufreq_schedutil.c
kernel/time/tick-sched.c
kernel/trace/ring_buffer.c
kernel/trace/trace.c
lib/kobject_uevent.c
lib/test_bpf.c
mm/backing-dev.c
net/bridge/br_netlink.c
net/core/dev.c
net/core/net_namespace.c
net/core/skbuff.c
net/ipv4/fib_frontend.c
net/ipv4/fib_semantics.c
net/ipv4/ip_gre.c
net/ipv4/xfrm4_input.c
net/ipv6/af_inet6.c
net/ipv6/ip6_gre.c
net/ipv6/ip6_output.c
net/ipv6/ip6_tunnel.c
net/ipv6/ipv6_sockglue.c
net/ipv6/route.c
net/ipv6/xfrm6_input.c
net/openvswitch/flow.c
net/rds/send.c
net/sched/cls_api.c
net/sched/cls_bpf.c
net/sched/sch_generic.c
net/sctp/debug.c
net/sctp/socket.c
net/sctp/ulpqueue.c
net/strparser/strparser.c
net/tipc/bearer.c
net/tipc/group.c
net/tipc/monitor.c
net/tipc/socket.c
net/wireless/Makefile
net/wireless/certs/sforshee.hex [new file with mode: 0644]
net/wireless/certs/sforshee.x509 [deleted file]
net/wireless/nl80211.c
net/xfrm/xfrm_input.c
net/xfrm/xfrm_policy.c
net/xfrm/xfrm_state.c
net/xfrm/xfrm_user.c
security/Kconfig
sound/core/rawmidi.c
sound/hda/hdac_i915.c
sound/pci/hda/patch_conexant.c
sound/pci/hda/patch_hdmi.c
sound/pci/hda/patch_realtek.c
sound/soc/amd/acp-pcm-dma.c
sound/soc/atmel/Kconfig
sound/soc/codecs/da7218.c
sound/soc/codecs/msm8916-wcd-analog.c
sound/soc/codecs/msm8916-wcd-digital.c
sound/soc/codecs/nau8825.c
sound/soc/codecs/rt5514-spi.c
sound/soc/codecs/rt5514.c
sound/soc/codecs/rt5645.c
sound/soc/codecs/rt5663.c
sound/soc/codecs/rt5663.h
sound/soc/codecs/tlv320aic31xx.h
sound/soc/codecs/twl4030.c
sound/soc/codecs/wm_adsp.c
sound/soc/fsl/fsl_asrc.h
sound/soc/fsl/fsl_ssi.c
sound/soc/intel/boards/kbl_rt5663_max98927.c
sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c
sound/soc/intel/skylake/skl-nhlt.c
sound/soc/intel/skylake/skl-topology.c
sound/soc/rockchip/rockchip_spdif.c
sound/soc/sh/rcar/adg.c
sound/soc/sh/rcar/core.c
sound/soc/sh/rcar/dma.c
sound/soc/sh/rcar/ssi.c
sound/soc/sh/rcar/ssiu.c
sound/usb/mixer.c
sound/usb/quirks.c
tools/arch/s390/include/uapi/asm/bpf_perf_event.h
tools/arch/s390/include/uapi/asm/perf_regs.h [new file with mode: 0644]
tools/bpf/bpftool/map.c
tools/bpf/bpftool/prog.c
tools/kvm/kvm_stat/kvm_stat
tools/kvm/kvm_stat/kvm_stat.txt
tools/objtool/arch/x86/decode.c
tools/objtool/builtin-orc.c
tools/objtool/orc_gen.c
tools/perf/Makefile.config
tools/perf/arch/s390/include/perf_regs.h
tools/perf/check-headers.sh
tools/perf/jvmti/jvmti_agent.c
tools/perf/jvmti/jvmti_agent.h
tools/perf/jvmti/libjvmti.c
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/bpf/test_progs.c
tools/testing/selftests/bpf/test_verifier.c
tools/testing/selftests/net/config
tools/testing/selftests/x86/ldt_gdt.c
tools/usb/usbip/src/utils.c
virt/kvm/arm/arch_timer.c
virt/kvm/arm/arm.c
virt/kvm/arm/mmio.c
virt/kvm/arm/mmu.c

index 168310707ec2cdaf0a2d14e6a381e54d05b90a42..7041c6710f22f0c94805b687b2ba46e504c79ae5 100644 (file)
                        steal time is computed, but won't influence scheduler
                        behaviour
 
+       nopti           [X86-64] Disable kernel page table isolation
+
        nolapic         [X86-32,APIC] Do not enable or use the local APIC.
 
        nolapic_timer   [X86-32,APIC] Do not use the local APIC timer.
        pt.             [PARIDE]
                        See Documentation/blockdev/paride.txt.
 
+       pti=            [X86_64]
+                       Control user/kernel address space isolation:
+                       on - enable
+                       off - disable
+                       auto - default setting
+
        pty.legacy_count=
                        [KNL] Number of legacy pty's. Overwrites compiled-in
                        default number.
index de50a8561774249351515662404e2a1f8328aba6..9b55952039a692d8119ce62502af9a4dd071b8e6 100644 (file)
@@ -230,7 +230,7 @@ If supported by your machine this will be exposed by the WMI bus with
 a sysfs attribute called "force_power".
 
 For example the intel-wmi-thunderbolt driver exposes this attribute in:
-  /sys/devices/platform/PNP0C14:00/wmi_bus/wmi_bus-PNP0C14:00/86CCFD48-205E-4A77-9C48-2021CBEDE341/force_power
+  /sys/bus/wmi/devices/86CCFD48-205E-4A77-9C48-2021CBEDE341/force_power
 
   To force the power to on, write 1 to this attribute file.
   To disable force power, write 0 to this attribute file.
index 376fa2f50e6bc9b41052928037acd4b3a382d380..956bb046e599d576e3f881b2901e0d369a3c9802 100644 (file)
@@ -13,7 +13,6 @@ Required properties:
                  at25df321a
                  at25df641
                  at26df081a
-                 en25s64
                  mr25h128
                  mr25h256
                  mr25h10
@@ -33,7 +32,6 @@ Required properties:
                  s25fl008k
                  s25fl064k
                  sst25vf040b
-                 sst25wf040b
                  m25p40
                  m25p80
                  m25p16
index 5ca5a709b6aa1989901eff6b9239f0bc71d0d272..3ab9dfef38d113523549e66cee20ad8db6e56842 100644 (file)
@@ -73,7 +73,7 @@ Example:
                compatible = "dlg,da7218";
                reg = <0x1a>;
                interrupt-parent = <&gpio6>;
-               interrupts = <11 IRQ_TYPE_LEVEL_HIGH>;
+               interrupts = <11 IRQ_TYPE_LEVEL_LOW>;
                wakeup-source;
 
                VDD-supply = <&reg_audio>;
index cf61681826b675ad984dc17f6c1dbcc2e1b6fc35..5b54d2d045c355808bf0f58afba9ec7539ff4472 100644 (file)
@@ -77,7 +77,7 @@ Example:
                reg = <0x1a>;
 
                interrupt-parent = <&gpio6>;
-               interrupts = <11 IRQ_TYPE_LEVEL_HIGH>;
+               interrupts = <11 IRQ_TYPE_LEVEL_LOW>;
 
                VDD-supply = <&reg_audio>;
                VDDMIC-supply = <&reg_audio>;
index 5bf13960f7f4a3c826c10b1e15a618df82d82403..e3c48b20b1a691b37d0b425251a257c682a38eca 100644 (file)
@@ -12,24 +12,30 @@ Required properties:
   - "fsl,imx53-ecspi" for SPI compatible with the one integrated on i.MX53 and later Soc
 - reg : Offset and length of the register set for the device
 - interrupts : Should contain CSPI/eCSPI interrupt
-- cs-gpios : Specifies the gpio pins to be used for chipselects.
 - clocks : Clock specifiers for both ipg and per clocks.
 - clock-names : Clock names should include both "ipg" and "per"
 See the clock consumer binding,
        Documentation/devicetree/bindings/clock/clock-bindings.txt
-- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
-               Documentation/devicetree/bindings/dma/dma.txt
-- dma-names: DMA request names should include "tx" and "rx" if present.
 
-Obsolete properties:
-- fsl,spi-num-chipselects : Contains the number of the chipselect
+Recommended properties:
+- cs-gpios : GPIOs to use as chip selects, see spi-bus.txt.  While the native chip
+select lines can be used, they appear to always generate a pulse between each
+word of a transfer.  Most use cases will require GPIO based chip selects to
+generate a valid transaction.
 
 Optional properties:
+- num-cs :  Number of total chip selects, see spi-bus.txt.
+- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
+Documentation/devicetree/bindings/dma/dma.txt.
+- dma-names: DMA request names, if present, should include "tx" and "rx".
 - fsl,spi-rdy-drctl: Integer, representing the value of DRCTL, the register
 controlling the SPI_READY handling. Note that to enable the DRCTL consideration,
 the SPI_READY mode-flag needs to be set too.
 Valid values are: 0 (disabled), 1 (edge-triggered burst) and 2 (level-triggered burst).
 
+Obsolete properties:
+- fsl,spi-num-chipselects : Contains the number of the chipselect
+
 Example:
 
 ecspi@70010000 {
index 3448e675b4623ce81b5e0bc1116c52a12c411801..ad41b3813f0a3a3bd5abb32532b42c9b820bbe27 100644 (file)
@@ -1,6 +1,4 @@
 
-<previous description obsolete, deleted>
-
 Virtual memory map with 4 level page tables:
 
 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
@@ -14,13 +12,16 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
 ... unused hole ...
 ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
 ... unused hole ...
+fffffe0000000000 - fffffe7fffffffff (=39 bits) LDT remap for PTI
+fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
 ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
 ... unused hole ...
 ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
 ... unused hole ...
 ffffffff80000000 - ffffffff9fffffff (=512 MB)  kernel text mapping, from phys 0
-ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space (variable)
-ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
+ffffffffa0000000 - [fixmap start]   (~1526 MB) module mapping space (variable)
+[fixmap start]   - ffffffffff5fffff kernel-internal fixmap range
+ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
 ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
 
 Virtual memory map with 5 level page tables:
@@ -29,26 +30,29 @@ Virtual memory map with 5 level page tables:
 hole caused by [56:63] sign extension
 ff00000000000000 - ff0fffffffffffff (=52 bits) guard hole, reserved for hypervisor
 ff10000000000000 - ff8fffffffffffff (=55 bits) direct mapping of all phys. memory
-ff90000000000000 - ff91ffffffffffff (=49 bits) hole
-ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space
+ff90000000000000 - ff9fffffffffffff (=52 bits) LDT remap for PTI
+ffa0000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space (12800 TB)
 ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
 ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
 ... unused hole ...
 ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
 ... unused hole ...
+fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
 ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
 ... unused hole ...
 ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
 ... unused hole ...
 ffffffff80000000 - ffffffff9fffffff (=512 MB)  kernel text mapping, from phys 0
-ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space
-ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
+ffffffffa0000000 - [fixmap start]   (~1526 MB) module mapping space
+[fixmap start]   - ffffffffff5fffff kernel-internal fixmap range
+ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
 ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
 
 Architecture defines a 64-bit virtual address. Implementations can support
 less. Currently supported are 48- and 57-bit virtual addresses. Bits 63
-through to the most-significant implemented bit are set to either all ones
-or all zero. This causes hole between user space and kernel addresses.
+through to the most-significant implemented bit are sign extended.
+This causes hole between user space and kernel addresses if you interpret them
+as unsigned.
 
 The direct mapping covers all memory in the system up to the highest
 memory address (this means in some cases it can also include PCI memory
@@ -58,9 +62,6 @@ vmalloc space is lazily synchronized into the different PML4/PML5 pages of
 the processes using the page fault handler, with init_top_pgt as
 reference.
 
-Current X86-64 implementations support up to 46 bits of address space (64 TB),
-which is our current limit. This expands into MBZ space in the page tables.
-
 We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual
 memory window (this size is arbitrary, it can be raised later if needed).
 The mappings are not part of any other kernel PGD and are only available
@@ -72,5 +73,3 @@ following fixmap section.
 Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
 physical memory, vmalloc/ioremap space and virtual memory map are randomized.
 Their order is preserved but their base will be offset early at boot time.
-
--Andi Kleen, Jul 2004
index a6e86e20761e143ca976d4f8170e60b603bb5ed9..b46c9cea5ae5b90227687436eb64130d05ae10ca 100644 (file)
@@ -2621,24 +2621,22 @@ F:      fs/bfs/
 F:     include/uapi/linux/bfs_fs.h
 
 BLACKFIN ARCHITECTURE
-M:     Steven Miao <realmz6@gmail.com>
 L:     adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
 T:     git git://git.code.sf.net/p/adi-linux/code
 W:     http://blackfin.uclinux.org
-S:     Supported
+S:     Orphan
 F:     arch/blackfin/
 
 BLACKFIN EMAC DRIVER
 L:     adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:     http://blackfin.uclinux.org
-S:     Supported
+S:     Orphan
 F:     drivers/net/ethernet/adi/
 
 BLACKFIN MEDIA DRIVER
-M:     Scott Jiang <scott.jiang.linux@gmail.com>
 L:     adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:     http://blackfin.uclinux.org/
-S:     Supported
+S:     Orphan
 F:     drivers/media/platform/blackfin/
 F:     drivers/media/i2c/adv7183*
 F:     drivers/media/i2c/vs6624*
@@ -2646,25 +2644,25 @@ F:      drivers/media/i2c/vs6624*
 BLACKFIN RTC DRIVER
 L:     adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:     http://blackfin.uclinux.org
-S:     Supported
+S:     Orphan
 F:     drivers/rtc/rtc-bfin.c
 
 BLACKFIN SDH DRIVER
 L:     adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:     http://blackfin.uclinux.org
-S:     Supported
+S:     Orphan
 F:     drivers/mmc/host/bfin_sdh.c
 
 BLACKFIN SERIAL DRIVER
 L:     adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:     http://blackfin.uclinux.org
-S:     Supported
+S:     Orphan
 F:     drivers/tty/serial/bfin_uart.c
 
 BLACKFIN WATCHDOG DRIVER
 L:     adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:     http://blackfin.uclinux.org
-S:     Supported
+S:     Orphan
 F:     drivers/watchdog/bfin_wdt.c
 
 BLINKM RGB LED DRIVER
@@ -13493,6 +13491,7 @@ M:      Mika Westerberg <mika.westerberg@linux.intel.com>
 M:     Yehezkel Bernat <yehezkel.bernat@intel.com>
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git
 S:     Maintained
+F:     Documentation/admin-guide/thunderbolt.rst
 F:     drivers/thunderbolt/
 F:     include/linux/thunderbolt.h
 
index 7e02f951b284187d5354c2b7bd39b0ef1bf5d903..92b74bcd3c2a28870c4368bfa26c60e9e510f3e5 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 4
 PATCHLEVEL = 15
 SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION = -rc5
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
@@ -789,6 +789,9 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign)
 # disable invalid "can't wrap" optimizations for signed / pointers
 KBUILD_CFLAGS  += $(call cc-option,-fno-strict-overflow)
 
+# Make sure -fstack-check isn't enabled (like gentoo apparently did)
+KBUILD_CFLAGS  += $(call cc-option,-fno-stack-check,)
+
 # conserve stack if available
 KBUILD_CFLAGS   += $(call cc-option,-fconserve-stack)
 
index 1712f132b80d2402d94d72ea974a0c3326fa2f52..b83fdc06286a64ece150fb7e419bc587e47c3e34 100644 (file)
                .pushsection .text.fixup,"ax"
                .align  4
 9001:          mov     r4, #-EFAULT
+#ifdef CONFIG_CPU_SW_DOMAIN_PAN
+               ldr     r5, [sp, #9*4]          @ *err_ptr
+#else
                ldr     r5, [sp, #8*4]          @ *err_ptr
+#endif
                str     r4, [r5]
                ldmia   sp, {r1, r2}            @ retrieve dst, len
                add     r2, r2, r1
index 321c9c05dd9e09fc0c745a4543a286b7628f00a4..f4363d40e2cd7fd62d40d826d5296c95f15cde9f 100644 (file)
@@ -74,6 +74,9 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
 {
        u64 reg;
 
+       /* Clear pmscr in case of early return */
+       *pmscr_el1 = 0;
+
        /* SPE present on this CPU? */
        if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
                                                  ID_AA64DFR0_PMSVER_SHIFT))
index 9345b44b86f036572e33721eb80e9bbbe4493aa4..f57118e1f6b4265257799ae2cf8ea356077e20b9 100644 (file)
@@ -123,8 +123,8 @@ int puts(const char *s)
        while ((nuline = strchr(s, '\n')) != NULL) {
                if (nuline != s)
                        pdc_iodc_print(s, nuline - s);
-                       pdc_iodc_print("\r\n", 2);
-                       s = nuline + 1;
+               pdc_iodc_print("\r\n", 2);
+               s = nuline + 1;
        }
        if (*s != '\0')
                pdc_iodc_print(s, strlen(s));
index c980a02a52bc0dda0a23b205f59d1d86438553f2..598c8d60fa5e602cc9303e1986ada9680d64feb3 100644 (file)
@@ -35,7 +35,12 @@ struct thread_info {
 
 /* thread information allocation */
 
+#ifdef CONFIG_IRQSTACKS
+#define THREAD_SIZE_ORDER      2 /* PA-RISC requires at least 16k stack */
+#else
 #define THREAD_SIZE_ORDER      3 /* PA-RISC requires at least 32k stack */
+#endif
+
 /* Be sure to hunt all references to this down when you change the size of
  * the kernel stack */
 #define THREAD_SIZE             (PAGE_SIZE << THREAD_SIZE_ORDER)
index a4fd296c958e8e14f13a913aca50510b11eb49b7..f3cecf5117cf8ab14724f0ea3535220c3224d569 100644 (file)
@@ -878,9 +878,6 @@ ENTRY_CFI(syscall_exit_rfi)
        STREG   %r19,PT_SR7(%r16)
 
 intr_return:
-       /* NOTE: Need to enable interrupts incase we schedule. */
-       ssm     PSW_SM_I, %r0
-
        /* check for reschedule */
        mfctl   %cr30,%r1
        LDREG   TI_FLAGS(%r1),%r19      /* sched.h: TIF_NEED_RESCHED */
@@ -907,6 +904,11 @@ intr_check_sig:
        LDREG   PT_IASQ1(%r16), %r20
        cmpib,COND(=),n 0,%r20,intr_restore /* backward */
 
+       /* NOTE: We need to enable interrupts if we have to deliver
+        * signals. We used to do this earlier but it caused kernel
+        * stack overflows. */
+       ssm     PSW_SM_I, %r0
+
        copy    %r0, %r25                       /* long in_syscall = 0 */
 #ifdef CONFIG_64BIT
        ldo     -16(%r30),%r29                  /* Reference param save area */
@@ -958,6 +960,10 @@ intr_do_resched:
        cmpib,COND(=)   0, %r20, intr_do_preempt
        nop
 
+       /* NOTE: We need to enable interrupts if we schedule.  We used
+        * to do this earlier but it caused kernel stack overflows. */
+       ssm     PSW_SM_I, %r0
+
 #ifdef CONFIG_64BIT
        ldo     -16(%r30),%r29          /* Reference param save area */
 #endif
index e3a8e5e4d5de75897adcea4134f87c7246f60646..8d072c44f300c16d45ba8f4ee0c2eee6435e4ddd 100644 (file)
@@ -305,6 +305,7 @@ ENDPROC_CFI(os_hpmc)
 
 
        __INITRODATA
+       .align 4
        .export os_hpmc_size
 os_hpmc_size:
        .word .os_hpmc_end-.os_hpmc
index 5a657986ebbf4bef7beff4e8c8d20f1343872347..143f90e2f9f3c631616d4af52f0fe3fa08f44af9 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/slab.h>
 #include <linux/kallsyms.h>
 #include <linux/sort.h>
-#include <linux/sched.h>
 
 #include <linux/uaccess.h>
 #include <asm/assembly.h>
index 7eab4bb8abe630b14c54c3b457285b4228607dc6..66e506520505d8a3245d49d492831df5e3bbb42a 100644 (file)
@@ -16,9 +16,7 @@
 #include <linux/preempt.h>
 #include <linux/init.h>
 
-#include <asm/processor.h>
 #include <asm/delay.h>
-
 #include <asm/special_insns.h>    /* for mfctl() */
 #include <asm/processor.h> /* for boot_cpu_data */
 
index 6177d43f0ce8afa9c1f6a1101e92ba161e47d97a..e2a2b8400490049143edee40316313a906ca6db7 100644 (file)
@@ -160,9 +160,10 @@ static inline void enter_lazy_tlb(struct mm_struct *mm,
 #endif
 }
 
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
-                                struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm,
+                               struct mm_struct *mm)
 {
+       return 0;
 }
 
 #ifndef CONFIG_PPC_BOOK3S_64
index 5acb5a176dbe5c8bffe6ddb7458b7d3ac2b7019f..72be0c32e902a35fa45e5ed02036df91999dda58 100644 (file)
@@ -1403,7 +1403,7 @@ void show_regs(struct pt_regs * regs)
 
        printk("NIP:  "REG" LR: "REG" CTR: "REG"\n",
               regs->nip, regs->link, regs->ctr);
-       printk("REGS: %p TRAP: %04lx   %s  (%s)\n",
+       printk("REGS: %px TRAP: %04lx   %s  (%s)\n",
               regs, regs->trap, print_tainted(), init_utsname()->release);
        printk("MSR:  "REG" ", regs->msr);
        print_msr_bits(regs->msr);
index bf457843e03217b9aa02815d7791f0fce72aea2b..0d750d274c4e21a3324eb3505bbd73c86a58cdc9 100644 (file)
@@ -725,7 +725,8 @@ u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu)
 
        /* Return the per-cpu state for state saving/migration */
        return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT |
-              (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT;
+              (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT |
+              (u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT;
 }
 
 int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
@@ -1558,7 +1559,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
 
        /*
         * Restore P and Q. If the interrupt was pending, we
-        * force both P and Q, which will trigger a resend.
+        * force Q and !P, which will trigger a resend.
         *
         * That means that a guest that had both an interrupt
         * pending (queued) and Q set will restore with only
@@ -1566,7 +1567,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
         * is perfectly fine as coalescing interrupts that haven't
         * been presented yet is always allowed.
         */
-       if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING)
+       if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING))
                state->old_p = true;
        if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING)
                state->old_q = true;
index 46d74e81aff1b4caad4769e7686fa0a800695cd4..d183b4801bdbded832b90d2aa1a18e713f70695b 100644 (file)
@@ -763,7 +763,8 @@ emit_clear:
                        func = (u8 *) __bpf_call_base + imm;
 
                        /* Save skb pointer if we need to re-cache skb data */
-                       if (bpf_helper_changes_pkt_data(func))
+                       if ((ctx->seen & SEEN_SKB) &&
+                           bpf_helper_changes_pkt_data(func))
                                PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
 
                        bpf_jit_emit_func_call(image, ctx, (u64)func);
@@ -772,7 +773,8 @@ emit_clear:
                        PPC_MR(b2p[BPF_REG_0], 3);
 
                        /* refresh skb cache */
-                       if (bpf_helper_changes_pkt_data(func)) {
+                       if ((ctx->seen & SEEN_SKB) &&
+                           bpf_helper_changes_pkt_data(func)) {
                                /* reload skb pointer to r3 */
                                PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
                                bpf_jit_emit_skb_loads(image, ctx);
index 1538129663658381b6b1a425dcbf582b1ed09531..fce545774d50afc6093c28ad2f4127c24ed5331c 100644 (file)
@@ -410,8 +410,12 @@ static __u64 power_pmu_bhrb_to(u64 addr)
        int ret;
        __u64 target;
 
-       if (is_kernel_addr(addr))
-               return branch_target((unsigned int *)addr);
+       if (is_kernel_addr(addr)) {
+               if (probe_kernel_read(&instr, (void *)addr, sizeof(instr)))
+                       return 0;
+
+               return branch_target(&instr);
+       }
 
        /* Userspace: need copy instruction here then translate it */
        pagefault_disable();
index 0ead3cd73caa2f8816e8c04f47cca691efba0560..be4e7f84f70a59db60e92a9bfe845678f71cc608 100644 (file)
@@ -309,6 +309,19 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu)
        if (!cpumask_test_and_clear_cpu(cpu, &nest_imc_cpumask))
                return 0;
 
+       /*
+        * Check whether nest_imc is registered. We could end up here if the
+        * cpuhotplug callback registration fails. i.e, callback invokes the
+        * offline path for all successfully registered nodes. At this stage,
+        * nest_imc pmu will not be registered and we should return here.
+        *
+        * We return with a zero since this is not an offline failure. And
+        * cpuhp_setup_state() returns the actual failure reason to the caller,
+        * which in turn will call the cleanup routine.
+        */
+       if (!nest_pmus)
+               return 0;
+
        /*
         * Now that this cpu is one of the designated,
         * find a next cpu a) which is online and b) in same chip.
@@ -1171,6 +1184,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
                if (nest_pmus == 1) {
                        cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE);
                        kfree(nest_imc_refc);
+                       kfree(per_nest_pmu_arr);
                }
 
                if (nest_pmus > 0)
@@ -1195,7 +1209,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
                kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
        kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
        kfree(pmu_ptr);
-       kfree(per_nest_pmu_arr);
        return;
 }
 
@@ -1309,6 +1322,8 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
                        ret = nest_pmu_cpumask_init();
                        if (ret) {
                                mutex_unlock(&nest_init_lock);
+                               kfree(nest_imc_refc);
+                               kfree(per_nest_pmu_arr);
                                goto err_free;
                        }
                }
index 44cbf4c12ea137562f02758aab4e2e604f54e96f..df95102e732cb466911b32632fd53e3f3369bf54 100644 (file)
@@ -354,6 +354,7 @@ static int fsl_of_msi_remove(struct platform_device *ofdev)
 }
 
 static struct lock_class_key fsl_msi_irq_class;
+static struct lock_class_key fsl_msi_irq_request_class;
 
 static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,
                               int offset, int irq_index)
@@ -373,7 +374,8 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,
                dev_err(&dev->dev, "No memory for MSI cascade data\n");
                return -ENOMEM;
        }
-       irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class);
+       irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class,
+                             &fsl_msi_irq_request_class);
        cascade_data->index = offset;
        cascade_data->msi_data = msi;
        cascade_data->virq = virt_msir;
index e81c16838b90f1bc9a5418bc1b4e5365e9cb0aef..9557d8b516df5a689dda995cd7fd501ddd6cf54c 100644 (file)
@@ -55,8 +55,7 @@ struct bpf_jit {
 #define SEEN_LITERAL   8       /* code uses literals */
 #define SEEN_FUNC      16      /* calls C functions */
 #define SEEN_TAIL_CALL 32      /* code uses tail calls */
-#define SEEN_SKB_CHANGE        64      /* code changes skb data */
-#define SEEN_REG_AX    128     /* code uses constant blinding */
+#define SEEN_REG_AX    64      /* code uses constant blinding */
 #define SEEN_STACK     (SEEN_FUNC | SEEN_MEM | SEEN_SKB)
 
 /*
@@ -448,12 +447,12 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
                        EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
                                      REG_15, 152);
        }
-       if (jit->seen & SEEN_SKB)
+       if (jit->seen & SEEN_SKB) {
                emit_load_skb_data_hlen(jit);
-       if (jit->seen & SEEN_SKB_CHANGE)
                /* stg %b1,ST_OFF_SKBP(%r0,%r15) */
                EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
                              STK_OFF_SKBP);
+       }
 }
 
 /*
@@ -983,8 +982,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
                EMIT2(0x0d00, REG_14, REG_W1);
                /* lgr %b0,%r2: load return value into %b0 */
                EMIT4(0xb9040000, BPF_REG_0, REG_2);
-               if (bpf_helper_changes_pkt_data((void *)func)) {
-                       jit->seen |= SEEN_SKB_CHANGE;
+               if ((jit->seen & SEEN_SKB) &&
+                   bpf_helper_changes_pkt_data((void *)func)) {
                        /* lg %b1,ST_OFF_SKBP(%r15) */
                        EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
                                      REG_15, STK_OFF_SKBP);
index e5547b22cd1832c3aea507b3dfc694da90568222..0ddbbb03182232fe199f5222248617c72a2f23b7 100644 (file)
@@ -44,8 +44,8 @@ EXPORT_SYMBOL(__arch_hweight32)
        .previous
 
 ENTRY(__arch_hweight64)
-       sethi   %hi(__sw_hweight16), %g1
-       jmpl    %g1 + %lo(__sw_hweight16), %g0
+       sethi   %hi(__sw_hweight64), %g1
+       jmpl    %g1 + %lo(__sw_hweight64), %g0
         nop
 ENDPROC(__arch_hweight64)
 EXPORT_SYMBOL(__arch_hweight64)
index be3136f142a9993e0c6c8cfa1d651b1685654a73..a8103a84b4ac4a2ec84c44c302862b3aed8b7e7f 100644 (file)
@@ -113,7 +113,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
        if (!printk_ratelimit())
                return;
 
-       printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x",
+       printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
               task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
               tsk->comm, task_pid_nr(tsk), address,
               (void *)regs->pc, (void *)regs->u_regs[UREG_I7],
index 815c03d7a765524424b92866b1567ea2a43695d4..41363f46797bf9f74dd922fadbd2a3f190e8c9bb 100644 (file)
@@ -154,7 +154,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
        if (!printk_ratelimit())
                return;
 
-       printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x",
+       printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
               task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
               tsk->comm, task_pid_nr(tsk), address,
               (void *)regs->tpc, (void *)regs->u_regs[UREG_I7],
index 5765e7e711f78248d2bff70f9c57ca48a4514355..ff5f9cb3039af1f91c8701915f08c051c21d0d81 100644 (file)
@@ -1245,14 +1245,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
                u8 *func = ((u8 *)__bpf_call_base) + imm;
 
                ctx->saw_call = true;
+               if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
+                       emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx);
 
                emit_call((u32 *)func, ctx);
                emit_nop(ctx);
 
                emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
 
-               if (bpf_helper_changes_pkt_data(func) && ctx->saw_ld_abs_ind)
-                       load_skb_regs(ctx, bpf2sparc[BPF_REG_6]);
+               if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
+                       load_skb_regs(ctx, L7);
                break;
        }
 
index b668e351fd6c2e4f7a4b75c8a67eada77449abc9..fca34b2177e28a055663055d01c4fb7d78420285 100644 (file)
@@ -15,9 +15,10 @@ extern void uml_setup_stubs(struct mm_struct *mm);
 /*
  * Needed since we do not use the asm-generic/mm_hooks.h:
  */
-static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
 {
        uml_setup_stubs(mm);
+       return 0;
 }
 extern void arch_exit_mmap(struct mm_struct *mm);
 static inline void arch_unmap(struct mm_struct *mm,
index 4e6fcb32620ffb2125f648622499e5bf7c950e72..428644175956231aad112a0ce221452913736635 100644 (file)
@@ -150,7 +150,7 @@ static void show_segv_info(struct uml_pt_regs *regs)
        if (!printk_ratelimit())
                return;
 
-       printk("%s%s[%d]: segfault at %lx ip %p sp %p error %x",
+       printk("%s%s[%d]: segfault at %lx ip %px sp %px error %x",
                task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
                tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi),
                (void *)UPT_IP(regs), (void *)UPT_SP(regs),
index 59b06b48f27d7a4e0d8b82fc147d3fdad7f75295..5c205a9cb5a6a4bb2c865255bc946d7ca4882db1 100644 (file)
@@ -81,9 +81,10 @@ do { \
        } \
 } while (0)
 
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
-                                struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm,
+                               struct mm_struct *mm)
 {
+       return 0;
 }
 
 static inline void arch_unmap(struct mm_struct *mm,
index 8eed3f94bfc774de5e3f344590f8889a999dea9c..d4fc98c50378c40bc901f6446d2bfff68151eb6a 100644 (file)
@@ -926,7 +926,8 @@ config MAXSMP
 config NR_CPUS
        int "Maximum number of CPUs" if SMP && !MAXSMP
        range 2 8 if SMP && X86_32 && !X86_BIGSMP
-       range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK
+       range 2 64 if SMP && X86_32 && X86_BIGSMP
+       range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
        range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
        default "1" if !SMP
        default "8192" if MAXSMP
index d5364ca2e3f9290d0ba36606b7e25369f872c144..b5e5e02f8cde7fa9123dc56981c3a3a98f45843c 100644 (file)
@@ -23,6 +23,9 @@
  */
 #undef CONFIG_AMD_MEM_ENCRYPT
 
+/* No PAGE_TABLE_ISOLATION support needed either: */
+#undef CONFIG_PAGE_TABLE_ISOLATION
+
 #include "misc.h"
 
 /* These actually do the work of building the kernel identity maps. */
index 3fd8bc560faece4cb6253e87a4c092965a73e7af..45a63e00a6af9a12b4739246d6844ba94f766e71 100644 (file)
@@ -1,6 +1,11 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #include <linux/jump_label.h>
 #include <asm/unwind_hints.h>
+#include <asm/cpufeatures.h>
+#include <asm/page_types.h>
+#include <asm/percpu.h>
+#include <asm/asm-offsets.h>
+#include <asm/processor-flags.h>
 
 /*
 
@@ -187,6 +192,146 @@ For 32-bit we have the following conventions - kernel is built with
 #endif
 .endm
 
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+
+/*
+ * PAGE_TABLE_ISOLATION PGDs are 8k.  Flip bit 12 to switch between the two
+ * halves:
+ */
+#define PTI_SWITCH_PGTABLES_MASK       (1<<PAGE_SHIFT)
+#define PTI_SWITCH_MASK                (PTI_SWITCH_PGTABLES_MASK|(1<<X86_CR3_PTI_SWITCH_BIT))
+
+.macro SET_NOFLUSH_BIT reg:req
+       bts     $X86_CR3_PCID_NOFLUSH_BIT, \reg
+.endm
+
+.macro ADJUST_KERNEL_CR3 reg:req
+       ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
+       /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
+       andq    $(~PTI_SWITCH_MASK), \reg
+.endm
+
+.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
+       ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+       mov     %cr3, \scratch_reg
+       ADJUST_KERNEL_CR3 \scratch_reg
+       mov     \scratch_reg, %cr3
+.Lend_\@:
+.endm
+
+#define THIS_CPU_user_pcid_flush_mask   \
+       PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask
+
+.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
+       ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+       mov     %cr3, \scratch_reg
+
+       ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
+
+       /*
+        * Test if the ASID needs a flush.
+        */
+       movq    \scratch_reg, \scratch_reg2
+       andq    $(0x7FF), \scratch_reg          /* mask ASID */
+       bt      \scratch_reg, THIS_CPU_user_pcid_flush_mask
+       jnc     .Lnoflush_\@
+
+       /* Flush needed, clear the bit */
+       btr     \scratch_reg, THIS_CPU_user_pcid_flush_mask
+       movq    \scratch_reg2, \scratch_reg
+       jmp     .Lwrcr3_\@
+
+.Lnoflush_\@:
+       movq    \scratch_reg2, \scratch_reg
+       SET_NOFLUSH_BIT \scratch_reg
+
+.Lwrcr3_\@:
+       /* Flip the PGD and ASID to the user version */
+       orq     $(PTI_SWITCH_MASK), \scratch_reg
+       mov     \scratch_reg, %cr3
+.Lend_\@:
+.endm
+
+.macro SWITCH_TO_USER_CR3_STACK        scratch_reg:req
+       pushq   %rax
+       SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax
+       popq    %rax
+.endm
+
+.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
+       ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI
+       movq    %cr3, \scratch_reg
+       movq    \scratch_reg, \save_reg
+       /*
+        * Is the "switch mask" all zero?  That means that both of
+        * these are zero:
+        *
+        *      1. The user/kernel PCID bit, and
+        *      2. The user/kernel "bit" that points CR3 to the
+        *         bottom half of the 8k PGD
+        *
+        * That indicates a kernel CR3 value, not a user CR3.
+        */
+       testq   $(PTI_SWITCH_MASK), \scratch_reg
+       jz      .Ldone_\@
+
+       ADJUST_KERNEL_CR3 \scratch_reg
+       movq    \scratch_reg, %cr3
+
+.Ldone_\@:
+.endm
+
+.macro RESTORE_CR3 scratch_reg:req save_reg:req
+       ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+
+       ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
+
+       /*
+        * KERNEL pages can always resume with NOFLUSH as we do
+        * explicit flushes.
+        */
+       bt      $X86_CR3_PTI_SWITCH_BIT, \save_reg
+       jnc     .Lnoflush_\@
+
+       /*
+        * Check if there's a pending flush for the user ASID we're
+        * about to set.
+        */
+       movq    \save_reg, \scratch_reg
+       andq    $(0x7FF), \scratch_reg
+       bt      \scratch_reg, THIS_CPU_user_pcid_flush_mask
+       jnc     .Lnoflush_\@
+
+       btr     \scratch_reg, THIS_CPU_user_pcid_flush_mask
+       jmp     .Lwrcr3_\@
+
+.Lnoflush_\@:
+       SET_NOFLUSH_BIT \save_reg
+
+.Lwrcr3_\@:
+       /*
+        * The CR3 write could be avoided when not changing its value,
+        * but would require a CR3 read *and* a scratch register.
+        */
+       movq    \save_reg, %cr3
+.Lend_\@:
+.endm
+
+#else /* CONFIG_PAGE_TABLE_ISOLATION=n: */
+
+.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
+.endm
+.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
+.endm
+.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
+.endm
+.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
+.endm
+.macro RESTORE_CR3 scratch_reg:req save_reg:req
+.endm
+
+#endif
+
 #endif /* CONFIG_X86_64 */
 
 /*
index 4838037f97f6edffda62b5b045c837fcc29402f0..ace8f321a5a1f2d1331cc4331a1922c9ed3d8bc1 100644 (file)
@@ -941,9 +941,10 @@ ENTRY(debug)
        movl    %esp, %eax                      # pt_regs pointer
 
        /* Are we currently on the SYSENTER stack? */
-       PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
-       subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
-       cmpl    $SIZEOF_SYSENTER_stack, %ecx
+       movl    PER_CPU_VAR(cpu_entry_area), %ecx
+       addl    $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
+       subl    %eax, %ecx      /* ecx = (end of entry_stack) - esp */
+       cmpl    $SIZEOF_entry_stack, %ecx
        jb      .Ldebug_from_sysenter_stack
 
        TRACE_IRQS_OFF
@@ -984,9 +985,10 @@ ENTRY(nmi)
        movl    %esp, %eax                      # pt_regs pointer
 
        /* Are we currently on the SYSENTER stack? */
-       PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
-       subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
-       cmpl    $SIZEOF_SYSENTER_stack, %ecx
+       movl    PER_CPU_VAR(cpu_entry_area), %ecx
+       addl    $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
+       subl    %eax, %ecx      /* ecx = (end of entry_stack) - esp */
+       cmpl    $SIZEOF_entry_stack, %ecx
        jb      .Lnmi_from_sysenter_stack
 
        /* Not on SYSENTER stack. */
index f81d50d7ceacdefa06d61482687937096c68421c..f048e384ff54e06530b657efc3b00cdf50f1ce5b 100644 (file)
@@ -23,7 +23,6 @@
 #include <asm/segment.h>
 #include <asm/cache.h>
 #include <asm/errno.h>
-#include "calling.h"
 #include <asm/asm-offsets.h>
 #include <asm/msr.h>
 #include <asm/unistd.h>
@@ -40,6 +39,8 @@
 #include <asm/frame.h>
 #include <linux/err.h>
 
+#include "calling.h"
+
 .code64
 .section .entry.text, "ax"
 
@@ -140,6 +141,67 @@ END(native_usergs_sysret64)
  * with them due to bugs in both AMD and Intel CPUs.
  */
 
+       .pushsection .entry_trampoline, "ax"
+
+/*
+ * The code in here gets remapped into cpu_entry_area's trampoline.  This means
+ * that the assembler and linker have the wrong idea as to where this code
+ * lives (and, in fact, it's mapped more than once, so it's not even at a
+ * fixed address).  So we can't reference any symbols outside the entry
+ * trampoline and expect it to work.
+ *
+ * Instead, we carefully abuse %rip-relative addressing.
+ * _entry_trampoline(%rip) refers to the start of the remapped) entry
+ * trampoline.  We can thus find cpu_entry_area with this macro:
+ */
+
+#define CPU_ENTRY_AREA \
+       _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
+
+/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
+#define RSP_SCRATCH    CPU_ENTRY_AREA_entry_stack + \
+                       SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA
+
+ENTRY(entry_SYSCALL_64_trampoline)
+       UNWIND_HINT_EMPTY
+       swapgs
+
+       /* Stash the user RSP. */
+       movq    %rsp, RSP_SCRATCH
+
+       /* Note: using %rsp as a scratch reg. */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
+
+       /* Load the top of the task stack into RSP */
+       movq    CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
+
+       /* Start building the simulated IRET frame. */
+       pushq   $__USER_DS                      /* pt_regs->ss */
+       pushq   RSP_SCRATCH                     /* pt_regs->sp */
+       pushq   %r11                            /* pt_regs->flags */
+       pushq   $__USER_CS                      /* pt_regs->cs */
+       pushq   %rcx                            /* pt_regs->ip */
+
+       /*
+        * x86 lacks a near absolute jump, and we can't jump to the real
+        * entry text with a relative jump.  We could push the target
+        * address and then use retq, but this destroys the pipeline on
+        * many CPUs (wasting over 20 cycles on Sandy Bridge).  Instead,
+        * spill RDI and restore it in a second-stage trampoline.
+        */
+       pushq   %rdi
+       movq    $entry_SYSCALL_64_stage2, %rdi
+       jmp     *%rdi
+END(entry_SYSCALL_64_trampoline)
+
+       .popsection
+
+ENTRY(entry_SYSCALL_64_stage2)
+       UNWIND_HINT_EMPTY
+       popq    %rdi
+       jmp     entry_SYSCALL_64_after_hwframe
+END(entry_SYSCALL_64_stage2)
+
 ENTRY(entry_SYSCALL_64)
        UNWIND_HINT_EMPTY
        /*
@@ -149,6 +211,10 @@ ENTRY(entry_SYSCALL_64)
         */
 
        swapgs
+       /*
+        * This path is not taken when PAGE_TABLE_ISOLATION is disabled so it
+        * is not required to switch CR3.
+        */
        movq    %rsp, PER_CPU_VAR(rsp_scratch)
        movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
@@ -330,8 +396,25 @@ syscall_return_via_sysret:
        popq    %rsi    /* skip rcx */
        popq    %rdx
        popq    %rsi
+
+       /*
+        * Now all regs are restored except RSP and RDI.
+        * Save old stack pointer and switch to trampoline stack.
+        */
+       movq    %rsp, %rdi
+       movq    PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+
+       pushq   RSP-RDI(%rdi)   /* RSP */
+       pushq   (%rdi)          /* RDI */
+
+       /*
+        * We are on the trampoline stack.  All regs except RDI are live.
+        * We can do future final exit work right here.
+        */
+       SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
+
        popq    %rdi
-       movq    RSP-ORIG_RAX(%rsp), %rsp
+       popq    %rsp
        USERGS_SYSRET64
 END(entry_SYSCALL_64)
 
@@ -466,12 +549,13 @@ END(irq_entries_start)
 
 .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
 #ifdef CONFIG_DEBUG_ENTRY
-       pushfq
-       testl $X86_EFLAGS_IF, (%rsp)
+       pushq %rax
+       SAVE_FLAGS(CLBR_RAX)
+       testl $X86_EFLAGS_IF, %eax
        jz .Lokay_\@
        ud2
 .Lokay_\@:
-       addq $8, %rsp
+       popq %rax
 #endif
 .endm
 
@@ -563,6 +647,13 @@ END(irq_entries_start)
 /* 0(%rsp): ~(interrupt number) */
        .macro interrupt func
        cld
+
+       testb   $3, CS-ORIG_RAX(%rsp)
+       jz      1f
+       SWAPGS
+       call    switch_to_thread_stack
+1:
+
        ALLOC_PT_GPREGS_ON_STACK
        SAVE_C_REGS
        SAVE_EXTRA_REGS
@@ -572,12 +663,8 @@ END(irq_entries_start)
        jz      1f
 
        /*
-        * IRQ from user mode.  Switch to kernel gsbase and inform context
-        * tracking that we're in kernel mode.
-        */
-       SWAPGS
-
-       /*
+        * IRQ from user mode.
+        *
         * We need to tell lockdep that IRQs are off.  We can't do this until
         * we fix gsbase, and we should do it before enter_from_user_mode
         * (which can take locks).  Since TRACE_IRQS_OFF idempotent,
@@ -630,10 +717,43 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
        ud2
 1:
 #endif
-       SWAPGS
        POP_EXTRA_REGS
-       POP_C_REGS
-       addq    $8, %rsp        /* skip regs->orig_ax */
+       popq    %r11
+       popq    %r10
+       popq    %r9
+       popq    %r8
+       popq    %rax
+       popq    %rcx
+       popq    %rdx
+       popq    %rsi
+
+       /*
+        * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
+        * Save old stack pointer and switch to trampoline stack.
+        */
+       movq    %rsp, %rdi
+       movq    PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+
+       /* Copy the IRET frame to the trampoline stack. */
+       pushq   6*8(%rdi)       /* SS */
+       pushq   5*8(%rdi)       /* RSP */
+       pushq   4*8(%rdi)       /* EFLAGS */
+       pushq   3*8(%rdi)       /* CS */
+       pushq   2*8(%rdi)       /* RIP */
+
+       /* Push user RDI on the trampoline stack. */
+       pushq   (%rdi)
+
+       /*
+        * We are on the trampoline stack.  All regs except RDI are live.
+        * We can do future final exit work right here.
+        */
+
+       SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
+
+       /* Restore RDI. */
+       popq    %rdi
+       SWAPGS
        INTERRUPT_RETURN
 
 
@@ -713,7 +833,9 @@ native_irq_return_ldt:
         */
 
        pushq   %rdi                            /* Stash user RDI */
-       SWAPGS
+       SWAPGS                                  /* to kernel GS */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi   /* to kernel CR3 */
+
        movq    PER_CPU_VAR(espfix_waddr), %rdi
        movq    %rax, (0*8)(%rdi)               /* user RAX */
        movq    (1*8)(%rsp), %rax               /* user RIP */
@@ -729,7 +851,6 @@ native_irq_return_ldt:
        /* Now RAX == RSP. */
 
        andl    $0xffff0000, %eax               /* RAX = (RSP & 0xffff0000) */
-       popq    %rdi                            /* Restore user RDI */
 
        /*
         * espfix_stack[31:16] == 0.  The page tables are set up such that
@@ -740,7 +861,11 @@ native_irq_return_ldt:
         * still points to an RO alias of the ESPFIX stack.
         */
        orq     PER_CPU_VAR(espfix_stack), %rax
-       SWAPGS
+
+       SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
+       SWAPGS                                  /* to user GS */
+       popq    %rdi                            /* Restore user RDI */
+
        movq    %rax, %rsp
        UNWIND_HINT_IRET_REGS offset=8
 
@@ -829,7 +954,35 @@ apicinterrupt IRQ_WORK_VECTOR                      irq_work_interrupt              smp_irq_work_interrupt
 /*
  * Exception entry points.
  */
-#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
+#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
+
+/*
+ * Switch to the thread stack.  This is called with the IRET frame and
+ * orig_ax on the stack.  (That is, RDI..R12 are not on the stack and
+ * space has not been allocated for them.)
+ */
+ENTRY(switch_to_thread_stack)
+       UNWIND_HINT_FUNC
+
+       pushq   %rdi
+       /* Need to switch before accessing the thread stack. */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+       movq    %rsp, %rdi
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+       UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
+
+       pushq   7*8(%rdi)               /* regs->ss */
+       pushq   6*8(%rdi)               /* regs->rsp */
+       pushq   5*8(%rdi)               /* regs->eflags */
+       pushq   4*8(%rdi)               /* regs->cs */
+       pushq   3*8(%rdi)               /* regs->ip */
+       pushq   2*8(%rdi)               /* regs->orig_ax */
+       pushq   8(%rdi)                 /* return address */
+       UNWIND_HINT_FUNC
+
+       movq    (%rdi), %rdi
+       ret
+END(switch_to_thread_stack)
 
 .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
 ENTRY(\sym)
@@ -848,11 +1001,12 @@ ENTRY(\sym)
 
        ALLOC_PT_GPREGS_ON_STACK
 
-       .if \paranoid
-       .if \paranoid == 1
+       .if \paranoid < 2
        testb   $3, CS(%rsp)                    /* If coming from userspace, switch stacks */
-       jnz     1f
+       jnz     .Lfrom_usermode_switch_stack_\@
        .endif
+
+       .if \paranoid
        call    paranoid_entry
        .else
        call    error_entry
@@ -894,20 +1048,15 @@ ENTRY(\sym)
        jmp     error_exit
        .endif
 
-       .if \paranoid == 1
+       .if \paranoid < 2
        /*
-        * Paranoid entry from userspace.  Switch stacks and treat it
+        * Entry from userspace.  Switch stacks and treat it
         * as a normal entry.  This means that paranoid handlers
         * run in real process context if user_mode(regs).
         */
-1:
+.Lfrom_usermode_switch_stack_\@:
        call    error_entry
 
-
-       movq    %rsp, %rdi                      /* pt_regs pointer */
-       call    sync_regs
-       movq    %rax, %rsp                      /* switch stack */
-
        movq    %rsp, %rdi                      /* pt_regs pointer */
 
        .if \has_error_code
@@ -1119,7 +1268,11 @@ ENTRY(paranoid_entry)
        js      1f                              /* negative -> in kernel */
        SWAPGS
        xorl    %ebx, %ebx
-1:     ret
+
+1:
+       SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
+
+       ret
 END(paranoid_entry)
 
 /*
@@ -1141,6 +1294,7 @@ ENTRY(paranoid_exit)
        testl   %ebx, %ebx                      /* swapgs needed? */
        jnz     .Lparanoid_exit_no_swapgs
        TRACE_IRQS_IRETQ
+       RESTORE_CR3     scratch_reg=%rbx save_reg=%r14
        SWAPGS_UNSAFE_STACK
        jmp     .Lparanoid_exit_restore
 .Lparanoid_exit_no_swapgs:
@@ -1168,8 +1322,18 @@ ENTRY(error_entry)
         * from user mode due to an IRET fault.
         */
        SWAPGS
+       /* We have user CR3.  Change to kernel CR3. */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
 
 .Lerror_entry_from_usermode_after_swapgs:
+       /* Put us onto the real thread stack. */
+       popq    %r12                            /* save return addr in %12 */
+       movq    %rsp, %rdi                      /* arg0 = pt_regs pointer */
+       call    sync_regs
+       movq    %rax, %rsp                      /* switch stack */
+       ENCODE_FRAME_POINTER
+       pushq   %r12
+
        /*
         * We need to tell lockdep that IRQs are off.  We can't do this until
         * we fix gsbase, and we should do it before enter_from_user_mode
@@ -1206,6 +1370,7 @@ ENTRY(error_entry)
         * .Lgs_change's error handler with kernel gsbase.
         */
        SWAPGS
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
        jmp .Lerror_entry_done
 
 .Lbstep_iret:
@@ -1215,10 +1380,11 @@ ENTRY(error_entry)
 
 .Lerror_bad_iret:
        /*
-        * We came from an IRET to user mode, so we have user gsbase.
-        * Switch to kernel gsbase:
+        * We came from an IRET to user mode, so we have user
+        * gsbase and CR3.  Switch to kernel gsbase and CR3:
         */
        SWAPGS
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
 
        /*
         * Pretend that the exception came from user mode: set up pt_regs
@@ -1250,6 +1416,10 @@ END(error_exit)
 /*
  * Runs on exception stack.  Xen PV does not go through this path at all,
  * so we can use real assembly here.
+ *
+ * Registers:
+ *     %r14: Used to save/restore the CR3 of the interrupted context
+ *           when PAGE_TABLE_ISOLATION is in use.  Do not clobber.
  */
 ENTRY(nmi)
        UNWIND_HINT_IRET_REGS
@@ -1313,6 +1483,7 @@ ENTRY(nmi)
 
        swapgs
        cld
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
        movq    %rsp, %rdx
        movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
        UNWIND_HINT_IRET_REGS base=%rdx offset=8
@@ -1565,6 +1736,8 @@ end_repeat_nmi:
        movq    $-1, %rsi
        call    do_nmi
 
+       RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
+
        testl   %ebx, %ebx                      /* swapgs needed? */
        jnz     nmi_restore
 nmi_swapgs:
index 568e130d932cd2a7d44393e5fc52408cffe64f34..40f17009ec20cd5e4eff6147aa0468232d47e096 100644 (file)
  */
 ENTRY(entry_SYSENTER_compat)
        /* Interrupts are off on entry. */
-       SWAPGS_UNSAFE_STACK
+       SWAPGS
+
+       /* We are about to clobber %rsp anyway, clobbering here is OK */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
+
        movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
        /*
@@ -215,6 +219,12 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
        pushq   $0                      /* pt_regs->r14 = 0 */
        pushq   $0                      /* pt_regs->r15 = 0 */
 
+       /*
+        * We just saved %rdi so it is safe to clobber.  It is not
+        * preserved during the C calls inside TRACE_IRQS_OFF anyway.
+        */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+
        /*
         * User mode is traced as though IRQs are on, and SYSENTER
         * turned them off.
@@ -256,10 +266,22 @@ sysret32_from_system_call:
         * when the system call started, which is already known to user
         * code.  We zero R8-R10 to avoid info leaks.
          */
+       movq    RSP-ORIG_RAX(%rsp), %rsp
+
+       /*
+        * The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored
+        * on the process stack which is not mapped to userspace and
+        * not readable after we SWITCH_TO_USER_CR3.  Delay the CR3
+        * switch until after after the last reference to the process
+        * stack.
+        *
+        * %r8/%r9 are zeroed before the sysret, thus safe to clobber.
+        */
+       SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
+
        xorq    %r8, %r8
        xorq    %r9, %r9
        xorq    %r10, %r10
-       movq    RSP-ORIG_RAX(%rsp), %rsp
        swapgs
        sysretl
 END(entry_SYSCALL_compat)
@@ -306,8 +328,11 @@ ENTRY(entry_INT80_compat)
         */
        movl    %eax, %eax
 
-       /* Construct struct pt_regs on stack (iret frame is already on stack) */
        pushq   %rax                    /* pt_regs->orig_ax */
+
+       /* switch to thread stack expects orig_ax to be pushed */
+       call    switch_to_thread_stack
+
        pushq   %rdi                    /* pt_regs->di */
        pushq   %rsi                    /* pt_regs->si */
        pushq   %rdx                    /* pt_regs->dx */
index f279ba2643dc8933b9659242082e7ef2ea2d9dd6..577fa8adb785baf5ea1c993a2bbc88adf43fbbcc 100644 (file)
@@ -37,6 +37,7 @@
 #include <asm/unistd.h>
 #include <asm/fixmap.h>
 #include <asm/traps.h>
+#include <asm/paravirt.h>
 
 #define CREATE_TRACE_POINTS
 #include "vsyscall_trace.h"
@@ -138,6 +139,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 
        WARN_ON_ONCE(address != regs->ip);
 
+       /* This should be unreachable in NATIVE mode. */
+       if (WARN_ON(vsyscall_mode == NATIVE))
+               return false;
+
        if (vsyscall_mode == NONE) {
                warn_bad_vsyscall(KERN_INFO, regs,
                                  "vsyscall attempted with vsyscall=none");
@@ -329,16 +334,47 @@ int in_gate_area_no_mm(unsigned long addr)
        return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
 }
 
+/*
+ * The VSYSCALL page is the only user-accessible page in the kernel address
+ * range.  Normally, the kernel page tables can have _PAGE_USER clear, but
+ * the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls
+ * are enabled.
+ *
+ * Some day we may create a "minimal" vsyscall mode in which we emulate
+ * vsyscalls but leave the page not present.  If so, we skip calling
+ * this.
+ */
+void __init set_vsyscall_pgtable_user_bits(pgd_t *root)
+{
+       pgd_t *pgd;
+       p4d_t *p4d;
+       pud_t *pud;
+       pmd_t *pmd;
+
+       pgd = pgd_offset_pgd(root, VSYSCALL_ADDR);
+       set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
+       p4d = p4d_offset(pgd, VSYSCALL_ADDR);
+#if CONFIG_PGTABLE_LEVELS >= 5
+       p4d->p4d |= _PAGE_USER;
+#endif
+       pud = pud_offset(p4d, VSYSCALL_ADDR);
+       set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER));
+       pmd = pmd_offset(pud, VSYSCALL_ADDR);
+       set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER));
+}
+
 void __init map_vsyscall(void)
 {
        extern char __vsyscall_page;
        unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
 
-       if (vsyscall_mode != NONE)
+       if (vsyscall_mode != NONE) {
                __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
                             vsyscall_mode == NATIVE
                             ? PAGE_KERNEL_VSYSCALL
                             : PAGE_KERNEL_VVAR);
+               set_vsyscall_pgtable_user_bits(swapper_pg_dir);
+       }
 
        BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
                     (unsigned long)VSYSCALL_ADDR);
index 09c26a4f139c125e000675689ebc983acd8ab91a..731153a4681e73f761dea8c0c15ce6757b89860e 100644 (file)
@@ -3847,6 +3847,8 @@ static struct attribute *intel_pmu_attrs[] = {
 
 __init int intel_pmu_init(void)
 {
+       struct attribute **extra_attr = NULL;
+       struct attribute **to_free = NULL;
        union cpuid10_edx edx;
        union cpuid10_eax eax;
        union cpuid10_ebx ebx;
@@ -3854,7 +3856,6 @@ __init int intel_pmu_init(void)
        unsigned int unused;
        struct extra_reg *er;
        int version, i;
-       struct attribute **extra_attr = NULL;
        char *name;
 
        if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
@@ -4294,6 +4295,7 @@ __init int intel_pmu_init(void)
                extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
                        hsw_format_attr : nhm_format_attr;
                extra_attr = merge_attr(extra_attr, skl_format_attr);
+               to_free = extra_attr;
                x86_pmu.cpu_events = get_hsw_events_attrs();
                intel_pmu_pebs_data_source_skl(
                        boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
@@ -4401,6 +4403,7 @@ __init int intel_pmu_init(void)
                pr_cont("full-width counters, ");
        }
 
+       kfree(to_free);
        return 0;
 }
 
index 3674a4b6f8bd0c5f12223b8f5c16067a933450df..8f0aace08b87975489a2401859e51ef20d82838a 100644 (file)
@@ -3,16 +3,18 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 
+#include <asm/cpu_entry_area.h>
 #include <asm/perf_event.h>
 #include <asm/insn.h>
 
 #include "../perf_event.h"
 
+/* Waste a full page so it can be mapped into the cpu_entry_area */
+DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
+
 /* The size of a BTS record in bytes: */
 #define BTS_RECORD_SIZE                24
 
-#define BTS_BUFFER_SIZE                (PAGE_SIZE << 4)
-#define PEBS_BUFFER_SIZE       (PAGE_SIZE << 4)
 #define PEBS_FIXUP_SIZE                PAGE_SIZE
 
 /*
@@ -279,17 +281,52 @@ void fini_debug_store_on_cpu(int cpu)
 
 static DEFINE_PER_CPU(void *, insn_buffer);
 
-static int alloc_pebs_buffer(int cpu)
+static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
 {
-       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+       phys_addr_t pa;
+       size_t msz = 0;
+
+       pa = virt_to_phys(addr);
+       for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
+               cea_set_pte(cea, pa, prot);
+}
+
+static void ds_clear_cea(void *cea, size_t size)
+{
+       size_t msz = 0;
+
+       for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
+               cea_set_pte(cea, 0, PAGE_NONE);
+}
+
+static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
+{
+       unsigned int order = get_order(size);
        int node = cpu_to_node(cpu);
-       int max;
-       void *buffer, *ibuffer;
+       struct page *page;
+
+       page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
+       return page ? page_address(page) : NULL;
+}
+
+static void dsfree_pages(const void *buffer, size_t size)
+{
+       if (buffer)
+               free_pages((unsigned long)buffer, get_order(size));
+}
+
+static int alloc_pebs_buffer(int cpu)
+{
+       struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
+       struct debug_store *ds = hwev->ds;
+       size_t bsiz = x86_pmu.pebs_buffer_size;
+       int max, node = cpu_to_node(cpu);
+       void *buffer, *ibuffer, *cea;
 
        if (!x86_pmu.pebs)
                return 0;
 
-       buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
+       buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
        if (unlikely(!buffer))
                return -ENOMEM;
 
@@ -300,25 +337,27 @@ static int alloc_pebs_buffer(int cpu)
        if (x86_pmu.intel_cap.pebs_format < 2) {
                ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
                if (!ibuffer) {
-                       kfree(buffer);
+                       dsfree_pages(buffer, bsiz);
                        return -ENOMEM;
                }
                per_cpu(insn_buffer, cpu) = ibuffer;
        }
-
-       max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size;
-
-       ds->pebs_buffer_base = (u64)(unsigned long)buffer;
+       hwev->ds_pebs_vaddr = buffer;
+       /* Update the cpu entry area mapping */
+       cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
+       ds->pebs_buffer_base = (unsigned long) cea;
+       ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
        ds->pebs_index = ds->pebs_buffer_base;
-       ds->pebs_absolute_maximum = ds->pebs_buffer_base +
-               max * x86_pmu.pebs_record_size;
-
+       max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
+       ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
        return 0;
 }
 
 static void release_pebs_buffer(int cpu)
 {
-       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+       struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
+       struct debug_store *ds = hwev->ds;
+       void *cea;
 
        if (!ds || !x86_pmu.pebs)
                return;
@@ -326,73 +365,70 @@ static void release_pebs_buffer(int cpu)
        kfree(per_cpu(insn_buffer, cpu));
        per_cpu(insn_buffer, cpu) = NULL;
 
-       kfree((void *)(unsigned long)ds->pebs_buffer_base);
+       /* Clear the fixmap */
+       cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
+       ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
        ds->pebs_buffer_base = 0;
+       dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
+       hwev->ds_pebs_vaddr = NULL;
 }
 
 static int alloc_bts_buffer(int cpu)
 {
-       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-       int node = cpu_to_node(cpu);
-       int max, thresh;
-       void *buffer;
+       struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
+       struct debug_store *ds = hwev->ds;
+       void *buffer, *cea;
+       int max;
 
        if (!x86_pmu.bts)
                return 0;
 
-       buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
+       buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
        if (unlikely(!buffer)) {
                WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
                return -ENOMEM;
        }
-
-       max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
-       thresh = max / 16;
-
-       ds->bts_buffer_base = (u64)(unsigned long)buffer;
+       hwev->ds_bts_vaddr = buffer;
+       /* Update the fixmap */
+       cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
+       ds->bts_buffer_base = (unsigned long) cea;
+       ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
        ds->bts_index = ds->bts_buffer_base;
-       ds->bts_absolute_maximum = ds->bts_buffer_base +
-               max * BTS_RECORD_SIZE;
-       ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
-               thresh * BTS_RECORD_SIZE;
-
+       max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE);
+       ds->bts_absolute_maximum = ds->bts_buffer_base + max;
+       ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16);
        return 0;
 }
 
 static void release_bts_buffer(int cpu)
 {
-       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+       struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
+       struct debug_store *ds = hwev->ds;
+       void *cea;
 
        if (!ds || !x86_pmu.bts)
                return;
 
-       kfree((void *)(unsigned long)ds->bts_buffer_base);
+       /* Clear the fixmap */
+       cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
+       ds_clear_cea(cea, BTS_BUFFER_SIZE);
        ds->bts_buffer_base = 0;
+       dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
+       hwev->ds_bts_vaddr = NULL;
 }
 
 static int alloc_ds_buffer(int cpu)
 {
-       int node = cpu_to_node(cpu);
-       struct debug_store *ds;
-
-       ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
-       if (unlikely(!ds))
-               return -ENOMEM;
+       struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
 
+       memset(ds, 0, sizeof(*ds));
        per_cpu(cpu_hw_events, cpu).ds = ds;
-
        return 0;
 }
 
 static void release_ds_buffer(int cpu)
 {
-       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-
-       if (!ds)
-               return;
-
        per_cpu(cpu_hw_events, cpu).ds = NULL;
-       kfree(ds);
 }
 
 void release_ds_buffers(void)
index f7aaadf9331fb75587e74a42b041d78b2a014fc0..8e4ea143ed96403d275bf6727801961db9a053d7 100644 (file)
@@ -14,6 +14,8 @@
 
 #include <linux/perf_event.h>
 
+#include <asm/intel_ds.h>
+
 /* To enable MSR tracing please use the generic trace points. */
 
 /*
@@ -77,8 +79,6 @@ struct amd_nb {
        struct event_constraint event_constraints[X86_PMC_IDX_MAX];
 };
 
-/* The maximal number of PEBS events: */
-#define MAX_PEBS_EVENTS                8
 #define PEBS_COUNTER_MASK      ((1ULL << MAX_PEBS_EVENTS) - 1)
 
 /*
@@ -95,23 +95,6 @@ struct amd_nb {
        PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \
        PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)
 
-/*
- * A debug store configuration.
- *
- * We only support architectures that use 64bit fields.
- */
-struct debug_store {
-       u64     bts_buffer_base;
-       u64     bts_index;
-       u64     bts_absolute_maximum;
-       u64     bts_interrupt_threshold;
-       u64     pebs_buffer_base;
-       u64     pebs_index;
-       u64     pebs_absolute_maximum;
-       u64     pebs_interrupt_threshold;
-       u64     pebs_event_reset[MAX_PEBS_EVENTS];
-};
-
 #define PEBS_REGS \
        (PERF_REG_X86_AX | \
         PERF_REG_X86_BX | \
@@ -216,6 +199,8 @@ struct cpu_hw_events {
         * Intel DebugStore bits
         */
        struct debug_store      *ds;
+       void                    *ds_pebs_vaddr;
+       void                    *ds_bts_vaddr;
        u64                     pebs_enabled;
        int                     n_pebs;
        int                     n_large_pebs;
index 219faaec51dfa192f69d8893c8844219c0c89029..386a6900e206f6578e3b38ee7f085d36ac50a928 100644 (file)
 #endif
 
 #ifndef __ASSEMBLY__
+#ifndef __BPF__
 /*
  * This output constraint should be used for any inline asm which has a "call"
  * instruction.  Otherwise the asm may be inserted before the frame pointer
 register unsigned long current_stack_pointer asm(_ASM_SP);
 #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
 #endif
+#endif
 
 #endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
new file mode 100644 (file)
index 0000000..4a7884b
--- /dev/null
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef _ASM_X86_CPU_ENTRY_AREA_H
+#define _ASM_X86_CPU_ENTRY_AREA_H
+
+#include <linux/percpu-defs.h>
+#include <asm/processor.h>
+#include <asm/intel_ds.h>
+
+/*
+ * cpu_entry_area is a percpu region that contains things needed by the CPU
+ * and early entry/exit code.  Real types aren't used for all fields here
+ * to avoid circular header dependencies.
+ *
+ * Every field is a virtual alias of some other allocated backing store.
+ * There is no direct allocation of a struct cpu_entry_area.
+ */
+struct cpu_entry_area {
+       char gdt[PAGE_SIZE];
+
+       /*
+        * The GDT is just below entry_stack and thus serves (on x86_64) as
+        * a a read-only guard page.
+        */
+       struct entry_stack_page entry_stack_page;
+
+       /*
+        * On x86_64, the TSS is mapped RO.  On x86_32, it's mapped RW because
+        * we need task switches to work, and task switches write to the TSS.
+        */
+       struct tss_struct tss;
+
+       char entry_trampoline[PAGE_SIZE];
+
+#ifdef CONFIG_X86_64
+       /*
+        * Exception stacks used for IST entries.
+        *
+        * In the future, this should have a separate slot for each stack
+        * with guard pages between them.
+        */
+       char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
+#endif
+#ifdef CONFIG_CPU_SUP_INTEL
+       /*
+        * Per CPU debug store for Intel performance monitoring. Wastes a
+        * full page at the moment.
+        */
+       struct debug_store cpu_debug_store;
+       /*
+        * The actual PEBS/BTS buffers must be mapped to user space
+        * Reserve enough fixmap PTEs.
+        */
+       struct debug_store_buffers cpu_debug_buffers;
+#endif
+};
+
+#define CPU_ENTRY_AREA_SIZE    (sizeof(struct cpu_entry_area))
+#define CPU_ENTRY_AREA_TOT_SIZE        (CPU_ENTRY_AREA_SIZE * NR_CPUS)
+
+DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
+
+extern void setup_cpu_entry_areas(void);
+extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
+
+#define        CPU_ENTRY_AREA_RO_IDT           CPU_ENTRY_AREA_BASE
+#define CPU_ENTRY_AREA_PER_CPU         (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
+
+#define CPU_ENTRY_AREA_RO_IDT_VADDR    ((void *)CPU_ENTRY_AREA_RO_IDT)
+
+#define CPU_ENTRY_AREA_MAP_SIZE                        \
+       (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)
+
+extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
+
+static inline struct entry_stack *cpu_entry_stack(int cpu)
+{
+       return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
+}
+
+#endif
index bf6a76202a779ee131b4df8c89449ab52abd0a79..ea9a7dde62e5c4d551ba89e429f911fb5c6603fd 100644 (file)
@@ -135,6 +135,8 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
        set_bit(bit, (unsigned long *)cpu_caps_set);    \
 } while (0)
 
+#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
+
 #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
 /*
  * Static testing of CPU features.  Used the same as boot_cpu_has().
index 800104c8a3edfee7f4f52a33b8451a51ee0ed90a..07cdd17157050e4123bdffc43347ecd58811bc1d 100644 (file)
 #define X86_FEATURE_CAT_L3             ( 7*32+ 4) /* Cache Allocation Technology L3 */
 #define X86_FEATURE_CAT_L2             ( 7*32+ 5) /* Cache Allocation Technology L2 */
 #define X86_FEATURE_CDP_L3             ( 7*32+ 6) /* Code and Data Prioritization L3 */
+#define X86_FEATURE_INVPCID_SINGLE     ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
 
 #define X86_FEATURE_HW_PSTATE          ( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK      ( 7*32+ 9) /* AMD ProcFeedbackInterface */
 #define X86_FEATURE_SME                        ( 7*32+10) /* AMD Secure Memory Encryption */
-
+#define X86_FEATURE_PTI                        ( 7*32+11) /* Kernel Page Table Isolation enabled */
 #define X86_FEATURE_INTEL_PPIN         ( 7*32+14) /* Intel Processor Inventory Number */
 #define X86_FEATURE_INTEL_PT           ( 7*32+15) /* Intel Processor Trace */
 #define X86_FEATURE_AVX512_4VNNIW      ( 7*32+16) /* AVX-512 Neural Network Instructions */
 #define X86_BUG_SWAPGS_FENCE           X86_BUG(11) /* SWAPGS without input dep on GS */
 #define X86_BUG_MONITOR                        X86_BUG(12) /* IPI required to wake up remote CPU */
 #define X86_BUG_AMD_E400               X86_BUG(13) /* CPU is among the affected by Erratum 400 */
+#define X86_BUG_CPU_INSECURE           X86_BUG(14) /* CPU is insecure and needs kernel page table isolation */
 
 #endif /* _ASM_X86_CPUFEATURES_H */
index 4011cb03ef08e52db15f52779ce366c26359a34b..13c5ee878a477902b8494532b24853b6c156a170 100644 (file)
@@ -7,6 +7,7 @@
 #include <asm/mmu.h>
 #include <asm/fixmap.h>
 #include <asm/irq_vectors.h>
+#include <asm/cpu_entry_area.h>
 
 #include <linux/smp.h>
 #include <linux/percpu.h>
@@ -20,6 +21,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
 
        desc->type              = (info->read_exec_only ^ 1) << 1;
        desc->type             |= info->contents << 2;
+       /* Set the ACCESS bit so it can be mapped RO */
+       desc->type             |= 1;
 
        desc->s                 = 1;
        desc->dpl               = 0x3;
@@ -60,17 +63,10 @@ static inline struct desc_struct *get_current_gdt_rw(void)
        return this_cpu_ptr(&gdt_page)->gdt;
 }
 
-/* Get the fixmap index for a specific processor */
-static inline unsigned int get_cpu_gdt_ro_index(int cpu)
-{
-       return FIX_GDT_REMAP_BEGIN + cpu;
-}
-
 /* Provide the fixmap address of the remapped GDT */
 static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
 {
-       unsigned int idx = get_cpu_gdt_ro_index(cpu);
-       return (struct desc_struct *)__fix_to_virt(idx);
+       return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt;
 }
 
 /* Provide the current read-only GDT */
@@ -185,7 +181,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr,
 #endif
 }
 
-static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
+static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr)
 {
        struct desc_struct *d = get_cpu_gdt_rw(cpu);
        tss_desc tss;
index 14d6d50073142b0f49b06850ccd0d394546479ee..b027633e73003e121d7c043438ac7dbd10fc07a4 100644 (file)
 # define DISABLE_LA57  (1<<(X86_FEATURE_LA57 & 31))
 #endif
 
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+# define DISABLE_PTI           0
+#else
+# define DISABLE_PTI           (1 << (X86_FEATURE_PTI & 31))
+#endif
+
 /*
  * Make sure to add features to the correct mask
  */
@@ -60,7 +66,7 @@
 #define DISABLED_MASK4 (DISABLE_PCID)
 #define DISABLED_MASK5 0
 #define DISABLED_MASK6 0
-#define DISABLED_MASK7 0
+#define DISABLED_MASK7 (DISABLE_PTI)
 #define DISABLED_MASK8 0
 #define DISABLED_MASK9 (DISABLE_MPX)
 #define DISABLED_MASK10        0
index 0211029076ea8b9ed6648b9bf298c99c8b2124ad..6777480d8a427eaaa07559f77985c125aa66bb6c 100644 (file)
@@ -2,7 +2,7 @@
 #ifndef _ASM_X86_ESPFIX_H
 #define _ASM_X86_ESPFIX_H
 
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_X86_ESPFIX64
 
 #include <asm/percpu.h>
 
@@ -11,7 +11,8 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);
 
 extern void init_espfix_bsp(void);
 extern void init_espfix_ap(int cpu);
-
-#endif /* CONFIG_X86_64 */
+#else
+static inline void init_espfix_ap(int cpu) { }
+#endif
 
 #endif /* _ASM_X86_ESPFIX_H */
index b0c505fe9a958c701fef6d96f281bb8ab1a773de..64c4a30e0d39621ff8587fc8da538cd3d1d9f144 100644 (file)
@@ -44,7 +44,6 @@ extern unsigned long __FIXADDR_TOP;
                         PAGE_SIZE)
 #endif
 
-
 /*
  * Here we define all the compile-time 'special' virtual
  * addresses. The point is to have a constant address at
@@ -84,7 +83,6 @@ enum fixed_addresses {
        FIX_IO_APIC_BASE_0,
        FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
 #endif
-       FIX_RO_IDT,     /* Virtual mapping for read-only IDT */
 #ifdef CONFIG_X86_32
        FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
        FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
@@ -100,9 +98,6 @@ enum fixed_addresses {
 #ifdef CONFIG_X86_INTEL_MID
        FIX_LNW_VRTC,
 #endif
-       /* Fixmap entries to remap the GDTs, one per processor. */
-       FIX_GDT_REMAP_BEGIN,
-       FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
 
 #ifdef CONFIG_ACPI_APEI_GHES
        /* Used for GHES mapping from assorted contexts */
@@ -143,7 +138,7 @@ enum fixed_addresses {
 extern void reserve_top_address(unsigned long reserve);
 
 #define FIXADDR_SIZE   (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
-#define FIXADDR_START          (FIXADDR_TOP - FIXADDR_SIZE)
+#define FIXADDR_START  (FIXADDR_TOP - FIXADDR_SIZE)
 
 extern int fixmaps_set;
 
index 1b0a5abcd8aeb6e700013c5434aaeb0bba7a152f..96aa6b9884dc5b3bc8d54c9ef1c6258eea13a0d0 100644 (file)
 #ifndef _ASM_X86_HYPERVISOR_H
 #define _ASM_X86_HYPERVISOR_H
 
-#ifdef CONFIG_HYPERVISOR_GUEST
-
-#include <asm/kvm_para.h>
-#include <asm/x86_init.h>
-#include <asm/xen/hypervisor.h>
-
-/*
- * x86 hypervisor information
- */
-
+/* x86 hypervisor types  */
 enum x86_hypervisor_type {
        X86_HYPER_NATIVE = 0,
        X86_HYPER_VMWARE,
@@ -39,6 +30,12 @@ enum x86_hypervisor_type {
        X86_HYPER_KVM,
 };
 
+#ifdef CONFIG_HYPERVISOR_GUEST
+
+#include <asm/kvm_para.h>
+#include <asm/x86_init.h>
+#include <asm/xen/hypervisor.h>
+
 struct hypervisor_x86 {
        /* Hypervisor name */
        const char      *name;
@@ -58,7 +55,15 @@ struct hypervisor_x86 {
 
 extern enum x86_hypervisor_type x86_hyper_type;
 extern void init_hypervisor_platform(void);
+static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
+{
+       return x86_hyper_type == type;
+}
 #else
 static inline void init_hypervisor_platform(void) { }
+static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
+{
+       return type == X86_HYPER_NATIVE;
+}
 #endif /* CONFIG_HYPERVISOR_GUEST */
 #endif /* _ASM_X86_HYPERVISOR_H */
diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h
new file mode 100644 (file)
index 0000000..62a9f49
--- /dev/null
@@ -0,0 +1,36 @@
+#ifndef _ASM_INTEL_DS_H
+#define _ASM_INTEL_DS_H
+
+#include <linux/percpu-defs.h>
+
+#define BTS_BUFFER_SIZE                (PAGE_SIZE << 4)
+#define PEBS_BUFFER_SIZE       (PAGE_SIZE << 4)
+
+/* The maximal number of PEBS events: */
+#define MAX_PEBS_EVENTS                8
+
+/*
+ * A debug store configuration.
+ *
+ * We only support architectures that use 64bit fields.
+ */
+struct debug_store {
+       u64     bts_buffer_base;
+       u64     bts_index;
+       u64     bts_absolute_maximum;
+       u64     bts_interrupt_threshold;
+       u64     pebs_buffer_base;
+       u64     pebs_index;
+       u64     pebs_absolute_maximum;
+       u64     pebs_interrupt_threshold;
+       u64     pebs_event_reset[MAX_PEBS_EVENTS];
+} __aligned(PAGE_SIZE);
+
+DECLARE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
+
+struct debug_store_buffers {
+       char    bts_buffer[BTS_BUFFER_SIZE];
+       char    pebs_buffer[PEBS_BUFFER_SIZE];
+};
+
+#endif
diff --git a/arch/x86/include/asm/invpcid.h b/arch/x86/include/asm/invpcid.h
new file mode 100644 (file)
index 0000000..989cfa8
--- /dev/null
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_INVPCID
+#define _ASM_X86_INVPCID
+
+static inline void __invpcid(unsigned long pcid, unsigned long addr,
+                            unsigned long type)
+{
+       struct { u64 d[2]; } desc = { { pcid, addr } };
+
+       /*
+        * The memory clobber is because the whole point is to invalidate
+        * stale TLB entries and, especially if we're flushing global
+        * mappings, we don't want the compiler to reorder any subsequent
+        * memory accesses before the TLB flush.
+        *
+        * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
+        * invpcid (%rcx), %rax in long mode.
+        */
+       asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
+                     : : "m" (desc), "a" (type), "c" (&desc) : "memory");
+}
+
+#define INVPCID_TYPE_INDIV_ADDR                0
+#define INVPCID_TYPE_SINGLE_CTXT       1
+#define INVPCID_TYPE_ALL_INCL_GLOBAL   2
+#define INVPCID_TYPE_ALL_NON_GLOBAL    3
+
+/* Flush all mappings for a given pcid and addr, not including globals. */
+static inline void invpcid_flush_one(unsigned long pcid,
+                                    unsigned long addr)
+{
+       __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
+}
+
+/* Flush all mappings for a given PCID, not including globals. */
+static inline void invpcid_flush_single_context(unsigned long pcid)
+{
+       __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
+}
+
+/* Flush all mappings, including globals, for all PCIDs. */
+static inline void invpcid_flush_all(void)
+{
+       __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
+}
+
+/* Flush all mappings for all PCIDs except globals. */
+static inline void invpcid_flush_all_nonglobals(void)
+{
+       __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
+}
+
+#endif /* _ASM_X86_INVPCID */
index 139feef467f7e298c6f9db57c43facc64f5468b6..c066ffae222b769996b78c13ee5d063dc7c5c544 100644 (file)
@@ -44,7 +44,7 @@ extern int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
 extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
                              unsigned int nr_irqs);
 extern int mp_irqdomain_activate(struct irq_domain *domain,
-                                struct irq_data *irq_data, bool early);
+                                struct irq_data *irq_data, bool reserve);
 extern void mp_irqdomain_deactivate(struct irq_domain *domain,
                                    struct irq_data *irq_data);
 extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain);
index c8ef23f2c28f17c59308b9c41179c47f85e075ad..89f08955fff733c688a5ce4f4a0b8d74050ee617 100644 (file)
@@ -142,6 +142,9 @@ static inline notrace unsigned long arch_local_irq_save(void)
        swapgs;                                 \
        sysretl
 
+#ifdef CONFIG_DEBUG_ENTRY
+#define SAVE_FLAGS(x)          pushfq; popq %rax
+#endif
 #else
 #define INTERRUPT_RETURN               iret
 #define ENABLE_INTERRUPTS_SYSEXIT      sti; sysexit
index f86a8caa561e8873c3f34e6e8b8cd509ebadd819..395c9631e000a3a17aa574c1b25fcc2cafd5b5fb 100644 (file)
@@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long);
 extern int __must_check __die(const char *, struct pt_regs *, long);
 extern void show_stack_regs(struct pt_regs *regs);
 extern void __show_regs(struct pt_regs *regs, int all);
+extern void show_iret_regs(struct pt_regs *regs);
 extern unsigned long oops_begin(void);
 extern void oops_end(unsigned long, struct pt_regs *, int signr);
 
index 9ea26f16749706fddd5b15e8bf557a9e6156e165..5ff3e8af2c2056b7fe19560ee2ba1ad7146aaf2a 100644 (file)
@@ -3,6 +3,7 @@
 #define _ASM_X86_MMU_H
 
 #include <linux/spinlock.h>
+#include <linux/rwsem.h>
 #include <linux/mutex.h>
 #include <linux/atomic.h>
 
@@ -27,7 +28,8 @@ typedef struct {
        atomic64_t tlb_gen;
 
 #ifdef CONFIG_MODIFY_LDT_SYSCALL
-       struct ldt_struct *ldt;
+       struct rw_semaphore     ldt_usr_sem;
+       struct ldt_struct       *ldt;
 #endif
 
 #ifdef CONFIG_X86_64
index 6d16d15d09a0daed96a1e3d670b6203d1779b98e..c931b88982a0ff59e3b67947cc606e452f327dc0 100644 (file)
@@ -50,22 +50,53 @@ struct ldt_struct {
         * call gates.  On native, we could merge the ldt_struct and LDT
         * allocations, but it's not worth trying to optimize.
         */
-       struct desc_struct *entries;
-       unsigned int nr_entries;
+       struct desc_struct      *entries;
+       unsigned int            nr_entries;
+
+       /*
+        * If PTI is in use, then the entries array is not mapped while we're
+        * in user mode.  The whole array will be aliased at the addressed
+        * given by ldt_slot_va(slot).  We use two slots so that we can allocate
+        * and map, and enable a new LDT without invalidating the mapping
+        * of an older, still-in-use LDT.
+        *
+        * slot will be -1 if this LDT doesn't have an alias mapping.
+        */
+       int                     slot;
 };
 
+/* This is a multiple of PAGE_SIZE. */
+#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
+
+static inline void *ldt_slot_va(int slot)
+{
+#ifdef CONFIG_X86_64
+       return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
+#else
+       BUG();
+#endif
+}
+
 /*
  * Used for LDT copy/destruction.
  */
-int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm);
+static inline void init_new_context_ldt(struct mm_struct *mm)
+{
+       mm->context.ldt = NULL;
+       init_rwsem(&mm->context.ldt_usr_sem);
+}
+int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm);
 void destroy_context_ldt(struct mm_struct *mm);
+void ldt_arch_exit_mmap(struct mm_struct *mm);
 #else  /* CONFIG_MODIFY_LDT_SYSCALL */
-static inline int init_new_context_ldt(struct task_struct *tsk,
-                                      struct mm_struct *mm)
+static inline void init_new_context_ldt(struct mm_struct *mm) { }
+static inline int ldt_dup_context(struct mm_struct *oldmm,
+                                 struct mm_struct *mm)
 {
        return 0;
 }
-static inline void destroy_context_ldt(struct mm_struct *mm) {}
+static inline void destroy_context_ldt(struct mm_struct *mm) { }
+static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
 #endif
 
 static inline void load_mm_ldt(struct mm_struct *mm)
@@ -90,10 +121,31 @@ static inline void load_mm_ldt(struct mm_struct *mm)
         * that we can see.
         */
 
-       if (unlikely(ldt))
-               set_ldt(ldt->entries, ldt->nr_entries);
-       else
+       if (unlikely(ldt)) {
+               if (static_cpu_has(X86_FEATURE_PTI)) {
+                       if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
+                               /*
+                                * Whoops -- either the new LDT isn't mapped
+                                * (if slot == -1) or is mapped into a bogus
+                                * slot (if slot > 1).
+                                */
+                               clear_LDT();
+                               return;
+                       }
+
+                       /*
+                        * If page table isolation is enabled, ldt->entries
+                        * will not be mapped in the userspace pagetables.
+                        * Tell the CPU to access the LDT through the alias
+                        * at ldt_slot_va(ldt->slot).
+                        */
+                       set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
+               } else {
+                       set_ldt(ldt->entries, ldt->nr_entries);
+               }
+       } else {
                clear_LDT();
+       }
 #else
        clear_LDT();
 #endif
@@ -132,18 +184,21 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
 static inline int init_new_context(struct task_struct *tsk,
                                   struct mm_struct *mm)
 {
+       mutex_init(&mm->context.lock);
+
        mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
        atomic64_set(&mm->context.tlb_gen, 0);
 
-       #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
        if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
                /* pkey 0 is the default and always allocated */
                mm->context.pkey_allocation_map = 0x1;
                /* -1 means unallocated or invalid */
                mm->context.execute_only_pkey = -1;
        }
-       #endif
-       return init_new_context_ldt(tsk, mm);
+#endif
+       init_new_context_ldt(mm);
+       return 0;
 }
 static inline void destroy_context(struct mm_struct *mm)
 {
@@ -176,15 +231,16 @@ do {                                              \
 } while (0)
 #endif
 
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
-                                struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
 {
        paravirt_arch_dup_mmap(oldmm, mm);
+       return ldt_dup_context(oldmm, mm);
 }
 
 static inline void arch_exit_mmap(struct mm_struct *mm)
 {
        paravirt_arch_exit_mmap(mm);
+       ldt_arch_exit_mmap(mm);
 }
 
 #ifdef CONFIG_X86_64
@@ -281,33 +337,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
        return __pkru_allows_pkey(vma_pkey(vma), write);
 }
 
-/*
- * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
- * bits.  This serves two purposes.  It prevents a nasty situation in
- * which PCID-unaware code saves CR3, loads some other value (with PCID
- * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
- * the saved ASID was nonzero.  It also means that any bugs involving
- * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
- * deterministically.
- */
-
-static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
-{
-       if (static_cpu_has(X86_FEATURE_PCID)) {
-               VM_WARN_ON_ONCE(asid > 4094);
-               return __sme_pa(mm->pgd) | (asid + 1);
-       } else {
-               VM_WARN_ON_ONCE(asid != 0);
-               return __sme_pa(mm->pgd);
-       }
-}
-
-static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
-{
-       VM_WARN_ON_ONCE(asid > 4094);
-       return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
-}
-
 /*
  * This can be used from process context to figure out what the value of
  * CR3 is without needing to do a (slow) __read_cr3().
@@ -317,7 +346,7 @@ static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
  */
 static inline unsigned long __get_current_cr3_fast(void)
 {
-       unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
+       unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
                this_cpu_read(cpu_tlbstate.loaded_mm_asid));
 
        /* For now, be very restrictive about when this can be called. */
index 283efcaac8aff86f2c004bc23e4b8642cbf3d527..892df375b6155a51f584760efb9f9e77c3f732e8 100644 (file)
@@ -927,6 +927,15 @@ extern void default_banner(void);
        PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),       \
                  CLBR_NONE,                                            \
                  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
+
+#ifdef CONFIG_DEBUG_ENTRY
+#define SAVE_FLAGS(clobbers)                                        \
+       PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
+                 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);        \
+                 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl);    \
+                 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
+#endif
+
 #endif /* CONFIG_X86_32 */
 
 #endif /* __ASSEMBLY__ */
index 4b5e1eafada731cdedd48d771d79d55e766a84eb..aff42e1da6ee1591bbec15ce6f0543953789d19c 100644 (file)
@@ -30,6 +30,17 @@ static inline void paravirt_release_p4d(unsigned long pfn) {}
  */
 extern gfp_t __userpte_alloc_gfp;
 
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+/*
+ * Instead of one PGD, we acquire two PGDs.  Being order-1, it is
+ * both 8k in size and 8k-aligned.  That lets us just flip bit 12
+ * in a pointer to swap between the two 4k halves.
+ */
+#define PGD_ALLOCATION_ORDER 1
+#else
+#define PGD_ALLOCATION_ORDER 0
+#endif
+
 /*
  * Allocate and free page tables.
  */
index 95e2dfd755218ccfaf6417b44c822b545a35568e..e42b8943cb1a311a00ddceb36129ede3012489ef 100644 (file)
@@ -28,6 +28,7 @@ extern pgd_t early_top_pgt[PTRS_PER_PGD];
 int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
 
 void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
+void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user);
 void ptdump_walk_pgd_level_checkwx(void);
 
 #ifdef CONFIG_DEBUG_WX
@@ -841,7 +842,12 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
 
 static inline int p4d_bad(p4d_t p4d)
 {
-       return (p4d_flags(p4d) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
+       unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER;
+
+       if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
+               ignore_flags |= _PAGE_NX;
+
+       return (p4d_flags(p4d) & ~ignore_flags) != 0;
 }
 #endif  /* CONFIG_PGTABLE_LEVELS > 3 */
 
@@ -875,7 +881,12 @@ static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
 
 static inline int pgd_bad(pgd_t pgd)
 {
-       return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE;
+       unsigned long ignore_flags = _PAGE_USER;
+
+       if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
+               ignore_flags |= _PAGE_NX;
+
+       return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
 }
 
 static inline int pgd_none(pgd_t pgd)
@@ -904,7 +915,11 @@ static inline int pgd_none(pgd_t pgd)
  * pgd_offset() returns a (pgd_t *)
  * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
  */
-#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
+#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address)))
+/*
+ * a shortcut to get a pgd_t in a given mm
+ */
+#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
 /*
  * a shortcut which implies the use of the kernel's pgd, instead
  * of a process's
@@ -1106,7 +1121,14 @@ static inline int pud_write(pud_t pud)
  */
 static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
 {
-       memcpy(dst, src, count * sizeof(pgd_t));
+       memcpy(dst, src, count * sizeof(pgd_t));
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+       if (!static_cpu_has(X86_FEATURE_PTI))
+               return;
+       /* Clone the user space pgd as well */
+       memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src),
+              count * sizeof(pgd_t));
+#endif
 }
 
 #define PTE_SHIFT ilog2(PTRS_PER_PTE)
index f2ca9b28fd68303f4494775564aa9da77ddcd53a..ce245b0cdfcaa42bd932a387bbb189ee7349bfef 100644 (file)
@@ -38,13 +38,22 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
 #define LAST_PKMAP 1024
 #endif
 
-#define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE * (LAST_PKMAP + 1))     \
-                   & PMD_MASK)
+/*
+ * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
+ * to avoid include recursion hell
+ */
+#define CPU_ENTRY_AREA_PAGES   (NR_CPUS * 40)
+
+#define CPU_ENTRY_AREA_BASE                            \
+       ((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK)
+
+#define PKMAP_BASE             \
+       ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
 
 #ifdef CONFIG_HIGHMEM
 # define VMALLOC_END   (PKMAP_BASE - 2 * PAGE_SIZE)
 #else
-# define VMALLOC_END   (FIXADDR_START - 2 * PAGE_SIZE)
+# define VMALLOC_END   (CPU_ENTRY_AREA_BASE - 2 * PAGE_SIZE)
 #endif
 
 #define MODULES_VADDR  VMALLOC_START
index e9f05331e732a057b341e78977d666b8cfe35289..81462e9a34f6af49645a08f55c7d67e0144dbb77 100644 (file)
@@ -131,9 +131,97 @@ static inline pud_t native_pudp_get_and_clear(pud_t *xp)
 #endif
 }
 
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+/*
+ * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages
+ * (8k-aligned and 8k in size).  The kernel one is at the beginning 4k and
+ * the user one is in the last 4k.  To switch between them, you
+ * just need to flip the 12th bit in their addresses.
+ */
+#define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT
+
+/*
+ * This generates better code than the inline assembly in
+ * __set_bit().
+ */
+static inline void *ptr_set_bit(void *ptr, int bit)
+{
+       unsigned long __ptr = (unsigned long)ptr;
+
+       __ptr |= BIT(bit);
+       return (void *)__ptr;
+}
+static inline void *ptr_clear_bit(void *ptr, int bit)
+{
+       unsigned long __ptr = (unsigned long)ptr;
+
+       __ptr &= ~BIT(bit);
+       return (void *)__ptr;
+}
+
+static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp)
+{
+       return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
+}
+
+static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp)
+{
+       return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
+}
+
+static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp)
+{
+       return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
+}
+
+static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
+{
+       return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
+}
+#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+
+/*
+ * Page table pages are page-aligned.  The lower half of the top
+ * level is used for userspace and the top half for the kernel.
+ *
+ * Returns true for parts of the PGD that map userspace and
+ * false for the parts that map the kernel.
+ */
+static inline bool pgdp_maps_userspace(void *__ptr)
+{
+       unsigned long ptr = (unsigned long)__ptr;
+
+       return (ptr & ~PAGE_MASK) < (PAGE_SIZE / 2);
+}
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd);
+
+/*
+ * Take a PGD location (pgdp) and a pgd value that needs to be set there.
+ * Populates the user and returns the resulting PGD that must be set in
+ * the kernel copy of the page tables.
+ */
+static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+       if (!static_cpu_has(X86_FEATURE_PTI))
+               return pgd;
+       return __pti_set_user_pgd(pgdp, pgd);
+}
+#else
+static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+       return pgd;
+}
+#endif
+
 static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
 {
+#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL)
+       p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd);
+#else
        *p4dp = p4d;
+#endif
 }
 
 static inline void native_p4d_clear(p4d_t *p4d)
@@ -147,7 +235,11 @@ static inline void native_p4d_clear(p4d_t *p4d)
 
 static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
 {
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+       *pgdp = pti_set_user_pgd(pgdp, pgd);
+#else
        *pgdp = pgd;
+#endif
 }
 
 static inline void native_pgd_clear(pgd_t *pgd)
index 6d5f45dcd4a13caafbf184f323d0725c2d5f53e4..b97a539bcdeee8ca47e39e7353b3e9d1757da24b 100644 (file)
@@ -76,32 +76,45 @@ typedef struct { pteval_t pte; } pte_t;
 #define PGDIR_MASK     (~(PGDIR_SIZE - 1))
 
 /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
-#define MAXMEM         _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
+#define MAXMEM                 _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
+
 #ifdef CONFIG_X86_5LEVEL
-#define VMALLOC_SIZE_TB _AC(16384, UL)
-#define __VMALLOC_BASE _AC(0xff92000000000000, UL)
-#define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
+# define VMALLOC_SIZE_TB       _AC(12800, UL)
+# define __VMALLOC_BASE                _AC(0xffa0000000000000, UL)
+# define __VMEMMAP_BASE                _AC(0xffd4000000000000, UL)
+# define LDT_PGD_ENTRY         _AC(-112, UL)
+# define LDT_BASE_ADDR         (LDT_PGD_ENTRY << PGDIR_SHIFT)
 #else
-#define VMALLOC_SIZE_TB        _AC(32, UL)
-#define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
-#define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
+# define VMALLOC_SIZE_TB       _AC(32, UL)
+# define __VMALLOC_BASE                _AC(0xffffc90000000000, UL)
+# define __VMEMMAP_BASE                _AC(0xffffea0000000000, UL)
+# define LDT_PGD_ENTRY         _AC(-4, UL)
+# define LDT_BASE_ADDR         (LDT_PGD_ENTRY << PGDIR_SHIFT)
 #endif
+
 #ifdef CONFIG_RANDOMIZE_MEMORY
-#define VMALLOC_START  vmalloc_base
-#define VMEMMAP_START  vmemmap_base
+# define VMALLOC_START         vmalloc_base
+# define VMEMMAP_START         vmemmap_base
 #else
-#define VMALLOC_START  __VMALLOC_BASE
-#define VMEMMAP_START  __VMEMMAP_BASE
+# define VMALLOC_START         __VMALLOC_BASE
+# define VMEMMAP_START         __VMEMMAP_BASE
 #endif /* CONFIG_RANDOMIZE_MEMORY */
-#define VMALLOC_END    (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
-#define MODULES_VADDR    (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
+
+#define VMALLOC_END            (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
+
+#define MODULES_VADDR          (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
 /* The module sections ends with the start of the fixmap */
-#define MODULES_END   __fix_to_virt(__end_of_fixed_addresses + 1)
-#define MODULES_LEN   (MODULES_END - MODULES_VADDR)
-#define ESPFIX_PGD_ENTRY _AC(-2, UL)
-#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
-#define EFI_VA_START    ( -4 * (_AC(1, UL) << 30))
-#define EFI_VA_END      (-68 * (_AC(1, UL) << 30))
+#define MODULES_END            __fix_to_virt(__end_of_fixed_addresses + 1)
+#define MODULES_LEN            (MODULES_END - MODULES_VADDR)
+
+#define ESPFIX_PGD_ENTRY       _AC(-2, UL)
+#define ESPFIX_BASE_ADDR       (ESPFIX_PGD_ENTRY << P4D_SHIFT)
+
+#define CPU_ENTRY_AREA_PGD     _AC(-3, UL)
+#define CPU_ENTRY_AREA_BASE    (CPU_ENTRY_AREA_PGD << P4D_SHIFT)
+
+#define EFI_VA_START           ( -4 * (_AC(1, UL) << 30))
+#define EFI_VA_END             (-68 * (_AC(1, UL) << 30))
 
 #define EARLY_DYNAMIC_PAGE_TABLES      64
 
index 43212a43ee69feea1de27275d9075c566cdfcd2c..6a60fea90b9d9dd669033e4d7eee627b99a0c3d5 100644 (file)
 #define CR3_ADDR_MASK  __sme_clr(0x7FFFFFFFFFFFF000ull)
 #define CR3_PCID_MASK  0xFFFull
 #define CR3_NOFLUSH    BIT_ULL(63)
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+# define X86_CR3_PTI_SWITCH_BIT        11
+#endif
+
 #else
 /*
  * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save
index cc16fa882e3e760a40351cf3e7476ac9f25ffe00..d3a67fba200ae2a5c03f52a7815dca00b43c63ad 100644 (file)
@@ -163,9 +163,9 @@ enum cpuid_regs_idx {
 extern struct cpuinfo_x86      boot_cpu_data;
 extern struct cpuinfo_x86      new_cpu_data;
 
-extern struct tss_struct       doublefault_tss;
-extern __u32                   cpu_caps_cleared[NCAPINTS];
-extern __u32                   cpu_caps_set[NCAPINTS];
+extern struct x86_hw_tss       doublefault_tss;
+extern __u32                   cpu_caps_cleared[NCAPINTS + NBUGINTS];
+extern __u32                   cpu_caps_set[NCAPINTS + NBUGINTS];
 
 #ifdef CONFIG_SMP
 DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
@@ -253,6 +253,11 @@ static inline void load_cr3(pgd_t *pgdir)
        write_cr3(__sme_pa(pgdir));
 }
 
+/*
+ * Note that while the legacy 'TSS' name comes from 'Task State Segment',
+ * on modern x86 CPUs the TSS also holds information important to 64-bit mode,
+ * unrelated to the task-switch mechanism:
+ */
 #ifdef CONFIG_X86_32
 /* This is the TSS defined by the hardware. */
 struct x86_hw_tss {
@@ -305,7 +310,13 @@ struct x86_hw_tss {
 struct x86_hw_tss {
        u32                     reserved1;
        u64                     sp0;
+
+       /*
+        * We store cpu_current_top_of_stack in sp1 so it's always accessible.
+        * Linux does not use ring 1, so sp1 is not otherwise needed.
+        */
        u64                     sp1;
+
        u64                     sp2;
        u64                     reserved2;
        u64                     ist[7];
@@ -323,12 +334,22 @@ struct x86_hw_tss {
 #define IO_BITMAP_BITS                 65536
 #define IO_BITMAP_BYTES                        (IO_BITMAP_BITS/8)
 #define IO_BITMAP_LONGS                        (IO_BITMAP_BYTES/sizeof(long))
-#define IO_BITMAP_OFFSET               offsetof(struct tss_struct, io_bitmap)
+#define IO_BITMAP_OFFSET               (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
 #define INVALID_IO_BITMAP_OFFSET       0x8000
 
+struct entry_stack {
+       unsigned long           words[64];
+};
+
+struct entry_stack_page {
+       struct entry_stack stack;
+} __aligned(PAGE_SIZE);
+
 struct tss_struct {
        /*
-        * The hardware state:
+        * The fixed hardware portion.  This must not cross a page boundary
+        * at risk of violating the SDM's advice and potentially triggering
+        * errata.
         */
        struct x86_hw_tss       x86_tss;
 
@@ -339,18 +360,9 @@ struct tss_struct {
         * be within the limit.
         */
        unsigned long           io_bitmap[IO_BITMAP_LONGS + 1];
+} __aligned(PAGE_SIZE);
 
-#ifdef CONFIG_X86_32
-       /*
-        * Space for the temporary SYSENTER stack.
-        */
-       unsigned long           SYSENTER_stack_canary;
-       unsigned long           SYSENTER_stack[64];
-#endif
-
-} ____cacheline_aligned;
-
-DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
+DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
 
 /*
  * sizeof(unsigned long) coming from an extra "long" at the end
@@ -364,6 +376,9 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
 
 #ifdef CONFIG_X86_32
 DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
+#else
+/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */
+#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
 #endif
 
 /*
@@ -523,7 +538,7 @@ static inline void native_set_iopl_mask(unsigned mask)
 static inline void
 native_load_sp0(unsigned long sp0)
 {
-       this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
+       this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
 }
 
 static inline void native_swapgs(void)
@@ -535,12 +550,12 @@ static inline void native_swapgs(void)
 
 static inline unsigned long current_top_of_stack(void)
 {
-#ifdef CONFIG_X86_64
-       return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
-#else
-       /* sp0 on x86_32 is special in and around vm86 mode. */
+       /*
+        *  We can't read directly from tss.sp0: sp0 on x86_32 is special in
+        *  and around vm86 mode and sp0 on x86_64 is special because of the
+        *  entry trampoline.
+        */
        return this_cpu_read_stable(cpu_current_top_of_stack);
-#endif
 }
 
 static inline bool on_thread_stack(void)
@@ -837,13 +852,22 @@ static inline void spin_lock_prefetch(const void *x)
 
 #else
 /*
- * User space process size. 47bits minus one guard page.  The guard
- * page is necessary on Intel CPUs: if a SYSCALL instruction is at
- * the highest possible canonical userspace address, then that
- * syscall will enter the kernel with a non-canonical return
- * address, and SYSRET will explode dangerously.  We avoid this
- * particular problem by preventing anything from being mapped
- * at the maximum canonical address.
+ * User space process size.  This is the first address outside the user range.
+ * There are a few constraints that determine this:
+ *
+ * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
+ * address, then that syscall will enter the kernel with a
+ * non-canonical return address, and SYSRET will explode dangerously.
+ * We avoid this particular problem by preventing anything executable
+ * from being mapped at the maximum canonical address.
+ *
+ * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
+ * CPUs malfunction if they execute code from the highest canonical page.
+ * They'll speculate right off the end of the canonical space, and
+ * bad things happen.  This is worked around in the same way as the
+ * Intel problem.
+ *
+ * With page table isolation enabled, we map the LDT in ... [stay tuned]
  */
 #define TASK_SIZE_MAX  ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
 
diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h
new file mode 100644 (file)
index 0000000..0b5ef05
--- /dev/null
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _ASM_X86_PTI_H
+#define _ASM_X86_PTI_H
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+extern void pti_init(void);
+extern void pti_check_boottime_disable(void);
+#else
+static inline void pti_check_boottime_disable(void) { }
+#endif
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_X86_PTI_H */
index 8da111b3c342bbb61a9e630e101c8a83422a15ea..f737068787729f045a578776845231b0a0ee3e0d 100644 (file)
@@ -16,6 +16,7 @@ enum stack_type {
        STACK_TYPE_TASK,
        STACK_TYPE_IRQ,
        STACK_TYPE_SOFTIRQ,
+       STACK_TYPE_ENTRY,
        STACK_TYPE_EXCEPTION,
        STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
 };
@@ -28,6 +29,8 @@ struct stack_info {
 bool in_task_stack(unsigned long *stack, struct task_struct *task,
                   struct stack_info *info);
 
+bool in_entry_stack(unsigned long *stack, struct stack_info *info);
+
 int get_stack_info(unsigned long *stack, struct task_struct *task,
                   struct stack_info *info, unsigned long *visit_mask);
 
index 8c6bd6863db9d6b737cd0649324c154f9b9798a3..9b6df68d8fd1eba26f3651faa5c8b8f4dcf223f1 100644 (file)
@@ -79,10 +79,10 @@ do {                                                                        \
 static inline void refresh_sysenter_cs(struct thread_struct *thread)
 {
        /* Only happens when SEP is enabled, no need to test "SEP"arately: */
-       if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs))
+       if (unlikely(this_cpu_read(cpu_tss_rw.x86_tss.ss1) == thread->sysenter_cs))
                return;
 
-       this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs);
+       this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs);
        wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
 }
 #endif
@@ -90,10 +90,12 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread)
 /* This is used when switching tasks or entering/exiting vm86 mode. */
 static inline void update_sp0(struct task_struct *task)
 {
+       /* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */
 #ifdef CONFIG_X86_32
        load_sp0(task->thread.sp0);
 #else
-       load_sp0(task_top_of_stack(task));
+       if (static_cpu_has(X86_FEATURE_XENPV))
+               load_sp0(task_top_of_stack(task));
 #endif
 }
 
index 70f425947dc50f3e99ca639c0ead0d7e1cce636d..00223333821a96616647a9cbb6fe729c4a18b7b6 100644 (file)
@@ -207,7 +207,7 @@ static inline int arch_within_stack_frames(const void * const stack,
 #else /* !__ASSEMBLY__ */
 
 #ifdef CONFIG_X86_64
-# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
+# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1)
 #endif
 
 #endif
index 877b5c1a1b1247116e20e7272dbade77e1874fc4..f68f9c836cca09bcd7fc7c5795e219dcaa3f216b 100644 (file)
 #include <asm/cpufeature.h>
 #include <asm/special_insns.h>
 #include <asm/smp.h>
+#include <asm/invpcid.h>
+#include <asm/pti.h>
+#include <asm/processor-flags.h>
 
-static inline void __invpcid(unsigned long pcid, unsigned long addr,
-                            unsigned long type)
-{
-       struct { u64 d[2]; } desc = { { pcid, addr } };
+/*
+ * The x86 feature is called PCID (Process Context IDentifier). It is similar
+ * to what is traditionally called ASID on the RISC processors.
+ *
+ * We don't use the traditional ASID implementation, where each process/mm gets
+ * its own ASID and flush/restart when we run out of ASID space.
+ *
+ * Instead we have a small per-cpu array of ASIDs and cache the last few mm's
+ * that came by on this CPU, allowing cheaper switch_mm between processes on
+ * this CPU.
+ *
+ * We end up with different spaces for different things. To avoid confusion we
+ * use different names for each of them:
+ *
+ * ASID  - [0, TLB_NR_DYN_ASIDS-1]
+ *         the canonical identifier for an mm
+ *
+ * kPCID - [1, TLB_NR_DYN_ASIDS]
+ *         the value we write into the PCID part of CR3; corresponds to the
+ *         ASID+1, because PCID 0 is special.
+ *
+ * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS]
+ *         for KPTI each mm has two address spaces and thus needs two
+ *         PCID values, but we can still do with a single ASID denomination
+ *         for each mm. Corresponds to kPCID + 2048.
+ *
+ */
 
-       /*
-        * The memory clobber is because the whole point is to invalidate
-        * stale TLB entries and, especially if we're flushing global
-        * mappings, we don't want the compiler to reorder any subsequent
-        * memory accesses before the TLB flush.
-        *
-        * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
-        * invpcid (%rcx), %rax in long mode.
-        */
-       asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
-                     : : "m" (desc), "a" (type), "c" (&desc) : "memory");
-}
+/* There are 12 bits of space for ASIDS in CR3 */
+#define CR3_HW_ASID_BITS               12
 
-#define INVPCID_TYPE_INDIV_ADDR                0
-#define INVPCID_TYPE_SINGLE_CTXT       1
-#define INVPCID_TYPE_ALL_INCL_GLOBAL   2
-#define INVPCID_TYPE_ALL_NON_GLOBAL    3
+/*
+ * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for
+ * user/kernel switches
+ */
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+# define PTI_CONSUMED_PCID_BITS        1
+#else
+# define PTI_CONSUMED_PCID_BITS        0
+#endif
 
-/* Flush all mappings for a given pcid and addr, not including globals. */
-static inline void invpcid_flush_one(unsigned long pcid,
-                                    unsigned long addr)
-{
-       __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
-}
+#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS)
+
+/*
+ * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid.  -1 below to account
+ * for them being zero-based.  Another -1 is because PCID 0 is reserved for
+ * use by non-PCID-aware users.
+ */
+#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2)
+
+/*
+ * 6 because 6 should be plenty and struct tlb_state will fit in two cache
+ * lines.
+ */
+#define TLB_NR_DYN_ASIDS       6
 
-/* Flush all mappings for a given PCID, not including globals. */
-static inline void invpcid_flush_single_context(unsigned long pcid)
+/*
+ * Given @asid, compute kPCID
+ */
+static inline u16 kern_pcid(u16 asid)
 {
-       __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
+       VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+       /*
+        * Make sure that the dynamic ASID space does not confict with the
+        * bit we are using to switch between user and kernel ASIDs.
+        */
+       BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_SWITCH_BIT));
+
+       /*
+        * The ASID being passed in here should have respected the
+        * MAX_ASID_AVAILABLE and thus never have the switch bit set.
+        */
+       VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_SWITCH_BIT));
+#endif
+       /*
+        * The dynamically-assigned ASIDs that get passed in are small
+        * (<TLB_NR_DYN_ASIDS).  They never have the high switch bit set,
+        * so do not bother to clear it.
+        *
+        * If PCID is on, ASID-aware code paths put the ASID+1 into the
+        * PCID bits.  This serves two purposes.  It prevents a nasty
+        * situation in which PCID-unaware code saves CR3, loads some other
+        * value (with PCID == 0), and then restores CR3, thus corrupting
+        * the TLB for ASID 0 if the saved ASID was nonzero.  It also means
+        * that any bugs involving loading a PCID-enabled CR3 with
+        * CR4.PCIDE off will trigger deterministically.
+        */
+       return asid + 1;
 }
 
-/* Flush all mappings, including globals, for all PCIDs. */
-static inline void invpcid_flush_all(void)
+/*
+ * Given @asid, compute uPCID
+ */
+static inline u16 user_pcid(u16 asid)
 {
-       __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
+       u16 ret = kern_pcid(asid);
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+       ret |= 1 << X86_CR3_PTI_SWITCH_BIT;
+#endif
+       return ret;
 }
 
-/* Flush all mappings for all PCIDs except globals. */
-static inline void invpcid_flush_all_nonglobals(void)
+struct pgd_t;
+static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
 {
-       __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
+       if (static_cpu_has(X86_FEATURE_PCID)) {
+               return __sme_pa(pgd) | kern_pcid(asid);
+       } else {
+               VM_WARN_ON_ONCE(asid != 0);
+               return __sme_pa(pgd);
+       }
 }
 
-static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
 {
-       u64 new_tlb_gen;
-
-       /*
-        * Bump the generation count.  This also serves as a full barrier
-        * that synchronizes with switch_mm(): callers are required to order
-        * their read of mm_cpumask after their writes to the paging
-        * structures.
-        */
-       smp_mb__before_atomic();
-       new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen);
-       smp_mb__after_atomic();
-
-       return new_tlb_gen;
+       VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+       VM_WARN_ON_ONCE(!this_cpu_has(X86_FEATURE_PCID));
+       return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
 }
 
 #ifdef CONFIG_PARAVIRT
@@ -99,12 +159,6 @@ static inline bool tlb_defer_switch_to_init_mm(void)
        return !static_cpu_has(X86_FEATURE_PCID);
 }
 
-/*
- * 6 because 6 should be plenty and struct tlb_state will fit in
- * two cache lines.
- */
-#define TLB_NR_DYN_ASIDS 6
-
 struct tlb_context {
        u64 ctx_id;
        u64 tlb_gen;
@@ -138,6 +192,24 @@ struct tlb_state {
         */
        bool is_lazy;
 
+       /*
+        * If set we changed the page tables in such a way that we
+        * needed an invalidation of all contexts (aka. PCIDs / ASIDs).
+        * This tells us to go invalidate all the non-loaded ctxs[]
+        * on the next context switch.
+        *
+        * The current ctx was kept up-to-date as it ran and does not
+        * need to be invalidated.
+        */
+       bool invalidate_other;
+
+       /*
+        * Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate
+        * the corresponding user PCID needs a flush next time we
+        * switch to it; see SWITCH_TO_USER_CR3.
+        */
+       unsigned short user_pcid_flush_mask;
+
        /*
         * Access to this CR4 shadow and to H/W CR4 is protected by
         * disabling interrupts when modifying either one.
@@ -218,6 +290,14 @@ static inline unsigned long cr4_read_shadow(void)
        return this_cpu_read(cpu_tlbstate.cr4);
 }
 
+/*
+ * Mark all other ASIDs as invalid, preserves the current.
+ */
+static inline void invalidate_other_asid(void)
+{
+       this_cpu_write(cpu_tlbstate.invalidate_other, true);
+}
+
 /*
  * Save some of cr4 feature set we're using (e.g.  Pentium 4MB
  * enable and PPro Global page enable), so that any CPU's that boot
@@ -237,37 +317,61 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
 
 extern void initialize_tlbstate_and_flush(void);
 
+/*
+ * Given an ASID, flush the corresponding user ASID.  We can delay this
+ * until the next time we switch to it.
+ *
+ * See SWITCH_TO_USER_CR3.
+ */
+static inline void invalidate_user_asid(u16 asid)
+{
+       /* There is no user ASID if address space separation is off */
+       if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
+               return;
+
+       /*
+        * We only have a single ASID if PCID is off and the CR3
+        * write will have flushed it.
+        */
+       if (!cpu_feature_enabled(X86_FEATURE_PCID))
+               return;
+
+       if (!static_cpu_has(X86_FEATURE_PTI))
+               return;
+
+       __set_bit(kern_pcid(asid),
+                 (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
+}
+
+/*
+ * flush the entire current user mapping
+ */
 static inline void __native_flush_tlb(void)
 {
+       invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid));
        /*
-        * If current->mm == NULL then we borrow a mm which may change during a
-        * task switch and therefore we must not be preempted while we write CR3
-        * back:
+        * If current->mm == NULL then we borrow a mm which may change
+        * during a task switch and therefore we must not be preempted
+        * while we write CR3 back:
         */
        preempt_disable();
        native_write_cr3(__native_read_cr3());
        preempt_enable();
 }
 
-static inline void __native_flush_tlb_global_irq_disabled(void)
-{
-       unsigned long cr4;
-
-       cr4 = this_cpu_read(cpu_tlbstate.cr4);
-       /* clear PGE */
-       native_write_cr4(cr4 & ~X86_CR4_PGE);
-       /* write old PGE again and flush TLBs */
-       native_write_cr4(cr4);
-}
-
+/*
+ * flush everything
+ */
 static inline void __native_flush_tlb_global(void)
 {
-       unsigned long flags;
+       unsigned long cr4, flags;
 
        if (static_cpu_has(X86_FEATURE_INVPCID)) {
                /*
                 * Using INVPCID is considerably faster than a pair of writes
                 * to CR4 sandwiched inside an IRQ flag save/restore.
+                *
+                * Note, this works with CR4.PCIDE=0 or 1.
                 */
                invpcid_flush_all();
                return;
@@ -280,36 +384,69 @@ static inline void __native_flush_tlb_global(void)
         */
        raw_local_irq_save(flags);
 
-       __native_flush_tlb_global_irq_disabled();
+       cr4 = this_cpu_read(cpu_tlbstate.cr4);
+       /* toggle PGE */
+       native_write_cr4(cr4 ^ X86_CR4_PGE);
+       /* write old PGE again and flush TLBs */
+       native_write_cr4(cr4);
 
        raw_local_irq_restore(flags);
 }
 
+/*
+ * flush one page in the user mapping
+ */
 static inline void __native_flush_tlb_single(unsigned long addr)
 {
+       u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+
        asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+
+       if (!static_cpu_has(X86_FEATURE_PTI))
+               return;
+
+       /*
+        * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1.
+        * Just use invalidate_user_asid() in case we are called early.
+        */
+       if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE))
+               invalidate_user_asid(loaded_mm_asid);
+       else
+               invpcid_flush_one(user_pcid(loaded_mm_asid), addr);
 }
 
+/*
+ * flush everything
+ */
 static inline void __flush_tlb_all(void)
 {
-       if (boot_cpu_has(X86_FEATURE_PGE))
+       if (boot_cpu_has(X86_FEATURE_PGE)) {
                __flush_tlb_global();
-       else
+       } else {
+               /*
+                * !PGE -> !PCID (setup_pcid()), thus every flush is total.
+                */
                __flush_tlb();
-
-       /*
-        * Note: if we somehow had PCID but not PGE, then this wouldn't work --
-        * we'd end up flushing kernel translations for the current ASID but
-        * we might fail to flush kernel translations for other cached ASIDs.
-        *
-        * To avoid this issue, we force PCID off if PGE is off.
-        */
+       }
 }
 
+/*
+ * flush one page in the kernel mapping
+ */
 static inline void __flush_tlb_one(unsigned long addr)
 {
        count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
        __flush_tlb_single(addr);
+
+       if (!static_cpu_has(X86_FEATURE_PTI))
+               return;
+
+       /*
+        * __flush_tlb_single() will have cleared the TLB entry for this ASID,
+        * but since kernel space is replicated across all, we must also
+        * invalidate all others.
+        */
+       invalidate_other_asid();
 }
 
 #define TLB_FLUSH_ALL  -1UL
@@ -370,6 +507,17 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
 void native_flush_tlb_others(const struct cpumask *cpumask,
                             const struct flush_tlb_info *info);
 
+static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+{
+       /*
+        * Bump the generation count.  This also serves as a full barrier
+        * that synchronizes with switch_mm(): callers are required to order
+        * their read of mm_cpumask after their writes to the paging
+        * structures.
+        */
+       return atomic64_inc_return(&mm->context.tlb_gen);
+}
+
 static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
                                        struct mm_struct *mm)
 {
index 84b9ec0c1bc0867795c3a2d327f3b8831bf4f8ba..22647a642e98c41508dab9976a1ff4665b97a4d2 100644 (file)
@@ -283,34 +283,34 @@ TRACE_EVENT(vector_alloc_managed,
 DECLARE_EVENT_CLASS(vector_activate,
 
        TP_PROTO(unsigned int irq, bool is_managed, bool can_reserve,
-                bool early),
+                bool reserve),
 
-       TP_ARGS(irq, is_managed, can_reserve, early),
+       TP_ARGS(irq, is_managed, can_reserve, reserve),
 
        TP_STRUCT__entry(
                __field(        unsigned int,   irq             )
                __field(        bool,           is_managed      )
                __field(        bool,           can_reserve     )
-               __field(        bool,           early           )
+               __field(        bool,           reserve         )
        ),
 
        TP_fast_assign(
                __entry->irq            = irq;
                __entry->is_managed     = is_managed;
                __entry->can_reserve    = can_reserve;
-               __entry->early          = early;
+               __entry->reserve        = reserve;
        ),
 
-       TP_printk("irq=%u is_managed=%d can_reserve=%d early=%d",
+       TP_printk("irq=%u is_managed=%d can_reserve=%d reserve=%d",
                  __entry->irq, __entry->is_managed, __entry->can_reserve,
-                 __entry->early)
+                 __entry->reserve)
 );
 
 #define DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(name)                         \
 DEFINE_EVENT_FN(vector_activate, name,                                 \
        TP_PROTO(unsigned int irq, bool is_managed,                     \
-                bool can_reserve, bool early),                         \
-       TP_ARGS(irq, is_managed, can_reserve, early), NULL, NULL);      \
+                bool can_reserve, bool reserve),                       \
+       TP_ARGS(irq, is_managed, can_reserve, reserve), NULL, NULL);    \
 
 DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_activate);
 DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_deactivate);
index 1fadd310ff680ece697fa65a8db410c380a8547e..31051f35cbb768e452c4f76a60c5415a45f572e7 100644 (file)
@@ -75,7 +75,6 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long);
 dotraplinkage void do_stack_segment(struct pt_regs *, long);
 #ifdef CONFIG_X86_64
 dotraplinkage void do_double_fault(struct pt_regs *, long);
-asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
 #endif
 dotraplinkage void do_general_protection(struct pt_regs *, long);
 dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
index e9cc6fe1fc6f953c38ddcc61fcf06fd90d72ab04..c1688c2d0a128f063053697dc60bcbfbca509765 100644 (file)
@@ -7,6 +7,9 @@
 #include <asm/ptrace.h>
 #include <asm/stacktrace.h>
 
+#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip))
+#define IRET_FRAME_SIZE   (sizeof(struct pt_regs) - IRET_FRAME_OFFSET)
+
 struct unwind_state {
        struct stack_info stack_info;
        unsigned long stack_mask;
@@ -52,6 +55,10 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
 }
 
 #if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
+/*
+ * WARNING: The entire pt_regs may not be safe to dereference.  In some cases,
+ * only the iret frame registers are accessible.  Use with caution!
+ */
 static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
 {
        if (unwind_done(state))
index d9a7c659009c94b2ee04b4ddcc94e5874cc403b0..b986b2ca688a0e4fa24ace613c7e465e909723fe 100644 (file)
@@ -7,6 +7,7 @@
 
 #ifdef CONFIG_X86_VSYSCALL_EMULATION
 extern void map_vsyscall(void);
+extern void set_vsyscall_pgtable_user_bits(pgd_t *root);
 
 /*
  * Called on instruction fetch fault in vsyscall page.
index 7e1e730396ae08f5a267adaccf0c3ba46448f780..bcba3c643e63dced1c873ee5e1cdbfdd5d307928 100644 (file)
 #define X86_CR3_PWT            _BITUL(X86_CR3_PWT_BIT)
 #define X86_CR3_PCD_BIT                4 /* Page Cache Disable */
 #define X86_CR3_PCD            _BITUL(X86_CR3_PCD_BIT)
-#define X86_CR3_PCID_MASK      _AC(0x00000fff,UL) /* PCID Mask */
+
+#define X86_CR3_PCID_BITS      12
+#define X86_CR3_PCID_MASK      (_AC((1UL << X86_CR3_PCID_BITS) - 1, UL))
+
+#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */
+#define X86_CR3_PCID_NOFLUSH    _BITULL(X86_CR3_PCID_NOFLUSH_BIT)
 
 /*
  * Intel CPU features in CR4
index 201579dc52428edb5c3989102a432c14799d1e1f..8a79634214600ab02076cfb601dac780996b6f3f 100644 (file)
@@ -2988,7 +2988,7 @@ void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
 }
 
 int mp_irqdomain_activate(struct irq_domain *domain,
-                         struct irq_data *irq_data, bool early)
+                         struct irq_data *irq_data, bool reserve)
 {
        unsigned long flags;
 
index 750449152b04b4feed3c98ddc772db91f003f818..f8b03bb8e72560a436dbad677ddeed30f19531cf 100644 (file)
@@ -184,6 +184,7 @@ static void reserve_irq_vector_locked(struct irq_data *irqd)
        irq_matrix_reserve(vector_matrix);
        apicd->can_reserve = true;
        apicd->has_reserved = true;
+       irqd_set_can_reserve(irqd);
        trace_vector_reserve(irqd->irq, 0);
        vector_assign_managed_shutdown(irqd);
 }
@@ -368,8 +369,18 @@ static int activate_reserved(struct irq_data *irqd)
        int ret;
 
        ret = assign_irq_vector_any_locked(irqd);
-       if (!ret)
+       if (!ret) {
                apicd->has_reserved = false;
+               /*
+                * Core might have disabled reservation mode after
+                * allocating the irq descriptor. Ideally this should
+                * happen before allocation time, but that would require
+                * completely convoluted ways of transporting that
+                * information.
+                */
+               if (!irqd_can_reserve(irqd))
+                       apicd->can_reserve = false;
+       }
        return ret;
 }
 
@@ -398,21 +409,21 @@ static int activate_managed(struct irq_data *irqd)
 }
 
 static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd,
-                              bool early)
+                              bool reserve)
 {
        struct apic_chip_data *apicd = apic_chip_data(irqd);
        unsigned long flags;
        int ret = 0;
 
        trace_vector_activate(irqd->irq, apicd->is_managed,
-                             apicd->can_reserve, early);
+                             apicd->can_reserve, reserve);
 
        /* Nothing to do for fixed assigned vectors */
        if (!apicd->can_reserve && !apicd->is_managed)
                return 0;
 
        raw_spin_lock_irqsave(&vector_lock, flags);
-       if (early || irqd_is_managed_and_shutdown(irqd))
+       if (reserve || irqd_is_managed_and_shutdown(irqd))
                vector_assign_managed_shutdown(irqd);
        else if (apicd->is_managed)
                ret = activate_managed(irqd);
@@ -478,6 +489,7 @@ static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd,
        } else {
                /* Release the vector */
                apicd->can_reserve = true;
+               irqd_set_can_reserve(irqd);
                clear_irq_vector(irqd);
                realloc = true;
        }
index 8ea78275480dafeb702e11ba73364cd9e7c52f21..76417a9aab73c3f7e3376261eb9ec592b16adf3b 100644 (file)
@@ -17,6 +17,7 @@
 #include <asm/sigframe.h>
 #include <asm/bootparam.h>
 #include <asm/suspend.h>
+#include <asm/tlbflush.h>
 
 #ifdef CONFIG_XEN
 #include <xen/interface/xen.h>
@@ -93,4 +94,13 @@ void common(void) {
 
        BLANK();
        DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
+
+       /* TLB state for the entry code */
+       OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask);
+
+       /* Layout info for cpu_entry_area */
+       OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
+       OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
+       OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page);
+       DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack));
 }
index dedf428b20b68b0a4748fc1ac3032193c9121362..fa1261eefa16e73cedf27aadb878753be693f919 100644 (file)
@@ -47,13 +47,8 @@ void foo(void)
        BLANK();
 
        /* Offset from the sysenter stack to tss.sp0 */
-       DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
-              offsetofend(struct tss_struct, SYSENTER_stack));
-
-       /* Offset from cpu_tss to SYSENTER_stack */
-       OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
-       /* Size of SYSENTER_stack */
-       DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
+       DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) -
+              offsetofend(struct cpu_entry_area, entry_stack_page.stack));
 
 #ifdef CONFIG_CC_STACKPROTECTOR
        BLANK();
index 630212fa9b9da3f0498fc30d4c193c5926c43abb..bf51e51d808dd8914abd3b4bca69b37ce3ec023b 100644 (file)
@@ -23,6 +23,9 @@ int main(void)
 #ifdef CONFIG_PARAVIRT
        OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
        OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
+#ifdef CONFIG_DEBUG_ENTRY
+       OFFSET(PV_IRQ_save_fl, pv_irq_ops, save_fl);
+#endif
        BLANK();
 #endif
 
@@ -63,6 +66,7 @@ int main(void)
 
        OFFSET(TSS_ist, tss_struct, x86_tss.ist);
        OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
+       OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
        BLANK();
 
 #ifdef CONFIG_CC_STACKPROTECTOR
index fa998ca8aa5aa5b4899dbe8a57c5b543f927009e..c47de4ebf63a3e84a64511662c08d5b20faa94db 100644 (file)
@@ -476,8 +476,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
        return NULL;            /* Not found */
 }
 
-__u32 cpu_caps_cleared[NCAPINTS];
-__u32 cpu_caps_set[NCAPINTS];
+__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
+__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
 
 void load_percpu_segment(int cpu)
 {
@@ -490,28 +490,23 @@ void load_percpu_segment(int cpu)
        load_stack_canary_segment();
 }
 
-/* Setup the fixmap mapping only once per-processor */
-static inline void setup_fixmap_gdt(int cpu)
-{
-#ifdef CONFIG_X86_64
-       /* On 64-bit systems, we use a read-only fixmap GDT. */
-       pgprot_t prot = PAGE_KERNEL_RO;
-#else
-       /*
-        * On native 32-bit systems, the GDT cannot be read-only because
-        * our double fault handler uses a task gate, and entering through
-        * a task gate needs to change an available TSS to busy.  If the GDT
-        * is read-only, that will triple fault.
-        *
-        * On Xen PV, the GDT must be read-only because the hypervisor requires
-        * it.
-        */
-       pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ?
-               PAGE_KERNEL_RO : PAGE_KERNEL;
+#ifdef CONFIG_X86_32
+/* The 32-bit entry code needs to find cpu_entry_area. */
+DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
 #endif
 
-       __set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot);
-}
+#ifdef CONFIG_X86_64
+/*
+ * Special IST stacks which the CPU switches to when it calls
+ * an IST-marked descriptor entry. Up to 7 stacks (hardware
+ * limit), all of them are 4K, except the debug stack which
+ * is 8K.
+ */
+static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
+         [0 ... N_EXCEPTION_STACKS - 1]        = EXCEPTION_STKSZ,
+         [DEBUG_STACK - 1]                     = DEBUG_STKSZ
+};
+#endif
 
 /* Load the original GDT from the per-cpu structure */
 void load_direct_gdt(int cpu)
@@ -747,7 +742,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
 {
        int i;
 
-       for (i = 0; i < NCAPINTS; i++) {
+       for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
                c->x86_capability[i] &= ~cpu_caps_cleared[i];
                c->x86_capability[i] |= cpu_caps_set[i];
        }
@@ -927,6 +922,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
        }
 
        setup_force_cpu_cap(X86_FEATURE_ALWAYS);
+
+       /* Assume for now that ALL x86 CPUs are insecure */
+       setup_force_cpu_bug(X86_BUG_CPU_INSECURE);
+
        fpu__init_system(c);
 
 #ifdef CONFIG_X86_32
@@ -1250,7 +1249,7 @@ void enable_sep_cpu(void)
                return;
 
        cpu = get_cpu();
-       tss = &per_cpu(cpu_tss, cpu);
+       tss = &per_cpu(cpu_tss_rw, cpu);
 
        /*
         * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
@@ -1259,11 +1258,7 @@ void enable_sep_cpu(void)
 
        tss->x86_tss.ss1 = __KERNEL_CS;
        wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
-
-       wrmsr(MSR_IA32_SYSENTER_ESP,
-             (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
-             0);
-
+       wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0);
        wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
 
        put_cpu();
@@ -1357,25 +1352,22 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
 DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
 EXPORT_PER_CPU_SYMBOL(__preempt_count);
 
-/*
- * Special IST stacks which the CPU switches to when it calls
- * an IST-marked descriptor entry. Up to 7 stacks (hardware
- * limit), all of them are 4K, except the debug stack which
- * is 8K.
- */
-static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
-         [0 ... N_EXCEPTION_STACKS - 1]        = EXCEPTION_STKSZ,
-         [DEBUG_STACK - 1]                     = DEBUG_STKSZ
-};
-
-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
-       [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
-
 /* May not be marked __init: used by software suspend */
 void syscall_init(void)
 {
+       extern char _entry_trampoline[];
+       extern char entry_SYSCALL_64_trampoline[];
+
+       int cpu = smp_processor_id();
+       unsigned long SYSCALL64_entry_trampoline =
+               (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline +
+               (entry_SYSCALL_64_trampoline - _entry_trampoline);
+
        wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
-       wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
+       if (static_cpu_has(X86_FEATURE_PTI))
+               wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
+       else
+               wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
 
 #ifdef CONFIG_IA32_EMULATION
        wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
@@ -1386,7 +1378,7 @@ void syscall_init(void)
         * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
         */
        wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
-       wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+       wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1));
        wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
 #else
        wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
@@ -1530,7 +1522,7 @@ void cpu_init(void)
        if (cpu)
                load_ucode_ap();
 
-       t = &per_cpu(cpu_tss, cpu);
+       t = &per_cpu(cpu_tss_rw, cpu);
        oist = &per_cpu(orig_ist, cpu);
 
 #ifdef CONFIG_NUMA
@@ -1569,7 +1561,7 @@ void cpu_init(void)
         * set up and load the per-CPU TSS
         */
        if (!oist->ist[0]) {
-               char *estacks = per_cpu(exception_stacks, cpu);
+               char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
 
                for (v = 0; v < N_EXCEPTION_STACKS; v++) {
                        estacks += exception_stack_sizes[v];
@@ -1580,7 +1572,7 @@ void cpu_init(void)
                }
        }
 
-       t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+       t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
 
        /*
         * <= is required because the CPU will access up to
@@ -1596,11 +1588,12 @@ void cpu_init(void)
        enter_lazy_tlb(&init_mm, me);
 
        /*
-        * Initialize the TSS.  Don't bother initializing sp0, as the initial
-        * task never enters user mode.
+        * Initialize the TSS.  sp0 points to the entry trampoline stack
+        * regardless of what task is running.
         */
-       set_tss_desc(cpu, t);
+       set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
        load_TR_desc();
+       load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
 
        load_mm_ldt(&init_mm);
 
@@ -1612,7 +1605,6 @@ void cpu_init(void)
        if (is_uv_system())
                uv_cpu_init();
 
-       setup_fixmap_gdt(cpu);
        load_fixmap_gdt(cpu);
 }
 
@@ -1622,7 +1614,7 @@ void cpu_init(void)
 {
        int cpu = smp_processor_id();
        struct task_struct *curr = current;
-       struct tss_struct *t = &per_cpu(cpu_tss, cpu);
+       struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
 
        wait_for_master_cpu(cpu);
 
@@ -1657,12 +1649,12 @@ void cpu_init(void)
         * Initialize the TSS.  Don't bother initializing sp0, as the initial
         * task never enters user mode.
         */
-       set_tss_desc(cpu, t);
+       set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
        load_TR_desc();
 
        load_mm_ldt(&init_mm);
 
-       t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+       t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
 
 #ifdef CONFIG_DOUBLEFAULT
        /* Set up doublefault TSS pointer in the GDT */
@@ -1674,7 +1666,6 @@ void cpu_init(void)
 
        fpu__init_cpu();
 
-       setup_fixmap_gdt(cpu);
        load_fixmap_gdt(cpu);
 }
 #endif
index 7dbcb7adf7975f7f29c38651c23c478ad315a34c..8ccdca6d3f9e9b876ee27f021ed8c021b1168220 100644 (file)
@@ -565,15 +565,6 @@ static void print_ucode(struct ucode_cpu_info *uci)
 }
 #else
 
-/*
- * Flush global tlb. We only do this in x86_64 where paging has been enabled
- * already and PGE should be enabled as well.
- */
-static inline void flush_tlb_early(void)
-{
-       __native_flush_tlb_global_irq_disabled();
-}
-
 static inline void print_ucode(struct ucode_cpu_info *uci)
 {
        struct microcode_intel *mc;
@@ -602,10 +593,6 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
        if (rev != mc->hdr.rev)
                return -1;
 
-#ifdef CONFIG_X86_64
-       /* Flush global tlb. This is precaution. */
-       flush_tlb_early();
-#endif
        uci->cpu_sig.rev = rev;
 
        if (early)
index 0e662c55ae902fedd5c78c1ed87a972b35a79856..0b8cedb20d6d92f2875a49292680c8cfecd5b044 100644 (file)
@@ -50,25 +50,23 @@ static void doublefault_fn(void)
                cpu_relax();
 }
 
-struct tss_struct doublefault_tss __cacheline_aligned = {
-       .x86_tss = {
-               .sp0            = STACK_START,
-               .ss0            = __KERNEL_DS,
-               .ldt            = 0,
-               .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
-
-               .ip             = (unsigned long) doublefault_fn,
-               /* 0x2 bit is always set */
-               .flags          = X86_EFLAGS_SF | 0x2,
-               .sp             = STACK_START,
-               .es             = __USER_DS,
-               .cs             = __KERNEL_CS,
-               .ss             = __KERNEL_DS,
-               .ds             = __USER_DS,
-               .fs             = __KERNEL_PERCPU,
-
-               .__cr3          = __pa_nodebug(swapper_pg_dir),
-       }
+struct x86_hw_tss doublefault_tss __cacheline_aligned = {
+       .sp0            = STACK_START,
+       .ss0            = __KERNEL_DS,
+       .ldt            = 0,
+       .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
+
+       .ip             = (unsigned long) doublefault_fn,
+       /* 0x2 bit is always set */
+       .flags          = X86_EFLAGS_SF | 0x2,
+       .sp             = STACK_START,
+       .es             = __USER_DS,
+       .cs             = __KERNEL_CS,
+       .ss             = __KERNEL_DS,
+       .ds             = __USER_DS,
+       .fs             = __KERNEL_PERCPU,
+
+       .__cr3          = __pa_nodebug(swapper_pg_dir),
 };
 
 /* dummy for do_double_fault() call */
index f13b4c00a5de4b7a7b36c40d27311672bcc9d05c..5fa110699ed275fe81dc9d8f678a2a2f8902017e 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/nmi.h>
 #include <linux/sysfs.h>
 
+#include <asm/cpu_entry_area.h>
 #include <asm/stacktrace.h>
 #include <asm/unwind.h>
 
@@ -43,6 +44,24 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
        return true;
 }
 
+bool in_entry_stack(unsigned long *stack, struct stack_info *info)
+{
+       struct entry_stack *ss = cpu_entry_stack(smp_processor_id());
+
+       void *begin = ss;
+       void *end = ss + 1;
+
+       if ((void *)stack < begin || (void *)stack >= end)
+               return false;
+
+       info->type      = STACK_TYPE_ENTRY;
+       info->begin     = begin;
+       info->end       = end;
+       info->next_sp   = NULL;
+
+       return true;
+}
+
 static void printk_stack_address(unsigned long address, int reliable,
                                 char *log_lvl)
 {
@@ -50,6 +69,28 @@ static void printk_stack_address(unsigned long address, int reliable,
        printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
 }
 
+void show_iret_regs(struct pt_regs *regs)
+{
+       printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip);
+       printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
+               regs->sp, regs->flags);
+}
+
+static void show_regs_safe(struct stack_info *info, struct pt_regs *regs)
+{
+       if (on_stack(info, regs, sizeof(*regs)))
+               __show_regs(regs, 0);
+       else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
+                         IRET_FRAME_SIZE)) {
+               /*
+                * When an interrupt or exception occurs in entry code, the
+                * full pt_regs might not have been saved yet.  In that case
+                * just print the iret frame.
+                */
+               show_iret_regs(regs);
+       }
+}
+
 void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
                        unsigned long *stack, char *log_lvl)
 {
@@ -71,31 +112,35 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
         * - task stack
         * - interrupt stack
         * - HW exception stacks (double fault, nmi, debug, mce)
+        * - entry stack
         *
-        * x86-32 can have up to three stacks:
+        * x86-32 can have up to four stacks:
         * - task stack
         * - softirq stack
         * - hardirq stack
+        * - entry stack
         */
        for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
                const char *stack_name;
 
-               /*
-                * If we overflowed the task stack into a guard page, jump back
-                * to the bottom of the usable stack.
-                */
-               if (task_stack_page(task) - (void *)stack < PAGE_SIZE)
-                       stack = task_stack_page(task);
-
-               if (get_stack_info(stack, task, &stack_info, &visit_mask))
-                       break;
+               if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
+                       /*
+                        * We weren't on a valid stack.  It's possible that
+                        * we overflowed a valid stack into a guard page.
+                        * See if the next page up is valid so that we can
+                        * generate some kind of backtrace if this happens.
+                        */
+                       stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack);
+                       if (get_stack_info(stack, task, &stack_info, &visit_mask))
+                               break;
+               }
 
                stack_name = stack_type_name(stack_info.type);
                if (stack_name)
                        printk("%s <%s>\n", log_lvl, stack_name);
 
-               if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
-                       __show_regs(regs, 0);
+               if (regs)
+                       show_regs_safe(&stack_info, regs);
 
                /*
                 * Scan the stack, printing any text addresses we find.  At the
@@ -119,7 +164,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 
                        /*
                         * Don't print regs->ip again if it was already printed
-                        * by __show_regs() below.
+                        * by show_regs_safe() below.
                         */
                        if (regs && stack == &regs->ip)
                                goto next;
@@ -155,8 +200,8 @@ next:
 
                        /* if the frame has entry regs, print them */
                        regs = unwind_get_entry_regs(&state);
-                       if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
-                               __show_regs(regs, 0);
+                       if (regs)
+                               show_regs_safe(&stack_info, regs);
                }
 
                if (stack_name)
@@ -252,11 +297,13 @@ int __die(const char *str, struct pt_regs *regs, long err)
        unsigned long sp;
 #endif
        printk(KERN_DEFAULT
-              "%s: %04lx [#%d]%s%s%s%s\n", str, err & 0xffff, ++die_counter,
+              "%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter,
               IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT"         : "",
               IS_ENABLED(CONFIG_SMP)     ? " SMP"             : "",
               debug_pagealloc_enabled()  ? " DEBUG_PAGEALLOC" : "",
-              IS_ENABLED(CONFIG_KASAN)   ? " KASAN"           : "");
+              IS_ENABLED(CONFIG_KASAN)   ? " KASAN"           : "",
+              IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ?
+              (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : "");
 
        if (notify_die(DIE_OOPS, str, regs, err,
                        current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
index daefae83a3aa86c59602b75bd3e6734c6e3b1030..04170f63e3a1d567caac3deea641e014b7e10823 100644 (file)
@@ -26,6 +26,9 @@ const char *stack_type_name(enum stack_type type)
        if (type == STACK_TYPE_SOFTIRQ)
                return "SOFTIRQ";
 
+       if (type == STACK_TYPE_ENTRY)
+               return "ENTRY_TRAMPOLINE";
+
        return NULL;
 }
 
@@ -93,6 +96,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
        if (task != current)
                goto unknown;
 
+       if (in_entry_stack(stack, info))
+               goto recursion_check;
+
        if (in_hardirq_stack(stack, info))
                goto recursion_check;
 
index 88ce2ffdb110303502ad33e64d357d8af5afd8c6..563e28d14f2ca157178d9de3a139d8370aaf89fe 100644 (file)
@@ -37,6 +37,15 @@ const char *stack_type_name(enum stack_type type)
        if (type == STACK_TYPE_IRQ)
                return "IRQ";
 
+       if (type == STACK_TYPE_ENTRY) {
+               /*
+                * On 64-bit, we have a generic entry stack that we
+                * use for all the kernel entry points, including
+                * SYSENTER.
+                */
+               return "ENTRY_TRAMPOLINE";
+       }
+
        if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
                return exception_stack_names[type - STACK_TYPE_EXCEPTION];
 
@@ -115,6 +124,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
        if (in_irq_stack(stack, info))
                goto recursion_check;
 
+       if (in_entry_stack(stack, info))
+               goto recursion_check;
+
        goto unknown;
 
 recursion_check:
index 7dca675fe78db60c5d79bc450bbd14bfee35cfc2..04a625f0fcda322dab7c9d8459a19ee56b71c936 100644 (file)
@@ -341,6 +341,27 @@ GLOBAL(early_recursion_flag)
        .balign PAGE_SIZE; \
 GLOBAL(name)
 
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+/*
+ * Each PGD needs to be 8k long and 8k aligned.  We do not
+ * ever go out to userspace with these, so we do not
+ * strictly *need* the second page, but this allows us to
+ * have a single set_pgd() implementation that does not
+ * need to worry about whether it has 4k or 8k to work
+ * with.
+ *
+ * This ensures PGDs are 8k long:
+ */
+#define PTI_USER_PGD_FILL      512
+/* This ensures they are 8k-aligned: */
+#define NEXT_PGD_PAGE(name) \
+       .balign 2 * PAGE_SIZE; \
+GLOBAL(name)
+#else
+#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
+#define PTI_USER_PGD_FILL      0
+#endif
+
 /* Automate the creation of 1 to 1 mapping pmd entries */
 #define PMDS(START, PERM, COUNT)                       \
        i = 0 ;                                         \
@@ -350,13 +371,14 @@ GLOBAL(name)
        .endr
 
        __INITDATA
-NEXT_PAGE(early_top_pgt)
+NEXT_PGD_PAGE(early_top_pgt)
        .fill   511,8,0
 #ifdef CONFIG_X86_5LEVEL
        .quad   level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
 #else
        .quad   level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
 #endif
+       .fill   PTI_USER_PGD_FILL,8,0
 
 NEXT_PAGE(early_dynamic_pgts)
        .fill   512*EARLY_DYNAMIC_PAGE_TABLES,8,0
@@ -364,13 +386,14 @@ NEXT_PAGE(early_dynamic_pgts)
        .data
 
 #if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
-NEXT_PAGE(init_top_pgt)
+NEXT_PGD_PAGE(init_top_pgt)
        .quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
        .org    init_top_pgt + PGD_PAGE_OFFSET*8, 0
        .quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
        .org    init_top_pgt + PGD_START_KERNEL*8, 0
        /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
        .quad   level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
+       .fill   PTI_USER_PGD_FILL,8,0
 
 NEXT_PAGE(level3_ident_pgt)
        .quad   level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
@@ -381,8 +404,9 @@ NEXT_PAGE(level2_ident_pgt)
         */
        PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
 #else
-NEXT_PAGE(init_top_pgt)
+NEXT_PGD_PAGE(init_top_pgt)
        .fill   512,8,0
+       .fill   PTI_USER_PGD_FILL,8,0
 #endif
 
 #ifdef CONFIG_X86_5LEVEL
index 3feb648781c470a7a49ee26749712ba7da891fe9..2f723301eb58fc5ad0d6796b342446ae2ee0c9e6 100644 (file)
@@ -67,7 +67,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
         * because the ->io_bitmap_max value must match the bitmap
         * contents:
         */
-       tss = &per_cpu(cpu_tss, get_cpu());
+       tss = &per_cpu(cpu_tss_rw, get_cpu());
 
        if (turn_on)
                bitmap_clear(t->io_bitmap_ptr, from, num);
index 49cfd9fe7589fa5ef2bef5d4a5d6431b7007836f..68e1867cca8045d0ed728ffc6b75a866c25484ed 100644 (file)
@@ -219,18 +219,6 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
        /* high bit used in ret_from_ code  */
        unsigned vector = ~regs->orig_ax;
 
-       /*
-        * NB: Unlike exception entries, IRQ entries do not reliably
-        * handle context tracking in the low-level entry code.  This is
-        * because syscall entries execute briefly with IRQs on before
-        * updating context tracking state, so we can take an IRQ from
-        * kernel mode with CONTEXT_USER.  The low-level entry code only
-        * updates the context if we came from user mode, so we won't
-        * switch to CONTEXT_KERNEL.  We'll fix that once the syscall
-        * code is cleaned up enough that we can cleanly defer enabling
-        * IRQs.
-        */
-
        entering_irq();
 
        /* entering_irq() tells RCU that we're not quiescent.  Check it. */
index 020efbf5786b35d343a8632cd14ac4f800465d9b..d86e344f5b3debfed504b72a7c0f83f36fe16387 100644 (file)
@@ -57,10 +57,10 @@ static inline void stack_overflow_check(struct pt_regs *regs)
        if (regs->sp >= estack_top && regs->sp <= estack_bottom)
                return;
 
-       WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n",
+       WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n",
                current->comm, curbase, regs->sp,
                irq_stack_top, irq_stack_bottom,
-               estack_top, estack_bottom);
+               estack_top, estack_bottom, (void *)regs->ip);
 
        if (sysctl_panic_on_stackoverflow)
                panic("low stack detected by irq handler - check messages\n");
index 1c1eae9613406b14c3154065e1fd036f985a384c..579cc4a66fdf663c7e1c727d6272a3a83fbf0648 100644 (file)
@@ -5,6 +5,11 @@
  * Copyright (C) 2002 Andi Kleen
  *
  * This handles calls from both 32bit and 64bit mode.
+ *
+ * Lock order:
+ *     contex.ldt_usr_sem
+ *       mmap_sem
+ *         context.lock
  */
 
 #include <linux/errno.h>
@@ -19,6 +24,7 @@
 #include <linux/uaccess.h>
 
 #include <asm/ldt.h>
+#include <asm/tlb.h>
 #include <asm/desc.h>
 #include <asm/mmu_context.h>
 #include <asm/syscalls.h>
@@ -42,17 +48,15 @@ static void refresh_ldt_segments(void)
 #endif
 }
 
-/* context.lock is held for us, so we don't need any locking. */
+/* context.lock is held by the task which issued the smp function call */
 static void flush_ldt(void *__mm)
 {
        struct mm_struct *mm = __mm;
-       mm_context_t *pc;
 
        if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm)
                return;
 
-       pc = &mm->context;
-       set_ldt(pc->ldt->entries, pc->ldt->nr_entries);
+       load_mm_ldt(mm);
 
        refresh_ldt_segments();
 }
@@ -89,25 +93,143 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
                return NULL;
        }
 
+       /* The new LDT isn't aliased for PTI yet. */
+       new_ldt->slot = -1;
+
        new_ldt->nr_entries = num_entries;
        return new_ldt;
 }
 
+/*
+ * If PTI is enabled, this maps the LDT into the kernelmode and
+ * usermode tables for the given mm.
+ *
+ * There is no corresponding unmap function.  Even if the LDT is freed, we
+ * leave the PTEs around until the slot is reused or the mm is destroyed.
+ * This is harmless: the LDT is always in ordinary memory, and no one will
+ * access the freed slot.
+ *
+ * If we wanted to unmap freed LDTs, we'd also need to do a flush to make
+ * it useful, and the flush would slow down modify_ldt().
+ */
+static int
+map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
+{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+       bool is_vmalloc, had_top_level_entry;
+       unsigned long va;
+       spinlock_t *ptl;
+       pgd_t *pgd;
+       int i;
+
+       if (!static_cpu_has(X86_FEATURE_PTI))
+               return 0;
+
+       /*
+        * Any given ldt_struct should have map_ldt_struct() called at most
+        * once.
+        */
+       WARN_ON(ldt->slot != -1);
+
+       /*
+        * Did we already have the top level entry allocated?  We can't
+        * use pgd_none() for this because it doens't do anything on
+        * 4-level page table kernels.
+        */
+       pgd = pgd_offset(mm, LDT_BASE_ADDR);
+       had_top_level_entry = (pgd->pgd != 0);
+
+       is_vmalloc = is_vmalloc_addr(ldt->entries);
+
+       for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) {
+               unsigned long offset = i << PAGE_SHIFT;
+               const void *src = (char *)ldt->entries + offset;
+               unsigned long pfn;
+               pte_t pte, *ptep;
+
+               va = (unsigned long)ldt_slot_va(slot) + offset;
+               pfn = is_vmalloc ? vmalloc_to_pfn(src) :
+                       page_to_pfn(virt_to_page(src));
+               /*
+                * Treat the PTI LDT range as a *userspace* range.
+                * get_locked_pte() will allocate all needed pagetables
+                * and account for them in this mm.
+                */
+               ptep = get_locked_pte(mm, va, &ptl);
+               if (!ptep)
+                       return -ENOMEM;
+               /*
+                * Map it RO so the easy to find address is not a primary
+                * target via some kernel interface which misses a
+                * permission check.
+                */
+               pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL));
+               set_pte_at(mm, va, ptep, pte);
+               pte_unmap_unlock(ptep, ptl);
+       }
+
+       if (mm->context.ldt) {
+               /*
+                * We already had an LDT.  The top-level entry should already
+                * have been allocated and synchronized with the usermode
+                * tables.
+                */
+               WARN_ON(!had_top_level_entry);
+               if (static_cpu_has(X86_FEATURE_PTI))
+                       WARN_ON(!kernel_to_user_pgdp(pgd)->pgd);
+       } else {
+               /*
+                * This is the first time we're mapping an LDT for this process.
+                * Sync the pgd to the usermode tables.
+                */
+               WARN_ON(had_top_level_entry);
+               if (static_cpu_has(X86_FEATURE_PTI)) {
+                       WARN_ON(kernel_to_user_pgdp(pgd)->pgd);
+                       set_pgd(kernel_to_user_pgdp(pgd), *pgd);
+               }
+       }
+
+       va = (unsigned long)ldt_slot_va(slot);
+       flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, 0);
+
+       ldt->slot = slot;
+#endif
+       return 0;
+}
+
+static void free_ldt_pgtables(struct mm_struct *mm)
+{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+       struct mmu_gather tlb;
+       unsigned long start = LDT_BASE_ADDR;
+       unsigned long end = start + (1UL << PGDIR_SHIFT);
+
+       if (!static_cpu_has(X86_FEATURE_PTI))
+               return;
+
+       tlb_gather_mmu(&tlb, mm, start, end);
+       free_pgd_range(&tlb, start, end, start, end);
+       tlb_finish_mmu(&tlb, start, end);
+#endif
+}
+
 /* After calling this, the LDT is immutable. */
 static void finalize_ldt_struct(struct ldt_struct *ldt)
 {
        paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
 }
 
-/* context.lock is held */
-static void install_ldt(struct mm_struct *current_mm,
-                       struct ldt_struct *ldt)
+static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt)
 {
+       mutex_lock(&mm->context.lock);
+
        /* Synchronizes with READ_ONCE in load_mm_ldt. */
-       smp_store_release(&current_mm->context.ldt, ldt);
+       smp_store_release(&mm->context.ldt, ldt);
 
-       /* Activate the LDT for all CPUs using current_mm. */
-       on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true);
+       /* Activate the LDT for all CPUs using currents mm. */
+       on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true);
+
+       mutex_unlock(&mm->context.lock);
 }
 
 static void free_ldt_struct(struct ldt_struct *ldt)
@@ -124,27 +246,20 @@ static void free_ldt_struct(struct ldt_struct *ldt)
 }
 
 /*
- * we do not have to muck with descriptors here, that is
- * done in switch_mm() as needed.
+ * Called on fork from arch_dup_mmap(). Just copy the current LDT state,
+ * the new task is not running, so nothing can be installed.
  */
-int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
+int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
 {
        struct ldt_struct *new_ldt;
-       struct mm_struct *old_mm;
        int retval = 0;
 
-       mutex_init(&mm->context.lock);
-       old_mm = current->mm;
-       if (!old_mm) {
-               mm->context.ldt = NULL;
+       if (!old_mm)
                return 0;
-       }
 
        mutex_lock(&old_mm->context.lock);
-       if (!old_mm->context.ldt) {
-               mm->context.ldt = NULL;
+       if (!old_mm->context.ldt)
                goto out_unlock;
-       }
 
        new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
        if (!new_ldt) {
@@ -156,6 +271,12 @@ int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
               new_ldt->nr_entries * LDT_ENTRY_SIZE);
        finalize_ldt_struct(new_ldt);
 
+       retval = map_ldt_struct(mm, new_ldt, 0);
+       if (retval) {
+               free_ldt_pgtables(mm);
+               free_ldt_struct(new_ldt);
+               goto out_unlock;
+       }
        mm->context.ldt = new_ldt;
 
 out_unlock:
@@ -174,13 +295,18 @@ void destroy_context_ldt(struct mm_struct *mm)
        mm->context.ldt = NULL;
 }
 
+void ldt_arch_exit_mmap(struct mm_struct *mm)
+{
+       free_ldt_pgtables(mm);
+}
+
 static int read_ldt(void __user *ptr, unsigned long bytecount)
 {
        struct mm_struct *mm = current->mm;
        unsigned long entries_size;
        int retval;
 
-       mutex_lock(&mm->context.lock);
+       down_read(&mm->context.ldt_usr_sem);
 
        if (!mm->context.ldt) {
                retval = 0;
@@ -209,7 +335,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount)
        retval = bytecount;
 
 out_unlock:
-       mutex_unlock(&mm->context.lock);
+       up_read(&mm->context.ldt_usr_sem);
        return retval;
 }
 
@@ -269,7 +395,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
                        ldt.avl = 0;
        }
 
-       mutex_lock(&mm->context.lock);
+       if (down_write_killable(&mm->context.ldt_usr_sem))
+               return -EINTR;
 
        old_ldt       = mm->context.ldt;
        old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
@@ -286,12 +413,24 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
        new_ldt->entries[ldt_info.entry_number] = ldt;
        finalize_ldt_struct(new_ldt);
 
+       /*
+        * If we are using PTI, map the new LDT into the userspace pagetables.
+        * If there is already an LDT, use the other slot so that other CPUs
+        * will continue to use the old LDT until install_ldt() switches
+        * them over to the new LDT.
+        */
+       error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0);
+       if (error) {
+               free_ldt_struct(old_ldt);
+               goto out_unlock;
+       }
+
        install_ldt(mm, new_ldt);
        free_ldt_struct(old_ldt);
        error = 0;
 
 out_unlock:
-       mutex_unlock(&mm->context.lock);
+       up_write(&mm->context.ldt_usr_sem);
 out:
        return error;
 }
index ac0be8283325edfdc2752f862b4c0cef208a931c..9edadabf04f66c657f8a29bb56fe994b2559d5cf 100644 (file)
@@ -10,7 +10,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
 DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
 DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
 DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
-DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
 DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
 
 DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
@@ -60,7 +59,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
                PATCH_SITE(pv_mmu_ops, read_cr2);
                PATCH_SITE(pv_mmu_ops, read_cr3);
                PATCH_SITE(pv_mmu_ops, write_cr3);
-               PATCH_SITE(pv_mmu_ops, flush_tlb_single);
                PATCH_SITE(pv_cpu_ops, wbinvd);
 #if defined(CONFIG_PARAVIRT_SPINLOCKS)
                case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
index bb988a24db927d758f9120d45f90d1c160628790..aed9d94bd46f41bb049b8e0153a44a43d97e80b4 100644 (file)
@@ -47,7 +47,7 @@
  * section. Since TSS's are completely CPU-local, we want them
  * on exact cacheline boundaries, to eliminate cacheline ping-pong.
  */
-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
+__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = {
        .x86_tss = {
                /*
                 * .sp0 is only used when entering ring 0 from a lower
@@ -56,6 +56,16 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
                 * Poison it.
                 */
                .sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
+
+#ifdef CONFIG_X86_64
+               /*
+                * .sp1 is cpu_current_top_of_stack.  The init task never
+                * runs user code, but cpu_current_top_of_stack should still
+                * be well defined before the first context switch.
+                */
+               .sp1 = TOP_OF_INIT_STACK,
+#endif
+
 #ifdef CONFIG_X86_32
                .ss0 = __KERNEL_DS,
                .ss1 = __KERNEL_CS,
@@ -71,11 +81,8 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
          */
        .io_bitmap              = { [0 ... IO_BITMAP_LONGS] = ~0 },
 #endif
-#ifdef CONFIG_X86_32
-       .SYSENTER_stack_canary  = STACK_END_MAGIC,
-#endif
 };
-EXPORT_PER_CPU_SYMBOL(cpu_tss);
+EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
 
 DEFINE_PER_CPU(bool, __tss_limit_invalid);
 EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
@@ -104,7 +111,7 @@ void exit_thread(struct task_struct *tsk)
        struct fpu *fpu = &t->fpu;
 
        if (bp) {
-               struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu());
+               struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu());
 
                t->io_bitmap_ptr = NULL;
                clear_thread_flag(TIF_IO_BITMAP);
index 45bf0c5f93e15103060d67d5245756ab72ce8fe5..5224c609918416337b97440eb2d515d8052463ae 100644 (file)
@@ -234,7 +234,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
        struct fpu *prev_fpu = &prev->fpu;
        struct fpu *next_fpu = &next->fpu;
        int cpu = smp_processor_id();
-       struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
+       struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
 
        /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
 
index eeeb34f85c250e8c01188b6d32cf5a62bd1af8a0..c754662320163107ca3a254362ce0e404a8d3c11 100644 (file)
@@ -69,9 +69,8 @@ void __show_regs(struct pt_regs *regs, int all)
        unsigned int fsindex, gsindex;
        unsigned int ds, cs, es;
 
-       printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs, (void *)regs->ip);
-       printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
-               regs->sp, regs->flags);
+       show_iret_regs(regs);
+
        if (regs->orig_ax != -1)
                pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
        else
@@ -88,6 +87,9 @@ void __show_regs(struct pt_regs *regs, int all)
        printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
               regs->r13, regs->r14, regs->r15);
 
+       if (!all)
+               return;
+
        asm("movl %%ds,%0" : "=r" (ds));
        asm("movl %%cs,%0" : "=r" (cs));
        asm("movl %%es,%0" : "=r" (es));
@@ -98,9 +100,6 @@ void __show_regs(struct pt_regs *regs, int all)
        rdmsrl(MSR_GS_BASE, gs);
        rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
 
-       if (!all)
-               return;
-
        cr0 = read_cr0();
        cr2 = read_cr2();
        cr3 = __read_cr3();
@@ -400,7 +399,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
        struct fpu *prev_fpu = &prev->fpu;
        struct fpu *next_fpu = &next->fpu;
        int cpu = smp_processor_id();
-       struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
+       struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
 
        WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
                     this_cpu_read(irq_count) != -1);
@@ -462,6 +461,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
         * Switch the PDA and FPU contexts.
         */
        this_cpu_write(current_task, next_p);
+       this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
 
        /* Reload sp0. */
        update_sp0(next_p);
index 35cb20994e32d2bf05f0b1510ccc26cc7e7590a5..c5970efa85570ab324bd1cad2e57d464dba86f46 100644 (file)
@@ -932,12 +932,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
        initial_code = (unsigned long)start_secondary;
        initial_stack  = idle->thread.sp;
 
-       /*
-        * Enable the espfix hack for this CPU
-       */
-#ifdef CONFIG_X86_ESPFIX64
+       /* Enable the espfix hack for this CPU */
        init_espfix_ap(cpu);
-#endif
 
        /* So we see what's up */
        announce_cpu(cpu, apicid);
index 9a9c9b076955dd493c7de80b8d814a27dd0fceb7..a5b802a1221272402b344d75ccf94bcff4b69b38 100644 (file)
@@ -93,17 +93,10 @@ static void set_tls_desc(struct task_struct *p, int idx,
        cpu = get_cpu();
 
        while (n-- > 0) {
-               if (LDT_empty(info) || LDT_zero(info)) {
+               if (LDT_empty(info) || LDT_zero(info))
                        memset(desc, 0, sizeof(*desc));
-               } else {
+               else
                        fill_ldt(desc, info);
-
-                       /*
-                        * Always set the accessed bit so that the CPU
-                        * doesn't try to write to the (read-only) GDT.
-                        */
-                       desc->type |= 1;
-               }
                ++info;
                ++desc;
        }
index 989514c94a55d8fa93a07192edd199be1a607bf8..f69dbd47d7332f4af7e5f274bb6aa9736f3014bd 100644 (file)
@@ -51,6 +51,7 @@
 #include <asm/traps.h>
 #include <asm/desc.h>
 #include <asm/fpu/internal.h>
+#include <asm/cpu_entry_area.h>
 #include <asm/mce.h>
 #include <asm/fixmap.h>
 #include <asm/mach_traps.h>
@@ -348,9 +349,15 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
 
        /*
         * If IRET takes a non-IST fault on the espfix64 stack, then we
-        * end up promoting it to a doublefault.  In that case, modify
-        * the stack to make it look like we just entered the #GP
-        * handler from user space, similar to bad_iret.
+        * end up promoting it to a doublefault.  In that case, take
+        * advantage of the fact that we're not using the normal (TSS.sp0)
+        * stack right now.  We can write a fake #GP(0) frame at TSS.sp0
+        * and then modify our own IRET frame so that, when we return,
+        * we land directly at the #GP(0) vector with the stack already
+        * set up according to its expectations.
+        *
+        * The net result is that our #GP handler will think that we
+        * entered from usermode with the bad user context.
         *
         * No need for ist_enter here because we don't use RCU.
         */
@@ -358,13 +365,26 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
                regs->cs == __KERNEL_CS &&
                regs->ip == (unsigned long)native_irq_return_iret)
        {
-               struct pt_regs *normal_regs = task_pt_regs(current);
+               struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
 
-               /* Fake a #GP(0) from userspace. */
-               memmove(&normal_regs->ip, (void *)regs->sp, 5*8);
-               normal_regs->orig_ax = 0;  /* Missing (lost) #GP error code */
+               /*
+                * regs->sp points to the failing IRET frame on the
+                * ESPFIX64 stack.  Copy it to the entry stack.  This fills
+                * in gpregs->ss through gpregs->ip.
+                *
+                */
+               memmove(&gpregs->ip, (void *)regs->sp, 5*8);
+               gpregs->orig_ax = 0;  /* Missing (lost) #GP error code */
+
+               /*
+                * Adjust our frame so that we return straight to the #GP
+                * vector with the expected RSP value.  This is safe because
+                * we won't enable interupts or schedule before we invoke
+                * general_protection, so nothing will clobber the stack
+                * frame we just set up.
+                */
                regs->ip = (unsigned long)general_protection;
-               regs->sp = (unsigned long)&normal_regs->orig_ax;
+               regs->sp = (unsigned long)&gpregs->orig_ax;
 
                return;
        }
@@ -389,7 +409,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
         *
         *   Processors update CR2 whenever a page fault is detected. If a
         *   second page fault occurs while an earlier page fault is being
-        *   delivered, the faulting linear address of the second fault will
+        *   delivered, the faulting linear address of the second fault will
         *   overwrite the contents of CR2 (replacing the previous
         *   address). These updates to CR2 occur even if the page fault
         *   results in a double fault or occurs during the delivery of a
@@ -605,14 +625,15 @@ NOKPROBE_SYMBOL(do_int3);
 
 #ifdef CONFIG_X86_64
 /*
- * Help handler running on IST stack to switch off the IST stack if the
- * interrupted code was in user mode. The actual stack switch is done in
- * entry_64.S
+ * Help handler running on a per-cpu (IST or entry trampoline) stack
+ * to switch to the normal thread stack if the interrupted code was in
+ * user mode. The actual stack switch is done in entry_64.S
  */
 asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs)
 {
-       struct pt_regs *regs = task_pt_regs(current);
-       *regs = *eregs;
+       struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
+       if (regs != eregs)
+               *regs = *eregs;
        return regs;
 }
 NOKPROBE_SYMBOL(sync_regs);
@@ -628,13 +649,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
        /*
         * This is called from entry_64.S early in handling a fault
         * caused by a bad iret to user mode.  To handle the fault
-        * correctly, we want move our stack frame to task_pt_regs
-        * and we want to pretend that the exception came from the
-        * iret target.
+        * correctly, we want to move our stack frame to where it would
+        * be had we entered directly on the entry stack (rather than
+        * just below the IRET frame) and we want to pretend that the
+        * exception came from the IRET target.
         */
        struct bad_iret_stack *new_stack =
-               container_of(task_pt_regs(current),
-                            struct bad_iret_stack, regs);
+               (struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
 
        /* Copy the IRET target to the new stack. */
        memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
@@ -795,14 +816,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
        debug_stack_usage_dec();
 
 exit:
-#if defined(CONFIG_X86_32)
-       /*
-        * This is the most likely code path that involves non-trivial use
-        * of the SYSENTER stack.  Check that we haven't overrun it.
-        */
-       WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
-            "Overran or corrupted SYSENTER stack\n");
-#endif
        ist_exit(regs);
 }
 NOKPROBE_SYMBOL(do_debug);
@@ -929,6 +942,9 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
 
 void __init trap_init(void)
 {
+       /* Init cpu_entry_area before IST entries are set up */
+       setup_cpu_entry_areas();
+
        idt_setup_traps();
 
        /*
@@ -936,8 +952,9 @@ void __init trap_init(void)
         * "sidt" instruction will not leak the location of the kernel, and
         * to defend the IDT against arbitrary memory write vulnerabilities.
         * It will be reloaded in cpu_init() */
-       __set_fixmap(FIX_RO_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO);
-       idt_descr.address = fix_to_virt(FIX_RO_IDT);
+       cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table),
+                   PAGE_KERNEL_RO);
+       idt_descr.address = CPU_ENTRY_AREA_RO_IDT;
 
        /*
         * Should be a barrier for any external CPU state:
index a3f973b2c97a03b121fe0173dbdc9298216721e6..be86a865087a6b9dc8e04031dbf2e2fbeeda1ed5 100644 (file)
@@ -253,22 +253,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
        return NULL;
 }
 
-static bool stack_access_ok(struct unwind_state *state, unsigned long addr,
+static bool stack_access_ok(struct unwind_state *state, unsigned long _addr,
                            size_t len)
 {
        struct stack_info *info = &state->stack_info;
+       void *addr = (void *)_addr;
 
-       /*
-        * If the address isn't on the current stack, switch to the next one.
-        *
-        * We may have to traverse multiple stacks to deal with the possibility
-        * that info->next_sp could point to an empty stack and the address
-        * could be on a subsequent stack.
-        */
-       while (!on_stack(info, (void *)addr, len))
-               if (get_stack_info(info->next_sp, state->task, info,
-                                  &state->stack_mask))
-                       return false;
+       if (!on_stack(info, addr, len) &&
+           (get_stack_info(addr, state->task, info, &state->stack_mask)))
+               return false;
 
        return true;
 }
@@ -283,42 +276,32 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
        return true;
 }
 
-#define REGS_SIZE (sizeof(struct pt_regs))
-#define SP_OFFSET (offsetof(struct pt_regs, sp))
-#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip))
-#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip))
-
 static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
-                            unsigned long *ip, unsigned long *sp, bool full)
+                            unsigned long *ip, unsigned long *sp)
 {
-       size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE;
-       size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET;
-       struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE);
-
-       if (IS_ENABLED(CONFIG_X86_64)) {
-               if (!stack_access_ok(state, addr, regs_size))
-                       return false;
+       struct pt_regs *regs = (struct pt_regs *)addr;
 
-               *ip = regs->ip;
-               *sp = regs->sp;
+       /* x86-32 support will be more complicated due to the &regs->sp hack */
+       BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_32));
 
-               return true;
-       }
-
-       if (!stack_access_ok(state, addr, sp_offset))
+       if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
                return false;
 
        *ip = regs->ip;
+       *sp = regs->sp;
+       return true;
+}
 
-       if (user_mode(regs)) {
-               if (!stack_access_ok(state, addr + sp_offset,
-                                    REGS_SIZE - SP_OFFSET))
-                       return false;
+static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr,
+                                 unsigned long *ip, unsigned long *sp)
+{
+       struct pt_regs *regs = (void *)addr - IRET_FRAME_OFFSET;
 
-               *sp = regs->sp;
-       } else
-               *sp = (unsigned long)&regs->sp;
+       if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
+               return false;
 
+       *ip = regs->ip;
+       *sp = regs->sp;
        return true;
 }
 
@@ -327,7 +310,6 @@ bool unwind_next_frame(struct unwind_state *state)
        unsigned long ip_p, sp, orig_ip, prev_sp = state->sp;
        enum stack_type prev_type = state->stack_info.type;
        struct orc_entry *orc;
-       struct pt_regs *ptregs;
        bool indirect = false;
 
        if (unwind_done(state))
@@ -435,7 +417,7 @@ bool unwind_next_frame(struct unwind_state *state)
                break;
 
        case ORC_TYPE_REGS:
-               if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) {
+               if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
                        orc_warn("can't dereference registers at %p for ip %pB\n",
                                 (void *)sp, (void *)orig_ip);
                        goto done;
@@ -447,20 +429,14 @@ bool unwind_next_frame(struct unwind_state *state)
                break;
 
        case ORC_TYPE_REGS_IRET:
-               if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) {
+               if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
                        orc_warn("can't dereference iret registers at %p for ip %pB\n",
                                 (void *)sp, (void *)orig_ip);
                        goto done;
                }
 
-               ptregs = container_of((void *)sp, struct pt_regs, ip);
-               if ((unsigned long)ptregs >= prev_sp &&
-                   on_stack(&state->stack_info, ptregs, REGS_SIZE)) {
-                       state->regs = ptregs;
-                       state->full_regs = false;
-               } else
-                       state->regs = NULL;
-
+               state->regs = (void *)sp - IRET_FRAME_OFFSET;
+               state->full_regs = false;
                state->signal = true;
                break;
 
@@ -553,8 +529,18 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
        }
 
        if (get_stack_info((unsigned long *)state->sp, state->task,
-                          &state->stack_info, &state->stack_mask))
-               return;
+                          &state->stack_info, &state->stack_mask)) {
+               /*
+                * We weren't on a valid stack.  It's possible that
+                * we overflowed a valid stack into a guard page.
+                * See if the next page up is valid so that we can
+                * generate some kind of backtrace if this happens.
+                */
+               void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp);
+               if (get_stack_info(next_page, state->task, &state->stack_info,
+                                  &state->stack_mask))
+                       return;
+       }
 
        /*
         * The caller can provide the address of the first frame directly
index a4009fb9be8725ce7bda96cd5e8160e524903266..1e413a9326aaa152a47d51fa4c1d1ea03cbb4d94 100644 (file)
@@ -61,11 +61,17 @@ jiffies_64 = jiffies;
                . = ALIGN(HPAGE_SIZE);                          \
                __end_rodata_hpage_align = .;
 
+#define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE);
+#define ALIGN_ENTRY_TEXT_END   . = ALIGN(PMD_SIZE);
+
 #else
 
 #define X64_ALIGN_RODATA_BEGIN
 #define X64_ALIGN_RODATA_END
 
+#define ALIGN_ENTRY_TEXT_BEGIN
+#define ALIGN_ENTRY_TEXT_END
+
 #endif
 
 PHDRS {
@@ -102,11 +108,22 @@ SECTIONS
                CPUIDLE_TEXT
                LOCK_TEXT
                KPROBES_TEXT
+               ALIGN_ENTRY_TEXT_BEGIN
                ENTRY_TEXT
                IRQENTRY_TEXT
+               ALIGN_ENTRY_TEXT_END
                SOFTIRQENTRY_TEXT
                *(.fixup)
                *(.gnu.warning)
+
+#ifdef CONFIG_X86_64
+               . = ALIGN(PAGE_SIZE);
+               _entry_trampoline = .;
+               *(.entry_trampoline)
+               . = ALIGN(PAGE_SIZE);
+               ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
+#endif
+
                /* End of text section */
                _etext = .;
        } :text = 0x9090
index abe74f779f9d793e9a6c2f19417f23b5aa7ce484..b514b2b2845a334d4b53f28ed0b73c96f12d0e6a 100644 (file)
@@ -2390,9 +2390,21 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
 }
 
 static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
-                                    u64 cr0, u64 cr4)
+                                   u64 cr0, u64 cr3, u64 cr4)
 {
        int bad;
+       u64 pcid;
+
+       /* In order to later set CR4.PCIDE, CR3[11:0] must be zero.  */
+       pcid = 0;
+       if (cr4 & X86_CR4_PCIDE) {
+               pcid = cr3 & 0xfff;
+               cr3 &= ~0xfff;
+       }
+
+       bad = ctxt->ops->set_cr(ctxt, 3, cr3);
+       if (bad)
+               return X86EMUL_UNHANDLEABLE;
 
        /*
         * First enable PAE, long mode needs it before CR0.PG = 1 is set.
@@ -2411,6 +2423,12 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
                bad = ctxt->ops->set_cr(ctxt, 4, cr4);
                if (bad)
                        return X86EMUL_UNHANDLEABLE;
+               if (pcid) {
+                       bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
+                       if (bad)
+                               return X86EMUL_UNHANDLEABLE;
+               }
+
        }
 
        return X86EMUL_CONTINUE;
@@ -2421,11 +2439,11 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
        struct desc_struct desc;
        struct desc_ptr dt;
        u16 selector;
-       u32 val, cr0, cr4;
+       u32 val, cr0, cr3, cr4;
        int i;
 
        cr0 =                      GET_SMSTATE(u32, smbase, 0x7ffc);
-       ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8));
+       cr3 =                      GET_SMSTATE(u32, smbase, 0x7ff8);
        ctxt->eflags =             GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
        ctxt->_eip =               GET_SMSTATE(u32, smbase, 0x7ff0);
 
@@ -2467,14 +2485,14 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
 
        ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
 
-       return rsm_enter_protected_mode(ctxt, cr0, cr4);
+       return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
 }
 
 static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
 {
        struct desc_struct desc;
        struct desc_ptr dt;
-       u64 val, cr0, cr4;
+       u64 val, cr0, cr3, cr4;
        u32 base3;
        u16 selector;
        int i, r;
@@ -2491,7 +2509,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
        ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
 
        cr0 =                       GET_SMSTATE(u64, smbase, 0x7f58);
-       ctxt->ops->set_cr(ctxt, 3,  GET_SMSTATE(u64, smbase, 0x7f50));
+       cr3 =                       GET_SMSTATE(u64, smbase, 0x7f50);
        cr4 =                       GET_SMSTATE(u64, smbase, 0x7f48);
        ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
        val =                       GET_SMSTATE(u64, smbase, 0x7ed0);
@@ -2519,7 +2537,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
        dt.address =                GET_SMSTATE(u64, smbase, 0x7e68);
        ctxt->ops->set_gdt(ctxt, &dt);
 
-       r = rsm_enter_protected_mode(ctxt, cr0, cr4);
+       r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
        if (r != X86EMUL_CONTINUE)
                return r;
 
index e5e66e5c664057bb5cc5ad2660008ccbf19b69e5..c4deb1f34faa6ce7ffe6bcaaebddc3e87b2a9a69 100644 (file)
@@ -3395,7 +3395,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
                spin_lock(&vcpu->kvm->mmu_lock);
                if(make_mmu_pages_available(vcpu) < 0) {
                        spin_unlock(&vcpu->kvm->mmu_lock);
-                       return 1;
+                       return -ENOSPC;
                }
                sp = kvm_mmu_get_page(vcpu, 0, 0,
                                vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
@@ -3410,7 +3410,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
                        spin_lock(&vcpu->kvm->mmu_lock);
                        if (make_mmu_pages_available(vcpu) < 0) {
                                spin_unlock(&vcpu->kvm->mmu_lock);
-                               return 1;
+                               return -ENOSPC;
                        }
                        sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
                                        i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL);
@@ -3450,7 +3450,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
                spin_lock(&vcpu->kvm->mmu_lock);
                if (make_mmu_pages_available(vcpu) < 0) {
                        spin_unlock(&vcpu->kvm->mmu_lock);
-                       return 1;
+                       return -ENOSPC;
                }
                sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
                                vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL);
@@ -3487,7 +3487,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
                spin_lock(&vcpu->kvm->mmu_lock);
                if (make_mmu_pages_available(vcpu) < 0) {
                        spin_unlock(&vcpu->kvm->mmu_lock);
-                       return 1;
+                       return -ENOSPC;
                }
                sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
                                      0, ACC_ALL);
index 8eba631c4dbd509d8687c6135e8dba267042f5e0..023afa0c8887002d6a79a8b121b46996feec1a61 100644 (file)
@@ -2302,7 +2302,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
                 * processors.  See 22.2.4.
                 */
                vmcs_writel(HOST_TR_BASE,
-                           (unsigned long)this_cpu_ptr(&cpu_tss));
+                           (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
                vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt);   /* 22.2.4 */
 
                /*
index faf843c9b916ead0992d0b155a138c6afdf7ae57..1cec2c62a0b08405d2bd7c8908d6b7f33de3b63c 100644 (file)
@@ -4384,7 +4384,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
                                         addr, n, v))
                    && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
                        break;
-               trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
+               trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
                handled += n;
                addr += n;
                len -= n;
@@ -4643,7 +4643,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
 {
        if (vcpu->mmio_read_completed) {
                trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
-                              vcpu->mmio_fragments[0].gpa, *(u64 *)val);
+                              vcpu->mmio_fragments[0].gpa, val);
                vcpu->mmio_read_completed = 0;
                return 1;
        }
@@ -4665,14 +4665,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
 
 static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
 {
-       trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
+       trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
        return vcpu_mmio_write(vcpu, gpa, bytes, val);
 }
 
 static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
                          void *val, int bytes)
 {
-       trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
+       trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
        return X86EMUL_IO_NEEDED;
 }
 
@@ -7264,13 +7264,12 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
 
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-       struct fpu *fpu = &current->thread.fpu;
        int r;
 
-       fpu__initialize(fpu);
-
        kvm_sigset_activate(vcpu);
 
+       kvm_load_guest_fpu(vcpu);
+
        if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
                if (kvm_run->immediate_exit) {
                        r = -EINTR;
@@ -7296,14 +7295,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                }
        }
 
-       kvm_load_guest_fpu(vcpu);
-
        if (unlikely(vcpu->arch.complete_userspace_io)) {
                int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
                vcpu->arch.complete_userspace_io = NULL;
                r = cui(vcpu);
                if (r <= 0)
-                       goto out_fpu;
+                       goto out;
        } else
                WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
 
@@ -7312,9 +7309,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        else
                r = vcpu_run(vcpu);
 
-out_fpu:
-       kvm_put_guest_fpu(vcpu);
 out:
+       kvm_put_guest_fpu(vcpu);
        post_kvm_run_save(vcpu);
        kvm_sigset_deactivate(vcpu);
 
@@ -7384,7 +7380,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 #endif
 
        kvm_rip_write(vcpu, regs->rip);
-       kvm_set_rflags(vcpu, regs->rflags);
+       kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
 
        vcpu->arch.exception.pending = false;
 
@@ -7498,6 +7494,29 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
 }
 EXPORT_SYMBOL_GPL(kvm_task_switch);
 
+int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+       if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG_BIT)) {
+               /*
+                * When EFER.LME and CR0.PG are set, the processor is in
+                * 64-bit mode (though maybe in a 32-bit code segment).
+                * CR4.PAE and EFER.LMA must be set.
+                */
+               if (!(sregs->cr4 & X86_CR4_PAE_BIT)
+                   || !(sregs->efer & EFER_LMA))
+                       return -EINVAL;
+       } else {
+               /*
+                * Not in 64-bit mode: EFER.LMA is clear and the code
+                * segment cannot be 64-bit.
+                */
+               if (sregs->efer & EFER_LMA || sregs->cs.l)
+                       return -EINVAL;
+       }
+
+       return 0;
+}
+
 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
                                  struct kvm_sregs *sregs)
 {
@@ -7510,6 +7529,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
                        (sregs->cr4 & X86_CR4_OSXSAVE))
                return -EINVAL;
 
+       if (kvm_valid_sregs(vcpu, sregs))
+               return -EINVAL;
+
        apic_base_msr.data = sregs->apic_base;
        apic_base_msr.host_initiated = true;
        if (kvm_set_apic_base(vcpu, &apic_base_msr))
index 553f8fd23cc4733d0edafa862b95446f7a04bab1..4846eff7e4c8b1505501d7f1dcb64127d0a4c67c 100644 (file)
@@ -107,10 +107,10 @@ static void delay_mwaitx(unsigned long __loops)
                delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
 
                /*
-                * Use cpu_tss as a cacheline-aligned, seldomly
+                * Use cpu_tss_rw as a cacheline-aligned, seldomly
                 * accessed per-cpu variable as the monitor target.
                 */
-               __monitorx(raw_cpu_ptr(&cpu_tss), 0, 0);
+               __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
 
                /*
                 * AMD, like Intel, supports the EAX hint and EAX=0xf
index 8e13b8cc6bedb0dc84eea64cd80ca6ae39037eaa..27e9e90a8d3572b900ccaacae1623de803072b17 100644 (file)
@@ -10,7 +10,7 @@ CFLAGS_REMOVE_mem_encrypt.o   = -pg
 endif
 
 obj-y  :=  init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
-           pat.o pgtable.o physaddr.o setup_nx.o tlb.o
+           pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
 
 # Make sure __phys_addr has no stackprotector
 nostackp := $(call cc-option, -fno-stack-protector)
@@ -41,9 +41,10 @@ obj-$(CONFIG_AMD_NUMA)               += amdtopology.o
 obj-$(CONFIG_ACPI_NUMA)                += srat.o
 obj-$(CONFIG_NUMA_EMU)         += numa_emulation.o
 
-obj-$(CONFIG_X86_INTEL_MPX)    += mpx.o
-obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
-obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
+obj-$(CONFIG_X86_INTEL_MPX)                    += mpx.o
+obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
+obj-$(CONFIG_RANDOMIZE_MEMORY)                 += kaslr.o
+obj-$(CONFIG_PAGE_TABLE_ISOLATION)             += pti.o
 
 obj-$(CONFIG_AMD_MEM_ENCRYPT)  += mem_encrypt.o
 obj-$(CONFIG_AMD_MEM_ENCRYPT)  += mem_encrypt_boot.o
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
new file mode 100644 (file)
index 0000000..b9283cc
--- /dev/null
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/spinlock.h>
+#include <linux/percpu.h>
+
+#include <asm/cpu_entry_area.h>
+#include <asm/pgtable.h>
+#include <asm/fixmap.h>
+#include <asm/desc.h>
+
+static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
+
+#ifdef CONFIG_X86_64
+static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+       [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+#endif
+
+struct cpu_entry_area *get_cpu_entry_area(int cpu)
+{
+       unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
+       BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
+
+       return (struct cpu_entry_area *) va;
+}
+EXPORT_SYMBOL(get_cpu_entry_area);
+
+void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags)
+{
+       unsigned long va = (unsigned long) cea_vaddr;
+
+       set_pte_vaddr(va, pfn_pte(pa >> PAGE_SHIFT, flags));
+}
+
+static void __init
+cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
+{
+       for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE)
+               cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
+}
+
+static void percpu_setup_debug_store(int cpu)
+{
+#ifdef CONFIG_CPU_SUP_INTEL
+       int npages;
+       void *cea;
+
+       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+               return;
+
+       cea = &get_cpu_entry_area(cpu)->cpu_debug_store;
+       npages = sizeof(struct debug_store) / PAGE_SIZE;
+       BUILD_BUG_ON(sizeof(struct debug_store) % PAGE_SIZE != 0);
+       cea_map_percpu_pages(cea, &per_cpu(cpu_debug_store, cpu), npages,
+                            PAGE_KERNEL);
+
+       cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers;
+       /*
+        * Force the population of PMDs for not yet allocated per cpu
+        * memory like debug store buffers.
+        */
+       npages = sizeof(struct debug_store_buffers) / PAGE_SIZE;
+       for (; npages; npages--, cea += PAGE_SIZE)
+               cea_set_pte(cea, 0, PAGE_NONE);
+#endif
+}
+
+/* Setup the fixmap mappings only once per-processor */
+static void __init setup_cpu_entry_area(int cpu)
+{
+#ifdef CONFIG_X86_64
+       extern char _entry_trampoline[];
+
+       /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
+       pgprot_t gdt_prot = PAGE_KERNEL_RO;
+       pgprot_t tss_prot = PAGE_KERNEL_RO;
+#else
+       /*
+        * On native 32-bit systems, the GDT cannot be read-only because
+        * our double fault handler uses a task gate, and entering through
+        * a task gate needs to change an available TSS to busy.  If the
+        * GDT is read-only, that will triple fault.  The TSS cannot be
+        * read-only because the CPU writes to it on task switches.
+        *
+        * On Xen PV, the GDT must be read-only because the hypervisor
+        * requires it.
+        */
+       pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
+               PAGE_KERNEL_RO : PAGE_KERNEL;
+       pgprot_t tss_prot = PAGE_KERNEL;
+#endif
+
+       cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu),
+                   gdt_prot);
+
+       cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page,
+                            per_cpu_ptr(&entry_stack_storage, cpu), 1,
+                            PAGE_KERNEL);
+
+       /*
+        * The Intel SDM says (Volume 3, 7.2.1):
+        *
+        *  Avoid placing a page boundary in the part of the TSS that the
+        *  processor reads during a task switch (the first 104 bytes). The
+        *  processor may not correctly perform address translations if a
+        *  boundary occurs in this area. During a task switch, the processor
+        *  reads and writes into the first 104 bytes of each TSS (using
+        *  contiguous physical addresses beginning with the physical address
+        *  of the first byte of the TSS). So, after TSS access begins, if
+        *  part of the 104 bytes is not physically contiguous, the processor
+        *  will access incorrect information without generating a page-fault
+        *  exception.
+        *
+        * There are also a lot of errata involving the TSS spanning a page
+        * boundary.  Assert that we're not doing that.
+        */
+       BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
+                     offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
+       BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
+       cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss,
+                            &per_cpu(cpu_tss_rw, cpu),
+                            sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
+
+#ifdef CONFIG_X86_32
+       per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
+#endif
+
+#ifdef CONFIG_X86_64
+       BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
+       BUILD_BUG_ON(sizeof(exception_stacks) !=
+                    sizeof(((struct cpu_entry_area *)0)->exception_stacks));
+       cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks,
+                            &per_cpu(exception_stacks, cpu),
+                            sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);
+
+       cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline,
+                    __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
+#endif
+       percpu_setup_debug_store(cpu);
+}
+
+static __init void setup_cpu_entry_area_ptes(void)
+{
+#ifdef CONFIG_X86_32
+       unsigned long start, end;
+
+       BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE);
+       BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
+
+       start = CPU_ENTRY_AREA_BASE;
+       end = start + CPU_ENTRY_AREA_MAP_SIZE;
+
+       /* Careful here: start + PMD_SIZE might wrap around */
+       for (; start < end && start >= CPU_ENTRY_AREA_BASE; start += PMD_SIZE)
+               populate_extra_pte(start);
+#endif
+}
+
+void __init setup_cpu_entry_areas(void)
+{
+       unsigned int cpu;
+
+       setup_cpu_entry_area_ptes();
+
+       for_each_possible_cpu(cpu)
+               setup_cpu_entry_area(cpu);
+}
index bfcffdf6c5775f7ac5bd4c9f768573d7ae7bba55..421f2664ffa06e6cd4b31e5d6521648add3e2c4e 100644 (file)
@@ -5,7 +5,7 @@
 
 static int ptdump_show(struct seq_file *m, void *v)
 {
-       ptdump_walk_pgd_level(m, NULL);
+       ptdump_walk_pgd_level_debugfs(m, NULL, false);
        return 0;
 }
 
@@ -22,21 +22,89 @@ static const struct file_operations ptdump_fops = {
        .release        = single_release,
 };
 
-static struct dentry *pe;
+static int ptdump_show_curknl(struct seq_file *m, void *v)
+{
+       if (current->mm->pgd) {
+               down_read(&current->mm->mmap_sem);
+               ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, false);
+               up_read(&current->mm->mmap_sem);
+       }
+       return 0;
+}
+
+static int ptdump_open_curknl(struct inode *inode, struct file *filp)
+{
+       return single_open(filp, ptdump_show_curknl, NULL);
+}
+
+static const struct file_operations ptdump_curknl_fops = {
+       .owner          = THIS_MODULE,
+       .open           = ptdump_open_curknl,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+static struct dentry *pe_curusr;
+
+static int ptdump_show_curusr(struct seq_file *m, void *v)
+{
+       if (current->mm->pgd) {
+               down_read(&current->mm->mmap_sem);
+               ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, true);
+               up_read(&current->mm->mmap_sem);
+       }
+       return 0;
+}
+
+static int ptdump_open_curusr(struct inode *inode, struct file *filp)
+{
+       return single_open(filp, ptdump_show_curusr, NULL);
+}
+
+static const struct file_operations ptdump_curusr_fops = {
+       .owner          = THIS_MODULE,
+       .open           = ptdump_open_curusr,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+#endif
+
+static struct dentry *dir, *pe_knl, *pe_curknl;
 
 static int __init pt_dump_debug_init(void)
 {
-       pe = debugfs_create_file("kernel_page_tables", S_IRUSR, NULL, NULL,
-                                &ptdump_fops);
-       if (!pe)
+       dir = debugfs_create_dir("page_tables", NULL);
+       if (!dir)
                return -ENOMEM;
 
+       pe_knl = debugfs_create_file("kernel", 0400, dir, NULL,
+                                    &ptdump_fops);
+       if (!pe_knl)
+               goto err;
+
+       pe_curknl = debugfs_create_file("current_kernel", 0400,
+                                       dir, NULL, &ptdump_curknl_fops);
+       if (!pe_curknl)
+               goto err;
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+       pe_curusr = debugfs_create_file("current_user", 0400,
+                                       dir, NULL, &ptdump_curusr_fops);
+       if (!pe_curusr)
+               goto err;
+#endif
        return 0;
+err:
+       debugfs_remove_recursive(dir);
+       return -ENOMEM;
 }
 
 static void __exit pt_dump_debug_exit(void)
 {
-       debugfs_remove_recursive(pe);
+       debugfs_remove_recursive(dir);
 }
 
 module_init(pt_dump_debug_init);
index 5e3ac6fe6c9e32ed1906f4f9bf736310a7193c7d..f56902c1f04b94e1543710dee75d2d7dee19b7c9 100644 (file)
@@ -44,68 +44,97 @@ struct addr_marker {
        unsigned long max_lines;
 };
 
-/* indices for address_markers; keep sync'd w/ address_markers below */
+/* Address space markers hints */
+
+#ifdef CONFIG_X86_64
+
 enum address_markers_idx {
        USER_SPACE_NR = 0,
-#ifdef CONFIG_X86_64
        KERNEL_SPACE_NR,
        LOW_KERNEL_NR,
+#if defined(CONFIG_MODIFY_LDT_SYSCALL) && defined(CONFIG_X86_5LEVEL)
+       LDT_NR,
+#endif
        VMALLOC_START_NR,
        VMEMMAP_START_NR,
 #ifdef CONFIG_KASAN
        KASAN_SHADOW_START_NR,
        KASAN_SHADOW_END_NR,
 #endif
-# ifdef CONFIG_X86_ESPFIX64
+#if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL)
+       LDT_NR,
+#endif
+       CPU_ENTRY_AREA_NR,
+#ifdef CONFIG_X86_ESPFIX64
        ESPFIX_START_NR,
-# endif
+#endif
+#ifdef CONFIG_EFI
+       EFI_END_NR,
+#endif
        HIGH_KERNEL_NR,
        MODULES_VADDR_NR,
        MODULES_END_NR,
-#else
+       FIXADDR_START_NR,
+       END_OF_SPACE_NR,
+};
+
+static struct addr_marker address_markers[] = {
+       [USER_SPACE_NR]         = { 0,                  "User Space" },
+       [KERNEL_SPACE_NR]       = { (1UL << 63),        "Kernel Space" },
+       [LOW_KERNEL_NR]         = { 0UL,                "Low Kernel Mapping" },
+       [VMALLOC_START_NR]      = { 0UL,                "vmalloc() Area" },
+       [VMEMMAP_START_NR]      = { 0UL,                "Vmemmap" },
+#ifdef CONFIG_KASAN
+       [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
+       [KASAN_SHADOW_END_NR]   = { KASAN_SHADOW_END,   "KASAN shadow end" },
+#endif
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
+       [LDT_NR]                = { LDT_BASE_ADDR,      "LDT remap" },
+#endif
+       [CPU_ENTRY_AREA_NR]     = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
+#ifdef CONFIG_X86_ESPFIX64
+       [ESPFIX_START_NR]       = { ESPFIX_BASE_ADDR,   "ESPfix Area", 16 },
+#endif
+#ifdef CONFIG_EFI
+       [EFI_END_NR]            = { EFI_VA_END,         "EFI Runtime Services" },
+#endif
+       [HIGH_KERNEL_NR]        = { __START_KERNEL_map, "High Kernel Mapping" },
+       [MODULES_VADDR_NR]      = { MODULES_VADDR,      "Modules" },
+       [MODULES_END_NR]        = { MODULES_END,        "End Modules" },
+       [FIXADDR_START_NR]      = { FIXADDR_START,      "Fixmap Area" },
+       [END_OF_SPACE_NR]       = { -1,                 NULL }
+};
+
+#else /* CONFIG_X86_64 */
+
+enum address_markers_idx {
+       USER_SPACE_NR = 0,
        KERNEL_SPACE_NR,
        VMALLOC_START_NR,
        VMALLOC_END_NR,
-# ifdef CONFIG_HIGHMEM
+#ifdef CONFIG_HIGHMEM
        PKMAP_BASE_NR,
-# endif
-       FIXADDR_START_NR,
 #endif
+       CPU_ENTRY_AREA_NR,
+       FIXADDR_START_NR,
+       END_OF_SPACE_NR,
 };
 
-/* Address space markers hints */
 static struct addr_marker address_markers[] = {
-       { 0, "User Space" },
-#ifdef CONFIG_X86_64
-       { 0x8000000000000000UL, "Kernel Space" },
-       { 0/* PAGE_OFFSET */,   "Low Kernel Mapping" },
-       { 0/* VMALLOC_START */, "vmalloc() Area" },
-       { 0/* VMEMMAP_START */, "Vmemmap" },
-#ifdef CONFIG_KASAN
-       { KASAN_SHADOW_START,   "KASAN shadow" },
-       { KASAN_SHADOW_END,     "KASAN shadow end" },
+       [USER_SPACE_NR]         = { 0,                  "User Space" },
+       [KERNEL_SPACE_NR]       = { PAGE_OFFSET,        "Kernel Mapping" },
+       [VMALLOC_START_NR]      = { 0UL,                "vmalloc() Area" },
+       [VMALLOC_END_NR]        = { 0UL,                "vmalloc() End" },
+#ifdef CONFIG_HIGHMEM
+       [PKMAP_BASE_NR]         = { 0UL,                "Persistent kmap() Area" },
 #endif
-# ifdef CONFIG_X86_ESPFIX64
-       { ESPFIX_BASE_ADDR,     "ESPfix Area", 16 },
-# endif
-# ifdef CONFIG_EFI
-       { EFI_VA_END,           "EFI Runtime Services" },
-# endif
-       { __START_KERNEL_map,   "High Kernel Mapping" },
-       { MODULES_VADDR,        "Modules" },
-       { MODULES_END,          "End Modules" },
-#else
-       { PAGE_OFFSET,          "Kernel Mapping" },
-       { 0/* VMALLOC_START */, "vmalloc() Area" },
-       { 0/*VMALLOC_END*/,     "vmalloc() End" },
-# ifdef CONFIG_HIGHMEM
-       { 0/*PKMAP_BASE*/,      "Persistent kmap() Area" },
-# endif
-       { 0/*FIXADDR_START*/,   "Fixmap Area" },
-#endif
-       { -1, NULL }            /* End of list */
+       [CPU_ENTRY_AREA_NR]     = { 0UL,                "CPU entry area" },
+       [FIXADDR_START_NR]      = { 0UL,                "Fixmap area" },
+       [END_OF_SPACE_NR]       = { -1,                 NULL }
 };
 
+#endif /* !CONFIG_X86_64 */
+
 /* Multipliers for offsets within the PTEs */
 #define PTE_LEVEL_MULT (PAGE_SIZE)
 #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
@@ -140,7 +169,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
        static const char * const level_name[] =
                { "cr3", "pgd", "p4d", "pud", "pmd", "pte" };
 
-       if (!pgprot_val(prot)) {
+       if (!(pr & _PAGE_PRESENT)) {
                /* Not present */
                pt_dump_cont_printf(m, dmsg, "                              ");
        } else {
@@ -447,7 +476,7 @@ static inline bool is_hypervisor_range(int idx)
 }
 
 static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
-                                      bool checkwx)
+                                      bool checkwx, bool dmesg)
 {
 #ifdef CONFIG_X86_64
        pgd_t *start = (pgd_t *) &init_top_pgt;
@@ -460,7 +489,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
 
        if (pgd) {
                start = pgd;
-               st.to_dmesg = true;
+               st.to_dmesg = dmesg;
        }
 
        st.check_wx = checkwx;
@@ -498,13 +527,37 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
 
 void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
 {
-       ptdump_walk_pgd_level_core(m, pgd, false);
+       ptdump_walk_pgd_level_core(m, pgd, false, true);
+}
+
+void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user)
+{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+       if (user && static_cpu_has(X86_FEATURE_PTI))
+               pgd = kernel_to_user_pgdp(pgd);
+#endif
+       ptdump_walk_pgd_level_core(m, pgd, false, false);
+}
+EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs);
+
+static void ptdump_walk_user_pgd_level_checkwx(void)
+{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+       pgd_t *pgd = (pgd_t *) &init_top_pgt;
+
+       if (!static_cpu_has(X86_FEATURE_PTI))
+               return;
+
+       pr_info("x86/mm: Checking user space page tables\n");
+       pgd = kernel_to_user_pgdp(pgd);
+       ptdump_walk_pgd_level_core(NULL, pgd, true, false);
+#endif
 }
-EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level);
 
 void ptdump_walk_pgd_level_checkwx(void)
 {
-       ptdump_walk_pgd_level_core(NULL, NULL, true);
+       ptdump_walk_pgd_level_core(NULL, NULL, true, false);
+       ptdump_walk_user_pgd_level_checkwx();
 }
 
 static int __init pt_dump_init(void)
@@ -525,8 +578,8 @@ static int __init pt_dump_init(void)
        address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
 # endif
        address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
+       address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE;
 #endif
-
        return 0;
 }
 __initcall(pt_dump_init);
index febf6980e6535572f998cf2fa0ee63d296bdc6f1..06fe3d51d385b88111961c0b5addc673fcd597a2 100644 (file)
@@ -860,7 +860,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
        if (!printk_ratelimit())
                return;
 
-       printk("%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
+       printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx",
                task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
                tsk->comm, task_pid_nr(tsk), address,
                (void *)regs->ip, (void *)regs->sp, error_code);
index 6fdf91ef130a4737ab434ce98f77b2583fe1840d..8ca324d072828e19700ba094dbeae433b767c964 100644 (file)
@@ -20,6 +20,7 @@
 #include <asm/kaslr.h>
 #include <asm/hypervisor.h>
 #include <asm/cpufeature.h>
+#include <asm/pti.h>
 
 /*
  * We need to define the tracepoints somewhere, and tlb.c
@@ -160,6 +161,12 @@ struct map_range {
 
 static int page_size_mask;
 
+static void enable_global_pages(void)
+{
+       if (!static_cpu_has(X86_FEATURE_PTI))
+               __supported_pte_mask |= _PAGE_GLOBAL;
+}
+
 static void __init probe_page_size_mask(void)
 {
        /*
@@ -177,11 +184,11 @@ static void __init probe_page_size_mask(void)
                cr4_set_bits_and_update_boot(X86_CR4_PSE);
 
        /* Enable PGE if available */
+       __supported_pte_mask &= ~_PAGE_GLOBAL;
        if (boot_cpu_has(X86_FEATURE_PGE)) {
                cr4_set_bits_and_update_boot(X86_CR4_PGE);
-               __supported_pte_mask |= _PAGE_GLOBAL;
-       } else
-               __supported_pte_mask &= ~_PAGE_GLOBAL;
+               enable_global_pages();
+       }
 
        /* Enable 1 GB linear kernel mappings if available: */
        if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) {
@@ -194,34 +201,44 @@ static void __init probe_page_size_mask(void)
 
 static void setup_pcid(void)
 {
-#ifdef CONFIG_X86_64
-       if (boot_cpu_has(X86_FEATURE_PCID)) {
-               if (boot_cpu_has(X86_FEATURE_PGE)) {
-                       /*
-                        * This can't be cr4_set_bits_and_update_boot() --
-                        * the trampoline code can't handle CR4.PCIDE and
-                        * it wouldn't do any good anyway.  Despite the name,
-                        * cr4_set_bits_and_update_boot() doesn't actually
-                        * cause the bits in question to remain set all the
-                        * way through the secondary boot asm.
-                        *
-                        * Instead, we brute-force it and set CR4.PCIDE
-                        * manually in start_secondary().
-                        */
-                       cr4_set_bits(X86_CR4_PCIDE);
-               } else {
-                       /*
-                        * flush_tlb_all(), as currently implemented, won't
-                        * work if PCID is on but PGE is not.  Since that
-                        * combination doesn't exist on real hardware, there's
-                        * no reason to try to fully support it, but it's
-                        * polite to avoid corrupting data if we're on
-                        * an improperly configured VM.
-                        */
-                       setup_clear_cpu_cap(X86_FEATURE_PCID);
-               }
+       if (!IS_ENABLED(CONFIG_X86_64))
+               return;
+
+       if (!boot_cpu_has(X86_FEATURE_PCID))
+               return;
+
+       if (boot_cpu_has(X86_FEATURE_PGE)) {
+               /*
+                * This can't be cr4_set_bits_and_update_boot() -- the
+                * trampoline code can't handle CR4.PCIDE and it wouldn't
+                * do any good anyway.  Despite the name,
+                * cr4_set_bits_and_update_boot() doesn't actually cause
+                * the bits in question to remain set all the way through
+                * the secondary boot asm.
+                *
+                * Instead, we brute-force it and set CR4.PCIDE manually in
+                * start_secondary().
+                */
+               cr4_set_bits(X86_CR4_PCIDE);
+
+               /*
+                * INVPCID's single-context modes (2/3) only work if we set
+                * X86_CR4_PCIDE, *and* we INVPCID support.  It's unusable
+                * on systems that have X86_CR4_PCIDE clear, or that have
+                * no INVPCID support at all.
+                */
+               if (boot_cpu_has(X86_FEATURE_INVPCID))
+                       setup_force_cpu_cap(X86_FEATURE_INVPCID_SINGLE);
+       } else {
+               /*
+                * flush_tlb_all(), as currently implemented, won't work if
+                * PCID is on but PGE is not.  Since that combination
+                * doesn't exist on real hardware, there's no reason to try
+                * to fully support it, but it's polite to avoid corrupting
+                * data if we're on an improperly configured VM.
+                */
+               setup_clear_cpu_cap(X86_FEATURE_PCID);
        }
-#endif
 }
 
 #ifdef CONFIG_X86_32
@@ -622,6 +639,7 @@ void __init init_mem_mapping(void)
 {
        unsigned long end;
 
+       pti_check_boottime_disable();
        probe_page_size_mask();
        setup_pcid();
 
@@ -845,7 +863,7 @@ void __init zone_sizes_init(void)
        free_area_init_nodes(max_zone_pfns);
 }
 
-DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
+__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
        .loaded_mm = &init_mm,
        .next_asid = 1,
        .cr4 = ~0UL,    /* fail hard if we screw up cr4 shadow initialization */
index 8a64a6f2848d9be2e73a341f4d87ab2dc35de09f..135c9a7898c7da908f1340f9750774b4327e63b3 100644 (file)
@@ -50,6 +50,7 @@
 #include <asm/setup.h>
 #include <asm/set_memory.h>
 #include <asm/page_types.h>
+#include <asm/cpu_entry_area.h>
 #include <asm/init.h>
 
 #include "mm_internal.h"
@@ -766,6 +767,7 @@ void __init mem_init(void)
        mem_init_print_info(NULL);
        printk(KERN_INFO "virtual kernel memory layout:\n"
                "    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+               "  cpu_entry : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 #ifdef CONFIG_HIGHMEM
                "    pkmap   : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 #endif
@@ -777,6 +779,10 @@ void __init mem_init(void)
                FIXADDR_START, FIXADDR_TOP,
                (FIXADDR_TOP - FIXADDR_START) >> 10,
 
+               CPU_ENTRY_AREA_BASE,
+               CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE,
+               CPU_ENTRY_AREA_MAP_SIZE >> 10,
+
 #ifdef CONFIG_HIGHMEM
                PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
                (LAST_PKMAP*PAGE_SIZE) >> 10,
index 99dfed6dfef8b2f9028f82b89ab8dc2bde8173c4..47388f0c0e59649ca3574d4e7c31b356dad7d247 100644 (file)
@@ -15,6 +15,7 @@
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
 #include <asm/pgtable.h>
+#include <asm/cpu_entry_area.h>
 
 extern struct range pfn_mapped[E820_MAX_ENTRIES];
 
@@ -277,6 +278,7 @@ void __init kasan_early_init(void)
 void __init kasan_init(void)
 {
        int i;
+       void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
 
 #ifdef CONFIG_KASAN_INLINE
        register_die_notifier(&kasan_die_notifier);
@@ -321,16 +323,33 @@ void __init kasan_init(void)
                map_range(&pfn_mapped[i]);
        }
 
+       shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE;
+       shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
+       shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin,
+                                               PAGE_SIZE);
+
+       shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE +
+                                       CPU_ENTRY_AREA_MAP_SIZE);
+       shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
+       shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end,
+                                       PAGE_SIZE);
+
        kasan_populate_zero_shadow(
                kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
-               kasan_mem_to_shadow((void *)__START_KERNEL_map));
+               shadow_cpu_entry_begin);
+
+       kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
+                             (unsigned long)shadow_cpu_entry_end, 0);
+
+       kasan_populate_zero_shadow(shadow_cpu_entry_end,
+                               kasan_mem_to_shadow((void *)__START_KERNEL_map));
 
        kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
                              (unsigned long)kasan_mem_to_shadow(_end),
                              early_pfn_to_nid(__pa(_stext)));
 
        kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
-                       (void *)KASAN_SHADOW_END);
+                               (void *)KASAN_SHADOW_END);
 
        load_cr3(init_top_pgt);
        __flush_tlb_all();
index 96d456a94b0342eb967f918e4233498ac24e4349..004abf9ebf1222c169448090f7f1c570635bce41 100644 (file)
@@ -355,14 +355,15 @@ static inline void _pgd_free(pgd_t *pgd)
                kmem_cache_free(pgd_cache, pgd);
 }
 #else
+
 static inline pgd_t *_pgd_alloc(void)
 {
-       return (pgd_t *)__get_free_page(PGALLOC_GFP);
+       return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
 }
 
 static inline void _pgd_free(pgd_t *pgd)
 {
-       free_page((unsigned long)pgd);
+       free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
 }
 #endif /* CONFIG_X86_PAE */
 
index 6b9bf023a700559b87ae7ac89570d9bbd26d1f05..c3c5274410a908e762aed936406006d63c3116ac 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/pagemap.h>
 #include <linux/spinlock.h>
 
+#include <asm/cpu_entry_area.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 #include <asm/fixmap.h>
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
new file mode 100644 (file)
index 0000000..bce8aea
--- /dev/null
@@ -0,0 +1,387 @@
+/*
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * This code is based in part on work published here:
+ *
+ *     https://github.com/IAIK/KAISER
+ *
+ * The original work was written by and and signed off by for the Linux
+ * kernel by:
+ *
+ *   Signed-off-by: Richard Fellner <richard.fellner@student.tugraz.at>
+ *   Signed-off-by: Moritz Lipp <moritz.lipp@iaik.tugraz.at>
+ *   Signed-off-by: Daniel Gruss <daniel.gruss@iaik.tugraz.at>
+ *   Signed-off-by: Michael Schwarz <michael.schwarz@iaik.tugraz.at>
+ *
+ * Major changes to the original code by: Dave Hansen <dave.hansen@intel.com>
+ * Mostly rewritten by Thomas Gleixner <tglx@linutronix.de> and
+ *                    Andy Lutomirsky <luto@amacapital.net>
+ */
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/bug.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+
+#include <asm/cpufeature.h>
+#include <asm/hypervisor.h>
+#include <asm/vsyscall.h>
+#include <asm/cmdline.h>
+#include <asm/pti.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/desc.h>
+
+#undef pr_fmt
+#define pr_fmt(fmt)     "Kernel/User page tables isolation: " fmt
+
+/* Backporting helper */
+#ifndef __GFP_NOTRACK
+#define __GFP_NOTRACK  0
+#endif
+
+static void __init pti_print_if_insecure(const char *reason)
+{
+       if (boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
+               pr_info("%s\n", reason);
+}
+
+static void __init pti_print_if_secure(const char *reason)
+{
+       if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
+               pr_info("%s\n", reason);
+}
+
+void __init pti_check_boottime_disable(void)
+{
+       char arg[5];
+       int ret;
+
+       if (hypervisor_is_type(X86_HYPER_XEN_PV)) {
+               pti_print_if_insecure("disabled on XEN PV.");
+               return;
+       }
+
+       ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
+       if (ret > 0)  {
+               if (ret == 3 && !strncmp(arg, "off", 3)) {
+                       pti_print_if_insecure("disabled on command line.");
+                       return;
+               }
+               if (ret == 2 && !strncmp(arg, "on", 2)) {
+                       pti_print_if_secure("force enabled on command line.");
+                       goto enable;
+               }
+               if (ret == 4 && !strncmp(arg, "auto", 4))
+                       goto autosel;
+       }
+
+       if (cmdline_find_option_bool(boot_command_line, "nopti")) {
+               pti_print_if_insecure("disabled on command line.");
+               return;
+       }
+
+autosel:
+       if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE))
+               return;
+enable:
+       setup_force_cpu_cap(X86_FEATURE_PTI);
+}
+
+pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+       /*
+        * Changes to the high (kernel) portion of the kernelmode page
+        * tables are not automatically propagated to the usermode tables.
+        *
+        * Users should keep in mind that, unlike the kernelmode tables,
+        * there is no vmalloc_fault equivalent for the usermode tables.
+        * Top-level entries added to init_mm's usermode pgd after boot
+        * will not be automatically propagated to other mms.
+        */
+       if (!pgdp_maps_userspace(pgdp))
+               return pgd;
+
+       /*
+        * The user page tables get the full PGD, accessible from
+        * userspace:
+        */
+       kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd;
+
+       /*
+        * If this is normal user memory, make it NX in the kernel
+        * pagetables so that, if we somehow screw up and return to
+        * usermode with the kernel CR3 loaded, we'll get a page fault
+        * instead of allowing user code to execute with the wrong CR3.
+        *
+        * As exceptions, we don't set NX if:
+        *  - _PAGE_USER is not set.  This could be an executable
+        *     EFI runtime mapping or something similar, and the kernel
+        *     may execute from it
+        *  - we don't have NX support
+        *  - we're clearing the PGD (i.e. the new pgd is not present).
+        */
+       if ((pgd.pgd & (_PAGE_USER|_PAGE_PRESENT)) == (_PAGE_USER|_PAGE_PRESENT) &&
+           (__supported_pte_mask & _PAGE_NX))
+               pgd.pgd |= _PAGE_NX;
+
+       /* return the copy of the PGD we want the kernel to use: */
+       return pgd;
+}
+
+/*
+ * Walk the user copy of the page tables (optionally) trying to allocate
+ * page table pages on the way down.
+ *
+ * Returns a pointer to a P4D on success, or NULL on failure.
+ */
+static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
+{
+       pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
+       gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+
+       if (address < PAGE_OFFSET) {
+               WARN_ONCE(1, "attempt to walk user address\n");
+               return NULL;
+       }
+
+       if (pgd_none(*pgd)) {
+               unsigned long new_p4d_page = __get_free_page(gfp);
+               if (!new_p4d_page)
+                       return NULL;
+
+               if (pgd_none(*pgd)) {
+                       set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
+                       new_p4d_page = 0;
+               }
+               if (new_p4d_page)
+                       free_page(new_p4d_page);
+       }
+       BUILD_BUG_ON(pgd_large(*pgd) != 0);
+
+       return p4d_offset(pgd, address);
+}
+
+/*
+ * Walk the user copy of the page tables (optionally) trying to allocate
+ * page table pages on the way down.
+ *
+ * Returns a pointer to a PMD on success, or NULL on failure.
+ */
+static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
+{
+       gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+       p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
+       pud_t *pud;
+
+       BUILD_BUG_ON(p4d_large(*p4d) != 0);
+       if (p4d_none(*p4d)) {
+               unsigned long new_pud_page = __get_free_page(gfp);
+               if (!new_pud_page)
+                       return NULL;
+
+               if (p4d_none(*p4d)) {
+                       set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
+                       new_pud_page = 0;
+               }
+               if (new_pud_page)
+                       free_page(new_pud_page);
+       }
+
+       pud = pud_offset(p4d, address);
+       /* The user page tables do not use large mappings: */
+       if (pud_large(*pud)) {
+               WARN_ON(1);
+               return NULL;
+       }
+       if (pud_none(*pud)) {
+               unsigned long new_pmd_page = __get_free_page(gfp);
+               if (!new_pmd_page)
+                       return NULL;
+
+               if (pud_none(*pud)) {
+                       set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
+                       new_pmd_page = 0;
+               }
+               if (new_pmd_page)
+                       free_page(new_pmd_page);
+       }
+
+       return pmd_offset(pud, address);
+}
+
+#ifdef CONFIG_X86_VSYSCALL_EMULATION
+/*
+ * Walk the shadow copy of the page tables (optionally) trying to allocate
+ * page table pages on the way down.  Does not support large pages.
+ *
+ * Note: this is only used when mapping *new* kernel data into the
+ * user/shadow page tables.  It is never used for userspace data.
+ *
+ * Returns a pointer to a PTE on success, or NULL on failure.
+ */
+static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
+{
+       gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+       pmd_t *pmd = pti_user_pagetable_walk_pmd(address);
+       pte_t *pte;
+
+       /* We can't do anything sensible if we hit a large mapping. */
+       if (pmd_large(*pmd)) {
+               WARN_ON(1);
+               return NULL;
+       }
+
+       if (pmd_none(*pmd)) {
+               unsigned long new_pte_page = __get_free_page(gfp);
+               if (!new_pte_page)
+                       return NULL;
+
+               if (pmd_none(*pmd)) {
+                       set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
+                       new_pte_page = 0;
+               }
+               if (new_pte_page)
+                       free_page(new_pte_page);
+       }
+
+       pte = pte_offset_kernel(pmd, address);
+       if (pte_flags(*pte) & _PAGE_USER) {
+               WARN_ONCE(1, "attempt to walk to user pte\n");
+               return NULL;
+       }
+       return pte;
+}
+
+static void __init pti_setup_vsyscall(void)
+{
+       pte_t *pte, *target_pte;
+       unsigned int level;
+
+       pte = lookup_address(VSYSCALL_ADDR, &level);
+       if (!pte || WARN_ON(level != PG_LEVEL_4K) || pte_none(*pte))
+               return;
+
+       target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR);
+       if (WARN_ON(!target_pte))
+               return;
+
+       *target_pte = *pte;
+       set_vsyscall_pgtable_user_bits(kernel_to_user_pgdp(swapper_pg_dir));
+}
+#else
+static void __init pti_setup_vsyscall(void) { }
+#endif
+
+static void __init
+pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
+{
+       unsigned long addr;
+
+       /*
+        * Clone the populated PMDs which cover start to end. These PMD areas
+        * can have holes.
+        */
+       for (addr = start; addr < end; addr += PMD_SIZE) {
+               pmd_t *pmd, *target_pmd;
+               pgd_t *pgd;
+               p4d_t *p4d;
+               pud_t *pud;
+
+               pgd = pgd_offset_k(addr);
+               if (WARN_ON(pgd_none(*pgd)))
+                       return;
+               p4d = p4d_offset(pgd, addr);
+               if (WARN_ON(p4d_none(*p4d)))
+                       return;
+               pud = pud_offset(p4d, addr);
+               if (pud_none(*pud))
+                       continue;
+               pmd = pmd_offset(pud, addr);
+               if (pmd_none(*pmd))
+                       continue;
+
+               target_pmd = pti_user_pagetable_walk_pmd(addr);
+               if (WARN_ON(!target_pmd))
+                       return;
+
+               /*
+                * Copy the PMD.  That is, the kernelmode and usermode
+                * tables will share the last-level page tables of this
+                * address range
+                */
+               *target_pmd = pmd_clear_flags(*pmd, clear);
+       }
+}
+
+/*
+ * Clone a single p4d (i.e. a top-level entry on 4-level systems and a
+ * next-level entry on 5-level systems.
+ */
+static void __init pti_clone_p4d(unsigned long addr)
+{
+       p4d_t *kernel_p4d, *user_p4d;
+       pgd_t *kernel_pgd;
+
+       user_p4d = pti_user_pagetable_walk_p4d(addr);
+       kernel_pgd = pgd_offset_k(addr);
+       kernel_p4d = p4d_offset(kernel_pgd, addr);
+       *user_p4d = *kernel_p4d;
+}
+
+/*
+ * Clone the CPU_ENTRY_AREA into the user space visible page table.
+ */
+static void __init pti_clone_user_shared(void)
+{
+       pti_clone_p4d(CPU_ENTRY_AREA_BASE);
+}
+
+/*
+ * Clone the ESPFIX P4D into the user space visinble page table
+ */
+static void __init pti_setup_espfix64(void)
+{
+#ifdef CONFIG_X86_ESPFIX64
+       pti_clone_p4d(ESPFIX_BASE_ADDR);
+#endif
+}
+
+/*
+ * Clone the populated PMDs of the entry and irqentry text and force it RO.
+ */
+static void __init pti_clone_entry_text(void)
+{
+       pti_clone_pmds((unsigned long) __entry_text_start,
+                       (unsigned long) __irqentry_text_end, _PAGE_RW);
+}
+
+/*
+ * Initialize kernel page table isolation
+ */
+void __init pti_init(void)
+{
+       if (!static_cpu_has(X86_FEATURE_PTI))
+               return;
+
+       pr_info("enabled\n");
+
+       pti_clone_user_shared();
+       pti_clone_entry_text();
+       pti_setup_espfix64();
+       pti_setup_vsyscall();
+}
index 3118392cdf756bfc913d7a4137d5f7e0d46b046d..a1561957dccbb82d188d8209d76f7eddb780519a 100644 (file)
  *     Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
  */
 
+/*
+ * We get here when we do something requiring a TLB invalidation
+ * but could not go invalidate all of the contexts.  We do the
+ * necessary invalidation by clearing out the 'ctx_id' which
+ * forces a TLB flush when the context is loaded.
+ */
+void clear_asid_other(void)
+{
+       u16 asid;
+
+       /*
+        * This is only expected to be set if we have disabled
+        * kernel _PAGE_GLOBAL pages.
+        */
+       if (!static_cpu_has(X86_FEATURE_PTI)) {
+               WARN_ON_ONCE(1);
+               return;
+       }
+
+       for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
+               /* Do not need to flush the current asid */
+               if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid))
+                       continue;
+               /*
+                * Make sure the next time we go to switch to
+                * this asid, we do a flush:
+                */
+               this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0);
+       }
+       this_cpu_write(cpu_tlbstate.invalidate_other, false);
+}
+
 atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
 
 
@@ -42,6 +74,9 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
                return;
        }
 
+       if (this_cpu_read(cpu_tlbstate.invalidate_other))
+               clear_asid_other();
+
        for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
                if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
                    next->context.ctx_id)
@@ -65,6 +100,25 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
        *need_flush = true;
 }
 
+static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
+{
+       unsigned long new_mm_cr3;
+
+       if (need_flush) {
+               invalidate_user_asid(new_asid);
+               new_mm_cr3 = build_cr3(pgdir, new_asid);
+       } else {
+               new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
+       }
+
+       /*
+        * Caution: many callers of this function expect
+        * that load_cr3() is serializing and orders TLB
+        * fills with respect to the mm_cpumask writes.
+        */
+       write_cr3(new_mm_cr3);
+}
+
 void leave_mm(int cpu)
 {
        struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
@@ -128,7 +182,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
         * isn't free.
         */
 #ifdef CONFIG_DEBUG_VM
-       if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
+       if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
                /*
                 * If we were to BUG here, we'd be very likely to kill
                 * the system so hard that we don't see the call trace.
@@ -195,7 +249,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
                if (need_flush) {
                        this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
                        this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
-                       write_cr3(build_cr3(next, new_asid));
+                       load_new_mm_cr3(next->pgd, new_asid, true);
 
                        /*
                         * NB: This gets called via leave_mm() in the idle path
@@ -208,7 +262,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
                        trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
                } else {
                        /* The new ASID is already up to date. */
-                       write_cr3(build_cr3_noflush(next, new_asid));
+                       load_new_mm_cr3(next->pgd, new_asid, false);
 
                        /* See above wrt _rcuidle. */
                        trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
@@ -288,7 +342,7 @@ void initialize_tlbstate_and_flush(void)
                !(cr4_read_shadow() & X86_CR4_PCIDE));
 
        /* Force ASID 0 and force a TLB flush. */
-       write_cr3(build_cr3(mm, 0));
+       write_cr3(build_cr3(mm->pgd, 0));
 
        /* Reinitialize tlbstate. */
        this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
@@ -551,7 +605,7 @@ static void do_kernel_range_flush(void *info)
 
        /* flush range by one by one 'invlpg' */
        for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
-               __flush_tlb_single(addr);
+               __flush_tlb_one(addr);
 }
 
 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
index 6a151ce70e865caadde95c859855c4b63283ad4b..d87ac96e37ede3ea93dabb67d99538fdadc03de0 100644 (file)
@@ -196,6 +196,9 @@ static pgd_t *efi_pgd;
  * because we want to avoid inserting EFI region mappings (EFI_VA_END
  * to EFI_VA_START) into the standard kernel page tables. Everything
  * else can be shared, see efi_sync_low_kernel_mappings().
+ *
+ * We don't want the pgd on the pgd_list and cannot use pgd_alloc() for the
+ * allocation.
  */
 int __init efi_alloc_page_tables(void)
 {
@@ -208,7 +211,7 @@ int __init efi_alloc_page_tables(void)
                return 0;
 
        gfp_mask = GFP_KERNEL | __GFP_ZERO;
-       efi_pgd = (pgd_t *)__get_free_page(gfp_mask);
+       efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER);
        if (!efi_pgd)
                return -ENOMEM;
 
index f44c0bc95aa2f45ad42462a5f23f4db4672d1257..8538a6723171a5606058a8823ed1cbb2d343fdb6 100644 (file)
@@ -299,7 +299,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
                local_flush_tlb();
                stat->d_alltlb++;
        } else {
-               __flush_tlb_one(msg->address);
+               __flush_tlb_single(msg->address);
                stat->d_onetlb++;
        }
        stat->d_requestee++;
index 5f6fd860820a3c5175609a5a3468262aac937604..e4cb9f4cde8ae224afe51ced1970805df293c956 100644 (file)
@@ -128,7 +128,7 @@ static void uv_domain_free(struct irq_domain *domain, unsigned int virq,
  * on the specified blade to allow the sending of MSIs to the specified CPU.
  */
 static int uv_domain_activate(struct irq_domain *domain,
-                             struct irq_data *irq_data, bool early)
+                             struct irq_data *irq_data, bool reserve)
 {
        uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data);
        return 0;
index 36a28eddb435e72d2abc5ffbdd1e78a46b56876e..a7d966964c6f20577c927cf5e618bc86b3331977 100644 (file)
@@ -152,17 +152,19 @@ static void do_fpu_end(void)
 static void fix_processor_context(void)
 {
        int cpu = smp_processor_id();
-       struct tss_struct *t = &per_cpu(cpu_tss, cpu);
 #ifdef CONFIG_X86_64
        struct desc_struct *desc = get_cpu_gdt_rw(cpu);
        tss_desc tss;
 #endif
-       set_tss_desc(cpu, t);   /*
-                                * This just modifies memory; should not be
-                                * necessary. But... This is necessary, because
-                                * 386 hardware has concept of busy TSS or some
-                                * similar stupidity.
-                                */
+
+       /*
+        * We need to reload TR, which requires that we change the
+        * GDT entry to indicate "available" first.
+        *
+        * XXX: This could probably all be replaced by a call to
+        * force_reload_TR().
+        */
+       set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
 
 #ifdef CONFIG_X86_64
        memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc));
index d669e9d890017770456abe458f1161eb2509c09e..c9081c6671f0b7a05ecfaaf206e7e1ed2b1f456a 100644 (file)
@@ -1,8 +1,12 @@
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+#include <linux/bootmem.h>
+#endif
 #include <linux/cpu.h>
 #include <linux/kexec.h>
 
 #include <xen/features.h>
 #include <xen/page.h>
+#include <xen/interface/memory.h>
 
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
@@ -331,3 +335,80 @@ void xen_arch_unregister_cpu(int num)
 }
 EXPORT_SYMBOL(xen_arch_unregister_cpu);
 #endif
+
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+void __init arch_xen_balloon_init(struct resource *hostmem_resource)
+{
+       struct xen_memory_map memmap;
+       int rc;
+       unsigned int i, last_guest_ram;
+       phys_addr_t max_addr = PFN_PHYS(max_pfn);
+       struct e820_table *xen_e820_table;
+       const struct e820_entry *entry;
+       struct resource *res;
+
+       if (!xen_initial_domain())
+               return;
+
+       xen_e820_table = kmalloc(sizeof(*xen_e820_table), GFP_KERNEL);
+       if (!xen_e820_table)
+               return;
+
+       memmap.nr_entries = ARRAY_SIZE(xen_e820_table->entries);
+       set_xen_guest_handle(memmap.buffer, xen_e820_table->entries);
+       rc = HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap);
+       if (rc) {
+               pr_warn("%s: Can't read host e820 (%d)\n", __func__, rc);
+               goto out;
+       }
+
+       last_guest_ram = 0;
+       for (i = 0; i < memmap.nr_entries; i++) {
+               if (xen_e820_table->entries[i].addr >= max_addr)
+                       break;
+               if (xen_e820_table->entries[i].type == E820_TYPE_RAM)
+                       last_guest_ram = i;
+       }
+
+       entry = &xen_e820_table->entries[last_guest_ram];
+       if (max_addr >= entry->addr + entry->size)
+               goto out; /* No unallocated host RAM. */
+
+       hostmem_resource->start = max_addr;
+       hostmem_resource->end = entry->addr + entry->size;
+
+       /*
+        * Mark non-RAM regions between the end of dom0 RAM and end of host RAM
+        * as unavailable. The rest of that region can be used for hotplug-based
+        * ballooning.
+        */
+       for (; i < memmap.nr_entries; i++) {
+               entry = &xen_e820_table->entries[i];
+
+               if (entry->type == E820_TYPE_RAM)
+                       continue;
+
+               if (entry->addr >= hostmem_resource->end)
+                       break;
+
+               res = kzalloc(sizeof(*res), GFP_KERNEL);
+               if (!res)
+                       goto out;
+
+               res->name = "Unavailable host RAM";
+               res->start = entry->addr;
+               res->end = (entry->addr + entry->size < hostmem_resource->end) ?
+                           entry->addr + entry->size : hostmem_resource->end;
+               rc = insert_resource(hostmem_resource, res);
+               if (rc) {
+                       pr_warn("%s: Can't insert [%llx - %llx) (%d)\n",
+                               __func__, res->start, res->end, rc);
+                       kfree(res);
+                       goto  out;
+               }
+       }
+
+ out:
+       kfree(xen_e820_table);
+}
+#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
index f2414c6c5e7c455b43fc45773fbd1264cf86c24e..c047f42552e1a61ed0a5787d904681974cc05af1 100644 (file)
@@ -88,6 +88,8 @@
 #include "multicalls.h"
 #include "pmu.h"
 
+#include "../kernel/cpu/cpu.h" /* get_cpu_cap() */
+
 void *xen_initial_gdt;
 
 static int xen_cpu_up_prepare_pv(unsigned int cpu);
@@ -826,7 +828,7 @@ static void xen_load_sp0(unsigned long sp0)
        mcs = xen_mc_entry(0);
        MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
        xen_mc_issue(PARAVIRT_LAZY_CPU);
-       this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
+       this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
 }
 
 void xen_set_iopl_mask(unsigned mask)
@@ -1258,6 +1260,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
        __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
 
        /* Work out if we support NX */
+       get_cpu_cap(&boot_cpu_data);
        x86_configure_nx();
 
        /* Get mfn list */
index fc048ec686e7699b263254c79b482ccf935c21ef..4d62c071b166f65c848a12ca07bfe44ca20e198a 100644 (file)
@@ -1902,6 +1902,18 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
        /* Graft it onto L4[511][510] */
        copy_page(level2_kernel_pgt, l2);
 
+       /*
+        * Zap execute permission from the ident map. Due to the sharing of
+        * L1 entries we need to do this in the L2.
+        */
+       if (__supported_pte_mask & _PAGE_NX) {
+               for (i = 0; i < PTRS_PER_PMD; ++i) {
+                       if (pmd_none(level2_ident_pgt[i]))
+                               continue;
+                       level2_ident_pgt[i] = pmd_set_flags(level2_ident_pgt[i], _PAGE_NX);
+               }
+       }
+
        /* Copy the initial P->M table mappings if necessary. */
        i = pgd_index(xen_start_info->mfn_list);
        if (i && i < pgd_index(__START_KERNEL_map))
@@ -2261,7 +2273,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
 
        switch (idx) {
        case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
-       case FIX_RO_IDT:
 #ifdef CONFIG_X86_32
        case FIX_WP_TEST:
 # ifdef CONFIG_HIGHMEM
@@ -2272,7 +2283,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
 #endif
        case FIX_TEXT_POKE0:
        case FIX_TEXT_POKE1:
-       case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END:
                /* All local page mappings */
                pte = pfn_pte(phys, prot);
                break;
index c114ca767b3b8a382918e2b0160983fa257318db..6e0d2086eacbf37326467b5142e59750151a5328 100644 (file)
@@ -808,7 +808,6 @@ char * __init xen_memory_setup(void)
        addr = xen_e820_table.entries[0].addr;
        size = xen_e820_table.entries[0].size;
        while (i < xen_e820_table.nr_entries) {
-               bool discard = false;
 
                chunk_size = size;
                type = xen_e820_table.entries[i].type;
@@ -824,11 +823,10 @@ char * __init xen_memory_setup(void)
                                xen_add_extra_mem(pfn_s, n_pfns);
                                xen_max_p2m_pfn = pfn_s + n_pfns;
                        } else
-                               discard = true;
+                               type = E820_TYPE_UNUSABLE;
                }
 
-               if (!discard)
-                       xen_align_and_add_e820_region(addr, chunk_size, type);
+               xen_align_and_add_e820_region(addr, chunk_size, type);
 
                addr += chunk_size;
                size -= chunk_size;
index 8bfdea58159ba9ffd972dd95717e0eee99101e0a..9ef6cf3addb38cae822d0e5c5ef18ba9e98cd2d7 100644 (file)
@@ -599,6 +599,8 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
        bio->bi_disk = bio_src->bi_disk;
        bio->bi_partno = bio_src->bi_partno;
        bio_set_flag(bio, BIO_CLONED);
+       if (bio_flagged(bio_src, BIO_THROTTLED))
+               bio_set_flag(bio, BIO_THROTTLED);
        bio->bi_opf = bio_src->bi_opf;
        bio->bi_write_hint = bio_src->bi_write_hint;
        bio->bi_iter = bio_src->bi_iter;
index b21f8e86f1207f9b76bf3e2083fcf72b5062f0b7..d3a94719f03fb2af81d6270d6fc9ed58f0dde373 100644 (file)
 #include "blk.h"
 
 /*
- * Append a bio to a passthrough request.  Only works can be merged into
- * the request based on the driver constraints.
+ * Append a bio to a passthrough request.  Only works if the bio can be merged
+ * into the request based on the driver constraints.
  */
-int blk_rq_append_bio(struct request *rq, struct bio *bio)
+int blk_rq_append_bio(struct request *rq, struct bio **bio)
 {
-       blk_queue_bounce(rq->q, &bio);
+       struct bio *orig_bio = *bio;
+
+       blk_queue_bounce(rq->q, bio);
 
        if (!rq->bio) {
-               blk_rq_bio_prep(rq->q, rq, bio);
+               blk_rq_bio_prep(rq->q, rq, *bio);
        } else {
-               if (!ll_back_merge_fn(rq->q, rq, bio))
+               if (!ll_back_merge_fn(rq->q, rq, *bio)) {
+                       if (orig_bio != *bio) {
+                               bio_put(*bio);
+                               *bio = orig_bio;
+                       }
                        return -EINVAL;
+               }
 
-               rq->biotail->bi_next = bio;
-               rq->biotail = bio;
-               rq->__data_len += bio->bi_iter.bi_size;
+               rq->biotail->bi_next = *bio;
+               rq->biotail = *bio;
+               rq->__data_len += (*bio)->bi_iter.bi_size;
        }
 
        return 0;
@@ -73,14 +80,12 @@ static int __blk_rq_map_user_iov(struct request *rq,
         * We link the bounce buffer in and could have to traverse it
         * later so we have to get a ref to prevent it from being freed
         */
-       ret = blk_rq_append_bio(rq, bio);
-       bio_get(bio);
+       ret = blk_rq_append_bio(rq, &bio);
        if (ret) {
-               bio_endio(bio);
                __blk_rq_unmap_user(orig_bio);
-               bio_put(bio);
                return ret;
        }
+       bio_get(bio);
 
        return 0;
 }
@@ -213,7 +218,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
        int reading = rq_data_dir(rq) == READ;
        unsigned long addr = (unsigned long) kbuf;
        int do_copy = 0;
-       struct bio *bio;
+       struct bio *bio, *orig_bio;
        int ret;
 
        if (len > (queue_max_hw_sectors(q) << 9))
@@ -236,10 +241,11 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
        if (do_copy)
                rq->rq_flags |= RQF_COPY_USER;
 
-       ret = blk_rq_append_bio(rq, bio);
+       orig_bio = bio;
+       ret = blk_rq_append_bio(rq, &bio);
        if (unlikely(ret)) {
                /* request is too big */
-               bio_put(bio);
+               bio_put(orig_bio);
                return ret;
        }
 
index 825bc29767e6699ac85675d319a9866b70cc9b84..d19f416d61012ac032c49608f0afe463c948e8bc 100644 (file)
@@ -2226,13 +2226,7 @@ again:
 out_unlock:
        spin_unlock_irq(q->queue_lock);
 out:
-       /*
-        * As multiple blk-throtls may stack in the same issue path, we
-        * don't want bios to leave with the flag set.  Clear the flag if
-        * being issued.
-        */
-       if (!throttled)
-               bio_clear_flag(bio, BIO_THROTTLED);
+       bio_set_flag(bio, BIO_THROTTLED);
 
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
        if (throttled || !td->track_bio_latency)
index fceb1a96480bfb9600e4664fa2b4992c8bb64210..1d05c422c932ad56d705f94deed6cce0891ff9d3 100644 (file)
@@ -200,6 +200,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
        unsigned i = 0;
        bool bounce = false;
        int sectors = 0;
+       bool passthrough = bio_is_passthrough(*bio_orig);
 
        bio_for_each_segment(from, *bio_orig, iter) {
                if (i++ < BIO_MAX_PAGES)
@@ -210,13 +211,14 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
        if (!bounce)
                return;
 
-       if (sectors < bio_sectors(*bio_orig)) {
+       if (!passthrough && sectors < bio_sectors(*bio_orig)) {
                bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split);
                bio_chain(bio, *bio_orig);
                generic_make_request(*bio_orig);
                *bio_orig = bio;
        }
-       bio = bio_clone_bioset(*bio_orig, GFP_NOIO, bounce_bio_set);
+       bio = bio_clone_bioset(*bio_orig, GFP_NOIO, passthrough ? NULL :
+                       bounce_bio_set);
 
        bio_for_each_segment_all(to, bio, i) {
                struct page *page = to->bv_page;
index b4df317c291692f01138b91608dc6c80f71bb9aa..f95c60774ce8ca613417d3ccf54bee52010752ee 100644 (file)
@@ -100,9 +100,13 @@ struct kyber_hctx_data {
        unsigned int cur_domain;
        unsigned int batching;
        wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS];
+       struct sbq_wait_state *domain_ws[KYBER_NUM_DOMAINS];
        atomic_t wait_index[KYBER_NUM_DOMAINS];
 };
 
+static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
+                            void *key);
+
 static int rq_sched_domain(const struct request *rq)
 {
        unsigned int op = rq->cmd_flags;
@@ -385,6 +389,9 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
 
        for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
                INIT_LIST_HEAD(&khd->rqs[i]);
+               init_waitqueue_func_entry(&khd->domain_wait[i],
+                                         kyber_domain_wake);
+               khd->domain_wait[i].private = hctx;
                INIT_LIST_HEAD(&khd->domain_wait[i].entry);
                atomic_set(&khd->wait_index[i], 0);
        }
@@ -524,35 +531,39 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd,
        int nr;
 
        nr = __sbitmap_queue_get(domain_tokens);
-       if (nr >= 0)
-               return nr;
 
        /*
         * If we failed to get a domain token, make sure the hardware queue is
         * run when one becomes available. Note that this is serialized on
         * khd->lock, but we still need to be careful about the waker.
         */
-       if (list_empty_careful(&wait->entry)) {
-               init_waitqueue_func_entry(wait, kyber_domain_wake);
-               wait->private = hctx;
+       if (nr < 0 && list_empty_careful(&wait->entry)) {
                ws = sbq_wait_ptr(domain_tokens,
                                  &khd->wait_index[sched_domain]);
+               khd->domain_ws[sched_domain] = ws;
                add_wait_queue(&ws->wait, wait);
 
                /*
                 * Try again in case a token was freed before we got on the wait
-                * queue. The waker may have already removed the entry from the
-                * wait queue, but list_del_init() is okay with that.
+                * queue.
                 */
                nr = __sbitmap_queue_get(domain_tokens);
-               if (nr >= 0) {
-                       unsigned long flags;
+       }
 
-                       spin_lock_irqsave(&ws->wait.lock, flags);
-                       list_del_init(&wait->entry);
-                       spin_unlock_irqrestore(&ws->wait.lock, flags);
-               }
+       /*
+        * If we got a token while we were on the wait queue, remove ourselves
+        * from the wait queue to ensure that all wake ups make forward
+        * progress. It's possible that the waker already deleted the entry
+        * between the !list_empty_careful() check and us grabbing the lock, but
+        * list_del_init() is okay with that.
+        */
+       if (nr >= 0 && !list_empty_careful(&wait->entry)) {
+               ws = khd->domain_ws[sched_domain];
+               spin_lock_irq(&ws->wait.lock);
+               list_del_init(&wait->entry);
+               spin_unlock_irq(&ws->wait.lock);
        }
+
        return nr;
 }
 
index 415a54ced4d6a490ae1e09170c8b80ef3eef135e..444a387df219e96a35fb7972f2a1a810e013a60b 100644 (file)
@@ -1138,12 +1138,6 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
                if (!af_alg_readable(sk))
                        break;
 
-               if (!ctx->used) {
-                       err = af_alg_wait_for_data(sk, flags);
-                       if (err)
-                               return err;
-               }
-
                seglen = min_t(size_t, (maxsize - len),
                               msg_data_left(msg));
 
index 48b34e9c68342c55610ad83900557dc1c785af41..ddcc45f77edd367bf118e46aa757891c5c3d8869 100644 (file)
@@ -111,6 +111,12 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
        size_t usedpages = 0;           /* [in]  RX bufs to be used from user */
        size_t processed = 0;           /* [in]  TX bufs to be consumed */
 
+       if (!ctx->used) {
+               err = af_alg_wait_for_data(sk, flags);
+               if (err)
+                       return err;
+       }
+
        /*
         * Data length provided by caller via sendmsg/sendpage that has not
         * yet been processed.
@@ -285,6 +291,10 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
                /* AIO operation */
                sock_hold(sk);
                areq->iocb = msg->msg_iocb;
+
+               /* Remember output size that will be generated. */
+               areq->outlen = outlen;
+
                aead_request_set_callback(&areq->cra_u.aead_req,
                                          CRYPTO_TFM_REQ_MAY_BACKLOG,
                                          af_alg_async_cb, areq);
@@ -292,12 +302,8 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
                                 crypto_aead_decrypt(&areq->cra_u.aead_req);
 
                /* AIO operation in progress */
-               if (err == -EINPROGRESS || err == -EBUSY) {
-                       /* Remember output size that will be generated. */
-                       areq->outlen = outlen;
-
+               if (err == -EINPROGRESS || err == -EBUSY)
                        return -EIOCBQUEUED;
-               }
 
                sock_put(sk);
        } else {
index 30cff827dd8fff048fa3e2ca7de770ab73022749..baef9bfccddaa94728bea5933bea16c32b2a32b5 100644 (file)
@@ -72,6 +72,12 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
        int err = 0;
        size_t len = 0;
 
+       if (!ctx->used) {
+               err = af_alg_wait_for_data(sk, flags);
+               if (err)
+                       return err;
+       }
+
        /* Allocate cipher request for current operation. */
        areq = af_alg_alloc_areq(sk, sizeof(struct af_alg_async_req) +
                                     crypto_skcipher_reqsize(tfm));
@@ -119,6 +125,10 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
                /* AIO operation */
                sock_hold(sk);
                areq->iocb = msg->msg_iocb;
+
+               /* Remember output size that will be generated. */
+               areq->outlen = len;
+
                skcipher_request_set_callback(&areq->cra_u.skcipher_req,
                                              CRYPTO_TFM_REQ_MAY_SLEEP,
                                              af_alg_async_cb, areq);
@@ -127,12 +137,8 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
                        crypto_skcipher_decrypt(&areq->cra_u.skcipher_req);
 
                /* AIO operation in progress */
-               if (err == -EINPROGRESS || err == -EBUSY) {
-                       /* Remember output size that will be generated. */
-                       areq->outlen = len;
-
+               if (err == -EINPROGRESS || err == -EBUSY)
                        return -EIOCBQUEUED;
-               }
 
                sock_put(sk);
        } else {
index 4e64726588524f137acd590809bef11673695ed2..eca04d3729b37c696c2dac4b0ac472422f30615d 100644 (file)
@@ -81,6 +81,7 @@ static int mcryptd_init_queue(struct mcryptd_queue *queue,
                pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue);
                crypto_init_queue(&cpu_queue->queue, max_cpu_qlen);
                INIT_WORK(&cpu_queue->work, mcryptd_queue_worker);
+               spin_lock_init(&cpu_queue->q_lock);
        }
        return 0;
 }
@@ -104,15 +105,16 @@ static int mcryptd_enqueue_request(struct mcryptd_queue *queue,
        int cpu, err;
        struct mcryptd_cpu_queue *cpu_queue;
 
-       cpu = get_cpu();
-       cpu_queue = this_cpu_ptr(queue->cpu_queue);
-       rctx->tag.cpu = cpu;
+       cpu_queue = raw_cpu_ptr(queue->cpu_queue);
+       spin_lock(&cpu_queue->q_lock);
+       cpu = smp_processor_id();
+       rctx->tag.cpu = smp_processor_id();
 
        err = crypto_enqueue_request(&cpu_queue->queue, request);
        pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n",
                 cpu, cpu_queue, request);
+       spin_unlock(&cpu_queue->q_lock);
        queue_work_on(cpu, kcrypto_wq, &cpu_queue->work);
-       put_cpu();
 
        return err;
 }
@@ -161,16 +163,11 @@ static void mcryptd_queue_worker(struct work_struct *work)
        cpu_queue = container_of(work, struct mcryptd_cpu_queue, work);
        i = 0;
        while (i < MCRYPTD_BATCH || single_task_running()) {
-               /*
-                * preempt_disable/enable is used to prevent
-                * being preempted by mcryptd_enqueue_request()
-                */
-               local_bh_disable();
-               preempt_disable();
+
+               spin_lock_bh(&cpu_queue->q_lock);
                backlog = crypto_get_backlog(&cpu_queue->queue);
                req = crypto_dequeue_request(&cpu_queue->queue);
-               preempt_enable();
-               local_bh_enable();
+               spin_unlock_bh(&cpu_queue->q_lock);
 
                if (!req) {
                        mcryptd_opportunistic_flush();
@@ -185,7 +182,7 @@ static void mcryptd_queue_worker(struct work_struct *work)
                ++i;
        }
        if (cpu_queue->queue.qlen)
-               queue_work(kcrypto_wq, &cpu_queue->work);
+               queue_work_on(smp_processor_id(), kcrypto_wq, &cpu_queue->work);
 }
 
 void mcryptd_flusher(struct work_struct *__work)
index 778e0ff42bfa801eda5be848da9e6747ebbc2626..11af5fd6a443570550e1dac5b0a429b2cae801b1 100644 (file)
@@ -449,6 +449,8 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk,
 
        walk->total = req->cryptlen;
        walk->nbytes = 0;
+       walk->iv = req->iv;
+       walk->oiv = req->iv;
 
        if (unlikely(!walk->total))
                return 0;
@@ -456,9 +458,6 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk,
        scatterwalk_start(&walk->in, req->src);
        scatterwalk_start(&walk->out, req->dst);
 
-       walk->iv = req->iv;
-       walk->oiv = req->iv;
-
        walk->flags &= ~SKCIPHER_WALK_SLEEP;
        walk->flags |= req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
                       SKCIPHER_WALK_SLEEP : 0;
@@ -510,6 +509,8 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk,
        int err;
 
        walk->nbytes = 0;
+       walk->iv = req->iv;
+       walk->oiv = req->iv;
 
        if (unlikely(!walk->total))
                return 0;
@@ -525,9 +526,6 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk,
        scatterwalk_done(&walk->in, 0, walk->total);
        scatterwalk_done(&walk->out, 0, walk->total);
 
-       walk->iv = req->iv;
-       walk->oiv = req->iv;
-
        if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP)
                walk->flags |= SKCIPHER_WALK_SLEEP;
        else
index 6742f6c68034c5e833505d294902dd97c274c1b0..9bff853e85f37831d8d053a2aa363f139537c9b5 100644 (file)
@@ -1007,7 +1007,7 @@ skip:
        /* The record may be cleared by others, try read next record */
        if (len == -ENOENT)
                goto skip;
-       else if (len < sizeof(*rcd)) {
+       else if (len < 0 || len < sizeof(*rcd)) {
                rc = -EIO;
                goto out;
        }
index 30e84cc600ae6438c25aec2f2975ae4e3f144553..06ea4749ebd9826a3d7b8b0a9798a1cc797f4d61 100644 (file)
@@ -1171,7 +1171,7 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls)
        struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu);
        struct cpc_register_resource *desired_reg;
        int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu);
-       struct cppc_pcc_data *pcc_ss_data = pcc_data[pcc_ss_id];
+       struct cppc_pcc_data *pcc_ss_data;
        int ret = 0;
 
        if (!cpc_desc || pcc_ss_id < 0) {
index ff2580e7611d18c6d56c58d50c2cbc3a2d54aa36..abeb4df4f22e43d7f0d1398af9962135a37af4b6 100644 (file)
@@ -1670,6 +1670,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
                                dev_name(&adev_dimm->dev));
                return -ENXIO;
        }
+       /*
+        * Record nfit_mem for the notification path to track back to
+        * the nfit sysfs attributes for this dimm device object.
+        */
+       dev_set_drvdata(&adev_dimm->dev, nfit_mem);
 
        /*
         * Until standardization materializes we need to consider 4
@@ -1752,9 +1757,11 @@ static void shutdown_dimm_notify(void *data)
                        sysfs_put(nfit_mem->flags_attr);
                        nfit_mem->flags_attr = NULL;
                }
-               if (adev_dimm)
+               if (adev_dimm) {
                        acpi_remove_notify_handler(adev_dimm->handle,
                                        ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify);
+                       dev_set_drvdata(&adev_dimm->dev, NULL);
+               }
        }
        mutex_unlock(&acpi_desc->init_mutex);
 }
index bccec9de05330b2fe6822369e5c7a409e8759e95..a7ecfde66b7b34f44cc9cffeac0be1b703e29aff 100644 (file)
@@ -482,7 +482,8 @@ enum binder_deferred_state {
  * @tsk                   task_struct for group_leader of process
  *                        (invariant after initialized)
  * @files                 files_struct for process
- *                        (invariant after initialized)
+ *                        (protected by @files_lock)
+ * @files_lock            mutex to protect @files
  * @deferred_work_node:   element for binder_deferred_list
  *                        (protected by binder_deferred_lock)
  * @deferred_work:        bitmap of deferred work to perform
@@ -530,6 +531,7 @@ struct binder_proc {
        int pid;
        struct task_struct *tsk;
        struct files_struct *files;
+       struct mutex files_lock;
        struct hlist_node deferred_work_node;
        int deferred_work;
        bool is_dead;
@@ -877,20 +879,26 @@ static void binder_inc_node_tmpref_ilocked(struct binder_node *node);
 
 static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
 {
-       struct files_struct *files = proc->files;
        unsigned long rlim_cur;
        unsigned long irqs;
+       int ret;
 
-       if (files == NULL)
-               return -ESRCH;
-
-       if (!lock_task_sighand(proc->tsk, &irqs))
-               return -EMFILE;
-
+       mutex_lock(&proc->files_lock);
+       if (proc->files == NULL) {
+               ret = -ESRCH;
+               goto err;
+       }
+       if (!lock_task_sighand(proc->tsk, &irqs)) {
+               ret = -EMFILE;
+               goto err;
+       }
        rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
        unlock_task_sighand(proc->tsk, &irqs);
 
-       return __alloc_fd(files, 0, rlim_cur, flags);
+       ret = __alloc_fd(proc->files, 0, rlim_cur, flags);
+err:
+       mutex_unlock(&proc->files_lock);
+       return ret;
 }
 
 /*
@@ -899,8 +907,10 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
 static void task_fd_install(
        struct binder_proc *proc, unsigned int fd, struct file *file)
 {
+       mutex_lock(&proc->files_lock);
        if (proc->files)
                __fd_install(proc->files, fd, file);
+       mutex_unlock(&proc->files_lock);
 }
 
 /*
@@ -910,9 +920,11 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd)
 {
        int retval;
 
-       if (proc->files == NULL)
-               return -ESRCH;
-
+       mutex_lock(&proc->files_lock);
+       if (proc->files == NULL) {
+               retval = -ESRCH;
+               goto err;
+       }
        retval = __close_fd(proc->files, fd);
        /* can't restart close syscall because file table entry was cleared */
        if (unlikely(retval == -ERESTARTSYS ||
@@ -920,7 +932,8 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd)
                     retval == -ERESTARTNOHAND ||
                     retval == -ERESTART_RESTARTBLOCK))
                retval = -EINTR;
-
+err:
+       mutex_unlock(&proc->files_lock);
        return retval;
 }
 
@@ -4627,7 +4640,9 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma)
        ret = binder_alloc_mmap_handler(&proc->alloc, vma);
        if (ret)
                return ret;
+       mutex_lock(&proc->files_lock);
        proc->files = get_files_struct(current);
+       mutex_unlock(&proc->files_lock);
        return 0;
 
 err_bad_arg:
@@ -4651,6 +4666,7 @@ static int binder_open(struct inode *nodp, struct file *filp)
        spin_lock_init(&proc->outer_lock);
        get_task_struct(current->group_leader);
        proc->tsk = current->group_leader;
+       mutex_init(&proc->files_lock);
        INIT_LIST_HEAD(&proc->todo);
        proc->default_priority = task_nice(current);
        binder_dev = container_of(filp->private_data, struct binder_device,
@@ -4903,9 +4919,11 @@ static void binder_deferred_func(struct work_struct *work)
 
                files = NULL;
                if (defer & BINDER_DEFERRED_PUT_FILES) {
+                       mutex_lock(&proc->files_lock);
                        files = proc->files;
                        if (files)
                                proc->files = NULL;
+                       mutex_unlock(&proc->files_lock);
                }
 
                if (defer & BINDER_DEFERRED_FLUSH)
index eb3af2739537a8def39259206e2345a2adc72c71..07532d83be0bca7d06aa5e99670ac920435e0ed9 100644 (file)
@@ -186,6 +186,11 @@ static void cache_associativity(struct cacheinfo *this_leaf)
                this_leaf->ways_of_associativity = (size / nr_sets) / line_size;
 }
 
+static bool cache_node_is_unified(struct cacheinfo *this_leaf)
+{
+       return of_property_read_bool(this_leaf->of_node, "cache-unified");
+}
+
 static void cache_of_override_properties(unsigned int cpu)
 {
        int index;
@@ -194,6 +199,14 @@ static void cache_of_override_properties(unsigned int cpu)
 
        for (index = 0; index < cache_leaves(cpu); index++) {
                this_leaf = this_cpu_ci->info_list + index;
+               /*
+                * init_cache_level must setup the cache level correctly
+                * overriding the architecturally specified levels, so
+                * if type is NONE at this stage, it should be unified
+                */
+               if (this_leaf->type == CACHE_TYPE_NOCACHE &&
+                   cache_node_is_unified(this_leaf))
+                       this_leaf->type = CACHE_TYPE_UNIFIED;
                cache_size(this_leaf);
                cache_get_line_size(this_leaf);
                cache_nr_sets(this_leaf);
index ccb9975a97fa3f214d658776450ab618bae26643..ad0477ae820f040affe54f4368d3a02d9da63350 100644 (file)
@@ -35,13 +35,13 @@ static inline u64 mb_per_tick(int mbps)
 struct nullb_cmd {
        struct list_head list;
        struct llist_node ll_list;
-       call_single_data_t csd;
+       struct __call_single_data csd;
        struct request *rq;
        struct bio *bio;
        unsigned int tag;
+       blk_status_t error;
        struct nullb_queue *nq;
        struct hrtimer timer;
-       blk_status_t error;
 };
 
 struct nullb_queue {
index 647d056df88c8dd2a7d8288e35fa2eeba9b7705b..b56c11f51bafad32bafc7b5b9c7467e7cd16632b 100644 (file)
@@ -220,7 +220,8 @@ static bool clk_core_is_enabled(struct clk_core *core)
 
        ret = core->ops->is_enabled(core->hw);
 done:
-       clk_pm_runtime_put(core);
+       if (core->dev)
+               pm_runtime_put(core->dev);
 
        return ret;
 }
@@ -1564,6 +1565,9 @@ static void clk_change_rate(struct clk_core *core)
                best_parent_rate = core->parent->rate;
        }
 
+       if (clk_pm_runtime_get(core))
+               return;
+
        if (core->flags & CLK_SET_RATE_UNGATE) {
                unsigned long flags;
 
@@ -1634,6 +1638,8 @@ static void clk_change_rate(struct clk_core *core)
        /* handle the new child who might not be in core->children yet */
        if (core->new_child)
                clk_change_rate(core->new_child);
+
+       clk_pm_runtime_put(core);
 }
 
 static int clk_core_set_rate_nolock(struct clk_core *core,
index a1a634253d6f2299bfad888b2fa193c98b4ac019..f00d8758ba24f6e5ed537a88be76ff85e5a7c5e4 100644 (file)
@@ -16,6 +16,7 @@
 
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
+#include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
@@ -83,9 +84,20 @@ static int sun9i_mmc_reset_deassert(struct reset_controller_dev *rcdev,
        return 0;
 }
 
+static int sun9i_mmc_reset_reset(struct reset_controller_dev *rcdev,
+                                unsigned long id)
+{
+       sun9i_mmc_reset_assert(rcdev, id);
+       udelay(10);
+       sun9i_mmc_reset_deassert(rcdev, id);
+
+       return 0;
+}
+
 static const struct reset_control_ops sun9i_mmc_reset_ops = {
        .assert         = sun9i_mmc_reset_assert,
        .deassert       = sun9i_mmc_reset_deassert,
+       .reset          = sun9i_mmc_reset_reset,
 };
 
 static int sun9i_a80_mmc_config_clk_probe(struct platform_device *pdev)
index 58d4f4e1ad6a907991873a03027e6c7aa2f31fc4..ca38229b045ab288a2f250dddaf1b174e8c0572f 100644 (file)
@@ -22,6 +22,8 @@
 
 #include "cpufreq_governor.h"
 
+#define CPUFREQ_DBS_MIN_SAMPLING_INTERVAL      (2 * TICK_NSEC / NSEC_PER_USEC)
+
 static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs);
 
 static DEFINE_MUTEX(gov_dbs_data_mutex);
@@ -47,11 +49,15 @@ ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf,
 {
        struct dbs_data *dbs_data = to_dbs_data(attr_set);
        struct policy_dbs_info *policy_dbs;
+       unsigned int sampling_interval;
        int ret;
-       ret = sscanf(buf, "%u", &dbs_data->sampling_rate);
-       if (ret != 1)
+
+       ret = sscanf(buf, "%u", &sampling_interval);
+       if (ret != 1 || sampling_interval < CPUFREQ_DBS_MIN_SAMPLING_INTERVAL)
                return -EINVAL;
 
+       dbs_data->sampling_rate = sampling_interval;
+
        /*
         * We are operating under dbs_data->mutex and so the list and its
         * entries can't be freed concurrently.
@@ -430,7 +436,14 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
        if (ret)
                goto free_policy_dbs_info;
 
-       dbs_data->sampling_rate = cpufreq_policy_transition_delay_us(policy);
+       /*
+        * The sampling interval should not be less than the transition latency
+        * of the CPU and it also cannot be too small for dbs_update() to work
+        * correctly.
+        */
+       dbs_data->sampling_rate = max_t(unsigned int,
+                                       CPUFREQ_DBS_MIN_SAMPLING_INTERVAL,
+                                       cpufreq_policy_transition_delay_us(policy));
 
        if (!have_governor_per_policy())
                gov->gdbs_data = dbs_data;
index 628fe899cb483da9dbf0f7661b537734bc82f784..d9b2c2de49c43f125c91b382f818ff81d0ffc6ac 100644 (file)
@@ -226,17 +226,18 @@ static void imx6q_opp_check_speed_grading(struct device *dev)
        val >>= OCOTP_CFG3_SPEED_SHIFT;
        val &= 0x3;
 
-       if ((val != OCOTP_CFG3_SPEED_1P2GHZ) &&
-            of_machine_is_compatible("fsl,imx6q"))
-               if (dev_pm_opp_disable(dev, 1200000000))
-                       dev_warn(dev, "failed to disable 1.2GHz OPP\n");
        if (val < OCOTP_CFG3_SPEED_996MHZ)
                if (dev_pm_opp_disable(dev, 996000000))
                        dev_warn(dev, "failed to disable 996MHz OPP\n");
-       if (of_machine_is_compatible("fsl,imx6q")) {
+
+       if (of_machine_is_compatible("fsl,imx6q") ||
+           of_machine_is_compatible("fsl,imx6qp")) {
                if (val != OCOTP_CFG3_SPEED_852MHZ)
                        if (dev_pm_opp_disable(dev, 852000000))
                                dev_warn(dev, "failed to disable 852MHz OPP\n");
+               if (val != OCOTP_CFG3_SPEED_1P2GHZ)
+                       if (dev_pm_opp_disable(dev, 1200000000))
+                               dev_warn(dev, "failed to disable 1.2GHz OPP\n");
        }
        iounmap(base);
 put_node:
index dfcf56ee3c6181fc64d0a074e2f5659fec35a6d8..76861a00bb92c4b449f346433f282da8e6cb82c9 100644 (file)
@@ -522,6 +522,7 @@ static struct of_device_id const bcm_kona_gpio_of_match[] = {
  * category than their parents, so it won't report false recursion.
  */
 static struct lock_class_key gpio_lock_class;
+static struct lock_class_key gpio_request_class;
 
 static int bcm_kona_gpio_irq_map(struct irq_domain *d, unsigned int irq,
                                 irq_hw_number_t hwirq)
@@ -531,7 +532,7 @@ static int bcm_kona_gpio_irq_map(struct irq_domain *d, unsigned int irq,
        ret = irq_set_chip_data(irq, d->host_data);
        if (ret < 0)
                return ret;
-       irq_set_lockdep_class(irq, &gpio_lock_class);
+       irq_set_lockdep_class(irq, &gpio_lock_class, &gpio_request_class);
        irq_set_chip_and_handler(irq, &bcm_gpio_irq_chip, handle_simple_irq);
        irq_set_noprobe(irq);
 
index 545d43a587b7ef1308dc827ffbea2c2dd733b478..bb4f8cf18bd9f6c7e47328aaf2ec1cdd3dfc3d6b 100644 (file)
@@ -327,6 +327,7 @@ static struct brcmstb_gpio_bank *brcmstb_gpio_hwirq_to_bank(
  * category than their parents, so it won't report false recursion.
  */
 static struct lock_class_key brcmstb_gpio_irq_lock_class;
+static struct lock_class_key brcmstb_gpio_irq_request_class;
 
 
 static int brcmstb_gpio_irq_map(struct irq_domain *d, unsigned int irq,
@@ -346,7 +347,8 @@ static int brcmstb_gpio_irq_map(struct irq_domain *d, unsigned int irq,
        ret = irq_set_chip_data(irq, &bank->gc);
        if (ret < 0)
                return ret;
-       irq_set_lockdep_class(irq, &brcmstb_gpio_irq_lock_class);
+       irq_set_lockdep_class(irq, &brcmstb_gpio_irq_lock_class,
+                             &brcmstb_gpio_irq_request_class);
        irq_set_chip_and_handler(irq, &priv->irq_chip, handle_level_irq);
        irq_set_noprobe(irq);
        return 0;
index 23e771dba4c17ab7a497feb37c108f15fe117c79..e85903eddc68ecd5930df5ded45efbc398aa57ca 100644 (file)
@@ -103,8 +103,8 @@ static int gpio_reg_to_irq(struct gpio_chip *gc, unsigned offset)
        struct gpio_reg *r = to_gpio_reg(gc);
        int irq = r->irqs[offset];
 
-       if (irq >= 0 && r->irq.domain)
-               irq = irq_find_mapping(r->irq.domain, irq);
+       if (irq >= 0 && r->irqdomain)
+               irq = irq_find_mapping(r->irqdomain, irq);
 
        return irq;
 }
index 8db47f671708752dbaae420478935f1613847dd3..02fa8fe2292a13608e332d6247e9c0b55870ae98 100644 (file)
@@ -565,6 +565,7 @@ static const struct dev_pm_ops tegra_gpio_pm_ops = {
  * than their parents, so it won't report false recursion.
  */
 static struct lock_class_key gpio_lock_class;
+static struct lock_class_key gpio_request_class;
 
 static int tegra_gpio_probe(struct platform_device *pdev)
 {
@@ -670,7 +671,8 @@ static int tegra_gpio_probe(struct platform_device *pdev)
 
                bank = &tgi->bank_info[GPIO_BANK(gpio)];
 
-               irq_set_lockdep_class(irq, &gpio_lock_class);
+               irq_set_lockdep_class(irq, &gpio_lock_class,
+                                     &gpio_request_class);
                irq_set_chip_data(irq, bank);
                irq_set_chip_and_handler(irq, &tgi->ic, handle_simple_irq);
        }
index 2313af82fad3d4bb3cf7027bec9f25d9e6b5887c..acd59113e08b9cda0fce6930981520a481ec7203 100644 (file)
@@ -139,7 +139,7 @@ static int xgene_gpio_sb_to_irq(struct gpio_chip *gc, u32 gpio)
 
 static int xgene_gpio_sb_domain_activate(struct irq_domain *d,
                                         struct irq_data *irq_data,
-                                        bool early)
+                                        bool reserve)
 {
        struct xgene_gpio_sb *priv = d->host_data;
        u32 gpio = HWIRQ_TO_GPIO(priv, irq_data->hwirq);
index eb4528c87c0b3977420a2108c7feaaf9b2a95869..d6f3d9ee1350e422e3f373427d55d6ce8b6be565 100644 (file)
@@ -1074,7 +1074,7 @@ void acpi_gpiochip_add(struct gpio_chip *chip)
        }
 
        if (!chip->names)
-               devprop_gpiochip_set_names(chip);
+               devprop_gpiochip_set_names(chip, dev_fwnode(chip->parent));
 
        acpi_gpiochip_request_regions(acpi_gpio);
        acpi_gpiochip_scan_gpios(acpi_gpio);
index 27f383bda7d9621322a3340d9def92354d21d7a3..f748aa3e77f72000b357718d5c955abba38c5c07 100644 (file)
 /**
  * devprop_gpiochip_set_names - Set GPIO line names using device properties
  * @chip: GPIO chip whose lines should be named, if possible
+ * @fwnode: Property Node containing the gpio-line-names property
  *
  * Looks for device property "gpio-line-names" and if it exists assigns
  * GPIO line names for the chip. The memory allocated for the assigned
  * names belong to the underlying firmware node and should not be released
  * by the caller.
  */
-void devprop_gpiochip_set_names(struct gpio_chip *chip)
+void devprop_gpiochip_set_names(struct gpio_chip *chip,
+                               const struct fwnode_handle *fwnode)
 {
        struct gpio_device *gdev = chip->gpiodev;
        const char **names;
        int ret, i;
 
-       if (!chip->parent) {
-               dev_warn(&gdev->dev, "GPIO chip parent is NULL\n");
-               return;
-       }
-
-       ret = device_property_read_string_array(chip->parent, "gpio-line-names",
+       ret = fwnode_property_read_string_array(fwnode, "gpio-line-names",
                                                NULL, 0);
        if (ret < 0)
                return;
 
        if (ret != gdev->ngpio) {
-               dev_warn(chip->parent,
+               dev_warn(&gdev->dev,
                         "names %d do not match number of GPIOs %d\n", ret,
                         gdev->ngpio);
                return;
@@ -52,10 +49,10 @@ void devprop_gpiochip_set_names(struct gpio_chip *chip)
        if (!names)
                return;
 
-       ret = device_property_read_string_array(chip->parent, "gpio-line-names",
+       ret = fwnode_property_read_string_array(fwnode, "gpio-line-names",
                                                names, gdev->ngpio);
        if (ret < 0) {
-               dev_warn(chip->parent, "failed to read GPIO line names\n");
+               dev_warn(&gdev->dev, "failed to read GPIO line names\n");
                kfree(names);
                return;
        }
index e0d59e61b52fa6aa53e7830ae1ab1c59ceec2453..72a0695d2ac3a3d3217462ff4ea85147921f5dcd 100644 (file)
@@ -493,7 +493,8 @@ int of_gpiochip_add(struct gpio_chip *chip)
 
        /* If the chip defines names itself, these take precedence */
        if (!chip->names)
-               devprop_gpiochip_set_names(chip);
+               devprop_gpiochip_set_names(chip,
+                                          of_fwnode_handle(chip->of_node));
 
        of_node_get(chip->of_node);
 
index aad84a6306c4e5ddbc3364d58add85fa1b1e2583..44332b793718afe1092f5c1a1f42a088b52ab6fb 100644 (file)
@@ -73,7 +73,8 @@ LIST_HEAD(gpio_devices);
 
 static void gpiochip_free_hogs(struct gpio_chip *chip);
 static int gpiochip_add_irqchip(struct gpio_chip *gpiochip,
-                               struct lock_class_key *key);
+                               struct lock_class_key *lock_key,
+                               struct lock_class_key *request_key);
 static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip);
 static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gpiochip);
 static void gpiochip_irqchip_free_valid_mask(struct gpio_chip *gpiochip);
@@ -1100,7 +1101,8 @@ static void gpiochip_setup_devs(void)
 }
 
 int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
-                              struct lock_class_key *key)
+                              struct lock_class_key *lock_key,
+                              struct lock_class_key *request_key)
 {
        unsigned long   flags;
        int             status = 0;
@@ -1246,7 +1248,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
        if (status)
                goto err_remove_from_list;
 
-       status = gpiochip_add_irqchip(chip, key);
+       status = gpiochip_add_irqchip(chip, lock_key, request_key);
        if (status)
                goto err_remove_chip;
 
@@ -1632,7 +1634,7 @@ int gpiochip_irq_map(struct irq_domain *d, unsigned int irq,
         * This lock class tells lockdep that GPIO irqs are in a different
         * category than their parents, so it won't report false recursion.
         */
-       irq_set_lockdep_class(irq, chip->irq.lock_key);
+       irq_set_lockdep_class(irq, chip->irq.lock_key, chip->irq.request_key);
        irq_set_chip_and_handler(irq, chip->irq.chip, chip->irq.handler);
        /* Chips that use nested thread handlers have them marked */
        if (chip->irq.threaded)
@@ -1712,10 +1714,12 @@ static int gpiochip_to_irq(struct gpio_chip *chip, unsigned offset)
 /**
  * gpiochip_add_irqchip() - adds an IRQ chip to a GPIO chip
  * @gpiochip: the GPIO chip to add the IRQ chip to
- * @lock_key: lockdep class
+ * @lock_key: lockdep class for IRQ lock
+ * @request_key: lockdep class for IRQ request
  */
 static int gpiochip_add_irqchip(struct gpio_chip *gpiochip,
-                               struct lock_class_key *lock_key)
+                               struct lock_class_key *lock_key,
+                               struct lock_class_key *request_key)
 {
        struct irq_chip *irqchip = gpiochip->irq.chip;
        const struct irq_domain_ops *ops;
@@ -1753,6 +1757,7 @@ static int gpiochip_add_irqchip(struct gpio_chip *gpiochip,
        gpiochip->to_irq = gpiochip_to_irq;
        gpiochip->irq.default_type = type;
        gpiochip->irq.lock_key = lock_key;
+       gpiochip->irq.request_key = request_key;
 
        if (gpiochip->irq.domain_ops)
                ops = gpiochip->irq.domain_ops;
@@ -1850,7 +1855,8 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip)
  * @type: the default type for IRQs on this irqchip, pass IRQ_TYPE_NONE
  * to have the core avoid setting up any default type in the hardware.
  * @threaded: whether this irqchip uses a nested thread handler
- * @lock_key: lockdep class
+ * @lock_key: lockdep class for IRQ lock
+ * @request_key: lockdep class for IRQ request
  *
  * This function closely associates a certain irqchip with a certain
  * gpiochip, providing an irq domain to translate the local IRQs to
@@ -1872,7 +1878,8 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip,
                             irq_flow_handler_t handler,
                             unsigned int type,
                             bool threaded,
-                            struct lock_class_key *lock_key)
+                            struct lock_class_key *lock_key,
+                            struct lock_class_key *request_key)
 {
        struct device_node *of_node;
 
@@ -1913,6 +1920,7 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip,
        gpiochip->irq.default_type = type;
        gpiochip->to_irq = gpiochip_to_irq;
        gpiochip->irq.lock_key = lock_key;
+       gpiochip->irq.request_key = request_key;
        gpiochip->irq.domain = irq_domain_add_simple(of_node,
                                        gpiochip->ngpio, first_irq,
                                        &gpiochip_domain_ops, gpiochip);
@@ -1940,7 +1948,8 @@ EXPORT_SYMBOL_GPL(gpiochip_irqchip_add_key);
 #else /* CONFIG_GPIOLIB_IRQCHIP */
 
 static inline int gpiochip_add_irqchip(struct gpio_chip *gpiochip,
-                                      struct lock_class_key *key)
+                                      struct lock_class_key *lock_key,
+                                      struct lock_class_key *request_key)
 {
        return 0;
 }
index af48322839c3d6004ee16a32591f1f434d364fc8..6c44d165213910a259ee94c85731ab4d1ca68431 100644 (file)
@@ -228,7 +228,8 @@ static inline int gpio_chip_hwgpio(const struct gpio_desc *desc)
        return desc - &desc->gdev->descs[0];
 }
 
-void devprop_gpiochip_set_names(struct gpio_chip *chip);
+void devprop_gpiochip_set_names(struct gpio_chip *chip,
+                               const struct fwnode_handle *fwnode);
 
 /* With descriptor prefix */
 
index da43813d67a4ad56ddecb79ac0a749afe29abc43..5aeb5f8816f3b9a68666cf57372cddeb12c2b36a 100644 (file)
@@ -2467,7 +2467,7 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
                                  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
                                  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
                                  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
-                                 PACKET3_MAP_QUEUES_ALLOC_FORMAT(1) | /* alloc format: all_on_one_pipe */
+                                 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
                                  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
                                  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
                amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
index f71fe6d2ddda795fd2fb914740b75845893c1298..bb5fa895fb6446097580ce229ef23dc473f979af 100644 (file)
@@ -2336,7 +2336,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
                       const struct dm_connector_state *dm_state)
 {
        struct drm_display_mode *preferred_mode = NULL;
-       const struct drm_connector *drm_connector;
+       struct drm_connector *drm_connector;
        struct dc_stream_state *stream = NULL;
        struct drm_display_mode mode = *drm_mode;
        bool native_mode_found = false;
@@ -2355,11 +2355,13 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 
        if (!aconnector->dc_sink) {
                /*
-                * Exclude MST from creating fake_sink
-                * TODO: need to enable MST into fake_sink feature
+                * Create dc_sink when necessary to MST
+                * Don't apply fake_sink to MST
                 */
-               if (aconnector->mst_port)
-                       goto stream_create_fail;
+               if (aconnector->mst_port) {
+                       dm_dp_mst_dc_sink_create(drm_connector);
+                       goto mst_dc_sink_create_done;
+               }
 
                if (create_fake_sink(aconnector))
                        goto stream_create_fail;
@@ -2410,6 +2412,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 stream_create_fail:
 dm_state_null:
 drm_connector_null:
+mst_dc_sink_create_done:
        return stream;
 }
 
index 117521c6a6ed26213c60ec5316df64b66eeaba12..0230250a1164bb01b41f3a2b22011960909e14bb 100644 (file)
@@ -189,6 +189,8 @@ struct amdgpu_dm_connector {
        struct mutex hpd_lock;
 
        bool fake_enable;
+
+       bool mst_connected;
 };
 
 #define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base)
index f8efb98b1fa72f86ecbec4c568a653af164c7daa..638c2c2b5cd79069e7312b7d7f23a28b6f5eb3b6 100644 (file)
@@ -185,6 +185,42 @@ static int dm_connector_update_modes(struct drm_connector *connector,
        return ret;
 }
 
+void dm_dp_mst_dc_sink_create(struct drm_connector *connector)
+{
+       struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+       struct edid *edid;
+       struct dc_sink *dc_sink;
+       struct dc_sink_init_data init_params = {
+                       .link = aconnector->dc_link,
+                       .sink_signal = SIGNAL_TYPE_DISPLAY_PORT_MST };
+
+       edid = drm_dp_mst_get_edid(connector, &aconnector->mst_port->mst_mgr, aconnector->port);
+
+       if (!edid) {
+               drm_mode_connector_update_edid_property(
+                       &aconnector->base,
+                       NULL);
+               return;
+       }
+
+       aconnector->edid = edid;
+
+       dc_sink = dc_link_add_remote_sink(
+               aconnector->dc_link,
+               (uint8_t *)aconnector->edid,
+               (aconnector->edid->extensions + 1) * EDID_LENGTH,
+               &init_params);
+
+       dc_sink->priv = aconnector;
+       aconnector->dc_sink = dc_sink;
+
+       amdgpu_dm_add_sink_to_freesync_module(
+                       connector, aconnector->edid);
+
+       drm_mode_connector_update_edid_property(
+                                       &aconnector->base, aconnector->edid);
+}
+
 static int dm_dp_mst_get_modes(struct drm_connector *connector)
 {
        struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
@@ -311,6 +347,7 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
                        drm_mode_connector_set_path_property(connector, pathprop);
 
                        drm_connector_list_iter_end(&conn_iter);
+                       aconnector->mst_connected = true;
                        return &aconnector->base;
                }
        }
@@ -363,6 +400,8 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
         */
        amdgpu_dm_connector_funcs_reset(connector);
 
+       aconnector->mst_connected = true;
+
        DRM_INFO("DM_MST: added connector: %p [id: %d] [master: %p]\n",
                        aconnector, connector->base.id, aconnector->mst_port);
 
@@ -394,6 +433,8 @@ static void dm_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
        drm_mode_connector_update_edid_property(
                        &aconnector->base,
                        NULL);
+
+       aconnector->mst_connected = false;
 }
 
 static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr)
@@ -404,10 +445,18 @@ static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr)
        drm_kms_helper_hotplug_event(dev);
 }
 
+static void dm_dp_mst_link_status_reset(struct drm_connector *connector)
+{
+       mutex_lock(&connector->dev->mode_config.mutex);
+       drm_mode_connector_set_link_status_property(connector, DRM_MODE_LINK_STATUS_BAD);
+       mutex_unlock(&connector->dev->mode_config.mutex);
+}
+
 static void dm_dp_mst_register_connector(struct drm_connector *connector)
 {
        struct drm_device *dev = connector->dev;
        struct amdgpu_device *adev = dev->dev_private;
+       struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
 
        if (adev->mode_info.rfbdev)
                drm_fb_helper_add_one_connector(&adev->mode_info.rfbdev->helper, connector);
@@ -416,6 +465,8 @@ static void dm_dp_mst_register_connector(struct drm_connector *connector)
 
        drm_connector_register(connector);
 
+       if (aconnector->mst_connected)
+               dm_dp_mst_link_status_reset(connector);
 }
 
 static const struct drm_dp_mst_topology_cbs dm_mst_cbs = {
index 2da851b40042aee9b79eb2c666d45c0f5061fee0..8cf51da26657e29e72062b34aeed7e5d827f9e21 100644 (file)
@@ -31,5 +31,6 @@ struct amdgpu_dm_connector;
 
 void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm,
                                       struct amdgpu_dm_connector *aconnector);
+void dm_dp_mst_dc_sink_create(struct drm_connector *connector);
 
 #endif
index 3dce35e66b0917d2ec93420063b3477443788302..b142629a105841b603501291800e45b9ade30591 100644 (file)
@@ -900,6 +900,15 @@ bool dcn_validate_bandwidth(
                        v->override_vta_ps[input_idx] = pipe->plane_res.scl_data.taps.v_taps;
                        v->override_hta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.h_taps_c;
                        v->override_vta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.v_taps_c;
+                       /*
+                        * Spreadsheet doesn't handle taps_c is one properly,
+                        * need to force Chroma to always be scaled to pass
+                        * bandwidth validation.
+                        */
+                       if (v->override_hta_pschroma[input_idx] == 1)
+                               v->override_hta_pschroma[input_idx] = 2;
+                       if (v->override_vta_pschroma[input_idx] == 1)
+                               v->override_vta_pschroma[input_idx] = 2;
                        v->source_scan[input_idx] = (pipe->plane_state->rotation % 2) ? dcn_bw_vert : dcn_bw_hor;
                }
                if (v->is_line_buffer_bpp_fixed == dcn_bw_yes)
index e27ed4a45265290690604b10e6d4df4fbee77514..42a111b9505dcb5190437a381c7dba8fda444719 100644 (file)
@@ -1801,7 +1801,7 @@ static void disable_link(struct dc_link *link, enum signal_type signal)
                link->link_enc->funcs->disable_output(link->link_enc, signal, link);
 }
 
-bool dp_active_dongle_validate_timing(
+static bool dp_active_dongle_validate_timing(
                const struct dc_crtc_timing *timing,
                const struct dc_dongle_caps *dongle_caps)
 {
@@ -1833,6 +1833,8 @@ bool dp_active_dongle_validate_timing(
        /* Check Color Depth and Pixel Clock */
        if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420)
                required_pix_clk /= 2;
+       else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422)
+               required_pix_clk = required_pix_clk * 2 / 3;
 
        switch (timing->display_color_depth) {
        case COLOR_DEPTH_666:
index 07ff8d2faf3f4630276d9241092f605274375cda..d844fadcd56f048739e374cb8d534cba10d235b3 100644 (file)
@@ -2866,16 +2866,19 @@ static void dce110_apply_ctx_for_surface(
                int num_planes,
                struct dc_state *context)
 {
-       int i, be_idx;
+       int i;
 
        if (num_planes == 0)
                return;
 
-       be_idx = -1;
        for (i = 0; i < dc->res_pool->pipe_count; i++) {
-               if (stream == context->res_ctx.pipe_ctx[i].stream) {
-                       be_idx = context->res_ctx.pipe_ctx[i].stream_res.tg->inst;
-                       break;
+               struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+               struct pipe_ctx *old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
+
+               if (stream == pipe_ctx->stream) {
+                       if (!pipe_ctx->top_pipe &&
+                               (pipe_ctx->plane_state || old_pipe_ctx->plane_state))
+                               dc->hwss.pipe_control_lock(dc, pipe_ctx, true);
                }
        }
 
@@ -2895,9 +2898,22 @@ static void dce110_apply_ctx_for_surface(
                                        context->stream_count);
 
                dce110_program_front_end_for_pipe(dc, pipe_ctx);
+
+               dc->hwss.update_plane_addr(dc, pipe_ctx);
+
                program_surface_visibility(dc, pipe_ctx);
 
        }
+
+       for (i = 0; i < dc->res_pool->pipe_count; i++) {
+               struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+               struct pipe_ctx *old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
+
+               if ((stream == pipe_ctx->stream) &&
+                       (!pipe_ctx->top_pipe) &&
+                       (pipe_ctx->plane_state || old_pipe_ctx->plane_state))
+                       dc->hwss.pipe_control_lock(dc, pipe_ctx, false);
+       }
 }
 
 static void dce110_power_down_fe(struct dc *dc, int fe_idx)
index 74e7c82bdc76a71080d8e22dd6db90179ffee54e..a9d55d0dd69e009f4a31038c9529c86c05904ec7 100644 (file)
@@ -159,11 +159,10 @@ bool dpp_get_optimal_number_of_taps(
                        scl_data->taps.h_taps = 1;
                if (IDENTITY_RATIO(scl_data->ratios.vert))
                        scl_data->taps.v_taps = 1;
-               /*
-                * Spreadsheet doesn't handle taps_c is one properly,
-                * need to force Chroma to always be scaled to pass
-                * bandwidth validation.
-                */
+               if (IDENTITY_RATIO(scl_data->ratios.horz_c))
+                       scl_data->taps.h_taps_c = 1;
+               if (IDENTITY_RATIO(scl_data->ratios.vert_c))
+                       scl_data->taps.v_taps_c = 1;
        }
 
        return true;
index 59849f02e2ad5bb74559ea85fbeb6fc1dd97bde6..1402c0e71b03d18866139056b12f0d5fd84b6afb 100644 (file)
@@ -220,17 +220,6 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr
 
        mutex_lock(&dev->mode_config.idr_mutex);
 
-       /* Insert the new lessee into the tree */
-       id = idr_alloc(&(drm_lease_owner(lessor)->lessee_idr), lessee, 1, 0, GFP_KERNEL);
-       if (id < 0) {
-               error = id;
-               goto out_lessee;
-       }
-
-       lessee->lessee_id = id;
-       lessee->lessor = drm_master_get(lessor);
-       list_add_tail(&lessee->lessee_list, &lessor->lessees);
-
        idr_for_each_entry(leases, entry, object) {
                error = 0;
                if (!idr_find(&dev->mode_config.crtc_idr, object))
@@ -246,6 +235,17 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr
                }
        }
 
+       /* Insert the new lessee into the tree */
+       id = idr_alloc(&(drm_lease_owner(lessor)->lessee_idr), lessee, 1, 0, GFP_KERNEL);
+       if (id < 0) {
+               error = id;
+               goto out_lessee;
+       }
+
+       lessee->lessee_id = id;
+       lessee->lessor = drm_master_get(lessor);
+       list_add_tail(&lessee->lessee_list, &lessor->lessees);
+
        /* Move the leases over */
        lessee->leases = *leases;
        DRM_DEBUG_LEASE("new lessee %d %p, lessor %d %p\n", lessee->lessee_id, lessee, lessor->lessee_id, lessor);
index 37a93cdffb4ad0e7986a634df4d70ccc3fef286e..2c90519576a3e8b63a4c8361f18672db853ebcec 100644 (file)
@@ -558,11 +558,10 @@ int drm_plane_check_pixel_format(const struct drm_plane *plane, u32 format)
 }
 
 /*
- * setplane_internal - setplane handler for internal callers
+ * __setplane_internal - setplane handler for internal callers
  *
- * Note that we assume an extra reference has already been taken on fb.  If the
- * update fails, this reference will be dropped before return; if it succeeds,
- * the previous framebuffer (if any) will be unreferenced instead.
+ * This function will take a reference on the new fb for the plane
+ * on success.
  *
  * src_{x,y,w,h} are provided in 16.16 fixed point format
  */
@@ -630,14 +629,12 @@ static int __setplane_internal(struct drm_plane *plane,
        if (!ret) {
                plane->crtc = crtc;
                plane->fb = fb;
-               fb = NULL;
+               drm_framebuffer_get(plane->fb);
        } else {
                plane->old_fb = NULL;
        }
 
 out:
-       if (fb)
-               drm_framebuffer_put(fb);
        if (plane->old_fb)
                drm_framebuffer_put(plane->old_fb);
        plane->old_fb = NULL;
@@ -685,6 +682,7 @@ int drm_mode_setplane(struct drm_device *dev, void *data,
        struct drm_plane *plane;
        struct drm_crtc *crtc = NULL;
        struct drm_framebuffer *fb = NULL;
+       int ret;
 
        if (!drm_core_check_feature(dev, DRIVER_MODESET))
                return -EINVAL;
@@ -717,15 +715,16 @@ int drm_mode_setplane(struct drm_device *dev, void *data,
                }
        }
 
-       /*
-        * setplane_internal will take care of deref'ing either the old or new
-        * framebuffer depending on success.
-        */
-       return setplane_internal(plane, crtc, fb,
-                                plane_req->crtc_x, plane_req->crtc_y,
-                                plane_req->crtc_w, plane_req->crtc_h,
-                                plane_req->src_x, plane_req->src_y,
-                                plane_req->src_w, plane_req->src_h);
+       ret = setplane_internal(plane, crtc, fb,
+                               plane_req->crtc_x, plane_req->crtc_y,
+                               plane_req->crtc_w, plane_req->crtc_h,
+                               plane_req->src_x, plane_req->src_y,
+                               plane_req->src_w, plane_req->src_h);
+
+       if (fb)
+               drm_framebuffer_put(fb);
+
+       return ret;
 }
 
 static int drm_mode_cursor_universal(struct drm_crtc *crtc,
@@ -788,13 +787,12 @@ static int drm_mode_cursor_universal(struct drm_crtc *crtc,
                src_h = fb->height << 16;
        }
 
-       /*
-        * setplane_internal will take care of deref'ing either the old or new
-        * framebuffer depending on success.
-        */
        ret = __setplane_internal(crtc->cursor, crtc, fb,
-                               crtc_x, crtc_y, crtc_w, crtc_h,
-                               0, 0, src_w, src_h, ctx);
+                                 crtc_x, crtc_y, crtc_w, crtc_h,
+                                 0, 0, src_w, src_h, ctx);
+
+       if (fb)
+               drm_framebuffer_put(fb);
 
        /* Update successful; save new cursor position, if necessary */
        if (ret == 0 && req->flags & DRM_MODE_CURSOR_MOVE) {
index f776fc1cc543abf8e752a5133aaf1ca63fb2d8ff..cb4d09c70fd44647f30b6d10244f25e90db0835f 100644 (file)
@@ -369,40 +369,26 @@ static const struct file_operations drm_syncobj_file_fops = {
        .release = drm_syncobj_file_release,
 };
 
-static int drm_syncobj_alloc_file(struct drm_syncobj *syncobj)
-{
-       struct file *file = anon_inode_getfile("syncobj_file",
-                                              &drm_syncobj_file_fops,
-                                              syncobj, 0);
-       if (IS_ERR(file))
-               return PTR_ERR(file);
-
-       drm_syncobj_get(syncobj);
-       if (cmpxchg(&syncobj->file, NULL, file)) {
-               /* lost the race */
-               fput(file);
-       }
-
-       return 0;
-}
-
 int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd)
 {
-       int ret;
+       struct file *file;
        int fd;
 
        fd = get_unused_fd_flags(O_CLOEXEC);
        if (fd < 0)
                return fd;
 
-       if (!syncobj->file) {
-               ret = drm_syncobj_alloc_file(syncobj);
-               if (ret) {
-                       put_unused_fd(fd);
-                       return ret;
-               }
+       file = anon_inode_getfile("syncobj_file",
+                                 &drm_syncobj_file_fops,
+                                 syncobj, 0);
+       if (IS_ERR(file)) {
+               put_unused_fd(fd);
+               return PTR_ERR(file);
        }
-       fd_install(fd, syncobj->file);
+
+       drm_syncobj_get(syncobj);
+       fd_install(fd, file);
+
        *p_fd = fd;
        return 0;
 }
@@ -422,31 +408,24 @@ static int drm_syncobj_handle_to_fd(struct drm_file *file_private,
        return ret;
 }
 
-static struct drm_syncobj *drm_syncobj_fdget(int fd)
-{
-       struct file *file = fget(fd);
-
-       if (!file)
-               return NULL;
-       if (file->f_op != &drm_syncobj_file_fops)
-               goto err;
-
-       return file->private_data;
-err:
-       fput(file);
-       return NULL;
-};
-
 static int drm_syncobj_fd_to_handle(struct drm_file *file_private,
                                    int fd, u32 *handle)
 {
-       struct drm_syncobj *syncobj = drm_syncobj_fdget(fd);
+       struct drm_syncobj *syncobj;
+       struct file *file;
        int ret;
 
-       if (!syncobj)
+       file = fget(fd);
+       if (!file)
                return -EINVAL;
 
+       if (file->f_op != &drm_syncobj_file_fops) {
+               fput(file);
+               return -EINVAL;
+       }
+
        /* take a reference to put in the idr */
+       syncobj = file->private_data;
        drm_syncobj_get(syncobj);
 
        idr_preload(GFP_KERNEL);
@@ -455,12 +434,14 @@ static int drm_syncobj_fd_to_handle(struct drm_file *file_private,
        spin_unlock(&file_private->syncobj_table_lock);
        idr_preload_end();
 
-       if (ret < 0) {
-               fput(syncobj->file);
-               return ret;
-       }
-       *handle = ret;
-       return 0;
+       if (ret > 0) {
+               *handle = ret;
+               ret = 0;
+       } else
+               drm_syncobj_put(syncobj);
+
+       fput(file);
+       return ret;
 }
 
 static int drm_syncobj_import_sync_file_fence(struct drm_file *file_private,
index 355120865efd14873726e8eae2e1ec6d6fb31b9f..309f3fa6794a92554c3e7da74429f162090da098 100644 (file)
@@ -266,6 +266,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
        /* Clear host CRT status, so guest couldn't detect this host CRT. */
        if (IS_BROADWELL(dev_priv))
                vgpu_vreg(vgpu, PCH_ADPA) &= ~ADPA_CRT_HOTPLUG_MONITOR_MASK;
+
+       vgpu_vreg(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE;
 }
 
 static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num)
@@ -282,7 +284,6 @@ static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num)
 static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num,
                                    int type, unsigned int resolution)
 {
-       struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
        struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num);
 
        if (WARN_ON(resolution >= GVT_EDID_NUM))
@@ -308,7 +309,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num,
        port->type = type;
 
        emulate_monitor_status_change(vgpu);
-       vgpu_vreg(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE;
+
        return 0;
 }
 
index ad4050f7ab3b6965db1ce1b8d150036354464078..18de6569d04aef46aad4af88edb20f0a94d91b53 100644 (file)
@@ -330,17 +330,10 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
         * must wait for all rendering to complete to the object (as unbinding
         * must anyway), and retire the requests.
         */
-       ret = i915_gem_object_wait(obj,
-                                  I915_WAIT_INTERRUPTIBLE |
-                                  I915_WAIT_LOCKED |
-                                  I915_WAIT_ALL,
-                                  MAX_SCHEDULE_TIMEOUT,
-                                  NULL);
+       ret = i915_gem_object_set_to_cpu_domain(obj, false);
        if (ret)
                return ret;
 
-       i915_gem_retire_requests(to_i915(obj->base.dev));
-
        while ((vma = list_first_entry_or_null(&obj->vma_list,
                                               struct i915_vma,
                                               obj_link))) {
index e8ca67a129d28da6ef5b9d1de9d8357f7dc02f7e..ac236b88c99ca0fb07c49ec5d9487b1e0b38adfe 100644 (file)
@@ -367,6 +367,7 @@ struct i915_sw_dma_fence_cb {
        struct dma_fence *dma;
        struct timer_list timer;
        struct irq_work work;
+       struct rcu_head rcu;
 };
 
 static void timer_i915_sw_fence_wake(struct timer_list *t)
@@ -406,7 +407,7 @@ static void irq_i915_sw_fence_work(struct irq_work *wrk)
        del_timer_sync(&cb->timer);
        dma_fence_put(cb->dma);
 
-       kfree(cb);
+       kfree_rcu(cb, rcu);
 }
 
 int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
index 5f8b9f1f40f19e84968c18e5fbd229731b392dea..bcbc7abe66935eef46c9799451b939dc6d516639 100644 (file)
@@ -186,7 +186,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
        struct intel_wait *wait, *n, *first;
 
        if (!b->irq_armed)
-               return;
+               goto wakeup_signaler;
 
        /* We only disarm the irq when we are idle (all requests completed),
         * so if the bottom-half remains asleep, it missed the request
@@ -208,6 +208,14 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
        b->waiters = RB_ROOT;
 
        spin_unlock_irq(&b->rb_lock);
+
+       /*
+        * The signaling thread may be asleep holding a reference to a request,
+        * that had its signaling cancelled prior to being preempted. We need
+        * to kick the signaler, just in case, to release any such reference.
+        */
+wakeup_signaler:
+       wake_up_process(b->signaler);
 }
 
 static bool use_fake_irq(const struct intel_breadcrumbs *b)
@@ -651,23 +659,15 @@ static int intel_breadcrumbs_signaler(void *arg)
                }
 
                if (unlikely(do_schedule)) {
-                       DEFINE_WAIT(exec);
-
                        if (kthread_should_park())
                                kthread_parkme();
 
-                       if (kthread_should_stop()) {
-                               GEM_BUG_ON(request);
+                       if (unlikely(kthread_should_stop())) {
+                               i915_gem_request_put(request);
                                break;
                        }
 
-                       if (request)
-                               add_wait_queue(&request->execute, &exec);
-
                        schedule();
-
-                       if (request)
-                               remove_wait_queue(&request->execute, &exec);
                }
                i915_gem_request_put(request);
        } while (1);
index e0843bb991699d0c81ec8242d1809a3e26527af8..58a3755544b292dfdd3c7e089f9179c51861d76d 100644 (file)
@@ -2128,6 +2128,8 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder,
        if (WARN_ON(!pll))
                return;
 
+        mutex_lock(&dev_priv->dpll_lock);
+
        if (IS_CANNONLAKE(dev_priv)) {
                /* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */
                val = I915_READ(DPCLKA_CFGCR0);
@@ -2157,6 +2159,8 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder,
        } else if (INTEL_INFO(dev_priv)->gen < 9) {
                I915_WRITE(PORT_CLK_SEL(port), hsw_pll_to_ddi_pll_sel(pll));
        }
+
+       mutex_unlock(&dev_priv->dpll_lock);
 }
 
 static void intel_ddi_clk_disable(struct intel_encoder *encoder)
index e8ccf89cb17b6843b878ec0ef0641055d68e1cf8..30cf273d57aa5a7b91b77928a9a971cba478b8c0 100644 (file)
@@ -9944,11 +9944,10 @@ found:
        }
 
        ret = intel_modeset_setup_plane_state(state, crtc, mode, fb, 0, 0);
+       drm_framebuffer_put(fb);
        if (ret)
                goto fail;
 
-       drm_framebuffer_put(fb);
-
        ret = drm_atomic_set_mode_for_crtc(&crtc_state->base, mode);
        if (ret)
                goto fail;
@@ -13195,7 +13194,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
        primary->frontbuffer_bit = INTEL_FRONTBUFFER_PRIMARY(pipe);
        primary->check_plane = intel_check_primary_plane;
 
-       if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) {
+       if (INTEL_GEN(dev_priv) >= 10) {
                intel_primary_formats = skl_primary_formats;
                num_formats = ARRAY_SIZE(skl_primary_formats);
                modifiers = skl_format_modifiers_ccs;
index 3bf65288ffffd51719d0c4e8ce934ccd2d2f59e3..5809b29044fc573401f6116bb45762fc07e71596 100644 (file)
@@ -193,7 +193,7 @@ static bool lpe_audio_detect(struct drm_i915_private *dev_priv)
                };
 
                if (!pci_dev_present(atom_hdaudio_ids)) {
-                       DRM_INFO("%s\n", "HDaudio controller not detected, using LPE audio instead\n");
+                       DRM_INFO("HDaudio controller not detected, using LPE audio instead\n");
                        lpe_present = true;
                }
        }
index 2615912430cc97098f0fe806e95e5e40c1ee96f7..435ff8662cfa823a56f5d84a8fe66d4bc8929230 100644 (file)
@@ -224,7 +224,7 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
                /* Determine if we can get a cache-coherent map, forcing
                 * uncached mapping if we can't.
                 */
-               if (mmu->type[drm->ttm.type_host].type & NVIF_MEM_UNCACHED)
+               if (!nouveau_drm_use_coherent_gpu_mapping(drm))
                        nvbo->force_coherent = true;
        }
 
@@ -262,7 +262,8 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
                if (cli->device.info.family > NV_DEVICE_INFO_V0_CURIE &&
                    (flags & TTM_PL_FLAG_VRAM) && !vmm->page[i].vram)
                        continue;
-               if ((flags & TTM_PL_FLAG_TT  ) && !vmm->page[i].host)
+               if ((flags & TTM_PL_FLAG_TT) &&
+                   (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT))
                        continue;
 
                /* Select this page size if it's the first that supports
index 8d4a5be3b913016c410c8226ad5b05b0bf75299b..56fe261b62683a8690ac8f3439426ca1e4c269ef 100644 (file)
@@ -152,9 +152,9 @@ nouveau_cli_work_queue(struct nouveau_cli *cli, struct dma_fence *fence,
        work->cli = cli;
        mutex_lock(&cli->lock);
        list_add_tail(&work->head, &cli->worker);
-       mutex_unlock(&cli->lock);
        if (dma_fence_add_callback(fence, &work->cb, nouveau_cli_work_fence))
                nouveau_cli_work_fence(fence, &work->cb);
+       mutex_unlock(&cli->lock);
 }
 
 static void
index 3331e82ae9e7130b18f4a6f307cc284519f873d3..96f6bd8aee5d3a248d76c683b6146ebb8ef673c7 100644 (file)
@@ -157,8 +157,8 @@ struct nouveau_drm {
                struct nvif_object copy;
                int mtrr;
                int type_vram;
-               int type_host;
-               int type_ncoh;
+               int type_host[2];
+               int type_ncoh[2];
        } ttm;
 
        /* GEM interface support */
@@ -217,6 +217,13 @@ nouveau_drm(struct drm_device *dev)
        return dev->dev_private;
 }
 
+static inline bool
+nouveau_drm_use_coherent_gpu_mapping(struct nouveau_drm *drm)
+{
+       struct nvif_mmu *mmu = &drm->client.mmu;
+       return !(mmu->type[drm->ttm.type_host[0]].type & NVIF_MEM_UNCACHED);
+}
+
 int nouveau_pmops_suspend(struct device *);
 int nouveau_pmops_resume(struct device *);
 bool nouveau_pmops_runtime(void);
index c533d8e04afc0f1fc4708d85e069323291c428c3..be7357bf2246e6ae326c9b6750c2c183cb0974d9 100644 (file)
@@ -429,7 +429,7 @@ nouveau_fbcon_destroy(struct drm_device *dev, struct nouveau_fbdev *fbcon)
        drm_fb_helper_unregister_fbi(&fbcon->helper);
        drm_fb_helper_fini(&fbcon->helper);
 
-       if (nouveau_fb->nvbo) {
+       if (nouveau_fb && nouveau_fb->nvbo) {
                nouveau_vma_del(&nouveau_fb->vma);
                nouveau_bo_unmap(nouveau_fb->nvbo);
                nouveau_bo_unpin(nouveau_fb->nvbo);
index 589a9621db763f98454485081a3f80e2324e717c..c002f896850739b343624247e7d52d94e34bf99d 100644 (file)
@@ -103,10 +103,10 @@ nouveau_mem_host(struct ttm_mem_reg *reg, struct ttm_dma_tt *tt)
        u8 type;
        int ret;
 
-       if (mmu->type[drm->ttm.type_host].type & NVIF_MEM_UNCACHED)
-               type = drm->ttm.type_ncoh;
+       if (!nouveau_drm_use_coherent_gpu_mapping(drm))
+               type = drm->ttm.type_ncoh[!!mem->kind];
        else
-               type = drm->ttm.type_host;
+               type = drm->ttm.type_host[0];
 
        if (mem->kind && !(mmu->type[type].type & NVIF_MEM_KIND))
                mem->comp = mem->kind = 0;
index 08b974b3048279813e2d67ad0d5b0055e68998c2..dff51a0ee0281e8f5924ffc0135d8b4baf8542f9 100644 (file)
@@ -235,27 +235,46 @@ nouveau_ttm_global_release(struct nouveau_drm *drm)
        drm->ttm.mem_global_ref.release = NULL;
 }
 
-int
-nouveau_ttm_init(struct nouveau_drm *drm)
+static int
+nouveau_ttm_init_host(struct nouveau_drm *drm, u8 kind)
 {
-       struct nvkm_device *device = nvxx_device(&drm->client.device);
-       struct nvkm_pci *pci = device->pci;
        struct nvif_mmu *mmu = &drm->client.mmu;
-       struct drm_device *dev = drm->dev;
-       int typei, ret;
+       int typei;
 
        typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE |
-                                                  NVIF_MEM_COHERENT);
+                                           kind | NVIF_MEM_COHERENT);
        if (typei < 0)
                return -ENOSYS;
 
-       drm->ttm.type_host = typei;
+       drm->ttm.type_host[!!kind] = typei;
 
-       typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE);
+       typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE | kind);
        if (typei < 0)
                return -ENOSYS;
 
-       drm->ttm.type_ncoh = typei;
+       drm->ttm.type_ncoh[!!kind] = typei;
+       return 0;
+}
+
+int
+nouveau_ttm_init(struct nouveau_drm *drm)
+{
+       struct nvkm_device *device = nvxx_device(&drm->client.device);
+       struct nvkm_pci *pci = device->pci;
+       struct nvif_mmu *mmu = &drm->client.mmu;
+       struct drm_device *dev = drm->dev;
+       int typei, ret;
+
+       ret = nouveau_ttm_init_host(drm, 0);
+       if (ret)
+               return ret;
+
+       if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA &&
+           drm->client.device.info.chipset != 0x50) {
+               ret = nouveau_ttm_init_host(drm, NVIF_MEM_KIND);
+               if (ret)
+                       return ret;
+       }
 
        if (drm->client.device.info.platform != NV_DEVICE_INFO_V0_SOC &&
            drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
index 9e2628dd8e4d6734c2d7c5d073012bbb95b4fa4c..f5371d96b003c23cac9e1f34cf3deca3b54b06a6 100644 (file)
@@ -67,8 +67,8 @@ nouveau_vma_del(struct nouveau_vma **pvma)
                        nvif_vmm_put(&vma->vmm->vmm, &tmp);
                }
                list_del(&vma->head);
-               *pvma = NULL;
                kfree(*pvma);
+               *pvma = NULL;
        }
 }
 
index e146436156985a534fa14e0829db4560d6eb1459..00eeaaffeae565a04044fc55e52990eb71d1063b 100644 (file)
@@ -2369,7 +2369,7 @@ nv13b_chipset = {
        .imem = gk20a_instmem_new,
        .ltc = gp100_ltc_new,
        .mc = gp10b_mc_new,
-       .mmu = gf100_mmu_new,
+       .mmu = gp10b_mmu_new,
        .secboot = gp10b_secboot_new,
        .pmu = gm20b_pmu_new,
        .timer = gk20a_timer_new,
index 972370ed36f090d0c0323253b79735edd355db07..7c7efa4ea0d0edb391a27db2c6e99179799070f1 100644 (file)
@@ -36,6 +36,7 @@ nvbios_dp_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
                        if (data) {
                                *ver = nvbios_rd08(bios, data + 0x00);
                                switch (*ver) {
+                               case 0x20:
                                case 0x21:
                                case 0x30:
                                case 0x40:
@@ -63,6 +64,7 @@ nvbios_dpout_entry(struct nvkm_bios *bios, u8 idx,
        if (data && idx < *cnt) {
                u16 outp = nvbios_rd16(bios, data + *hdr + idx * *len);
                switch (*ver * !!outp) {
+               case 0x20:
                case 0x21:
                case 0x30:
                        *hdr = nvbios_rd08(bios, data + 0x04);
@@ -96,12 +98,16 @@ nvbios_dpout_parse(struct nvkm_bios *bios, u8 idx,
                info->type = nvbios_rd16(bios, data + 0x00);
                info->mask = nvbios_rd16(bios, data + 0x02);
                switch (*ver) {
+               case 0x20:
+                       info->mask |= 0x00c0; /* match any link */
+                       /* fall-through */
                case 0x21:
                case 0x30:
                        info->flags     = nvbios_rd08(bios, data + 0x05);
                        info->script[0] = nvbios_rd16(bios, data + 0x06);
                        info->script[1] = nvbios_rd16(bios, data + 0x08);
-                       info->lnkcmp    = nvbios_rd16(bios, data + 0x0a);
+                       if (*len >= 0x0c)
+                               info->lnkcmp    = nvbios_rd16(bios, data + 0x0a);
                        if (*len >= 0x0f) {
                                info->script[2] = nvbios_rd16(bios, data + 0x0c);
                                info->script[3] = nvbios_rd16(bios, data + 0x0e);
@@ -170,6 +176,7 @@ nvbios_dpcfg_parse(struct nvkm_bios *bios, u16 outp, u8 idx,
        memset(info, 0x00, sizeof(*info));
        if (data) {
                switch (*ver) {
+               case 0x20:
                case 0x21:
                        info->dc    = nvbios_rd08(bios, data + 0x02);
                        info->pe    = nvbios_rd08(bios, data + 0x03);
index 1ba7289684aa2116b6fcc4d05869f0d2b8322a39..db48a1daca0c7a3d786332ce25435839fcc10760 100644 (file)
@@ -249,7 +249,7 @@ nv50_instobj_acquire(struct nvkm_memory *memory)
                        iobj->base.memory.ptrs = &nv50_instobj_fast;
                else
                        iobj->base.memory.ptrs = &nv50_instobj_slow;
-               refcount_inc(&iobj->maps);
+               refcount_set(&iobj->maps, 1);
        }
 
        mutex_unlock(&imem->subdev.mutex);
index b1b1f3626b96298fcdb76f1819fd7b97801d5b37..deb96de54b0030244ec88014bce526119c3fae91 100644 (file)
@@ -136,6 +136,13 @@ nvkm_pci_init(struct nvkm_subdev *subdev)
                return ret;
 
        pci->irq = pdev->irq;
+
+       /* Ensure MSI interrupts are armed, for the case where there are
+        * already interrupts pending (for whatever reason) at load time.
+        */
+       if (pci->msi)
+               pci->func->msi_rearm(pci);
+
        return ret;
 }
 
index dda904ec0534cd9d84d3967b94bf5fa4f444df9e..500b6fb3e0284d2fdfc71265a64f0d5b51fe4f99 100644 (file)
@@ -175,11 +175,31 @@ static void sun4i_hdmi_mode_set(struct drm_encoder *encoder,
        writel(val, hdmi->base + SUN4I_HDMI_VID_TIMING_POL_REG);
 }
 
+static enum drm_mode_status sun4i_hdmi_mode_valid(struct drm_encoder *encoder,
+                                       const struct drm_display_mode *mode)
+{
+       struct sun4i_hdmi *hdmi = drm_encoder_to_sun4i_hdmi(encoder);
+       unsigned long rate = mode->clock * 1000;
+       unsigned long diff = rate / 200; /* +-0.5% allowed by HDMI spec */
+       long rounded_rate;
+
+       /* 165 MHz is the typical max pixelclock frequency for HDMI <= 1.2 */
+       if (rate > 165000000)
+               return MODE_CLOCK_HIGH;
+       rounded_rate = clk_round_rate(hdmi->tmds_clk, rate);
+       if (rounded_rate > 0 &&
+           max_t(unsigned long, rounded_rate, rate) -
+           min_t(unsigned long, rounded_rate, rate) < diff)
+               return MODE_OK;
+       return MODE_NOCLOCK;
+}
+
 static const struct drm_encoder_helper_funcs sun4i_hdmi_helper_funcs = {
        .atomic_check   = sun4i_hdmi_atomic_check,
        .disable        = sun4i_hdmi_disable,
        .enable         = sun4i_hdmi_enable,
        .mode_set       = sun4i_hdmi_mode_set,
+       .mode_valid     = sun4i_hdmi_mode_valid,
 };
 
 static const struct drm_encoder_funcs sun4i_hdmi_funcs = {
index e122f5b2a395583cc14302a9bc4166fbba671071..f4284b51bdca99a04e8eda109a4d67bf5c9fac74 100644 (file)
@@ -724,12 +724,12 @@ static int sun4i_tcon_bind(struct device *dev, struct device *master,
        if (IS_ERR(tcon->crtc)) {
                dev_err(dev, "Couldn't create our CRTC\n");
                ret = PTR_ERR(tcon->crtc);
-               goto err_free_clocks;
+               goto err_free_dotclock;
        }
 
        ret = sun4i_rgb_init(drm, tcon);
        if (ret < 0)
-               goto err_free_clocks;
+               goto err_free_dotclock;
 
        if (tcon->quirks->needs_de_be_mux) {
                /*
index 44343a2bf55c65458a196b5968b0c494f1c569b0..b5ba6441489f6e4f28f6e71129dfede3361bd262 100644 (file)
@@ -455,6 +455,7 @@ ttm_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
                freed += (nr_free_pool - shrink_pages) << pool->order;
                if (freed >= sc->nr_to_scan)
                        break;
+               shrink_pages <<= pool->order;
        }
        mutex_unlock(&lock);
        return freed;
@@ -543,7 +544,7 @@ static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags,
        int r = 0;
        unsigned i, j, cpages;
        unsigned npages = 1 << order;
-       unsigned max_cpages = min(count, (unsigned)NUM_PAGES_TO_ALLOC);
+       unsigned max_cpages = min(count << order, (unsigned)NUM_PAGES_TO_ALLOC);
 
        /* allocate array for page caching change */
        caching_array = kmalloc(max_cpages*sizeof(struct page *), GFP_KERNEL);
index f3fcb836a1f9edd0af5b1f31362987d22eff11b2..0c3f608131cff48fdbec637da27135d970534807 100644 (file)
@@ -551,7 +551,7 @@ static int hid_parser_main(struct hid_parser *parser, struct hid_item *item)
                ret = hid_add_field(parser, HID_FEATURE_REPORT, data);
                break;
        default:
-               hid_err(parser->device, "unknown main item tag 0x%x\n", item->tag);
+               hid_warn(parser->device, "unknown main item tag 0x%x\n", item->tag);
                ret = 0;
        }
 
index 68cdc962265b1051d7e399e0c6a181a4426673ed..271f31461da427d93459632b096c578d71f3ee44 100644 (file)
@@ -696,8 +696,16 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr,
                                              (u8 *)&word, 2);
                break;
        case I2C_SMBUS_I2C_BLOCK_DATA:
-               size = I2C_SMBUS_BLOCK_DATA;
-               /* fallthrough */
+               if (read_write == I2C_SMBUS_READ) {
+                       read_length = data->block[0];
+                       count = cp2112_write_read_req(buf, addr, read_length,
+                                                     command, NULL, 0);
+               } else {
+                       count = cp2112_write_req(buf, addr, command,
+                                                data->block + 1,
+                                                data->block[0]);
+               }
+               break;
        case I2C_SMBUS_BLOCK_DATA:
                if (I2C_SMBUS_READ == read_write) {
                        count = cp2112_write_read_req(buf, addr,
@@ -785,6 +793,9 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr,
        case I2C_SMBUS_WORD_DATA:
                data->word = le16_to_cpup((__le16 *)buf);
                break;
+       case I2C_SMBUS_I2C_BLOCK_DATA:
+               memcpy(data->block + 1, buf, read_length);
+               break;
        case I2C_SMBUS_BLOCK_DATA:
                if (read_length > I2C_SMBUS_BLOCK_MAX) {
                        ret = -EPROTO;
index 9325545fc3ae1cac4e3919c1120af6841590d4f4..edc0f64bb584806f080c1b6f682c53a20acc8466 100644 (file)
 
 #ifdef CONFIG_HOLTEK_FF
 
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Anssi Hannula <anssi.hannula@iki.fi>");
-MODULE_DESCRIPTION("Force feedback support for Holtek On Line Grip based devices");
-
 /*
  * These commands and parameters are currently known:
  *
@@ -223,3 +219,7 @@ static struct hid_driver holtek_driver = {
        .probe = holtek_probe,
 };
 module_hid_driver(holtek_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Anssi Hannula <anssi.hannula@iki.fi>");
+MODULE_DESCRIPTION("Force feedback support for Holtek On Line Grip based devices");
index 76ed9a216f101e90ddaa3bb38a1e0e7175ea81d4..610223f0e94530635ac94ccf40b9f582c57d6d62 100644 (file)
@@ -1378,6 +1378,8 @@ void vmbus_device_unregister(struct hv_device *device_obj)
        pr_debug("child device %s unregistered\n",
                dev_name(&device_obj->device));
 
+       kset_unregister(device_obj->channels_kset);
+
        /*
         * Kick off the process of unregistering the device.
         * This will call vmbus_remove() and eventually vmbus_device_release()
index c9790e2c344016e91c26d02f4a69e6a15035b62a..af5123042990281c694311da2819107dd8336afb 100644 (file)
@@ -143,6 +143,7 @@ static int hwmon_thermal_add_sensor(struct device *dev,
                                    struct hwmon_device *hwdev, int index)
 {
        struct hwmon_thermal_data *tdata;
+       struct thermal_zone_device *tzd;
 
        tdata = devm_kzalloc(dev, sizeof(*tdata), GFP_KERNEL);
        if (!tdata)
@@ -151,8 +152,14 @@ static int hwmon_thermal_add_sensor(struct device *dev,
        tdata->hwdev = hwdev;
        tdata->index = index;
 
-       devm_thermal_zone_of_sensor_register(&hwdev->dev, index, tdata,
-                                            &hwmon_thermal_ops);
+       tzd = devm_thermal_zone_of_sensor_register(&hwdev->dev, index, tdata,
+                                                  &hwmon_thermal_ops);
+       /*
+        * If CONFIG_THERMAL_OF is disabled, this returns -ENODEV,
+        * so ignore that error but forward any other error.
+        */
+       if (IS_ERR(tzd) && (PTR_ERR(tzd) != -ENODEV))
+               return PTR_ERR(tzd);
 
        return 0;
 }
@@ -621,14 +628,20 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata,
                                if (!chip->ops->is_visible(drvdata, hwmon_temp,
                                                           hwmon_temp_input, j))
                                        continue;
-                               if (info[i]->config[j] & HWMON_T_INPUT)
-                                       hwmon_thermal_add_sensor(dev, hwdev, j);
+                               if (info[i]->config[j] & HWMON_T_INPUT) {
+                                       err = hwmon_thermal_add_sensor(dev,
+                                                               hwdev, j);
+                                       if (err)
+                                               goto free_device;
+                               }
                        }
                }
        }
 
        return hdev;
 
+free_device:
+       device_unregister(hdev);
 free_hwmon:
        kfree(hwdev);
 ida_remove:
index feafdb961c485c61e3842d6a946d83b1bf7176b8..59b2f96d986aa2b86491d29b8d7c3e25f244ba6f 100644 (file)
@@ -386,6 +386,9 @@ int ib_open_shared_qp_security(struct ib_qp *qp, struct ib_device *dev)
        if (ret)
                return ret;
 
+       if (!qp->qp_sec)
+               return 0;
+
        mutex_lock(&real_qp->qp_sec->mutex);
        ret = check_qp_port_pkey_settings(real_qp->qp_sec->ports_pkeys,
                                          qp->qp_sec);
index d0202bb176a4a6a826b27f2b4327691e334ad4ea..840b24096690ddcdef6ce894c5ee21b6285bf238 100644 (file)
@@ -2074,8 +2074,8 @@ int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file,
                return -EOPNOTSUPP;
 
        if (ucore->inlen > sizeof(cmd)) {
-               if (ib_is_udata_cleared(ucore, sizeof(cmd),
-                                       ucore->inlen - sizeof(cmd)))
+               if (!ib_is_udata_cleared(ucore, sizeof(cmd),
+                                        ucore->inlen - sizeof(cmd)))
                        return -EOPNOTSUPP;
        }
 
index 3fb8fb6cc824ef09f9c9c229e5a99a3e4801a65e..e36d27ed4daae3d46cb88b3ab4f747c34e6abb49 100644 (file)
@@ -1438,7 +1438,8 @@ int ib_close_qp(struct ib_qp *qp)
        spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
 
        atomic_dec(&real_qp->usecnt);
-       ib_close_shared_qp_security(qp->qp_sec);
+       if (qp->qp_sec)
+               ib_close_shared_qp_security(qp->qp_sec);
        kfree(qp);
 
        return 0;
index b7bfc536e00fd8c7b241c0f56539d1394d235acf..6f2b26126c64a4503b6a3bf8b8c3991b65b65012 100644 (file)
@@ -395,7 +395,7 @@ next_cqe:
 
 static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq)
 {
-       if (CQE_OPCODE(cqe) == C4IW_DRAIN_OPCODE) {
+       if (DRAIN_CQE(cqe)) {
                WARN_ONCE(1, "Unexpected DRAIN CQE qp id %u!\n", wq->sq.qid);
                return 0;
        }
@@ -494,7 +494,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
        /*
         * Special cqe for drain WR completions...
         */
-       if (CQE_OPCODE(hw_cqe) == C4IW_DRAIN_OPCODE) {
+       if (DRAIN_CQE(hw_cqe)) {
                *cookie = CQE_DRAIN_COOKIE(hw_cqe);
                *cqe = *hw_cqe;
                goto skip_cqe;
@@ -571,10 +571,10 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
                        ret = -EAGAIN;
                        goto skip_cqe;
                }
-               if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) {
+               if (unlikely(!CQE_STATUS(hw_cqe) &&
+                            CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) {
                        t4_set_wq_in_error(wq);
-                       hw_cqe->header |= htonl(CQE_STATUS_V(T4_ERR_MSN));
-                       goto proc_cqe;
+                       hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN));
                }
                goto proc_cqe;
        }
@@ -748,9 +748,6 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
                                c4iw_invalidate_mr(qhp->rhp,
                                                   CQE_WRID_FR_STAG(&cqe));
                        break;
-               case C4IW_DRAIN_OPCODE:
-                       wc->opcode = IB_WC_SEND;
-                       break;
                default:
                        pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n",
                               CQE_OPCODE(&cqe), CQE_QPID(&cqe));
index 470f97a79ebb7f90e649179ab34b30ad0c089df7..65dd3726ca024db4e0fabff5c4527c676757065e 100644 (file)
@@ -693,8 +693,6 @@ static inline int to_ib_qp_state(int c4iw_qp_state)
        return IB_QPS_ERR;
 }
 
-#define C4IW_DRAIN_OPCODE FW_RI_SGE_EC_CR_RETURN
-
 static inline u32 c4iw_ib_to_tpt_access(int a)
 {
        return (a & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) |
index 38bddd02a9437470e0f3ed98a7e55afbc8cc7384..d5c92fc520d6471f5c1f41d77dd15b280a8a651b 100644 (file)
@@ -790,21 +790,57 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc)
        return 0;
 }
 
-static void complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr)
+static int ib_to_fw_opcode(int ib_opcode)
+{
+       int opcode;
+
+       switch (ib_opcode) {
+       case IB_WR_SEND_WITH_INV:
+               opcode = FW_RI_SEND_WITH_INV;
+               break;
+       case IB_WR_SEND:
+               opcode = FW_RI_SEND;
+               break;
+       case IB_WR_RDMA_WRITE:
+               opcode = FW_RI_RDMA_WRITE;
+               break;
+       case IB_WR_RDMA_READ:
+       case IB_WR_RDMA_READ_WITH_INV:
+               opcode = FW_RI_READ_REQ;
+               break;
+       case IB_WR_REG_MR:
+               opcode = FW_RI_FAST_REGISTER;
+               break;
+       case IB_WR_LOCAL_INV:
+               opcode = FW_RI_LOCAL_INV;
+               break;
+       default:
+               opcode = -EINVAL;
+       }
+       return opcode;
+}
+
+static int complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr)
 {
        struct t4_cqe cqe = {};
        struct c4iw_cq *schp;
        unsigned long flag;
        struct t4_cq *cq;
+       int opcode;
 
        schp = to_c4iw_cq(qhp->ibqp.send_cq);
        cq = &schp->cq;
 
+       opcode = ib_to_fw_opcode(wr->opcode);
+       if (opcode < 0)
+               return opcode;
+
        cqe.u.drain_cookie = wr->wr_id;
        cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
-                                CQE_OPCODE_V(C4IW_DRAIN_OPCODE) |
+                                CQE_OPCODE_V(opcode) |
                                 CQE_TYPE_V(1) |
                                 CQE_SWCQE_V(1) |
+                                CQE_DRAIN_V(1) |
                                 CQE_QPID_V(qhp->wq.sq.qid));
 
        spin_lock_irqsave(&schp->lock, flag);
@@ -819,6 +855,23 @@ static void complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr)
                                           schp->ibcq.cq_context);
                spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
        }
+       return 0;
+}
+
+static int complete_sq_drain_wrs(struct c4iw_qp *qhp, struct ib_send_wr *wr,
+                               struct ib_send_wr **bad_wr)
+{
+       int ret = 0;
+
+       while (wr) {
+               ret = complete_sq_drain_wr(qhp, wr);
+               if (ret) {
+                       *bad_wr = wr;
+                       break;
+               }
+               wr = wr->next;
+       }
+       return ret;
 }
 
 static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
@@ -833,9 +886,10 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
 
        cqe.u.drain_cookie = wr->wr_id;
        cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
-                                CQE_OPCODE_V(C4IW_DRAIN_OPCODE) |
+                                CQE_OPCODE_V(FW_RI_SEND) |
                                 CQE_TYPE_V(0) |
                                 CQE_SWCQE_V(1) |
+                                CQE_DRAIN_V(1) |
                                 CQE_QPID_V(qhp->wq.sq.qid));
 
        spin_lock_irqsave(&rchp->lock, flag);
@@ -852,6 +906,14 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
        }
 }
 
+static void complete_rq_drain_wrs(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
+{
+       while (wr) {
+               complete_rq_drain_wr(qhp, wr);
+               wr = wr->next;
+       }
+}
+
 int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                   struct ib_send_wr **bad_wr)
 {
@@ -875,7 +937,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
         */
        if (qhp->wq.flushed) {
                spin_unlock_irqrestore(&qhp->lock, flag);
-               complete_sq_drain_wr(qhp, wr);
+               err = complete_sq_drain_wrs(qhp, wr, bad_wr);
                return err;
        }
        num_wrs = t4_sq_avail(&qhp->wq);
@@ -1023,7 +1085,7 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
         */
        if (qhp->wq.flushed) {
                spin_unlock_irqrestore(&qhp->lock, flag);
-               complete_rq_drain_wr(qhp, wr);
+               complete_rq_drain_wrs(qhp, wr);
                return err;
        }
        num_wrs = t4_rq_avail(&qhp->wq);
index e9ea94268d51545b07400ed9cba095d8461fb2a1..79e8ee12c391cf6d800911d4b2f0e16063c6de39 100644 (file)
@@ -197,6 +197,11 @@ struct t4_cqe {
 #define CQE_SWCQE_G(x)    ((((x) >> CQE_SWCQE_S)) & CQE_SWCQE_M)
 #define CQE_SWCQE_V(x)   ((x)<<CQE_SWCQE_S)
 
+#define CQE_DRAIN_S       10
+#define CQE_DRAIN_M       0x1
+#define CQE_DRAIN_G(x)    ((((x) >> CQE_DRAIN_S)) & CQE_DRAIN_M)
+#define CQE_DRAIN_V(x)   ((x)<<CQE_DRAIN_S)
+
 #define CQE_STATUS_S      5
 #define CQE_STATUS_M      0x1F
 #define CQE_STATUS_G(x)   ((((x) >> CQE_STATUS_S)) & CQE_STATUS_M)
@@ -213,6 +218,7 @@ struct t4_cqe {
 #define CQE_OPCODE_V(x)   ((x)<<CQE_OPCODE_S)
 
 #define SW_CQE(x)         (CQE_SWCQE_G(be32_to_cpu((x)->header)))
+#define DRAIN_CQE(x)      (CQE_DRAIN_G(be32_to_cpu((x)->header)))
 #define CQE_QPID(x)       (CQE_QPID_G(be32_to_cpu((x)->header)))
 #define CQE_TYPE(x)       (CQE_TYPE_G(be32_to_cpu((x)->header)))
 #define SQ_TYPE(x)       (CQE_TYPE((x)))
index 4a9b4d7efe6362f31457672882e882af3bfdfc46..8ce9118d4a7fbac52494de9e5b489dc44365706b 100644 (file)
@@ -1131,7 +1131,6 @@ struct hfi1_devdata {
        u16 pcie_lnkctl;
        u16 pcie_devctl2;
        u32 pci_msix0;
-       u32 pci_lnkctl3;
        u32 pci_tph2;
 
        /*
index 09e50fd2a08f07bf7b2d42d3d4b4a1a00644b2ce..8c7e7a60b71584d5e587f6ef664cc773ab991991 100644 (file)
@@ -411,15 +411,12 @@ int restore_pci_variables(struct hfi1_devdata *dd)
        if (ret)
                goto error;
 
-       ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1,
-                                    dd->pci_lnkctl3);
-       if (ret)
-               goto error;
-
-       ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, dd->pci_tph2);
-       if (ret)
-               goto error;
-
+       if (pci_find_ext_capability(dd->pcidev, PCI_EXT_CAP_ID_TPH)) {
+               ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2,
+                                            dd->pci_tph2);
+               if (ret)
+                       goto error;
+       }
        return 0;
 
 error:
@@ -469,15 +466,12 @@ int save_pci_variables(struct hfi1_devdata *dd)
        if (ret)
                goto error;
 
-       ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1,
-                                   &dd->pci_lnkctl3);
-       if (ret)
-               goto error;
-
-       ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2);
-       if (ret)
-               goto error;
-
+       if (pci_find_ext_capability(dd->pcidev, PCI_EXT_CAP_ID_TPH)) {
+               ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2,
+                                           &dd->pci_tph2);
+               if (ret)
+                       goto error;
+       }
        return 0;
 
 error:
index 470995fa38d23ee9c5917f9986c53c582db8d542..6f6712f87a730de3cc8c05aeb73f50de73c07487 100644 (file)
@@ -47,17 +47,6 @@ int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey)
        return err;
 }
 
-int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
-                               bool reset, void *out, int out_size)
-{
-       u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { };
-
-       MLX5_SET(query_cong_statistics_in, in, opcode,
-                MLX5_CMD_OP_QUERY_CONG_STATISTICS);
-       MLX5_SET(query_cong_statistics_in, in, clear, reset);
-       return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
-}
-
 int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
                               void *out, int out_size)
 {
index af4c24596274deabd07ea0d95b719f4fd5d990e1..78ffded7cc2c59e2744d6279d96adc6e10cd1348 100644 (file)
@@ -37,8 +37,6 @@
 #include <linux/mlx5/driver.h>
 
 int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
-int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
-                               bool reset, void *out, int out_size);
 int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
                               void *out, int out_size);
 int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
index 543d0a4c8bf36eb43af6d9047c7eaa6cd75d9638..8ac50de2b2421ea66cb23eff88c09f8ecf3a0f94 100644 (file)
@@ -1463,6 +1463,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
        }
 
        INIT_LIST_HEAD(&context->vma_private_list);
+       mutex_init(&context->vma_private_list_mutex);
        INIT_LIST_HEAD(&context->db_page_list);
        mutex_init(&context->db_page_mutex);
 
@@ -1624,7 +1625,9 @@ static void  mlx5_ib_vma_close(struct vm_area_struct *area)
         * mlx5_ib_disassociate_ucontext().
         */
        mlx5_ib_vma_priv_data->vma = NULL;
+       mutex_lock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
        list_del(&mlx5_ib_vma_priv_data->list);
+       mutex_unlock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
        kfree(mlx5_ib_vma_priv_data);
 }
 
@@ -1644,10 +1647,13 @@ static int mlx5_ib_set_vma_data(struct vm_area_struct *vma,
                return -ENOMEM;
 
        vma_prv->vma = vma;
+       vma_prv->vma_private_list_mutex = &ctx->vma_private_list_mutex;
        vma->vm_private_data = vma_prv;
        vma->vm_ops =  &mlx5_ib_vm_ops;
 
+       mutex_lock(&ctx->vma_private_list_mutex);
        list_add(&vma_prv->list, vma_head);
+       mutex_unlock(&ctx->vma_private_list_mutex);
 
        return 0;
 }
@@ -1690,6 +1696,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
         * mlx5_ib_vma_close.
         */
        down_write(&owning_mm->mmap_sem);
+       mutex_lock(&context->vma_private_list_mutex);
        list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
                                 list) {
                vma = vma_private->vma;
@@ -1704,6 +1711,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
                list_del(&vma_private->list);
                kfree(vma_private);
        }
+       mutex_unlock(&context->vma_private_list_mutex);
        up_write(&owning_mm->mmap_sem);
        mmput(owning_mm);
        put_task_struct(owning_process);
@@ -3737,34 +3745,6 @@ free:
        return ret;
 }
 
-static int mlx5_ib_query_cong_counters(struct mlx5_ib_dev *dev,
-                                      struct mlx5_ib_port *port,
-                                      struct rdma_hw_stats *stats)
-{
-       int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
-       void *out;
-       int ret, i;
-       int offset = port->cnts.num_q_counters;
-
-       out = kvzalloc(outlen, GFP_KERNEL);
-       if (!out)
-               return -ENOMEM;
-
-       ret = mlx5_cmd_query_cong_counter(dev->mdev, false, out, outlen);
-       if (ret)
-               goto free;
-
-       for (i = 0; i < port->cnts.num_cong_counters; i++) {
-               stats->value[i + offset] =
-                       be64_to_cpup((__be64 *)(out +
-                                    port->cnts.offsets[i + offset]));
-       }
-
-free:
-       kvfree(out);
-       return ret;
-}
-
 static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
                                struct rdma_hw_stats *stats,
                                u8 port_num, int index)
@@ -3782,7 +3762,12 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
        num_counters = port->cnts.num_q_counters;
 
        if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
-               ret = mlx5_ib_query_cong_counters(dev, port, stats);
+               ret = mlx5_lag_query_cong_counters(dev->mdev,
+                                                  stats->value +
+                                                  port->cnts.num_q_counters,
+                                                  port->cnts.num_cong_counters,
+                                                  port->cnts.offsets +
+                                                  port->cnts.num_q_counters);
                if (ret)
                        return ret;
                num_counters += port->cnts.num_cong_counters;
index 6dd8cac78de2c44854f1d5006dd058d2bc81e170..2c5f3533bbc9cbf08ce4fc816ecb86bc5ff04612 100644 (file)
@@ -115,6 +115,8 @@ enum {
 struct mlx5_ib_vma_private_data {
        struct list_head list;
        struct vm_area_struct *vma;
+       /* protect vma_private_list add/del */
+       struct mutex *vma_private_list_mutex;
 };
 
 struct mlx5_ib_ucontext {
@@ -129,6 +131,8 @@ struct mlx5_ib_ucontext {
        /* Transport Domain number */
        u32                     tdn;
        struct list_head        vma_private_list;
+       /* protect vma_private_list add/del */
+       struct mutex            vma_private_list_mutex;
 
        unsigned long           upd_xlt_page;
        /* protect ODP/KSM */
index ee0ee1f9994b4fae933d8590b72ba138d29e176d..d109fe8290a70964d71e44ceb1408215f24d2058 100644 (file)
@@ -1637,6 +1637,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
        MLX5_SET(mkc, mkc, access_mode, mr->access_mode);
        MLX5_SET(mkc, mkc, umr_en, 1);
 
+       mr->ibmr.device = pd->device;
        err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
        if (err)
                goto err_destroy_psv;
index 63bc2efc34eb57e37f364e5b73e5a157e6040e47..4f7bd3b6a3152937a8ec2fa4de8637e3d9391a9a 100644 (file)
@@ -94,7 +94,7 @@ struct pvrdma_cq {
        u32 cq_handle;
        bool is_kernel;
        atomic_t refcnt;
-       wait_queue_head_t wait;
+       struct completion free;
 };
 
 struct pvrdma_id_table {
@@ -175,7 +175,7 @@ struct pvrdma_srq {
        u32 srq_handle;
        int npages;
        refcount_t refcnt;
-       wait_queue_head_t wait;
+       struct completion free;
 };
 
 struct pvrdma_qp {
@@ -197,7 +197,7 @@ struct pvrdma_qp {
        bool is_kernel;
        struct mutex mutex; /* QP state mutex. */
        atomic_t refcnt;
-       wait_queue_head_t wait;
+       struct completion free;
 };
 
 struct pvrdma_dev {
index 3562c0c30492d07a7d1eddacb033ef769a1f9264..e529622cefad6a501492dc165458b738636842b3 100644 (file)
@@ -179,7 +179,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
                pvrdma_page_dir_insert_umem(&cq->pdir, cq->umem, 0);
 
        atomic_set(&cq->refcnt, 1);
-       init_waitqueue_head(&cq->wait);
+       init_completion(&cq->free);
        spin_lock_init(&cq->cq_lock);
 
        memset(cmd, 0, sizeof(*cmd));
@@ -230,8 +230,9 @@ err_cq:
 
 static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq)
 {
-       atomic_dec(&cq->refcnt);
-       wait_event(cq->wait, !atomic_read(&cq->refcnt));
+       if (atomic_dec_and_test(&cq->refcnt))
+               complete(&cq->free);
+       wait_for_completion(&cq->free);
 
        if (!cq->is_kernel)
                ib_umem_release(cq->umem);
index 1f4e18717a006da9cd3566318123f0fff1038df9..e92681878c93f4b0d4f29724700cc7ba474d7de4 100644 (file)
@@ -346,9 +346,8 @@ static void pvrdma_qp_event(struct pvrdma_dev *dev, u32 qpn, int type)
                ibqp->event_handler(&e, ibqp->qp_context);
        }
        if (qp) {
-               atomic_dec(&qp->refcnt);
-               if (atomic_read(&qp->refcnt) == 0)
-                       wake_up(&qp->wait);
+               if (atomic_dec_and_test(&qp->refcnt))
+                       complete(&qp->free);
        }
 }
 
@@ -373,9 +372,8 @@ static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type)
                ibcq->event_handler(&e, ibcq->cq_context);
        }
        if (cq) {
-               atomic_dec(&cq->refcnt);
-               if (atomic_read(&cq->refcnt) == 0)
-                       wake_up(&cq->wait);
+               if (atomic_dec_and_test(&cq->refcnt))
+                       complete(&cq->free);
        }
 }
 
@@ -404,7 +402,7 @@ static void pvrdma_srq_event(struct pvrdma_dev *dev, u32 srqn, int type)
        }
        if (srq) {
                if (refcount_dec_and_test(&srq->refcnt))
-                       wake_up(&srq->wait);
+                       complete(&srq->free);
        }
 }
 
@@ -539,9 +537,8 @@ static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id)
                if (cq && cq->ibcq.comp_handler)
                        cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
                if (cq) {
-                       atomic_dec(&cq->refcnt);
-                       if (atomic_read(&cq->refcnt))
-                               wake_up(&cq->wait);
+                       if (atomic_dec_and_test(&cq->refcnt))
+                               complete(&cq->free);
                }
                pvrdma_idx_ring_inc(&ring->cons_head, ring_slots);
        }
index 10420a18d02f46dc07f1698387e11105ab76c004..4059308e1454a5bfda72a31d9931afaa50d39441 100644 (file)
@@ -246,7 +246,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
                spin_lock_init(&qp->rq.lock);
                mutex_init(&qp->mutex);
                atomic_set(&qp->refcnt, 1);
-               init_waitqueue_head(&qp->wait);
+               init_completion(&qp->free);
 
                qp->state = IB_QPS_RESET;
 
@@ -428,8 +428,16 @@ static void pvrdma_free_qp(struct pvrdma_qp *qp)
 
        pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
 
-       atomic_dec(&qp->refcnt);
-       wait_event(qp->wait, !atomic_read(&qp->refcnt));
+       if (atomic_dec_and_test(&qp->refcnt))
+               complete(&qp->free);
+       wait_for_completion(&qp->free);
+
+       if (!qp->is_kernel) {
+               if (qp->rumem)
+                       ib_umem_release(qp->rumem);
+               if (qp->sumem)
+                       ib_umem_release(qp->sumem);
+       }
 
        pvrdma_page_dir_cleanup(dev, &qp->pdir);
 
index 826ccb864596dc19879211f9cdeefee47e536d5c..5acebb1ef631ae0070d28917530345689a2654de 100644 (file)
@@ -149,7 +149,7 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd,
 
        spin_lock_init(&srq->lock);
        refcount_set(&srq->refcnt, 1);
-       init_waitqueue_head(&srq->wait);
+       init_completion(&srq->free);
 
        dev_dbg(&dev->pdev->dev,
                "create shared receive queue from user space\n");
@@ -236,8 +236,9 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq)
        dev->srq_tbl[srq->srq_handle] = NULL;
        spin_unlock_irqrestore(&dev->srq_tbl_lock, flags);
 
-       refcount_dec(&srq->refcnt);
-       wait_event(srq->wait, !refcount_read(&srq->refcnt));
+       if (refcount_dec_and_test(&srq->refcnt))
+               complete(&srq->free);
+       wait_for_completion(&srq->free);
 
        /* There is no support for kernel clients, so this is safe. */
        ib_umem_release(srq->umem);
index 3b96cdaf9a835d0faebad2b6727dde1132f109f1..e6151a29c412a36d7d9db66833ad659ab853656b 100644 (file)
@@ -1236,13 +1236,10 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
                ipoib_ib_dev_down(dev);
 
        if (level == IPOIB_FLUSH_HEAVY) {
-               rtnl_lock();
                if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
                        ipoib_ib_dev_stop(dev);
 
-               result = ipoib_ib_dev_open(dev);
-               rtnl_unlock();
-               if (result)
+               if (ipoib_ib_dev_open(dev))
                        return;
 
                if (netif_queue_stopped(dev))
@@ -1282,7 +1279,9 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work)
        struct ipoib_dev_priv *priv =
                container_of(work, struct ipoib_dev_priv, flush_heavy);
 
+       rtnl_lock();
        __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY, 0);
+       rtnl_unlock();
 }
 
 void ipoib_ib_dev_cleanup(struct net_device *dev)
index 7d5eb004091d1d64f6956825e9dd14bf975a8b68..97baf88d950589afa9a712005d903fa1603eea14 100644 (file)
@@ -4184,7 +4184,7 @@ static void amd_ir_update_irte(struct irq_data *irqd, struct amd_iommu *iommu,
                               struct irq_cfg *cfg);
 
 static int irq_remapping_activate(struct irq_domain *domain,
-                                 struct irq_data *irq_data, bool early)
+                                 struct irq_data *irq_data, bool reserve)
 {
        struct amd_ir_data *data = irq_data->chip_data;
        struct irq_2_irte *irte_info = &data->irq_2_irte;
index 76a193c7fcfc69b012d3e6e1f1dd246ab8d5acc8..66f69af2c2191f6a247a82acc227ed64b85c6bff 100644 (file)
@@ -1397,7 +1397,7 @@ static void intel_irq_remapping_free(struct irq_domain *domain,
 }
 
 static int intel_irq_remapping_activate(struct irq_domain *domain,
-                                       struct irq_data *irq_data, bool early)
+                                       struct irq_data *irq_data, bool reserve)
 {
        intel_ir_reconfigure_irte(irq_data, true);
        return 0;
index 4039e64cd34211db8fac8ebc2f993c5e081e9c83..06f025fd5726f6b230d51c880e7b8accf9e8c738 100644 (file)
@@ -2303,7 +2303,7 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
 }
 
 static int its_irq_domain_activate(struct irq_domain *domain,
-                                  struct irq_data *d, bool early)
+                                  struct irq_data *d, bool reserve)
 {
        struct its_device *its_dev = irq_data_get_irq_chip_data(d);
        u32 event = its_get_event_id(d);
@@ -2818,7 +2818,7 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq
 }
 
 static int its_vpe_irq_domain_activate(struct irq_domain *domain,
-                                      struct irq_data *d, bool early)
+                                      struct irq_data *d, bool reserve)
 {
        struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
        struct its_node *its;
index 06f29cf5018a151f7d35641d90b8d043aec85516..cee59fe1321c44f9e37c9b6e5bcfc1a57fb98f41 100644 (file)
@@ -342,6 +342,9 @@ static irqreturn_t intc_irqpin_shared_irq_handler(int irq, void *dev_id)
  */
 static struct lock_class_key intc_irqpin_irq_lock_class;
 
+/* And this is for the request mutex */
+static struct lock_class_key intc_irqpin_irq_request_class;
+
 static int intc_irqpin_irq_domain_map(struct irq_domain *h, unsigned int virq,
                                      irq_hw_number_t hw)
 {
@@ -352,7 +355,8 @@ static int intc_irqpin_irq_domain_map(struct irq_domain *h, unsigned int virq,
 
        intc_irqpin_dbg(&p->irq[hw], "map");
        irq_set_chip_data(virq, h->host_data);
-       irq_set_lockdep_class(virq, &intc_irqpin_irq_lock_class);
+       irq_set_lockdep_class(virq, &intc_irqpin_irq_lock_class,
+                             &intc_irqpin_irq_request_class);
        irq_set_chip_and_handler(virq, &p->irq_chip, handle_level_irq);
        return 0;
 }
index fd83c7f77a95d4998cff58be3d78ba467faddeab..f3654fd2eaf31b1fad6553c4f63ca04ebdbe0431 100644 (file)
@@ -186,7 +186,7 @@ void led_blink_set(struct led_classdev *led_cdev,
                   unsigned long *delay_on,
                   unsigned long *delay_off)
 {
-       del_timer_sync(&led_cdev->blink_timer);
+       led_stop_software_blink(led_cdev);
 
        clear_bit(LED_BLINK_ONESHOT, &led_cdev->work_flags);
        clear_bit(LED_BLINK_ONESHOT_STOP, &led_cdev->work_flags);
index 09cf3699e354415046af18c21eae3d98a445a53e..a307832d7e45fd90e046c7118d7fe505036d3cf6 100644 (file)
@@ -184,6 +184,7 @@ static struct irq_chip arizona_irq_chip = {
 };
 
 static struct lock_class_key arizona_irq_lock_class;
+static struct lock_class_key arizona_irq_request_class;
 
 static int arizona_irq_map(struct irq_domain *h, unsigned int virq,
                              irq_hw_number_t hw)
@@ -191,7 +192,8 @@ static int arizona_irq_map(struct irq_domain *h, unsigned int virq,
        struct arizona *data = h->host_data;
 
        irq_set_chip_data(virq, data);
-       irq_set_lockdep_class(virq, &arizona_irq_lock_class);
+       irq_set_lockdep_class(virq, &arizona_irq_lock_class,
+               &arizona_irq_request_class);
        irq_set_chip_and_handler(virq, &arizona_irq_chip, handle_simple_irq);
        irq_set_nested_thread(virq, 1);
        irq_set_noprobe(virq);
index c9714072e22465d4b23d8101038f782b084b2dca..59c82cdcf48d8a508613dbc7b1c98654285de28f 100644 (file)
@@ -377,6 +377,7 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev,
        u8 *ptr;
        u8 *rx_buf;
        u8 sum;
+       u8 rx_byte;
        int ret = 0, final_ret;
 
        len = cros_ec_prepare_tx(ec_dev, ec_msg);
@@ -421,25 +422,22 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev,
        if (!ret) {
                /* Verify that EC can process command */
                for (i = 0; i < len; i++) {
-                       switch (rx_buf[i]) {
-                       case EC_SPI_PAST_END:
-                       case EC_SPI_RX_BAD_DATA:
-                       case EC_SPI_NOT_READY:
-                               ret = -EAGAIN;
-                               ec_msg->result = EC_RES_IN_PROGRESS;
-                       default:
+                       rx_byte = rx_buf[i];
+                       if (rx_byte == EC_SPI_PAST_END  ||
+                           rx_byte == EC_SPI_RX_BAD_DATA ||
+                           rx_byte == EC_SPI_NOT_READY) {
+                               ret = -EREMOTEIO;
                                break;
                        }
-                       if (ret)
-                               break;
                }
-               if (!ret)
-                       ret = cros_ec_spi_receive_packet(ec_dev,
-                                       ec_msg->insize + sizeof(*response));
-       } else {
-               dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
        }
 
+       if (!ret)
+               ret = cros_ec_spi_receive_packet(ec_dev,
+                               ec_msg->insize + sizeof(*response));
+       else
+               dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
+
        final_ret = terminate_request(ec_dev);
 
        spi_bus_unlock(ec_spi->spi->master);
@@ -508,6 +506,7 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
        int i, len;
        u8 *ptr;
        u8 *rx_buf;
+       u8 rx_byte;
        int sum;
        int ret = 0, final_ret;
 
@@ -544,25 +543,22 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
        if (!ret) {
                /* Verify that EC can process command */
                for (i = 0; i < len; i++) {
-                       switch (rx_buf[i]) {
-                       case EC_SPI_PAST_END:
-                       case EC_SPI_RX_BAD_DATA:
-                       case EC_SPI_NOT_READY:
-                               ret = -EAGAIN;
-                               ec_msg->result = EC_RES_IN_PROGRESS;
-                       default:
+                       rx_byte = rx_buf[i];
+                       if (rx_byte == EC_SPI_PAST_END  ||
+                           rx_byte == EC_SPI_RX_BAD_DATA ||
+                           rx_byte == EC_SPI_NOT_READY) {
+                               ret = -EREMOTEIO;
                                break;
                        }
-                       if (ret)
-                               break;
                }
-               if (!ret)
-                       ret = cros_ec_spi_receive_response(ec_dev,
-                                       ec_msg->insize + EC_MSG_TX_PROTO_BYTES);
-       } else {
-               dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
        }
 
+       if (!ret)
+               ret = cros_ec_spi_receive_response(ec_dev,
+                               ec_msg->insize + EC_MSG_TX_PROTO_BYTES);
+       else
+               dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
+
        final_ret = terminate_request(ec_dev);
 
        spi_bus_unlock(ec_spi->spi->master);
@@ -667,6 +663,7 @@ static int cros_ec_spi_probe(struct spi_device *spi)
                           sizeof(struct ec_response_get_protocol_info);
        ec_dev->dout_size = sizeof(struct ec_host_request);
 
+       ec_spi->last_transfer_ns = ktime_get_ns();
 
        err = cros_ec_register(ec_dev);
        if (err) {
index da16bf45fab43ee9a946beef340f4cd2a224156e..dc94ffc6321a84dd25ce08d0f1a9374d40d4cead 100644 (file)
@@ -159,13 +159,18 @@ unsigned int twl4030_audio_get_mclk(void)
 EXPORT_SYMBOL_GPL(twl4030_audio_get_mclk);
 
 static bool twl4030_audio_has_codec(struct twl4030_audio_data *pdata,
-                             struct device_node *node)
+                             struct device_node *parent)
 {
+       struct device_node *node;
+
        if (pdata && pdata->codec)
                return true;
 
-       if (of_find_node_by_name(node, "codec"))
+       node = of_get_child_by_name(parent, "codec");
+       if (node) {
+               of_node_put(node);
                return true;
+       }
 
        return false;
 }
index d66502d36ba0b3202d1c15c08540fa8aade42a32..dd19f17a1b637543965dd94e64d0d44b9178f64c 100644 (file)
@@ -97,12 +97,16 @@ static struct reg_sequence twl6040_patch[] = {
 };
 
 
-static bool twl6040_has_vibra(struct device_node *node)
+static bool twl6040_has_vibra(struct device_node *parent)
 {
-#ifdef CONFIG_OF
-       if (of_find_node_by_name(node, "vibra"))
+       struct device_node *node;
+
+       node = of_get_child_by_name(parent, "vibra");
+       if (node) {
+               of_node_put(node);
                return true;
-#endif
+       }
+
        return false;
 }
 
index f80e911b8843819db8dcd1956c76ce2bf60b5ab8..73b6055774474e322b07cda4144c48b5b235a55c 100644 (file)
@@ -1114,7 +1114,7 @@ static int mtd_check_oob_ops(struct mtd_info *mtd, loff_t offs,
        if (!ops->oobbuf)
                ops->ooblen = 0;
 
-       if (offs < 0 || offs + ops->len >= mtd->size)
+       if (offs < 0 || offs + ops->len > mtd->size)
                return -EINVAL;
 
        if (ops->ooblen) {
index e0eb51d8c0129937b35157ccdc107e5ef54c038a..dd56a671ea4285af0f5079bc652ecf4a32410272 100644 (file)
@@ -1763,7 +1763,7 @@ try_dmaread:
                        err = brcmstb_nand_verify_erased_page(mtd, chip, buf,
                                                              addr);
                        /* erased page bitflips corrected */
-                       if (err > 0)
+                       if (err >= 0)
                                return err;
                }
 
index 484f7fbc3f7d2d11cd66fc3416e64ab38d47f852..a8bde6665c24f7e20e6103959ceee16c5d3ec5c8 100644 (file)
@@ -253,9 +253,9 @@ static int gpio_nand_probe(struct platform_device *pdev)
                goto out_ce;
        }
 
-       gpiomtd->nwp = devm_gpiod_get(dev, "ale", GPIOD_OUT_LOW);
-       if (IS_ERR(gpiomtd->nwp)) {
-               ret = PTR_ERR(gpiomtd->nwp);
+       gpiomtd->ale = devm_gpiod_get(dev, "ale", GPIOD_OUT_LOW);
+       if (IS_ERR(gpiomtd->ale)) {
+               ret = PTR_ERR(gpiomtd->ale);
                goto out_ce;
        }
 
index 50f8d4a1b9832326070045d0c294d22393001fbd..d4d824ef64e9fb395af3bc549daae72b96731e16 100644 (file)
@@ -1067,9 +1067,6 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
                return ret;
        }
 
-       /* handle the block mark swapping */
-       block_mark_swapping(this, payload_virt, auxiliary_virt);
-
        /* Loop over status bytes, accumulating ECC status. */
        status = auxiliary_virt + nfc_geo->auxiliary_status_offset;
 
@@ -1158,6 +1155,9 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
                max_bitflips = max_t(unsigned int, max_bitflips, *status);
        }
 
+       /* handle the block mark swapping */
+       block_mark_swapping(this, buf, auxiliary_virt);
+
        if (oob_required) {
                /*
                 * It's time to deliver the OOB bytes. See gpmi_ecc_read_oob()
index 3c63b16d485f4bb3a7587e9e6d36dd0e121668d2..d9efbc8d783b84b128379e0ce58a43b005a8ab58 100644 (file)
@@ -159,6 +159,8 @@ struct arc_emac_priv {
        unsigned int link;
        unsigned int duplex;
        unsigned int speed;
+
+       unsigned int rx_missed_errors;
 };
 
 /**
index 3241af1ce7182824c09ee3ad774f122565f6c940..bd277b0dc615118a58b81dfba5b040e26fa667ba 100644 (file)
@@ -26,6 +26,8 @@
 
 #include "emac.h"
 
+static void arc_emac_restart(struct net_device *ndev);
+
 /**
  * arc_emac_tx_avail - Return the number of available slots in the tx ring.
  * @priv: Pointer to ARC EMAC private data structure.
@@ -210,39 +212,48 @@ static int arc_emac_rx(struct net_device *ndev, int budget)
                        continue;
                }
 
-               pktlen = info & LEN_MASK;
-               stats->rx_packets++;
-               stats->rx_bytes += pktlen;
-               skb = rx_buff->skb;
-               skb_put(skb, pktlen);
-               skb->dev = ndev;
-               skb->protocol = eth_type_trans(skb, ndev);
-
-               dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr),
-                                dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE);
-
-               /* Prepare the BD for next cycle */
-               rx_buff->skb = netdev_alloc_skb_ip_align(ndev,
-                                                        EMAC_BUFFER_SIZE);
-               if (unlikely(!rx_buff->skb)) {
+               /* Prepare the BD for next cycle. netif_receive_skb()
+                * only if new skb was allocated and mapped to avoid holes
+                * in the RX fifo.
+                */
+               skb = netdev_alloc_skb_ip_align(ndev, EMAC_BUFFER_SIZE);
+               if (unlikely(!skb)) {
+                       if (net_ratelimit())
+                               netdev_err(ndev, "cannot allocate skb\n");
+                       /* Return ownership to EMAC */
+                       rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
                        stats->rx_errors++;
-                       /* Because receive_skb is below, increment rx_dropped */
                        stats->rx_dropped++;
                        continue;
                }
 
-               /* receive_skb only if new skb was allocated to avoid holes */
-               netif_receive_skb(skb);
-
-               addr = dma_map_single(&ndev->dev, (void *)rx_buff->skb->data,
+               addr = dma_map_single(&ndev->dev, (void *)skb->data,
                                      EMAC_BUFFER_SIZE, DMA_FROM_DEVICE);
                if (dma_mapping_error(&ndev->dev, addr)) {
                        if (net_ratelimit())
-                               netdev_err(ndev, "cannot dma map\n");
-                       dev_kfree_skb(rx_buff->skb);
+                               netdev_err(ndev, "cannot map dma buffer\n");
+                       dev_kfree_skb(skb);
+                       /* Return ownership to EMAC */
+                       rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
                        stats->rx_errors++;
+                       stats->rx_dropped++;
                        continue;
                }
+
+               /* unmap previosly mapped skb */
+               dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr),
+                                dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE);
+
+               pktlen = info & LEN_MASK;
+               stats->rx_packets++;
+               stats->rx_bytes += pktlen;
+               skb_put(rx_buff->skb, pktlen);
+               rx_buff->skb->dev = ndev;
+               rx_buff->skb->protocol = eth_type_trans(rx_buff->skb, ndev);
+
+               netif_receive_skb(rx_buff->skb);
+
+               rx_buff->skb = skb;
                dma_unmap_addr_set(rx_buff, addr, addr);
                dma_unmap_len_set(rx_buff, len, EMAC_BUFFER_SIZE);
 
@@ -258,6 +269,53 @@ static int arc_emac_rx(struct net_device *ndev, int budget)
        return work_done;
 }
 
+/**
+ * arc_emac_rx_miss_handle - handle R_MISS register
+ * @ndev:      Pointer to the net_device structure.
+ */
+static void arc_emac_rx_miss_handle(struct net_device *ndev)
+{
+       struct arc_emac_priv *priv = netdev_priv(ndev);
+       struct net_device_stats *stats = &ndev->stats;
+       unsigned int miss;
+
+       miss = arc_reg_get(priv, R_MISS);
+       if (miss) {
+               stats->rx_errors += miss;
+               stats->rx_missed_errors += miss;
+               priv->rx_missed_errors += miss;
+       }
+}
+
+/**
+ * arc_emac_rx_stall_check - check RX stall
+ * @ndev:      Pointer to the net_device structure.
+ * @budget:    How many BDs requested to process on 1 call.
+ * @work_done: How many BDs processed
+ *
+ * Under certain conditions EMAC stop reception of incoming packets and
+ * continuously increment R_MISS register instead of saving data into
+ * provided buffer. This function detect that condition and restart
+ * EMAC.
+ */
+static void arc_emac_rx_stall_check(struct net_device *ndev,
+                                   int budget, unsigned int work_done)
+{
+       struct arc_emac_priv *priv = netdev_priv(ndev);
+       struct arc_emac_bd *rxbd;
+
+       if (work_done)
+               priv->rx_missed_errors = 0;
+
+       if (priv->rx_missed_errors && budget) {
+               rxbd = &priv->rxbd[priv->last_rx_bd];
+               if (le32_to_cpu(rxbd->info) & FOR_EMAC) {
+                       arc_emac_restart(ndev);
+                       priv->rx_missed_errors = 0;
+               }
+       }
+}
+
 /**
  * arc_emac_poll - NAPI poll handler.
  * @napi:      Pointer to napi_struct structure.
@@ -272,6 +330,7 @@ static int arc_emac_poll(struct napi_struct *napi, int budget)
        unsigned int work_done;
 
        arc_emac_tx_clean(ndev);
+       arc_emac_rx_miss_handle(ndev);
 
        work_done = arc_emac_rx(ndev, budget);
        if (work_done < budget) {
@@ -279,6 +338,8 @@ static int arc_emac_poll(struct napi_struct *napi, int budget)
                arc_reg_or(priv, R_ENABLE, RXINT_MASK | TXINT_MASK);
        }
 
+       arc_emac_rx_stall_check(ndev, budget, work_done);
+
        return work_done;
 }
 
@@ -320,6 +381,8 @@ static irqreturn_t arc_emac_intr(int irq, void *dev_instance)
                if (status & MSER_MASK) {
                        stats->rx_missed_errors += 0x100;
                        stats->rx_errors += 0x100;
+                       priv->rx_missed_errors += 0x100;
+                       napi_schedule(&priv->napi);
                }
 
                if (status & RXCR_MASK) {
@@ -732,6 +795,63 @@ static int arc_emac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 }
 
 
+/**
+ * arc_emac_restart - Restart EMAC
+ * @ndev:      Pointer to net_device structure.
+ *
+ * This function do hardware reset of EMAC in order to restore
+ * network packets reception.
+ */
+static void arc_emac_restart(struct net_device *ndev)
+{
+       struct arc_emac_priv *priv = netdev_priv(ndev);
+       struct net_device_stats *stats = &ndev->stats;
+       int i;
+
+       if (net_ratelimit())
+               netdev_warn(ndev, "restarting stalled EMAC\n");
+
+       netif_stop_queue(ndev);
+
+       /* Disable interrupts */
+       arc_reg_clr(priv, R_ENABLE, RXINT_MASK | TXINT_MASK | ERR_MASK);
+
+       /* Disable EMAC */
+       arc_reg_clr(priv, R_CTRL, EN_MASK);
+
+       /* Return the sk_buff to system */
+       arc_free_tx_queue(ndev);
+
+       /* Clean Tx BD's */
+       priv->txbd_curr = 0;
+       priv->txbd_dirty = 0;
+       memset(priv->txbd, 0, TX_RING_SZ);
+
+       for (i = 0; i < RX_BD_NUM; i++) {
+               struct arc_emac_bd *rxbd = &priv->rxbd[i];
+               unsigned int info = le32_to_cpu(rxbd->info);
+
+               if (!(info & FOR_EMAC)) {
+                       stats->rx_errors++;
+                       stats->rx_dropped++;
+               }
+               /* Return ownership to EMAC */
+               rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
+       }
+       priv->last_rx_bd = 0;
+
+       /* Make sure info is visible to EMAC before enable */
+       wmb();
+
+       /* Enable interrupts */
+       arc_reg_set(priv, R_ENABLE, RXINT_MASK | TXINT_MASK | ERR_MASK);
+
+       /* Enable EMAC */
+       arc_reg_or(priv, R_CTRL, EN_MASK);
+
+       netif_start_queue(ndev);
+}
+
 static const struct net_device_ops arc_emac_netdev_ops = {
        .ndo_open               = arc_emac_open,
        .ndo_stop               = arc_emac_stop,
index 4c739d5355d2279a5f1c4cd23e8ccf3288788e8d..8ae269ec17a119b419afc92eeb66dde76d43d473 100644 (file)
@@ -3030,7 +3030,7 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link)
 
        del_timer_sync(&bp->timer);
 
-       if (IS_PF(bp)) {
+       if (IS_PF(bp) && !BP_NOMCP(bp)) {
                /* Set ALWAYS_ALIVE bit in shmem */
                bp->fw_drv_pulse_wr_seq |= DRV_PULSE_ALWAYS_ALIVE;
                bnx2x_drv_pulse(bp);
@@ -3116,7 +3116,7 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link)
        bp->cnic_loaded = false;
 
        /* Clear driver version indication in shmem */
-       if (IS_PF(bp))
+       if (IS_PF(bp) && !BP_NOMCP(bp))
                bnx2x_update_mng_version(bp);
 
        /* Check if there are pending parity attentions. If there are - set
index 91e2a7560b48d572d26e8566c9a3b0667083d45d..ddd5d3ebd20111f667536aac61141b10e5bb2e7c 100644 (file)
@@ -9578,6 +9578,15 @@ static int bnx2x_init_shmem(struct bnx2x *bp)
 
        do {
                bp->common.shmem_base = REG_RD(bp, MISC_REG_SHARED_MEM_ADDR);
+
+               /* If we read all 0xFFs, means we are in PCI error state and
+                * should bail out to avoid crashes on adapter's FW reads.
+                */
+               if (bp->common.shmem_base == 0xFFFFFFFF) {
+                       bp->flags |= NO_MCP_FLAG;
+                       return -ENODEV;
+               }
+
                if (bp->common.shmem_base) {
                        val = SHMEM_RD(bp, validity_map[BP_PORT(bp)]);
                        if (val & SHR_MEM_VALIDITY_MB)
@@ -14320,7 +14329,10 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev)
                BNX2X_ERR("IO slot reset --> driver unload\n");
 
                /* MCP should have been reset; Need to wait for validity */
-               bnx2x_init_shmem(bp);
+               if (bnx2x_init_shmem(bp)) {
+                       rtnl_unlock();
+                       return PCI_ERS_RESULT_DISCONNECT;
+               }
 
                if (IS_PF(bp) && SHMEM2_HAS(bp, drv_capabilities_flag)) {
                        u32 v;
index de51c2177d03b3cf9e4653226bd8901b1d29834e..8995cfefbfcf1aa1a46f1cbd2f1bdb6722dca7bb 100644 (file)
@@ -4,11 +4,13 @@
  * Copyright (C) 2001, 2002, 2003, 2004 David S. Miller (davem@redhat.com)
  * Copyright (C) 2001, 2002, 2003 Jeff Garzik (jgarzik@pobox.com)
  * Copyright (C) 2004 Sun Microsystems Inc.
- * Copyright (C) 2005-2014 Broadcom Corporation.
+ * Copyright (C) 2005-2016 Broadcom Corporation.
+ * Copyright (C) 2016-2017 Broadcom Limited.
  *
  * Firmware is:
  *     Derived from proprietary unpublished source code,
- *     Copyright (C) 2000-2003 Broadcom Corporation.
+ *     Copyright (C) 2000-2016 Broadcom Corporation.
+ *     Copyright (C) 2016-2017 Broadcom Ltd.
  *
  *     Permission is hereby granted for the distribution of this firmware
  *     data in hexadecimal or equivalent format, provided this copyright
@@ -10052,6 +10054,16 @@ static int tg3_reset_hw(struct tg3 *tp, bool reset_phy)
 
        tw32(GRC_MODE, tp->grc_mode | val);
 
+       /* On one of the AMD platform, MRRS is restricted to 4000 because of
+        * south bridge limitation. As a workaround, Driver is setting MRRS
+        * to 2048 instead of default 4096.
+        */
+       if (tp->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL &&
+           tp->pdev->subsystem_device == TG3PCI_SUBDEVICE_ID_DELL_5762) {
+               val = tr32(TG3PCI_DEV_STATUS_CTRL) & ~MAX_READ_REQ_MASK;
+               tw32(TG3PCI_DEV_STATUS_CTRL, val | MAX_READ_REQ_SIZE_2048);
+       }
+
        /* Setup the timer prescalar register.  Clock is always 66Mhz. */
        val = tr32(GRC_MISC_CFG);
        val &= ~0xff;
@@ -14225,7 +14237,10 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu)
        /* Reset PHY, otherwise the read DMA engine will be in a mode that
         * breaks all requests to 256 bytes.
         */
-       if (tg3_asic_rev(tp) == ASIC_REV_57766)
+       if (tg3_asic_rev(tp) == ASIC_REV_57766 ||
+           tg3_asic_rev(tp) == ASIC_REV_5717 ||
+           tg3_asic_rev(tp) == ASIC_REV_5719 ||
+           tg3_asic_rev(tp) == ASIC_REV_5720)
                reset_phy = true;
 
        err = tg3_restart_hw(tp, reset_phy);
index c2d02d02d1e6f4558365fe53f152e13e045e0277..1f0271fa7c74026edbc68c7880a44287eded7066 100644 (file)
@@ -5,7 +5,8 @@
  * Copyright (C) 2001, 2002, 2003, 2004 David S. Miller (davem@redhat.com)
  * Copyright (C) 2001 Jeff Garzik (jgarzik@pobox.com)
  * Copyright (C) 2004 Sun Microsystems Inc.
- * Copyright (C) 2007-2014 Broadcom Corporation.
+ * Copyright (C) 2007-2016 Broadcom Corporation.
+ * Copyright (C) 2016-2017 Broadcom Limited.
  */
 
 #ifndef _T3_H
@@ -96,6 +97,7 @@
 #define TG3PCI_SUBDEVICE_ID_DELL_JAGUAR                0x0106
 #define TG3PCI_SUBDEVICE_ID_DELL_MERLOT                0x0109
 #define TG3PCI_SUBDEVICE_ID_DELL_SLIM_MERLOT   0x010a
+#define TG3PCI_SUBDEVICE_ID_DELL_5762          0x07f0
 #define TG3PCI_SUBVENDOR_ID_COMPAQ             PCI_VENDOR_ID_COMPAQ
 #define TG3PCI_SUBDEVICE_ID_COMPAQ_BANSHEE     0x007c
 #define TG3PCI_SUBDEVICE_ID_COMPAQ_BANSHEE_2   0x009a
 #define TG3PCI_STD_RING_PROD_IDX       0x00000098 /* 64-bit */
 #define TG3PCI_RCV_RET_RING_CON_IDX    0x000000a0 /* 64-bit */
 /* 0xa8 --> 0xb8 unused */
+#define TG3PCI_DEV_STATUS_CTRL         0x000000b4
+#define  MAX_READ_REQ_SIZE_2048                 0x00004000
+#define  MAX_READ_REQ_MASK              0x00007000
 #define TG3PCI_DUAL_MAC_CTRL           0x000000b8
 #define  DUAL_MAC_CTRL_CH_MASK          0x00000003
 #define  DUAL_MAC_CTRL_ID               0x00000004
index 6105738552134809fc0d0abf8caadfa76f5e1f8a..8184d2fca9be017b4d374e1ba5b3fab6e7d77e9e 100644 (file)
@@ -818,6 +818,12 @@ static void fec_enet_bd_init(struct net_device *dev)
                for (i = 0; i < txq->bd.ring_size; i++) {
                        /* Initialize the BD for every fragment in the page. */
                        bdp->cbd_sc = cpu_to_fec16(0);
+                       if (bdp->cbd_bufaddr &&
+                           !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr)))
+                               dma_unmap_single(&fep->pdev->dev,
+                                                fec32_to_cpu(bdp->cbd_bufaddr),
+                                                fec16_to_cpu(bdp->cbd_datlen),
+                                                DMA_TO_DEVICE);
                        if (txq->tx_skbuff[i]) {
                                dev_kfree_skb_any(txq->tx_skbuff[i]);
                                txq->tx_skbuff[i] = NULL;
index bc93b69cfd1edcf62d11cd24d41a9ca74b8f0dcc..a539263cd79ce4be8fcc0cbfe6bfdd196336cd38 100644 (file)
@@ -1214,6 +1214,10 @@ static void mvneta_port_disable(struct mvneta_port *pp)
        val &= ~MVNETA_GMAC0_PORT_ENABLE;
        mvreg_write(pp, MVNETA_GMAC_CTRL_0, val);
 
+       pp->link = 0;
+       pp->duplex = -1;
+       pp->speed = 0;
+
        udelay(200);
 }
 
@@ -1958,9 +1962,9 @@ static int mvneta_rx_swbm(struct mvneta_port *pp, int rx_todo,
 
                if (!mvneta_rxq_desc_is_first_last(rx_status) ||
                    (rx_status & MVNETA_RXD_ERR_SUMMARY)) {
+                       mvneta_rx_error(pp, rx_desc);
 err_drop_frame:
                        dev->stats.rx_errors++;
-                       mvneta_rx_error(pp, rx_desc);
                        /* leave the descriptor untouched */
                        continue;
                }
@@ -3011,7 +3015,7 @@ static void mvneta_cleanup_rxqs(struct mvneta_port *pp)
 {
        int queue;
 
-       for (queue = 0; queue < txq_number; queue++)
+       for (queue = 0; queue < rxq_number; queue++)
                mvneta_rxq_deinit(pp, &pp->rxqs[queue]);
 }
 
index 54adfd96785846f9e60a2ded11ab96bc0c196c7e..fc67e35b253e4e59c12227c3e24da9c0f5bae311 100644 (file)
@@ -1961,11 +1961,12 @@ static int mtk_hw_init(struct mtk_eth *eth)
        /* set GE2 TUNE */
        regmap_write(eth->pctl, GPIO_BIAS_CTRL, 0x0);
 
-       /* GE1, Force 1000M/FD, FC ON */
-       mtk_w32(eth, MAC_MCR_FIXED_LINK, MTK_MAC_MCR(0));
-
-       /* GE2, Force 1000M/FD, FC ON */
-       mtk_w32(eth, MAC_MCR_FIXED_LINK, MTK_MAC_MCR(1));
+       /* Set linkdown as the default for each GMAC. Its own MCR would be set
+        * up with the more appropriate value when mtk_phy_link_adjust call is
+        * being invoked.
+        */
+       for (i = 0; i < MTK_MAC_COUNT; i++)
+               mtk_w32(eth, 0, MTK_MAC_MCR(i));
 
        /* Indicates CDM to parse the MTK special tag from CPU
         * which also is working out for untag packets.
index 1fffdebbc9e8994c70a19f4982f26d1de98be5f2..e9a1fbcc4adfa6e692902b551d0c535bfe019a9a 100644 (file)
@@ -362,7 +362,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
        case MLX5_CMD_OP_ALLOC_Q_COUNTER:
        case MLX5_CMD_OP_QUERY_Q_COUNTER:
-       case MLX5_CMD_OP_SET_RATE_LIMIT:
+       case MLX5_CMD_OP_SET_PP_RATE_LIMIT:
        case MLX5_CMD_OP_QUERY_RATE_LIMIT:
        case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
        case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
@@ -505,7 +505,7 @@ const char *mlx5_command_str(int command)
        MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
        MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
        MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
-       MLX5_COMMAND_STR_CASE(SET_RATE_LIMIT);
+       MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT);
        MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT);
        MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT);
        MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT);
index c0872b3284cb405583642d71a0e2e540d4804b6f..543060c305a073c0457cc31ae7318f425a0e7c49 100644 (file)
@@ -82,6 +82,9 @@
        max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req)
 #define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev)       MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 6)
 #define MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 8)
+#define MLX5E_MPWQE_STRIDE_SZ(mdev, cqe_cmprs) \
+       (cqe_cmprs ? MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) : \
+       MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev))
 
 #define MLX5_MPWRQ_LOG_WQE_SZ                  18
 #define MLX5_MPWRQ_WQE_PAGE_ORDER  (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
@@ -590,6 +593,7 @@ struct mlx5e_channel {
        struct mlx5_core_dev      *mdev;
        struct hwtstamp_config    *tstamp;
        int                        ix;
+       int                        cpu;
 };
 
 struct mlx5e_channels {
@@ -935,8 +939,9 @@ void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params,
                                 u8 cq_period_mode);
 void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params,
                                 u8 cq_period_mode);
-void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
-                             struct mlx5e_params *params, u8 rq_type);
+void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
+                              struct mlx5e_params *params,
+                              u8 rq_type);
 
 static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
 {
index c6d90b6dd80efa9a1ee82958adf5ef4dd3b4522d..9bcf38f4123b504637c080413078c23304d9e49e 100644 (file)
@@ -274,6 +274,7 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
 static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
                                    struct ieee_ets *ets)
 {
+       bool have_ets_tc = false;
        int bw_sum = 0;
        int i;
 
@@ -288,11 +289,14 @@ static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
        }
 
        /* Validate Bandwidth Sum */
-       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
-               if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS)
+       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+               if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
+                       have_ets_tc = true;
                        bw_sum += ets->tc_tx_bw[i];
+               }
+       }
 
-       if (bw_sum != 0 && bw_sum != 100) {
+       if (have_ets_tc && bw_sum != 100) {
                netdev_err(netdev,
                           "Failed to validate ETS: BW sum is illegal\n");
                return -EINVAL;
index 23425f02840581f6be591bc48cf8cccc8cc26443..8f05efa5c829bccb67ddd8b24dc2997adfe4a6c8 100644 (file)
@@ -1523,8 +1523,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
        new_channels.params = priv->channels.params;
        MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val);
 
-       mlx5e_set_rq_type_params(priv->mdev, &new_channels.params,
-                                new_channels.params.rq_wq_type);
+       new_channels.params.mpwqe_log_stride_sz =
+               MLX5E_MPWQE_STRIDE_SZ(priv->mdev, new_val);
+       new_channels.params.mpwqe_log_num_strides =
+               MLX5_MPWRQ_LOG_WQE_SZ - new_channels.params.mpwqe_log_stride_sz;
 
        if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
                priv->channels.params = new_channels.params;
@@ -1536,6 +1538,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
                return err;
 
        mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+       mlx5e_dbg(DRV, priv, "MLX5E: RxCqeCmprss was turned %s\n",
+                 MLX5E_GET_PFLAG(&priv->channels.params,
+                                 MLX5E_PFLAG_RX_CQE_COMPRESS) ? "ON" : "OFF");
+
        return 0;
 }
 
index d2b057a3e512c1144d741ccffd5bf47b5f138a01..d9d8227f195f0e151ba948e0622ea90a411817c4 100644 (file)
@@ -71,11 +71,6 @@ struct mlx5e_channel_param {
        struct mlx5e_cq_param      icosq_cq;
 };
 
-static int mlx5e_get_node(struct mlx5e_priv *priv, int ix)
-{
-       return pci_irq_get_node(priv->mdev->pdev, MLX5_EQ_VEC_COMP_BASE + ix);
-}
-
 static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
 {
        return MLX5_CAP_GEN(mdev, striding_rq) &&
@@ -83,8 +78,8 @@ static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
                MLX5_CAP_ETH(mdev, reg_umr_sq);
 }
 
-void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
-                             struct mlx5e_params *params, u8 rq_type)
+void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
+                              struct mlx5e_params *params, u8 rq_type)
 {
        params->rq_wq_type = rq_type;
        params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
@@ -93,10 +88,8 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
                params->log_rq_size = is_kdump_kernel() ?
                        MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW :
                        MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
-               params->mpwqe_log_stride_sz =
-                       MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) ?
-                       MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) :
-                       MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
+               params->mpwqe_log_stride_sz = MLX5E_MPWQE_STRIDE_SZ(mdev,
+                       MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
                params->mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
                        params->mpwqe_log_stride_sz;
                break;
@@ -120,13 +113,14 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
                       MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
 }
 
-static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev,
+                               struct mlx5e_params *params)
 {
        u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) &&
                    !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ?
                    MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
                    MLX5_WQ_TYPE_LINKED_LIST;
-       mlx5e_set_rq_type_params(mdev, params, rq_type);
+       mlx5e_init_rq_type_params(mdev, params, rq_type);
 }
 
 static void mlx5e_update_carrier(struct mlx5e_priv *priv)
@@ -444,17 +438,16 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
        int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
        int mtt_sz = mlx5e_get_wqe_mtt_sz();
        int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1;
-       int node = mlx5e_get_node(c->priv, c->ix);
        int i;
 
        rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info),
-                                       GFP_KERNEL, node);
+                                     GFP_KERNEL, cpu_to_node(c->cpu));
        if (!rq->mpwqe.info)
                goto err_out;
 
        /* We allocate more than mtt_sz as we will align the pointer */
-       rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz,
-                                       GFP_KERNEL, node);
+       rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL,
+                                       cpu_to_node(c->cpu));
        if (unlikely(!rq->mpwqe.mtt_no_align))
                goto err_free_wqe_info;
 
@@ -562,7 +555,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
        int err;
        int i;
 
-       rqp->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
+       rqp->wq.db_numa_node = cpu_to_node(c->cpu);
 
        err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq,
                                &rq->wq_ctrl);
@@ -629,8 +622,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
        default: /* MLX5_WQ_TYPE_LINKED_LIST */
                rq->wqe.frag_info =
                        kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info),
-                                    GFP_KERNEL,
-                                    mlx5e_get_node(c->priv, c->ix));
+                                    GFP_KERNEL, cpu_to_node(c->cpu));
                if (!rq->wqe.frag_info) {
                        err = -ENOMEM;
                        goto err_rq_wq_destroy;
@@ -1000,13 +992,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
        sq->uar_map   = mdev->mlx5e_res.bfreg.map;
        sq->min_inline_mode = params->tx_min_inline_mode;
 
-       param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
+       param->wq.db_numa_node = cpu_to_node(c->cpu);
        err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
        if (err)
                return err;
        sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
 
-       err = mlx5e_alloc_xdpsq_db(sq, mlx5e_get_node(c->priv, c->ix));
+       err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu));
        if (err)
                goto err_sq_wq_destroy;
 
@@ -1053,13 +1045,13 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
        sq->channel   = c;
        sq->uar_map   = mdev->mlx5e_res.bfreg.map;
 
-       param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
+       param->wq.db_numa_node = cpu_to_node(c->cpu);
        err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
        if (err)
                return err;
        sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
 
-       err = mlx5e_alloc_icosq_db(sq, mlx5e_get_node(c->priv, c->ix));
+       err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu));
        if (err)
                goto err_sq_wq_destroy;
 
@@ -1126,13 +1118,13 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
        if (MLX5_IPSEC_DEV(c->priv->mdev))
                set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
 
-       param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
+       param->wq.db_numa_node = cpu_to_node(c->cpu);
        err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
        if (err)
                return err;
        sq->wq.db    = &sq->wq.db[MLX5_SND_DBR];
 
-       err = mlx5e_alloc_txqsq_db(sq, mlx5e_get_node(c->priv, c->ix));
+       err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu));
        if (err)
                goto err_sq_wq_destroy;
 
@@ -1504,8 +1496,8 @@ static int mlx5e_alloc_cq(struct mlx5e_channel *c,
        struct mlx5_core_dev *mdev = c->priv->mdev;
        int err;
 
-       param->wq.buf_numa_node = mlx5e_get_node(c->priv, c->ix);
-       param->wq.db_numa_node  = mlx5e_get_node(c->priv, c->ix);
+       param->wq.buf_numa_node = cpu_to_node(c->cpu);
+       param->wq.db_numa_node  = cpu_to_node(c->cpu);
        param->eq_ix   = c->ix;
 
        err = mlx5e_alloc_cq_common(mdev, param, cq);
@@ -1604,6 +1596,11 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq)
        mlx5e_free_cq(cq);
 }
 
+static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
+{
+       return cpumask_first(priv->mdev->priv.irq_info[ix].mask);
+}
+
 static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
                             struct mlx5e_params *params,
                             struct mlx5e_channel_param *cparam)
@@ -1752,12 +1749,13 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 {
        struct mlx5e_cq_moder icocq_moder = {0, 0};
        struct net_device *netdev = priv->netdev;
+       int cpu = mlx5e_get_cpu(priv, ix);
        struct mlx5e_channel *c;
        unsigned int irq;
        int err;
        int eqn;
 
-       c = kzalloc_node(sizeof(*c), GFP_KERNEL, mlx5e_get_node(priv, ix));
+       c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
        if (!c)
                return -ENOMEM;
 
@@ -1765,6 +1763,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
        c->mdev     = priv->mdev;
        c->tstamp   = &priv->tstamp;
        c->ix       = ix;
+       c->cpu      = cpu;
        c->pdev     = &priv->mdev->pdev->dev;
        c->netdev   = priv->netdev;
        c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
@@ -1853,8 +1852,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
        for (tc = 0; tc < c->num_tc; tc++)
                mlx5e_activate_txqsq(&c->sq[tc]);
        mlx5e_activate_rq(&c->rq);
-       netif_set_xps_queue(c->netdev,
-               mlx5_get_vector_affinity(c->priv->mdev, c->ix), c->ix);
+       netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix);
 }
 
 static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
@@ -3679,6 +3677,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
                                                     struct sk_buff *skb,
                                                     netdev_features_t features)
 {
+       unsigned int offset = 0;
        struct udphdr *udph;
        u8 proto;
        u16 port;
@@ -3688,7 +3687,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
                proto = ip_hdr(skb)->protocol;
                break;
        case htons(ETH_P_IPV6):
-               proto = ipv6_hdr(skb)->nexthdr;
+               proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL);
                break;
        default:
                goto out;
index 60771865c99c9bf4402d042a760887c4497e0036..e7e7cef2bde402be23b191873a5790ed23fd7843 100644 (file)
@@ -466,7 +466,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
                        break;
                case MLX5_EVENT_TYPE_CQ_ERROR:
                        cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
-                       mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrom 0x%x\n",
+                       mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
                                       cqn, eqe->data.cq_err.syndrome);
                        mlx5_cq_event(dev, cqn, eqe->type);
                        break;
@@ -775,7 +775,7 @@ err1:
        return err;
 }
 
-int mlx5_stop_eqs(struct mlx5_core_dev *dev)
+void mlx5_stop_eqs(struct mlx5_core_dev *dev)
 {
        struct mlx5_eq_table *table = &dev->priv.eq_table;
        int err;
@@ -784,22 +784,26 @@ int mlx5_stop_eqs(struct mlx5_core_dev *dev)
        if (MLX5_CAP_GEN(dev, pg)) {
                err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq);
                if (err)
-                       return err;
+                       mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n",
+                                     err);
        }
 #endif
 
        err = mlx5_destroy_unmap_eq(dev, &table->pages_eq);
        if (err)
-               return err;
+               mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
+                             err);
 
-       mlx5_destroy_unmap_eq(dev, &table->async_eq);
+       err = mlx5_destroy_unmap_eq(dev, &table->async_eq);
+       if (err)
+               mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
+                             err);
        mlx5_cmd_use_polling(dev);
 
        err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
        if (err)
-               mlx5_cmd_use_events(dev);
-
-       return err;
+               mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
+                             err);
 }
 
 int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
index 3c11d6e2160abeef5a893b7b81274a7ce315368c..14962969c5ba8c4462662eeb30ef10cbe1c27fa6 100644 (file)
@@ -66,6 +66,9 @@ static int mlx5_fpga_mem_read_i2c(struct mlx5_fpga_device *fdev, size_t size,
        u8 actual_size;
        int err;
 
+       if (!size)
+               return -EINVAL;
+
        if (!fdev->mdev)
                return -ENOTCONN;
 
@@ -95,6 +98,9 @@ static int mlx5_fpga_mem_write_i2c(struct mlx5_fpga_device *fdev, size_t size,
        u8 actual_size;
        int err;
 
+       if (!size)
+               return -EINVAL;
+
        if (!fdev->mdev)
                return -ENOTCONN;
 
index c70fd663a63301e7e89ef9ee00d37c7075fe1a0b..dfaad9ecb2b8f155c5cdf30451c572b2d10f1d37 100644 (file)
@@ -174,6 +174,8 @@ static void del_hw_fte(struct fs_node *node);
 static void del_sw_flow_table(struct fs_node *node);
 static void del_sw_flow_group(struct fs_node *node);
 static void del_sw_fte(struct fs_node *node);
+static void del_sw_prio(struct fs_node *node);
+static void del_sw_ns(struct fs_node *node);
 /* Delete rule (destination) is special case that 
  * requires to lock the FTE for all the deletion process.
  */
@@ -408,6 +410,16 @@ static inline struct mlx5_core_dev *get_dev(struct fs_node *node)
        return NULL;
 }
 
+static void del_sw_ns(struct fs_node *node)
+{
+       kfree(node);
+}
+
+static void del_sw_prio(struct fs_node *node)
+{
+       kfree(node);
+}
+
 static void del_hw_flow_table(struct fs_node *node)
 {
        struct mlx5_flow_table *ft;
@@ -2064,7 +2076,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
                return ERR_PTR(-ENOMEM);
 
        fs_prio->node.type = FS_TYPE_PRIO;
-       tree_init_node(&fs_prio->node, NULL, NULL);
+       tree_init_node(&fs_prio->node, NULL, del_sw_prio);
        tree_add_node(&fs_prio->node, &ns->node);
        fs_prio->num_levels = num_levels;
        fs_prio->prio = prio;
@@ -2090,7 +2102,7 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio)
                return ERR_PTR(-ENOMEM);
 
        fs_init_namespace(ns);
-       tree_init_node(&ns->node, NULL, NULL);
+       tree_init_node(&ns->node, NULL, del_sw_ns);
        tree_add_node(&ns->node, &prio->node);
        list_add_tail(&ns->node.list, &prio->node.children);
 
index 1a0e797ad001ad672c954c228350ff9bcdea125b..21d29f7936f6c5d1e26c6e0d3f10644fd0f096c8 100644 (file)
@@ -241,7 +241,7 @@ static void print_health_info(struct mlx5_core_dev *dev)
        u32 fw;
        int i;
 
-       /* If the syndrom is 0, the device is OK and no need to print buffer */
+       /* If the syndrome is 0, the device is OK and no need to print buffer */
        if (!ioread8(&h->synd))
                return;
 
index d2a66dc4adc6d2933cfbc60c28cd49c67716a010..8812d7208e8f3522500b3f3e971b4a7341b22c8f 100644 (file)
@@ -57,7 +57,7 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
                                   struct mlx5e_params *params)
 {
        /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */
-       mlx5e_set_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST);
+       mlx5e_init_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST);
 
        /* RQ size in ipoib by default is 512 */
        params->log_rq_size = is_kdump_kernel() ?
index f26f97fe46666ff7a3f2ce6c496cb8936ee5cbb3..582b2f18010a317600f0238163557077d4c48a39 100644 (file)
@@ -137,6 +137,17 @@ int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
 }
 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
 
+static int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
+                                      bool reset, void *out, int out_size)
+{
+       u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { };
+
+       MLX5_SET(query_cong_statistics_in, in, opcode,
+                MLX5_CMD_OP_QUERY_CONG_STATISTICS);
+       MLX5_SET(query_cong_statistics_in, in, clear, reset);
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
+}
+
 static struct mlx5_lag *mlx5_lag_dev_get(struct mlx5_core_dev *dev)
 {
        return dev->priv.lag;
@@ -633,3 +644,48 @@ bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv)
        /* If bonded, we do not add an IB device for PF1. */
        return false;
 }
+
+int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
+                                u64 *values,
+                                int num_counters,
+                                size_t *offsets)
+{
+       int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
+       struct mlx5_core_dev *mdev[MLX5_MAX_PORTS];
+       struct mlx5_lag *ldev;
+       int num_ports;
+       int ret, i, j;
+       void *out;
+
+       out = kvzalloc(outlen, GFP_KERNEL);
+       if (!out)
+               return -ENOMEM;
+
+       memset(values, 0, sizeof(*values) * num_counters);
+
+       mutex_lock(&lag_mutex);
+       ldev = mlx5_lag_dev_get(dev);
+       if (ldev && mlx5_lag_is_bonded(ldev)) {
+               num_ports = MLX5_MAX_PORTS;
+               mdev[0] = ldev->pf[0].dev;
+               mdev[1] = ldev->pf[1].dev;
+       } else {
+               num_ports = 1;
+               mdev[0] = dev;
+       }
+
+       for (i = 0; i < num_ports; ++i) {
+               ret = mlx5_cmd_query_cong_counter(mdev[i], false, out, outlen);
+               if (ret)
+                       goto unlock;
+
+               for (j = 0; j < num_counters; ++j)
+                       values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
+       }
+
+unlock:
+       mutex_unlock(&lag_mutex);
+       kvfree(out);
+       return ret;
+}
+EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
index 5f323442cc5ac009d5006438d93183e96b85d0d9..8a89c7e8cd631f2e14cb7cbac99a8983964b7eda 100644 (file)
@@ -317,9 +317,6 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
 {
        struct mlx5_priv *priv = &dev->priv;
        struct mlx5_eq_table *table = &priv->eq_table;
-       struct irq_affinity irqdesc = {
-               .pre_vectors = MLX5_EQ_VEC_COMP_BASE,
-       };
        int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq);
        int nvec;
 
@@ -333,10 +330,9 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
        if (!priv->irq_info)
                goto err_free_msix;
 
-       nvec = pci_alloc_irq_vectors_affinity(dev->pdev,
+       nvec = pci_alloc_irq_vectors(dev->pdev,
                        MLX5_EQ_VEC_COMP_BASE + 1, nvec,
-                       PCI_IRQ_MSIX | PCI_IRQ_AFFINITY,
-                       &irqdesc);
+                       PCI_IRQ_MSIX);
        if (nvec < 0)
                return nvec;
 
@@ -622,6 +618,63 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
        return (u64)timer_l | (u64)timer_h1 << 32;
 }
 
+static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+       struct mlx5_priv *priv  = &mdev->priv;
+       int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
+
+       if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
+               mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
+               return -ENOMEM;
+       }
+
+       cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
+                       priv->irq_info[i].mask);
+
+       if (IS_ENABLED(CONFIG_SMP) &&
+           irq_set_affinity_hint(irq, priv->irq_info[i].mask))
+               mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
+
+       return 0;
+}
+
+static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+       struct mlx5_priv *priv  = &mdev->priv;
+       int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
+
+       irq_set_affinity_hint(irq, NULL);
+       free_cpumask_var(priv->irq_info[i].mask);
+}
+
+static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev)
+{
+       int err;
+       int i;
+
+       for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) {
+               err = mlx5_irq_set_affinity_hint(mdev, i);
+               if (err)
+                       goto err_out;
+       }
+
+       return 0;
+
+err_out:
+       for (i--; i >= 0; i--)
+               mlx5_irq_clear_affinity_hint(mdev, i);
+
+       return err;
+}
+
+static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev)
+{
+       int i;
+
+       for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++)
+               mlx5_irq_clear_affinity_hint(mdev, i);
+}
+
 int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
                    unsigned int *irqn)
 {
@@ -1097,6 +1150,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
                goto err_stop_eqs;
        }
 
+       err = mlx5_irq_set_affinity_hints(dev);
+       if (err) {
+               dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
+               goto err_affinity_hints;
+       }
+
        err = mlx5_init_fs(dev);
        if (err) {
                dev_err(&pdev->dev, "Failed to init flow steering\n");
@@ -1154,6 +1213,9 @@ err_sriov:
        mlx5_cleanup_fs(dev);
 
 err_fs:
+       mlx5_irq_clear_affinity_hints(dev);
+
+err_affinity_hints:
        free_comp_eqs(dev);
 
 err_stop_eqs:
@@ -1222,6 +1284,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 
        mlx5_sriov_detach(dev);
        mlx5_cleanup_fs(dev);
+       mlx5_irq_clear_affinity_hints(dev);
        free_comp_eqs(dev);
        mlx5_stop_eqs(dev);
        mlx5_put_uars_page(dev, priv->uar);
index db9e665ab10474f934131b5c2a8fa2173fa5feb6..889130edb71525ecd1f46e88a11b2d3fa0ef843f 100644 (file)
@@ -213,8 +213,8 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
 err_cmd:
        memset(din, 0, sizeof(din));
        memset(dout, 0, sizeof(dout));
-       MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
-       MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
+       MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP);
+       MLX5_SET(destroy_qp_in, din, qpn, qp->qpn);
        mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
        return err;
 }
index e651e4c02867740d35c07bfcf485860f26ad6409..d3c33e9eea7292412974802c4c38ded8898ed55c 100644 (file)
@@ -125,16 +125,16 @@ static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table,
        return ret_entry;
 }
 
-static int mlx5_set_rate_limit_cmd(struct mlx5_core_dev *dev,
+static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev,
                                   u32 rate, u16 index)
 {
-       u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)]   = {0};
-       u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(set_pp_rate_limit_out)] = {0};
 
-       MLX5_SET(set_rate_limit_in, in, opcode,
-                MLX5_CMD_OP_SET_RATE_LIMIT);
-       MLX5_SET(set_rate_limit_in, in, rate_limit_index, index);
-       MLX5_SET(set_rate_limit_in, in, rate_limit, rate);
+       MLX5_SET(set_pp_rate_limit_in, in, opcode,
+                MLX5_CMD_OP_SET_PP_RATE_LIMIT);
+       MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, index);
+       MLX5_SET(set_pp_rate_limit_in, in, rate_limit, rate);
        return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
@@ -173,7 +173,7 @@ int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index)
                entry->refcount++;
        } else {
                /* new rate limit */
-               err = mlx5_set_rate_limit_cmd(dev, rate, entry->index);
+               err = mlx5_set_pp_rate_limit_cmd(dev, rate, entry->index);
                if (err) {
                        mlx5_core_err(dev, "Failed configuring rate: %u (%d)\n",
                                      rate, err);
@@ -209,7 +209,7 @@ void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate)
        entry->refcount--;
        if (!entry->refcount) {
                /* need to remove rate */
-               mlx5_set_rate_limit_cmd(dev, 0, entry->index);
+               mlx5_set_pp_rate_limit_cmd(dev, 0, entry->index);
                entry->rate = 0;
        }
 
@@ -262,8 +262,8 @@ void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev)
        /* Clear all configured rates */
        for (i = 0; i < table->max_size; i++)
                if (table->rl_entry[i].rate)
-                       mlx5_set_rate_limit_cmd(dev, 0,
-                                               table->rl_entry[i].index);
+                       mlx5_set_pp_rate_limit_cmd(dev, 0,
+                                                  table->rl_entry[i].index);
 
        kfree(dev->priv.rl_table.rl_entry);
 }
index 07a9ba6cfc70a11f7b4c05c73c1b32a704b7e6ba..2f74953e4561511e23d8fe3219db89104e3dd9e3 100644 (file)
@@ -71,9 +71,9 @@ struct mlx5e_vxlan *mlx5e_vxlan_lookup_port(struct mlx5e_priv *priv, u16 port)
        struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
        struct mlx5e_vxlan *vxlan;
 
-       spin_lock(&vxlan_db->lock);
+       spin_lock_bh(&vxlan_db->lock);
        vxlan = radix_tree_lookup(&vxlan_db->tree, port);
-       spin_unlock(&vxlan_db->lock);
+       spin_unlock_bh(&vxlan_db->lock);
 
        return vxlan;
 }
@@ -88,8 +88,12 @@ static void mlx5e_vxlan_add_port(struct work_struct *work)
        struct mlx5e_vxlan *vxlan;
        int err;
 
-       if (mlx5e_vxlan_lookup_port(priv, port))
+       mutex_lock(&priv->state_lock);
+       vxlan = mlx5e_vxlan_lookup_port(priv, port);
+       if (vxlan) {
+               atomic_inc(&vxlan->refcount);
                goto free_work;
+       }
 
        if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port))
                goto free_work;
@@ -99,10 +103,11 @@ static void mlx5e_vxlan_add_port(struct work_struct *work)
                goto err_delete_port;
 
        vxlan->udp_port = port;
+       atomic_set(&vxlan->refcount, 1);
 
-       spin_lock_irq(&vxlan_db->lock);
+       spin_lock_bh(&vxlan_db->lock);
        err = radix_tree_insert(&vxlan_db->tree, vxlan->udp_port, vxlan);
-       spin_unlock_irq(&vxlan_db->lock);
+       spin_unlock_bh(&vxlan_db->lock);
        if (err)
                goto err_free;
 
@@ -113,35 +118,39 @@ err_free:
 err_delete_port:
        mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
 free_work:
+       mutex_unlock(&priv->state_lock);
        kfree(vxlan_work);
 }
 
-static void __mlx5e_vxlan_core_del_port(struct mlx5e_priv *priv, u16 port)
+static void mlx5e_vxlan_del_port(struct work_struct *work)
 {
+       struct mlx5e_vxlan_work *vxlan_work =
+               container_of(work, struct mlx5e_vxlan_work, work);
+       struct mlx5e_priv *priv         = vxlan_work->priv;
        struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
+       u16 port = vxlan_work->port;
        struct mlx5e_vxlan *vxlan;
+       bool remove = false;
 
-       spin_lock_irq(&vxlan_db->lock);
-       vxlan = radix_tree_delete(&vxlan_db->tree, port);
-       spin_unlock_irq(&vxlan_db->lock);
-
+       mutex_lock(&priv->state_lock);
+       spin_lock_bh(&vxlan_db->lock);
+       vxlan = radix_tree_lookup(&vxlan_db->tree, port);
        if (!vxlan)
-               return;
-
-       mlx5e_vxlan_core_del_port_cmd(priv->mdev, vxlan->udp_port);
-
-       kfree(vxlan);
-}
+               goto out_unlock;
 
-static void mlx5e_vxlan_del_port(struct work_struct *work)
-{
-       struct mlx5e_vxlan_work *vxlan_work =
-               container_of(work, struct mlx5e_vxlan_work, work);
-       struct mlx5e_priv *priv = vxlan_work->priv;
-       u16 port = vxlan_work->port;
+       if (atomic_dec_and_test(&vxlan->refcount)) {
+               radix_tree_delete(&vxlan_db->tree, port);
+               remove = true;
+       }
 
-       __mlx5e_vxlan_core_del_port(priv, port);
+out_unlock:
+       spin_unlock_bh(&vxlan_db->lock);
 
+       if (remove) {
+               mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
+               kfree(vxlan);
+       }
+       mutex_unlock(&priv->state_lock);
        kfree(vxlan_work);
 }
 
@@ -171,12 +180,11 @@ void mlx5e_vxlan_cleanup(struct mlx5e_priv *priv)
        struct mlx5e_vxlan *vxlan;
        unsigned int port = 0;
 
-       spin_lock_irq(&vxlan_db->lock);
+       /* Lockless since we are the only radix-tree consumers, wq is disabled */
        while (radix_tree_gang_lookup(&vxlan_db->tree, (void **)&vxlan, port, 1)) {
                port = vxlan->udp_port;
-               spin_unlock_irq(&vxlan_db->lock);
-               __mlx5e_vxlan_core_del_port(priv, (u16)port);
-               spin_lock_irq(&vxlan_db->lock);
+               radix_tree_delete(&vxlan_db->tree, port);
+               mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
+               kfree(vxlan);
        }
-       spin_unlock_irq(&vxlan_db->lock);
 }
index 5def12c048e38992e7edd9f870233e369ef4580e..5ef6ae7d568abcd1410bc403b526628a634799cb 100644 (file)
@@ -36,6 +36,7 @@
 #include "en.h"
 
 struct mlx5e_vxlan {
+       atomic_t refcount;
        u16 udp_port;
 };
 
index 72ef4f8025f00ff8810c2955b25b7f3baec49be1..be657b8533f04922a61a2f3a4b1aeddf3137cdf5 100644 (file)
@@ -2436,25 +2436,16 @@ static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
        rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
 }
 
-static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp,
-                                   const struct mlxsw_sp_rif *rif)
-{
-       char rauht_pl[MLXSW_REG_RAUHT_LEN];
-
-       mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL,
-                            rif->rif_index, rif->addr);
-       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
-}
-
 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
                                         struct mlxsw_sp_rif *rif)
 {
        struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
 
-       mlxsw_sp_neigh_rif_flush(mlxsw_sp, rif);
        list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
-                                rif_list_node)
+                                rif_list_node) {
+               mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
                mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
+       }
 }
 
 enum mlxsw_sp_nexthop_type {
index e379b78e86efa7c02dca2bc95afc1f79afc7800a..13190aa09faf748c16e1f00f7aee9097442aef85 100644 (file)
@@ -82,10 +82,33 @@ static const char *nfp_bpf_extra_cap(struct nfp_app *app, struct nfp_net *nn)
        return nfp_net_ebpf_capable(nn) ? "BPF" : "";
 }
 
+static int
+nfp_bpf_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id)
+{
+       int err;
+
+       nn->app_priv = kzalloc(sizeof(struct nfp_bpf_vnic), GFP_KERNEL);
+       if (!nn->app_priv)
+               return -ENOMEM;
+
+       err = nfp_app_nic_vnic_alloc(app, nn, id);
+       if (err)
+               goto err_free_priv;
+
+       return 0;
+err_free_priv:
+       kfree(nn->app_priv);
+       return err;
+}
+
 static void nfp_bpf_vnic_free(struct nfp_app *app, struct nfp_net *nn)
 {
+       struct nfp_bpf_vnic *bv = nn->app_priv;
+
        if (nn->dp.bpf_offload_xdp)
                nfp_bpf_xdp_offload(app, nn, NULL);
+       WARN_ON(bv->tc_prog);
+       kfree(bv);
 }
 
 static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
@@ -93,6 +116,9 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
 {
        struct tc_cls_bpf_offload *cls_bpf = type_data;
        struct nfp_net *nn = cb_priv;
+       struct bpf_prog *oldprog;
+       struct nfp_bpf_vnic *bv;
+       int err;
 
        if (type != TC_SETUP_CLSBPF ||
            !tc_can_offload(nn->dp.netdev) ||
@@ -100,8 +126,6 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
            cls_bpf->common.protocol != htons(ETH_P_ALL) ||
            cls_bpf->common.chain_index)
                return -EOPNOTSUPP;
-       if (nn->dp.bpf_offload_xdp)
-               return -EBUSY;
 
        /* Only support TC direct action */
        if (!cls_bpf->exts_integrated ||
@@ -110,16 +134,25 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
                return -EOPNOTSUPP;
        }
 
-       switch (cls_bpf->command) {
-       case TC_CLSBPF_REPLACE:
-               return nfp_net_bpf_offload(nn, cls_bpf->prog, true);
-       case TC_CLSBPF_ADD:
-               return nfp_net_bpf_offload(nn, cls_bpf->prog, false);
-       case TC_CLSBPF_DESTROY:
-               return nfp_net_bpf_offload(nn, NULL, true);
-       default:
+       if (cls_bpf->command != TC_CLSBPF_OFFLOAD)
                return -EOPNOTSUPP;
+
+       bv = nn->app_priv;
+       oldprog = cls_bpf->oldprog;
+
+       /* Don't remove if oldprog doesn't match driver's state */
+       if (bv->tc_prog != oldprog) {
+               oldprog = NULL;
+               if (!cls_bpf->prog)
+                       return 0;
        }
+
+       err = nfp_net_bpf_offload(nn, cls_bpf->prog, oldprog);
+       if (err)
+               return err;
+
+       bv->tc_prog = cls_bpf->prog;
+       return 0;
 }
 
 static int nfp_bpf_setup_tc_block(struct net_device *netdev,
@@ -167,7 +200,7 @@ const struct nfp_app_type app_bpf = {
 
        .extra_cap      = nfp_bpf_extra_cap,
 
-       .vnic_alloc     = nfp_app_nic_vnic_alloc,
+       .vnic_alloc     = nfp_bpf_vnic_alloc,
        .vnic_free      = nfp_bpf_vnic_free,
 
        .setup_tc       = nfp_bpf_setup_tc,
index 082a15f6dfb5b9ba806316bd4f272006c93749fb..57b6043177a3891c49096ab85906ee539b3d5ecb 100644 (file)
@@ -172,6 +172,14 @@ struct nfp_prog {
        struct list_head insns;
 };
 
+/**
+ * struct nfp_bpf_vnic - per-vNIC BPF priv structure
+ * @tc_prog:   currently loaded cls_bpf program
+ */
+struct nfp_bpf_vnic {
+       struct bpf_prog *tc_prog;
+};
+
 int nfp_bpf_jit(struct nfp_prog *prog);
 
 extern const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops;
index 70c92b649b299a1a16195e144e3da77cff25855c..38c924bdd32e46f3586eac77d5a63c361993fd09 100644 (file)
@@ -253,18 +253,18 @@ static int emac_open(struct net_device *netdev)
                return ret;
        }
 
-       ret = emac_mac_up(adpt);
+       ret = adpt->phy.open(adpt);
        if (ret) {
                emac_mac_rx_tx_rings_free_all(adpt);
                free_irq(irq->irq, irq);
                return ret;
        }
 
-       ret = adpt->phy.open(adpt);
+       ret = emac_mac_up(adpt);
        if (ret) {
-               emac_mac_down(adpt);
                emac_mac_rx_tx_rings_free_all(adpt);
                free_irq(irq->irq, irq);
+               adpt->phy.close(adpt);
                return ret;
        }
 
index e1e5ac0537606f2192d553c85795428b18fd615d..ce2ea2d491acac195eefe3f01f8ec9df08d3e77c 100644 (file)
@@ -409,7 +409,7 @@ struct stmmac_desc_ops {
        /* get timestamp value */
         u64(*get_timestamp) (void *desc, u32 ats);
        /* get rx timestamp status */
-       int (*get_rx_timestamp_status) (void *desc, u32 ats);
+       int (*get_rx_timestamp_status)(void *desc, void *next_desc, u32 ats);
        /* Display ring */
        void (*display_ring)(void *head, unsigned int size, bool rx);
        /* set MSS via context descriptor */
index 4b286e27c4ca5cdbbb7c457e31bef1b2e9e7bd94..7e089bf906b4f316034403f9a44fbfd191ee09eb 100644 (file)
@@ -258,7 +258,8 @@ static int dwmac4_rx_check_timestamp(void *desc)
        return ret;
 }
 
-static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats)
+static int dwmac4_wrback_get_rx_timestamp_status(void *desc, void *next_desc,
+                                                u32 ats)
 {
        struct dma_desc *p = (struct dma_desc *)desc;
        int ret = -EINVAL;
@@ -270,7 +271,7 @@ static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats)
 
                        /* Check if timestamp is OK from context descriptor */
                        do {
-                               ret = dwmac4_rx_check_timestamp(desc);
+                               ret = dwmac4_rx_check_timestamp(next_desc);
                                if (ret < 0)
                                        goto exit;
                                i++;
index 7546b3664113a3d776fe19094df71b2adfb99e98..2a828a31281423082995bc332ec51a3f20989804 100644 (file)
@@ -400,7 +400,8 @@ static u64 enh_desc_get_timestamp(void *desc, u32 ats)
        return ns;
 }
 
-static int enh_desc_get_rx_timestamp_status(void *desc, u32 ats)
+static int enh_desc_get_rx_timestamp_status(void *desc, void *next_desc,
+                                           u32 ats)
 {
        if (ats) {
                struct dma_extended_desc *p = (struct dma_extended_desc *)desc;
index f817f8f365696d3388e73f85710d30dde43a7d41..db4cee57bb2465eb98fe38cb947624e779da4673 100644 (file)
@@ -265,7 +265,7 @@ static u64 ndesc_get_timestamp(void *desc, u32 ats)
        return ns;
 }
 
-static int ndesc_get_rx_timestamp_status(void *desc, u32 ats)
+static int ndesc_get_rx_timestamp_status(void *desc, void *next_desc, u32 ats)
 {
        struct dma_desc *p = (struct dma_desc *)desc;
 
index 721b616552611aa74ea077e744ec9a0c4836a48f..08c19ebd530674972ceb9ebcb41cd7af4b3fb58d 100644 (file)
@@ -34,6 +34,7 @@ static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr,
 {
        u32 value = readl(ioaddr + PTP_TCR);
        unsigned long data;
+       u32 reg_value;
 
        /* For GMAC3.x, 4.x versions, convert the ptp_clock to nano second
         *      formula = (1/ptp_clock) * 1000000000
@@ -50,10 +51,11 @@ static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr,
 
        data &= PTP_SSIR_SSINC_MASK;
 
+       reg_value = data;
        if (gmac4)
-               data = data << GMAC4_PTP_SSIR_SSINC_SHIFT;
+               reg_value <<= GMAC4_PTP_SSIR_SSINC_SHIFT;
 
-       writel(data, ioaddr + PTP_SSIR);
+       writel(reg_value, ioaddr + PTP_SSIR);
 
        return data;
 }
index d7250539d0bd0c61c92fc9460c9e1197bb57ac8f..337d53d12e94b3acfe745e48422b44d1939ad2c0 100644 (file)
@@ -482,7 +482,7 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p,
                desc = np;
 
        /* Check if timestamp is available */
-       if (priv->hw->desc->get_rx_timestamp_status(desc, priv->adv_ts)) {
+       if (priv->hw->desc->get_rx_timestamp_status(p, np, priv->adv_ts)) {
                ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts);
                netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns);
                shhwtstamp = skb_hwtstamps(skb);
index b5a8f750e4337ce04fc46b3cf784cf84a2c42c77..82104edca393b9b6662a18ef8ea0bdd8d3bb057d 100644 (file)
@@ -879,6 +879,8 @@ static int m88e1510_config_init(struct phy_device *phydev)
 
        /* SGMII-to-Copper mode initialization */
        if (phydev->interface == PHY_INTERFACE_MODE_SGMII) {
+               u32 pause;
+
                /* Select page 18 */
                err = marvell_set_page(phydev, 18);
                if (err < 0)
@@ -902,6 +904,16 @@ static int m88e1510_config_init(struct phy_device *phydev)
                err = marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE);
                if (err < 0)
                        return err;
+
+               /* There appears to be a bug in the 88e1512 when used in
+                * SGMII to copper mode, where the AN advertisment register
+                * clears the pause bits each time a negotiation occurs.
+                * This means we can never be truely sure what was advertised,
+                * so disable Pause support.
+                */
+               pause = SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+               phydev->supported &= ~pause;
+               phydev->advertising &= ~pause;
        }
 
        return m88e1121_config_init(phydev);
@@ -2073,7 +2085,7 @@ static struct phy_driver marvell_drivers[] = {
                .flags = PHY_HAS_INTERRUPT,
                .probe = marvell_probe,
                .config_init = &m88e1145_config_init,
-               .config_aneg = &marvell_config_aneg,
+               .config_aneg = &m88e1101_config_aneg,
                .read_status = &genphy_read_status,
                .ack_interrupt = &marvell_ack_interrupt,
                .config_intr = &marvell_config_intr,
index bfd3090fb055bac4c40924205036119da5b6ce61..07c6048200c6164ac77a649468063e2fedd404c6 100644 (file)
@@ -194,8 +194,11 @@ static int xgene_mdio_reset(struct xgene_mdio_pdata *pdata)
        }
 
        ret = xgene_enet_ecc_init(pdata);
-       if (ret)
+       if (ret) {
+               if (pdata->dev->of_node)
+                       clk_disable_unprepare(pdata->clk);
                return ret;
+       }
        xgene_gmac_reset(pdata);
 
        return 0;
@@ -388,8 +391,10 @@ static int xgene_mdio_probe(struct platform_device *pdev)
                return ret;
 
        mdio_bus = mdiobus_alloc();
-       if (!mdio_bus)
-               return -ENOMEM;
+       if (!mdio_bus) {
+               ret = -ENOMEM;
+               goto out_clk;
+       }
 
        mdio_bus->name = "APM X-Gene MDIO bus";
 
@@ -418,7 +423,7 @@ static int xgene_mdio_probe(struct platform_device *pdev)
                mdio_bus->phy_mask = ~0;
                ret = mdiobus_register(mdio_bus);
                if (ret)
-                       goto out;
+                       goto out_mdiobus;
 
                acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_HANDLE(dev), 1,
                                    acpi_register_phy, NULL, mdio_bus, NULL);
@@ -426,16 +431,20 @@ static int xgene_mdio_probe(struct platform_device *pdev)
        }
 
        if (ret)
-               goto out;
+               goto out_mdiobus;
 
        pdata->mdio_bus = mdio_bus;
        xgene_mdio_status = true;
 
        return 0;
 
-out:
+out_mdiobus:
        mdiobus_free(mdio_bus);
 
+out_clk:
+       if (dev->of_node)
+               clk_disable_unprepare(pdata->clk);
+
        return ret;
 }
 
index ab4614113403455c1eee1c2ad69c7cebc6da5c9d..422ff6333c52da8c4a212123bdaf443945613e02 100644 (file)
@@ -624,6 +624,7 @@ static int ksz9031_read_status(struct phy_device *phydev)
                phydev->link = 0;
                if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev))
                        phydev->drv->config_intr(phydev);
+               return genphy_config_aneg(phydev);
        }
 
        return 0;
index 5dc9668dde34fe6b810c48f2bcd63e8609caa74e..827f3f92560e711a17514055cb96adfbc05708f7 100644 (file)
@@ -526,6 +526,7 @@ struct phylink *phylink_create(struct net_device *ndev, struct device_node *np,
        pl->link_config.pause = MLO_PAUSE_AN;
        pl->link_config.speed = SPEED_UNKNOWN;
        pl->link_config.duplex = DUPLEX_UNKNOWN;
+       pl->link_config.an_enabled = true;
        pl->ops = ops;
        __set_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state);
 
@@ -951,6 +952,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
        mutex_lock(&pl->state_mutex);
        /* Configure the MAC to match the new settings */
        linkmode_copy(pl->link_config.advertising, our_kset.link_modes.advertising);
+       pl->link_config.interface = config.interface;
        pl->link_config.speed = our_kset.base.speed;
        pl->link_config.duplex = our_kset.base.duplex;
        pl->link_config.an_enabled = our_kset.base.autoneg != AUTONEG_DISABLE;
index 19b9cc51079e75346af766c91786d66eaa92c3f2..31f4b7911ef84c85789011332e37c5314099d82c 100644 (file)
@@ -2155,6 +2155,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                }
 
                ndst = &rt->dst;
+               if (skb_dst(skb)) {
+                       int mtu = dst_mtu(ndst) - VXLAN_HEADROOM;
+
+                       skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL,
+                                                      skb, mtu);
+               }
+
                tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
                ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
                err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr),
@@ -2190,6 +2197,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                                goto out_unlock;
                }
 
+               if (skb_dst(skb)) {
+                       int mtu = dst_mtu(ndst) - VXLAN6_HEADROOM;
+
+                       skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL,
+                                                      skb, mtu);
+               }
+
                tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
                ttl = ttl ? : ip6_dst_hoplimit(ndst);
                skb_scrub_packet(skb, xnet);
@@ -3103,6 +3117,11 @@ static void vxlan_config_apply(struct net_device *dev,
 
                max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM :
                                           VXLAN_HEADROOM);
+               if (max_mtu < ETH_MIN_MTU)
+                       max_mtu = ETH_MIN_MTU;
+
+               if (!changelink && !conf->mtu)
+                       dev->mtu = max_mtu;
        }
 
        if (dev->mtu > max_mtu)
index 10b075a46b266218c53d1e5674c1789e1e0f3d80..e8189c07b41f6b450f135ef703ec4e01568e311d 100644 (file)
@@ -684,6 +684,7 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac,
        hdr = skb_put(skb, sizeof(*hdr) - ETH_ALEN);
        hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA |
                                         IEEE80211_STYPE_NULLFUNC |
+                                        IEEE80211_FCTL_TODS |
                                         (ps ? IEEE80211_FCTL_PM : 0));
        hdr->duration_id = cpu_to_le16(0);
        memcpy(hdr->addr1, vp->bssid, ETH_ALEN);
@@ -3215,7 +3216,7 @@ static int hwsim_get_radio_nl(struct sk_buff *msg, struct genl_info *info)
                if (!net_eq(wiphy_net(data->hw->wiphy), genl_info_net(info)))
                        continue;
 
-               skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+               skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
                if (!skb) {
                        res = -ENOMEM;
                        goto out_err;
index e949e3302af4743472ac26c68e9f4d54a27bb11a..c586bcdb5190b1c9f6447cb9a380568a03840988 100644 (file)
@@ -211,12 +211,12 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
        return ret;
 }
 
-static int btt_log_read_pair(struct arena_info *arena, u32 lane,
-                       struct log_entry *ent)
+static int btt_log_group_read(struct arena_info *arena, u32 lane,
+                       struct log_group *log)
 {
        return arena_read_bytes(arena,
-                       arena->logoff + (2 * lane * LOG_ENT_SIZE), ent,
-                       2 * LOG_ENT_SIZE, 0);
+                       arena->logoff + (lane * LOG_GRP_SIZE), log,
+                       LOG_GRP_SIZE, 0);
 }
 
 static struct dentry *debugfs_root;
@@ -256,6 +256,8 @@ static void arena_debugfs_init(struct arena_info *a, struct dentry *parent,
        debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff);
        debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off);
        debugfs_create_x32("flags", S_IRUGO, d, &a->flags);
+       debugfs_create_u32("log_index_0", S_IRUGO, d, &a->log_index[0]);
+       debugfs_create_u32("log_index_1", S_IRUGO, d, &a->log_index[1]);
 }
 
 static void btt_debugfs_init(struct btt *btt)
@@ -274,6 +276,11 @@ static void btt_debugfs_init(struct btt *btt)
        }
 }
 
+static u32 log_seq(struct log_group *log, int log_idx)
+{
+       return le32_to_cpu(log->ent[log_idx].seq);
+}
+
 /*
  * This function accepts two log entries, and uses the
  * sequence number to find the 'older' entry.
@@ -283,8 +290,10 @@ static void btt_debugfs_init(struct btt *btt)
  *
  * TODO The logic feels a bit kludge-y. make it better..
  */
-static int btt_log_get_old(struct log_entry *ent)
+static int btt_log_get_old(struct arena_info *a, struct log_group *log)
 {
+       int idx0 = a->log_index[0];
+       int idx1 = a->log_index[1];
        int old;
 
        /*
@@ -292,23 +301,23 @@ static int btt_log_get_old(struct log_entry *ent)
         * the next time, the following logic works out to put this
         * (next) entry into [1]
         */
-       if (ent[0].seq == 0) {
-               ent[0].seq = cpu_to_le32(1);
+       if (log_seq(log, idx0) == 0) {
+               log->ent[idx0].seq = cpu_to_le32(1);
                return 0;
        }
 
-       if (ent[0].seq == ent[1].seq)
+       if (log_seq(log, idx0) == log_seq(log, idx1))
                return -EINVAL;
-       if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5)
+       if (log_seq(log, idx0) + log_seq(log, idx1) > 5)
                return -EINVAL;
 
-       if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) {
-               if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1)
+       if (log_seq(log, idx0) < log_seq(log, idx1)) {
+               if ((log_seq(log, idx1) - log_seq(log, idx0)) == 1)
                        old = 0;
                else
                        old = 1;
        } else {
-               if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1)
+               if ((log_seq(log, idx0) - log_seq(log, idx1)) == 1)
                        old = 1;
                else
                        old = 0;
@@ -328,17 +337,18 @@ static int btt_log_read(struct arena_info *arena, u32 lane,
 {
        int ret;
        int old_ent, ret_ent;
-       struct log_entry log[2];
+       struct log_group log;
 
-       ret = btt_log_read_pair(arena, lane, log);
+       ret = btt_log_group_read(arena, lane, &log);
        if (ret)
                return -EIO;
 
-       old_ent = btt_log_get_old(log);
+       old_ent = btt_log_get_old(arena, &log);
        if (old_ent < 0 || old_ent > 1) {
                dev_err(to_dev(arena),
                                "log corruption (%d): lane %d seq [%d, %d]\n",
-                       old_ent, lane, log[0].seq, log[1].seq);
+                               old_ent, lane, log.ent[arena->log_index[0]].seq,
+                               log.ent[arena->log_index[1]].seq);
                /* TODO set error state? */
                return -EIO;
        }
@@ -346,7 +356,7 @@ static int btt_log_read(struct arena_info *arena, u32 lane,
        ret_ent = (old_flag ? old_ent : (1 - old_ent));
 
        if (ent != NULL)
-               memcpy(ent, &log[ret_ent], LOG_ENT_SIZE);
+               memcpy(ent, &log.ent[arena->log_index[ret_ent]], LOG_ENT_SIZE);
 
        return ret_ent;
 }
@@ -360,17 +370,13 @@ static int __btt_log_write(struct arena_info *arena, u32 lane,
                        u32 sub, struct log_entry *ent, unsigned long flags)
 {
        int ret;
-       /*
-        * Ignore the padding in log_entry for calculating log_half.
-        * The entry is 'committed' when we write the sequence number,
-        * and we want to ensure that that is the last thing written.
-        * We don't bother writing the padding as that would be extra
-        * media wear and write amplification
-        */
-       unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2;
-       u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE);
+       u32 group_slot = arena->log_index[sub];
+       unsigned int log_half = LOG_ENT_SIZE / 2;
        void *src = ent;
+       u64 ns_off;
 
+       ns_off = arena->logoff + (lane * LOG_GRP_SIZE) +
+               (group_slot * LOG_ENT_SIZE);
        /* split the 16B write into atomic, durable halves */
        ret = arena_write_bytes(arena, ns_off, src, log_half, flags);
        if (ret)
@@ -453,7 +459,7 @@ static int btt_log_init(struct arena_info *arena)
 {
        size_t logsize = arena->info2off - arena->logoff;
        size_t chunk_size = SZ_4K, offset = 0;
-       struct log_entry log;
+       struct log_entry ent;
        void *zerobuf;
        int ret;
        u32 i;
@@ -485,11 +491,11 @@ static int btt_log_init(struct arena_info *arena)
        }
 
        for (i = 0; i < arena->nfree; i++) {
-               log.lba = cpu_to_le32(i);
-               log.old_map = cpu_to_le32(arena->external_nlba + i);
-               log.new_map = cpu_to_le32(arena->external_nlba + i);
-               log.seq = cpu_to_le32(LOG_SEQ_INIT);
-               ret = __btt_log_write(arena, i, 0, &log, 0);
+               ent.lba = cpu_to_le32(i);
+               ent.old_map = cpu_to_le32(arena->external_nlba + i);
+               ent.new_map = cpu_to_le32(arena->external_nlba + i);
+               ent.seq = cpu_to_le32(LOG_SEQ_INIT);
+               ret = __btt_log_write(arena, i, 0, &ent, 0);
                if (ret)
                        goto free;
        }
@@ -594,6 +600,123 @@ static int btt_freelist_init(struct arena_info *arena)
        return 0;
 }
 
+static bool ent_is_padding(struct log_entry *ent)
+{
+       return (ent->lba == 0) && (ent->old_map == 0) && (ent->new_map == 0)
+               && (ent->seq == 0);
+}
+
+/*
+ * Detecting valid log indices: We read a log group (see the comments in btt.h
+ * for a description of a 'log_group' and its 'slots'), and iterate over its
+ * four slots. We expect that a padding slot will be all-zeroes, and use this
+ * to detect a padding slot vs. an actual entry.
+ *
+ * If a log_group is in the initial state, i.e. hasn't been used since the
+ * creation of this BTT layout, it will have three of the four slots with
+ * zeroes. We skip over these log_groups for the detection of log_index. If
+ * all log_groups are in the initial state (i.e. the BTT has never been
+ * written to), it is safe to assume the 'new format' of log entries in slots
+ * (0, 1).
+ */
+static int log_set_indices(struct arena_info *arena)
+{
+       bool idx_set = false, initial_state = true;
+       int ret, log_index[2] = {-1, -1};
+       u32 i, j, next_idx = 0;
+       struct log_group log;
+       u32 pad_count = 0;
+
+       for (i = 0; i < arena->nfree; i++) {
+               ret = btt_log_group_read(arena, i, &log);
+               if (ret < 0)
+                       return ret;
+
+               for (j = 0; j < 4; j++) {
+                       if (!idx_set) {
+                               if (ent_is_padding(&log.ent[j])) {
+                                       pad_count++;
+                                       continue;
+                               } else {
+                                       /* Skip if index has been recorded */
+                                       if ((next_idx == 1) &&
+                                               (j == log_index[0]))
+                                               continue;
+                                       /* valid entry, record index */
+                                       log_index[next_idx] = j;
+                                       next_idx++;
+                               }
+                               if (next_idx == 2) {
+                                       /* two valid entries found */
+                                       idx_set = true;
+                               } else if (next_idx > 2) {
+                                       /* too many valid indices */
+                                       return -ENXIO;
+                               }
+                       } else {
+                               /*
+                                * once the indices have been set, just verify
+                                * that all subsequent log groups are either in
+                                * their initial state or follow the same
+                                * indices.
+                                */
+                               if (j == log_index[0]) {
+                                       /* entry must be 'valid' */
+                                       if (ent_is_padding(&log.ent[j]))
+                                               return -ENXIO;
+                               } else if (j == log_index[1]) {
+                                       ;
+                                       /*
+                                        * log_index[1] can be padding if the
+                                        * lane never got used and it is still
+                                        * in the initial state (three 'padding'
+                                        * entries)
+                                        */
+                               } else {
+                                       /* entry must be invalid (padding) */
+                                       if (!ent_is_padding(&log.ent[j]))
+                                               return -ENXIO;
+                               }
+                       }
+               }
+               /*
+                * If any of the log_groups have more than one valid,
+                * non-padding entry, then the we are no longer in the
+                * initial_state
+                */
+               if (pad_count < 3)
+                       initial_state = false;
+               pad_count = 0;
+       }
+
+       if (!initial_state && !idx_set)
+               return -ENXIO;
+
+       /*
+        * If all the entries in the log were in the initial state,
+        * assume new padding scheme
+        */
+       if (initial_state)
+               log_index[1] = 1;
+
+       /*
+        * Only allow the known permutations of log/padding indices,
+        * i.e. (0, 1), and (0, 2)
+        */
+       if ((log_index[0] == 0) && ((log_index[1] == 1) || (log_index[1] == 2)))
+               ; /* known index possibilities */
+       else {
+               dev_err(to_dev(arena), "Found an unknown padding scheme\n");
+               return -ENXIO;
+       }
+
+       arena->log_index[0] = log_index[0];
+       arena->log_index[1] = log_index[1];
+       dev_dbg(to_dev(arena), "log_index_0 = %d\n", log_index[0]);
+       dev_dbg(to_dev(arena), "log_index_1 = %d\n", log_index[1]);
+       return 0;
+}
+
 static int btt_rtt_init(struct arena_info *arena)
 {
        arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL);
@@ -650,8 +773,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
        available -= 2 * BTT_PG_SIZE;
 
        /* The log takes a fixed amount of space based on nfree */
-       logsize = roundup(2 * arena->nfree * sizeof(struct log_entry),
-                               BTT_PG_SIZE);
+       logsize = roundup(arena->nfree * LOG_GRP_SIZE, BTT_PG_SIZE);
        available -= logsize;
 
        /* Calculate optimal split between map and data area */
@@ -668,6 +790,10 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
        arena->mapoff = arena->dataoff + datasize;
        arena->logoff = arena->mapoff + mapsize;
        arena->info2off = arena->logoff + logsize;
+
+       /* Default log indices are (0,1) */
+       arena->log_index[0] = 0;
+       arena->log_index[1] = 1;
        return arena;
 }
 
@@ -758,6 +884,13 @@ static int discover_arenas(struct btt *btt)
                arena->external_lba_start = cur_nlba;
                parse_arena_meta(arena, super, cur_off);
 
+               ret = log_set_indices(arena);
+               if (ret) {
+                       dev_err(to_dev(arena),
+                               "Unable to deduce log/padding indices\n");
+                       goto out;
+               }
+
                mutex_init(&arena->err_lock);
                ret = btt_freelist_init(arena);
                if (ret)
index 578c2057524d396fbf7c2eee88804b58c1f17cfe..db3cb6d4d0d495df8978494ff6619e2923478d32 100644 (file)
@@ -27,6 +27,7 @@
 #define MAP_ERR_MASK (1 << MAP_ERR_SHIFT)
 #define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT)))
 #define MAP_ENT_NORMAL 0xC0000000
+#define LOG_GRP_SIZE sizeof(struct log_group)
 #define LOG_ENT_SIZE sizeof(struct log_entry)
 #define ARENA_MIN_SIZE (1UL << 24)     /* 16 MB */
 #define ARENA_MAX_SIZE (1ULL << 39)    /* 512 GB */
@@ -50,12 +51,52 @@ enum btt_init_state {
        INIT_READY
 };
 
+/*
+ * A log group represents one log 'lane', and consists of four log entries.
+ * Two of the four entries are valid entries, and the remaining two are
+ * padding. Due to an old bug in the padding location, we need to perform a
+ * test to determine the padding scheme being used, and use that scheme
+ * thereafter.
+ *
+ * In kernels prior to 4.15, 'log group' would have actual log entries at
+ * indices (0, 2) and padding at indices (1, 3), where as the correct/updated
+ * format has log entries at indices (0, 1) and padding at indices (2, 3).
+ *
+ * Old (pre 4.15) format:
+ * +-----------------+-----------------+
+ * |      ent[0]     |      ent[1]     |
+ * |       16B       |       16B       |
+ * | lba/old/new/seq |       pad       |
+ * +-----------------------------------+
+ * |      ent[2]     |      ent[3]     |
+ * |       16B       |       16B       |
+ * | lba/old/new/seq |       pad       |
+ * +-----------------+-----------------+
+ *
+ * New format:
+ * +-----------------+-----------------+
+ * |      ent[0]     |      ent[1]     |
+ * |       16B       |       16B       |
+ * | lba/old/new/seq | lba/old/new/seq |
+ * +-----------------------------------+
+ * |      ent[2]     |      ent[3]     |
+ * |       16B       |       16B       |
+ * |       pad       |       pad       |
+ * +-----------------+-----------------+
+ *
+ * We detect during start-up which format is in use, and set
+ * arena->log_index[(0, 1)] with the detected format.
+ */
+
 struct log_entry {
        __le32 lba;
        __le32 old_map;
        __le32 new_map;
        __le32 seq;
-       __le64 padding[2];
+};
+
+struct log_group {
+       struct log_entry ent[4];
 };
 
 struct btt_sb {
@@ -125,6 +166,8 @@ struct aligned_lock {
  * @list:              List head for list of arenas
  * @debugfs_dir:       Debugfs dentry
  * @flags:             Arena flags - may signify error states.
+ * @err_lock:          Mutex for synchronizing error clearing.
+ * @log_index:         Indices of the valid log entries in a log_group
  *
  * arena_info is a per-arena handle. Once an arena is narrowed down for an
  * IO, this struct is passed around for the duration of the IO.
@@ -157,6 +200,7 @@ struct arena_info {
        /* Arena flags */
        u32 flags;
        struct mutex err_lock;
+       int log_index[2];
 };
 
 /**
@@ -176,6 +220,7 @@ struct arena_info {
  * @init_lock:         Mutex used for the BTT initialization
  * @init_state:                Flag describing the initialization state for the BTT
  * @num_arenas:                Number of arenas in the BTT instance
+ * @phys_bb:           Pointer to the namespace's badblocks structure
  */
 struct btt {
        struct gendisk *btt_disk;
index 65cc171c721de8774baf05f9650f3bbacf511eec..2adada1a58551776186d6f6928a437d462734a48 100644 (file)
@@ -364,9 +364,9 @@ struct device *nd_pfn_create(struct nd_region *nd_region)
 int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 {
        u64 checksum, offset;
-       unsigned long align;
        enum nd_pfn_mode mode;
        struct nd_namespace_io *nsio;
+       unsigned long align, start_pad;
        struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
        struct nd_namespace_common *ndns = nd_pfn->ndns;
        const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev);
@@ -410,6 +410,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 
        align = le32_to_cpu(pfn_sb->align);
        offset = le64_to_cpu(pfn_sb->dataoff);
+       start_pad = le32_to_cpu(pfn_sb->start_pad);
        if (align == 0)
                align = 1UL << ilog2(offset);
        mode = le32_to_cpu(pfn_sb->mode);
@@ -468,7 +469,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
                return -EBUSY;
        }
 
-       if ((align && !IS_ALIGNED(offset, align))
+       if ((align && !IS_ALIGNED(nsio->res.start + offset + start_pad, align))
                        || !IS_ALIGNED(offset, PAGE_SIZE)) {
                dev_err(&nd_pfn->dev,
                                "bad offset: %#llx dax disabled align: %#lx\n",
@@ -582,6 +583,12 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
        return altmap;
 }
 
+static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys)
+{
+       return min_t(u64, PHYS_SECTION_ALIGN_DOWN(phys),
+                       ALIGN_DOWN(phys, nd_pfn->align));
+}
+
 static int nd_pfn_init(struct nd_pfn *nd_pfn)
 {
        u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0;
@@ -637,13 +644,16 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
        start = nsio->res.start;
        size = PHYS_SECTION_ALIGN_UP(start + size) - start;
        if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
-                               IORES_DESC_NONE) == REGION_MIXED) {
+                               IORES_DESC_NONE) == REGION_MIXED
+                       || !IS_ALIGNED(start + resource_size(&nsio->res),
+                               nd_pfn->align)) {
                size = resource_size(&nsio->res);
-               end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size);
+               end_trunc = start + size - phys_pmem_align_down(nd_pfn,
+                               start + size);
        }
 
        if (start_pad + end_trunc)
-               dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n",
+               dev_info(&nd_pfn->dev, "%s alignment collision, truncate %d bytes\n",
                                dev_name(&ndns->dev), start_pad + end_trunc);
 
        /*
index f837d666cbd499c8e33a1514f55344a1796005a1..1e46e60b8f1080e339ebe81c1710dabb23afef75 100644 (file)
@@ -1287,7 +1287,7 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl,
        BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
                        NVME_DSM_MAX_RANGES);
 
-       queue->limits.discard_alignment = size;
+       queue->limits.discard_alignment = 0;
        queue->limits.discard_granularity = size;
 
        blk_queue_max_discard_sectors(queue, UINT_MAX);
@@ -1705,7 +1705,8 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
                blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
                blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
        }
-       if (ctrl->quirks & NVME_QUIRK_STRIPE_SIZE)
+       if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
+           is_power_of_2(ctrl->max_hw_sectors))
                blk_queue_chunk_sectors(q, ctrl->max_hw_sectors);
        blk_queue_virt_boundary(q, ctrl->page_size - 1);
        if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
@@ -2869,7 +2870,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 
        blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
        nvme_set_queue_limits(ctrl, ns->queue);
-       nvme_setup_streams_ns(ctrl, ns);
 
        id = nvme_identify_ns(ctrl, nsid);
        if (!id)
@@ -2880,6 +2880,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 
        if (nvme_init_ns_head(ns, nsid, id, &new))
                goto out_free_id;
+       nvme_setup_streams_ns(ctrl, ns);
        
 #ifdef CONFIG_NVME_MULTIPATH
        /*
@@ -2965,8 +2966,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
                return;
 
        if (ns->disk && ns->disk->flags & GENHD_FL_UP) {
-               if (blk_get_integrity(ns->disk))
-                       blk_integrity_unregister(ns->disk);
                nvme_mpath_remove_disk_links(ns);
                sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
                                        &nvme_ns_id_attr_group);
@@ -2974,6 +2973,8 @@ static void nvme_ns_remove(struct nvme_ns *ns)
                        nvme_nvm_unregister_sysfs(ns);
                del_gendisk(ns->disk);
                blk_cleanup_queue(ns->queue);
+               if (blk_get_integrity(ns->disk))
+                       blk_integrity_unregister(ns->disk);
        }
 
        mutex_lock(&ns->ctrl->subsys->lock);
index 0a8af4daef8903f8ba983d345f1044498c57a975..794e66e4aa20115f4dc3a6b5fc12f706b2040bf4 100644 (file)
@@ -3221,7 +3221,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 
                /* initiate nvme ctrl ref counting teardown */
                nvme_uninit_ctrl(&ctrl->ctrl);
-               nvme_put_ctrl(&ctrl->ctrl);
 
                /* Remove core ctrl ref. */
                nvme_put_ctrl(&ctrl->ctrl);
index a346b4923550829eca8c7a8176c4ad39dbd54f7c..41d3a3c1104ec7f810c7f9c33a2cab414c475299 100644 (file)
@@ -156,8 +156,8 @@ static int meson_mx_efuse_read(void *context, unsigned int offset,
                                 MESON_MX_EFUSE_CNTL1_AUTO_RD_ENABLE,
                                 MESON_MX_EFUSE_CNTL1_AUTO_RD_ENABLE);
 
-       for (i = offset; i < offset + bytes; i += efuse->config.word_size) {
-               addr = i / efuse->config.word_size;
+       for (i = 0; i < bytes; i += efuse->config.word_size) {
+               addr = (offset + i) / efuse->config.word_size;
 
                err = meson_mx_efuse_read_addr(efuse, addr, &tmp);
                if (err)
index a25fed52f7e94de4bd3dd5cb8b0922e1df8e81bf..41b740aed3a346e4bbc610959281649447f83bd4 100644 (file)
@@ -1692,3 +1692,36 @@ void lba_set_iregs(struct parisc_device *lba, u32 ibase, u32 imask)
        iounmap(base_addr);
 }
 
+
+/*
+ * The design of the Diva management card in rp34x0 machines (rp3410, rp3440)
+ * seems rushed, so that many built-in components simply don't work.
+ * The following quirks disable the serial AUX port and the built-in ATI RV100
+ * Radeon 7000 graphics card which both don't have any external connectors and
+ * thus are useless, and even worse, e.g. the AUX port occupies ttyS0 and as
+ * such makes those machines the only PARISC machines on which we can't use
+ * ttyS0 as boot console.
+ */
+static void quirk_diva_ati_card(struct pci_dev *dev)
+{
+       if (dev->subsystem_vendor != PCI_VENDOR_ID_HP ||
+           dev->subsystem_device != 0x1292)
+               return;
+
+       dev_info(&dev->dev, "Hiding Diva built-in ATI card");
+       dev->device = 0;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RADEON_QY,
+       quirk_diva_ati_card);
+
+static void quirk_diva_aux_disable(struct pci_dev *dev)
+{
+       if (dev->subsystem_vendor != PCI_VENDOR_ID_HP ||
+           dev->subsystem_device != 0x1291)
+               return;
+
+       dev_info(&dev->dev, "Hiding Diva built-in AUX serial device");
+       dev->device = 0;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_DIVA_AUX,
+       quirk_diva_aux_disable);
index 945099d49f8f9b08c2b159d2cf4899b1fdc4cea4..14fd865a512096393149fd63a3707305648f276f 100644 (file)
@@ -1012,7 +1012,12 @@ static int pci_pm_thaw_noirq(struct device *dev)
        if (pci_has_legacy_pm_support(pci_dev))
                return pci_legacy_resume_early(dev);
 
-       pci_update_current_state(pci_dev, PCI_D0);
+       /*
+        * pci_restore_state() requires the device to be in D0 (because of MSI
+        * restoration among other things), so force it into D0 in case the
+        * driver's "freeze" callbacks put it into a low-power state directly.
+        */
+       pci_set_power_state(pci_dev, PCI_D0);
        pci_restore_state(pci_dev);
 
        if (drv && drv->pm && drv->pm->thaw_noirq)
index accaaaccb662fd24b1b3cf5f8e22154f5f46f9eb..6601ad0dfb3ad23c77865138b3db7bd0c4f55ae8 100644 (file)
@@ -310,7 +310,7 @@ static int cpcap_usb_init_irq(struct platform_device *pdev,
        int irq, error;
 
        irq = platform_get_irq_byname(pdev, name);
-       if (!irq)
+       if (irq < 0)
                return -ENODEV;
 
        error = devm_request_threaded_irq(ddata->dev, irq, NULL,
index cb09245e9b4c730528a7019b44f89c5abd20c7d9..c845facacb063e9a0bd2b244869d3c22c3409fbf 100644 (file)
@@ -12,7 +12,9 @@ config PHY_RCAR_GEN3_USB2
        tristate "Renesas R-Car generation 3 USB 2.0 PHY driver"
        depends on ARCH_RENESAS
        depends on EXTCON
+       depends on USB_SUPPORT
        select GENERIC_PHY
+       select USB_COMMON
        help
          Support for USB 2.0 PHY found on Renesas R-Car generation 3 SoCs.
 
index ee85fa0ca4b05bac340121cd798291708091e59e..7492c8978217f45e04f591898bfb0ddbaac2a217 100644 (file)
@@ -1137,6 +1137,7 @@ static int rockchip_typec_phy_probe(struct platform_device *pdev)
                if (IS_ERR(phy)) {
                        dev_err(dev, "failed to create phy: %s\n",
                                child_np->name);
+                       pm_runtime_disable(dev);
                        return PTR_ERR(phy);
                }
 
@@ -1146,6 +1147,7 @@ static int rockchip_typec_phy_probe(struct platform_device *pdev)
        phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
        if (IS_ERR(phy_provider)) {
                dev_err(dev, "Failed to register phy provider\n");
+               pm_runtime_disable(dev);
                return PTR_ERR(phy_provider);
        }
 
index 4307bf0013e186cd859b779aaaf4c61ddd662a6c..63e916d4d0696156b244fb2c7e0e5f6de606c8e3 100644 (file)
@@ -75,14 +75,14 @@ MODULE_DEVICE_TABLE(of, tegra_xusb_padctl_of_match);
 static struct device_node *
 tegra_xusb_find_pad_node(struct tegra_xusb_padctl *padctl, const char *name)
 {
-       /*
-        * of_find_node_by_name() drops a reference, so make sure to grab one.
-        */
-       struct device_node *np = of_node_get(padctl->dev->of_node);
+       struct device_node *pads, *np;
+
+       pads = of_get_child_by_name(padctl->dev->of_node, "pads");
+       if (!pads)
+               return NULL;
 
-       np = of_find_node_by_name(np, "pads");
-       if (np)
-               np = of_find_node_by_name(np, name);
+       np = of_get_child_by_name(pads, name);
+       of_node_put(pads);
 
        return np;
 }
@@ -90,16 +90,16 @@ tegra_xusb_find_pad_node(struct tegra_xusb_padctl *padctl, const char *name)
 static struct device_node *
 tegra_xusb_pad_find_phy_node(struct tegra_xusb_pad *pad, unsigned int index)
 {
-       /*
-        * of_find_node_by_name() drops a reference, so make sure to grab one.
-        */
-       struct device_node *np = of_node_get(pad->dev.of_node);
+       struct device_node *np, *lanes;
 
-       np = of_find_node_by_name(np, "lanes");
-       if (!np)
+       lanes = of_get_child_by_name(pad->dev.of_node, "lanes");
+       if (!lanes)
                return NULL;
 
-       return of_find_node_by_name(np, pad->soc->lanes[index].name);
+       np = of_get_child_by_name(lanes, pad->soc->lanes[index].name);
+       of_node_put(lanes);
+
+       return np;
 }
 
 static int
@@ -195,7 +195,7 @@ int tegra_xusb_pad_register(struct tegra_xusb_pad *pad,
        unsigned int i;
        int err;
 
-       children = of_find_node_by_name(pad->dev.of_node, "lanes");
+       children = of_get_child_by_name(pad->dev.of_node, "lanes");
        if (!children)
                return -ENODEV;
 
@@ -444,21 +444,21 @@ static struct device_node *
 tegra_xusb_find_port_node(struct tegra_xusb_padctl *padctl, const char *type,
                          unsigned int index)
 {
-       /*
-        * of_find_node_by_name() drops a reference, so make sure to grab one.
-        */
-       struct device_node *np = of_node_get(padctl->dev->of_node);
+       struct device_node *ports, *np;
+       char *name;
 
-       np = of_find_node_by_name(np, "ports");
-       if (np) {
-               char *name;
+       ports = of_get_child_by_name(padctl->dev->of_node, "ports");
+       if (!ports)
+               return NULL;
 
-               name = kasprintf(GFP_KERNEL, "%s-%u", type, index);
-               if (!name)
-                       return ERR_PTR(-ENOMEM);
-               np = of_find_node_by_name(np, name);
-               kfree(name);
+       name = kasprintf(GFP_KERNEL, "%s-%u", type, index);
+       if (!name) {
+               of_node_put(ports);
+               return ERR_PTR(-ENOMEM);
        }
+       np = of_get_child_by_name(ports, name);
+       kfree(name);
+       of_node_put(ports);
 
        return np;
 }
@@ -847,7 +847,7 @@ static void tegra_xusb_remove_ports(struct tegra_xusb_padctl *padctl)
 
 static int tegra_xusb_padctl_probe(struct platform_device *pdev)
 {
-       struct device_node *np = of_node_get(pdev->dev.of_node);
+       struct device_node *np = pdev->dev.of_node;
        const struct tegra_xusb_padctl_soc *soc;
        struct tegra_xusb_padctl *padctl;
        const struct of_device_id *match;
@@ -855,7 +855,7 @@ static int tegra_xusb_padctl_probe(struct platform_device *pdev)
        int err;
 
        /* for backwards compatibility with old device trees */
-       np = of_find_node_by_name(np, "pads");
+       np = of_get_child_by_name(np, "pads");
        if (!np) {
                dev_warn(&pdev->dev, "deprecated DT, using legacy driver\n");
                return tegra_xusb_padctl_legacy_probe(pdev);
index bdedb6325c72a5fa0966377d53678722d18f07eb..4471fd94e1fe1f48b953360ad76e638e88f1a7ff 100644 (file)
@@ -1620,6 +1620,22 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
                        clear_bit(i, chip->irq.valid_mask);
        }
 
+       /*
+        * The same set of machines in chv_no_valid_mask[] have incorrectly
+        * configured GPIOs that generate spurious interrupts so we use
+        * this same list to apply another quirk for them.
+        *
+        * See also https://bugzilla.kernel.org/show_bug.cgi?id=197953.
+        */
+       if (!need_valid_mask) {
+               /*
+                * Mask all interrupts the community is able to generate
+                * but leave the ones that can only generate GPEs unmasked.
+                */
+               chv_writel(GENMASK(31, pctrl->community->nirqs),
+                          pctrl->regs + CHV_INTMASK);
+       }
+
        /* Clear all interrupts */
        chv_writel(0xffff, pctrl->regs + CHV_INTSTAT);
 
index e6cd8de793e2ae66c791389ff686b42ad8084d84..3501491e5bfc8a5547c3ac820e01ad338dbb097b 100644 (file)
@@ -222,6 +222,9 @@ static enum pin_config_param pcs_bias[] = {
  */
 static struct lock_class_key pcs_lock_class;
 
+/* Class for the IRQ request mutex */
+static struct lock_class_key pcs_request_class;
+
 /*
  * REVISIT: Reads and writes could eventually use regmap or something
  * generic. But at least on omaps, some mux registers are performance
@@ -1486,7 +1489,7 @@ static int pcs_irqdomain_map(struct irq_domain *d, unsigned int irq,
        irq_set_chip_data(irq, pcs_soc);
        irq_set_chip_and_handler(irq, &pcs->chip,
                                 handle_level_irq);
-       irq_set_lockdep_class(irq, &pcs_lock_class);
+       irq_set_lockdep_class(irq, &pcs_lock_class, &pcs_request_class);
        irq_set_noprobe(irq);
 
        return 0;
index a276c61be217b41b1ce35d33875d8a532033eff0..e62ab087bfd8afd788236d11137405d4abfca3f5 100644 (file)
@@ -290,7 +290,7 @@ static int stm32_gpio_domain_translate(struct irq_domain *d,
 }
 
 static int stm32_gpio_domain_activate(struct irq_domain *d,
-                                     struct irq_data *irq_data, bool early)
+                                     struct irq_data *irq_data, bool reserve)
 {
        struct stm32_gpio_bank *bank = d->host_data;
        struct stm32_pinctrl *pctl = dev_get_drvdata(bank->gpio_chip.parent);
index 6c815207f4f50470c1a0fea8992183aa67f69e37..3614df68830f8f6a4abd756c52ca4e1e72e8e1d1 100644 (file)
@@ -5386,6 +5386,13 @@ out:
 }
 EXPORT_SYMBOL_GPL(qeth_poll);
 
+static int qeth_setassparms_inspect_rc(struct qeth_ipa_cmd *cmd)
+{
+       if (!cmd->hdr.return_code)
+               cmd->hdr.return_code = cmd->data.setassparms.hdr.return_code;
+       return cmd->hdr.return_code;
+}
+
 int qeth_setassparms_cb(struct qeth_card *card,
                        struct qeth_reply *reply, unsigned long data)
 {
@@ -6242,7 +6249,7 @@ static int qeth_ipa_checksum_run_cmd_cb(struct qeth_card *card,
                                (struct qeth_checksum_cmd *)reply->param;
 
        QETH_CARD_TEXT(card, 4, "chkdoccb");
-       if (cmd->hdr.return_code)
+       if (qeth_setassparms_inspect_rc(cmd))
                return 0;
 
        memset(chksum_cb, 0, sizeof(*chksum_cb));
index 6e3d81969a77cc895580f79fa8e3aaa3b8bb4fee..d52265416da2af0da11cca770304f33ab203ad20 100644 (file)
@@ -1725,6 +1725,7 @@ struct aac_dev
 #define FIB_CONTEXT_FLAG_NATIVE_HBA            (0x00000010)
 #define FIB_CONTEXT_FLAG_NATIVE_HBA_TMF        (0x00000020)
 #define FIB_CONTEXT_FLAG_SCSI_CMD      (0x00000040)
+#define FIB_CONTEXT_FLAG_EH_RESET      (0x00000080)
 
 /*
  *     Define the command values
index bdf127aaab41d814e2337d2944166a0498bf1a66..d55332de08f91ad8e54e1296867569a8fa109a34 100644 (file)
@@ -1037,7 +1037,7 @@ static int aac_eh_bus_reset(struct scsi_cmnd* cmd)
                        info = &aac->hba_map[bus][cid];
                        if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS ||
                            info->devtype != AAC_DEVTYPE_NATIVE_RAW) {
-                               fib->flags |= FIB_CONTEXT_FLAG_TIMED_OUT;
+                               fib->flags |= FIB_CONTEXT_FLAG_EH_RESET;
                                cmd->SCp.phase = AAC_OWNER_ERROR_HANDLER;
                        }
                }
index a4f28b7e4c65df81ef583eab878a3aa9fc45e0e4..e18877177f1b52d9c43ad3b991b858c80a6cc079 100644 (file)
@@ -1576,7 +1576,9 @@ static struct request *_make_request(struct request_queue *q, bool has_write,
                return req;
 
        for_each_bio(bio) {
-               ret = blk_rq_append_bio(req, bio);
+               struct bio *bounce_bio = bio;
+
+               ret = blk_rq_append_bio(req, &bounce_bio);
                if (ret)
                        return ERR_PTR(ret);
        }
index 449ef5adbb2bc3719b3ba72953ebbecef9e4d086..dfb8da83fa504c979e9ba0639b32a4cae2c8969c 100644 (file)
@@ -374,10 +374,8 @@ int scsi_dev_info_list_add_keyed(int compatible, char *vendor, char *model,
                            model, compatible);
 
        if (strflags)
-               devinfo->flags = simple_strtoul(strflags, NULL, 0);
-       else
-               devinfo->flags = flags;
-
+               flags = (__force blist_flags_t)simple_strtoul(strflags, NULL, 0);
+       devinfo->flags = flags;
        devinfo->compatible = compatible;
 
        if (compatible)
index be5e919db0e8cd9e713727a91bc46923673ea556..0880d975eed3a56c58d27172bfd18c1a59da5d4b 100644 (file)
@@ -770,7 +770,7 @@ static int scsi_probe_lun(struct scsi_device *sdev, unsigned char *inq_result,
  *     SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized
  **/
 static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result,
-               int *bflags, int async)
+               blist_flags_t *bflags, int async)
 {
        int ret;
 
@@ -1049,14 +1049,15 @@ static unsigned char *scsi_inq_str(unsigned char *buf, unsigned char *inq,
  *   - SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized
  **/
 static int scsi_probe_and_add_lun(struct scsi_target *starget,
-                                 u64 lun, int *bflagsp,
+                                 u64 lun, blist_flags_t *bflagsp,
                                  struct scsi_device **sdevp,
                                  enum scsi_scan_mode rescan,
                                  void *hostdata)
 {
        struct scsi_device *sdev;
        unsigned char *result;
-       int bflags, res = SCSI_SCAN_NO_RESPONSE, result_len = 256;
+       blist_flags_t bflags;
+       int res = SCSI_SCAN_NO_RESPONSE, result_len = 256;
        struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
 
        /*
@@ -1201,7 +1202,7 @@ static int scsi_probe_and_add_lun(struct scsi_target *starget,
  *     Modifies sdevscan->lun.
  **/
 static void scsi_sequential_lun_scan(struct scsi_target *starget,
-                                    int bflags, int scsi_level,
+                                    blist_flags_t bflags, int scsi_level,
                                     enum scsi_scan_mode rescan)
 {
        uint max_dev_lun;
@@ -1292,7 +1293,7 @@ static void scsi_sequential_lun_scan(struct scsi_target *starget,
  *     0: scan completed (or no memory, so further scanning is futile)
  *     1: could not scan with REPORT LUN
  **/
-static int scsi_report_lun_scan(struct scsi_target *starget, int bflags,
+static int scsi_report_lun_scan(struct scsi_target *starget, blist_flags_t bflags,
                                enum scsi_scan_mode rescan)
 {
        unsigned char scsi_cmd[MAX_COMMAND_SIZE];
@@ -1538,7 +1539,7 @@ static void __scsi_scan_target(struct device *parent, unsigned int channel,
                unsigned int id, u64 lun, enum scsi_scan_mode rescan)
 {
        struct Scsi_Host *shost = dev_to_shost(parent);
-       int bflags = 0;
+       blist_flags_t bflags = 0;
        int res;
        struct scsi_target *starget;
 
index 50e7d7e4a86179b9a47d18569bb759be0b674b88..26ce17178401b645bd9548e49dd11fecb3babf8c 100644 (file)
@@ -967,7 +967,8 @@ sdev_show_wwid(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR(wwid, S_IRUGO, sdev_show_wwid, NULL);
 
-#define BLIST_FLAG_NAME(name) [ilog2(BLIST_##name)] = #name
+#define BLIST_FLAG_NAME(name)                                  \
+       [ilog2((__force unsigned int)BLIST_##name)] = #name
 static const char *const sdev_bflags_name[] = {
 #include "scsi_devinfo_tbl.c"
 };
@@ -984,7 +985,7 @@ sdev_show_blacklist(struct device *dev, struct device_attribute *attr,
        for (i = 0; i < sizeof(sdev->sdev_bflags) * BITS_PER_BYTE; i++) {
                const char *name = NULL;
 
-               if (!(sdev->sdev_bflags & BIT(i)))
+               if (!(sdev->sdev_bflags & (__force blist_flags_t)BIT(i)))
                        continue;
                if (i < ARRAY_SIZE(sdev_bflags_name) && sdev_bflags_name[i])
                        name = sdev_bflags_name[i];
@@ -1414,7 +1415,10 @@ static void __scsi_remove_target(struct scsi_target *starget)
                 * check.
                 */
                if (sdev->channel != starget->channel ||
-                   sdev->id != starget->id ||
+                   sdev->id != starget->id)
+                       continue;
+               if (sdev->sdev_state == SDEV_DEL ||
+                   sdev->sdev_state == SDEV_CANCEL ||
                    !get_device(&sdev->sdev_gendev))
                        continue;
                spin_unlock_irqrestore(shost->host_lock, flags);
index d0219e36080c3b79109ac405eb0cd726545585fc..10ebb213ddb33e2920e2fe83e60cc712a50c3002 100644 (file)
 
 /* Our blacklist flags */
 enum {
-       SPI_BLIST_NOIUS = 0x1,
+       SPI_BLIST_NOIUS = (__force blist_flags_t)0x1,
 };
 
 /* blacklist table, modelled on scsi_devinfo.c */
 static struct {
        char *vendor;
        char *model;
-       unsigned flags;
+       blist_flags_t flags;
 } spi_static_device_list[] __initdata = {
        {"HP", "Ultrium 3-SCSI", SPI_BLIST_NOIUS },
        {"IBM", "ULTRIUM-TD3", SPI_BLIST_NOIUS },
@@ -221,9 +221,11 @@ static int spi_device_configure(struct transport_container *tc,
 {
        struct scsi_device *sdev = to_scsi_device(dev);
        struct scsi_target *starget = sdev->sdev_target;
-       unsigned bflags = scsi_get_device_flags_keyed(sdev, &sdev->inquiry[8],
-                                                     &sdev->inquiry[16],
-                                                     SCSI_DEVINFO_SPI);
+       blist_flags_t bflags;
+
+       bflags = scsi_get_device_flags_keyed(sdev, &sdev->inquiry[8],
+                                            &sdev->inquiry[16],
+                                            SCSI_DEVINFO_SPI);
 
        /* Populate the target capability fields with the values
         * gleaned from the device inquiry */
index 1b06cf0375dcdbd6f1780ab7535bbaa4c5742916..3b3d1d050cacaa3d83dc615e29691ccc2c5de87f 100644 (file)
@@ -953,10 +953,11 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb,
                case TEST_UNIT_READY:
                        break;
                default:
-                       set_host_byte(scmnd, DID_TARGET_FAILURE);
+                       set_host_byte(scmnd, DID_ERROR);
                }
                break;
        case SRB_STATUS_INVALID_LUN:
+               set_host_byte(scmnd, DID_NO_CONNECT);
                do_work = true;
                process_err_fn = storvsc_remove_lun;
                break;
index 77fe55ce790c61a8835c4e2338a36be43dafcbac..d65345312527ce450b539964aa0465e1e6787b44 100644 (file)
@@ -79,6 +79,7 @@
 #define A3700_SPI_BYTE_LEN             BIT(5)
 #define A3700_SPI_CLK_PRESCALE         BIT(0)
 #define A3700_SPI_CLK_PRESCALE_MASK    (0x1f)
+#define A3700_SPI_CLK_EVEN_OFFS                (0x10)
 
 #define A3700_SPI_WFIFO_THRS_BIT       28
 #define A3700_SPI_RFIFO_THRS_BIT       24
@@ -220,6 +221,13 @@ static void a3700_spi_clock_set(struct a3700_spi *a3700_spi,
 
        prescale = DIV_ROUND_UP(clk_get_rate(a3700_spi->clk), speed_hz);
 
+       /* For prescaler values over 15, we can only set it by steps of 2.
+        * Starting from A3700_SPI_CLK_EVEN_OFFS, we set values from 0 up to
+        * 30. We only use this range from 16 to 30.
+        */
+       if (prescale > 15)
+               prescale = A3700_SPI_CLK_EVEN_OFFS + DIV_ROUND_UP(prescale, 2);
+
        val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
        val = val & ~A3700_SPI_CLK_PRESCALE_MASK;
 
index f95da364c2832b0142158e12c97648aab81b7165..66947097102370d0f54ba2559abddab1ac812a5d 100644 (file)
@@ -1661,12 +1661,12 @@ static int atmel_spi_remove(struct platform_device *pdev)
        pm_runtime_get_sync(&pdev->dev);
 
        /* reset the hardware and block queue progress */
-       spin_lock_irq(&as->lock);
        if (as->use_dma) {
                atmel_spi_stop_dma(master);
                atmel_spi_release_dma(master);
        }
 
+       spin_lock_irq(&as->lock);
        spi_writel(as, CR, SPI_BIT(SWRST));
        spi_writel(as, CR, SPI_BIT(SWRST)); /* AT91SAM9263 Rev B workaround */
        spi_readl(as, SR);
index 2ce875764ca646a2bdfb803cae33465ab8fa1786..0835a8d88fb8f85ab5ae44a4aa74d94121d19d87 100644 (file)
@@ -377,8 +377,8 @@ static int qspi_set_config_register(struct rspi_data *rspi, int access_size)
        /* Sets SPCMD */
        rspi_write16(rspi, rspi->spcmd, RSPI_SPCMD0);
 
-       /* Enables SPI function in master mode */
-       rspi_write8(rspi, SPCR_SPE | SPCR_MSTR, RSPI_SPCR);
+       /* Sets RSPI mode */
+       rspi_write8(rspi, SPCR_MSTR, RSPI_SPCR);
 
        return 0;
 }
index c5cd635c28f388bec2cfd47b9a6c6c9dcec9e046..41410031f8e99e6a1d54b8f94990df0133356ced 100644 (file)
@@ -525,7 +525,7 @@ err_free_master:
 
 static int sun4i_spi_remove(struct platform_device *pdev)
 {
-       pm_runtime_disable(&pdev->dev);
+       pm_runtime_force_suspend(&pdev->dev);
 
        return 0;
 }
index bc7100b93dfcf0c24213f479f9a5fffc41666315..e0b9fe1d0e37d98a7243ca35a56b1d62e024e8b3 100644 (file)
@@ -271,6 +271,7 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t)
        while (remaining_words) {
                int n_words, tx_words, rx_words;
                u32 sr;
+               int stalled;
 
                n_words = min(remaining_words, xspi->buffer_size);
 
@@ -299,7 +300,17 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t)
 
                /* Read out all the data from the Rx FIFO */
                rx_words = n_words;
+               stalled = 10;
                while (rx_words) {
+                       if (rx_words == n_words && !(stalled--) &&
+                           !(sr & XSPI_SR_TX_EMPTY_MASK) &&
+                           (sr & XSPI_SR_RX_EMPTY_MASK)) {
+                               dev_err(&spi->dev,
+                                       "Detected stall. Check C_SPI_MODE and C_SPI_MEMORY\n");
+                               xspi_init_hw(xspi);
+                               return -EIO;
+                       }
+
                        if ((sr & XSPI_SR_TX_EMPTY_MASK) && (rx_words > 1)) {
                                xilinx_spi_rx(xspi);
                                rx_words--;
index a517b2d29f1bb6efce2f22a5520a009c1375bcfb..8f6494158d3d018b847ad989f6fa1c2648848094 100644 (file)
@@ -37,7 +37,7 @@ config ION_CHUNK_HEAP
 
 config ION_CMA_HEAP
        bool "Ion CMA heap support"
-       depends on ION && CMA
+       depends on ION && DMA_CMA
        help
          Choose this option to enable CMA heaps with Ion. This heap is backed
          by the Contiguous Memory Allocator (CMA). If your system has these
index a7d9b0e9857225abf7a6c88885d15e76afc6627b..f480885e346b69cb6ec881abefca0ee3aaf9fd15 100644 (file)
@@ -346,7 +346,7 @@ static int ion_dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
        mutex_lock(&buffer->lock);
        list_for_each_entry(a, &buffer->attachments, list) {
                dma_sync_sg_for_cpu(a->dev, a->table->sgl, a->table->nents,
-                                   DMA_BIDIRECTIONAL);
+                                   direction);
        }
        mutex_unlock(&buffer->lock);
 
@@ -368,7 +368,7 @@ static int ion_dma_buf_end_cpu_access(struct dma_buf *dmabuf,
        mutex_lock(&buffer->lock);
        list_for_each_entry(a, &buffer->attachments, list) {
                dma_sync_sg_for_device(a->dev, a->table->sgl, a->table->nents,
-                                      DMA_BIDIRECTIONAL);
+                                      direction);
        }
        mutex_unlock(&buffer->lock);
 
index dd5545d9990a0f61ac2a1ba6249ec851602d1b3b..86196ffd2faf9a7733cd931fa48e24e5ace3fd22 100644 (file)
@@ -39,9 +39,15 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer,
        struct ion_cma_heap *cma_heap = to_cma_heap(heap);
        struct sg_table *table;
        struct page *pages;
+       unsigned long size = PAGE_ALIGN(len);
+       unsigned long nr_pages = size >> PAGE_SHIFT;
+       unsigned long align = get_order(size);
        int ret;
 
-       pages = cma_alloc(cma_heap->cma, len, 0, GFP_KERNEL);
+       if (align > CONFIG_CMA_ALIGNMENT)
+               align = CONFIG_CMA_ALIGNMENT;
+
+       pages = cma_alloc(cma_heap->cma, nr_pages, align, GFP_KERNEL);
        if (!pages)
                return -ENOMEM;
 
@@ -53,7 +59,7 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer,
        if (ret)
                goto free_mem;
 
-       sg_set_page(table->sgl, pages, len, 0);
+       sg_set_page(table->sgl, pages, size, 0);
 
        buffer->priv_virt = pages;
        buffer->sg_table = table;
@@ -62,7 +68,7 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer,
 free_mem:
        kfree(table);
 err:
-       cma_release(cma_heap->cma, pages, buffer->size);
+       cma_release(cma_heap->cma, pages, nr_pages);
        return -ENOMEM;
 }
 
@@ -70,9 +76,10 @@ static void ion_cma_free(struct ion_buffer *buffer)
 {
        struct ion_cma_heap *cma_heap = to_cma_heap(buffer->heap);
        struct page *pages = buffer->priv_virt;
+       unsigned long nr_pages = PAGE_ALIGN(buffer->size) >> PAGE_SHIFT;
 
        /* release memory */
-       cma_release(cma_heap->cma, pages, buffer->size);
+       cma_release(cma_heap->cma, pages, nr_pages);
        /* release sg table */
        sg_free_table(buffer->sg_table);
        kfree(buffer->sg_table);
index 986c2a40d9780ecbbc2226d9be6fa74876e85681..8267119ccc8e73108bc8e4f2edb777861192f353 100644 (file)
@@ -487,21 +487,18 @@ ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ipaddr,
                              ksocknal_nid2peerlist(id.nid));
        }
 
-       route2 = NULL;
        list_for_each_entry(route2, &peer->ksnp_routes, ksnr_list) {
-               if (route2->ksnr_ipaddr == ipaddr)
-                       break;
-
-               route2 = NULL;
-       }
-       if (!route2) {
-               ksocknal_add_route_locked(peer, route);
-               route->ksnr_share_count++;
-       } else {
-               ksocknal_route_decref(route);
-               route2->ksnr_share_count++;
+               if (route2->ksnr_ipaddr == ipaddr) {
+                       /* Route already exists, use the old one */
+                       ksocknal_route_decref(route);
+                       route2->ksnr_share_count++;
+                       goto out;
+               }
        }
-
+       /* Route doesn't already exist, add the new one */
+       ksocknal_add_route_locked(peer, route);
+       route->ksnr_share_count++;
+out:
        write_unlock_bh(&ksocknal_data.ksnd_global_lock);
 
        return 0;
index 7c69b4a9694d2016a8aac3b63a4b7d4399146688..0d99b242e82e3f84da25a47564f96db60be4b5f5 100644 (file)
@@ -920,7 +920,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
                                        " %d i: %d bio: %p, allocating another"
                                        " bio\n", bio->bi_vcnt, i, bio);
 
-                               rc = blk_rq_append_bio(req, bio);
+                               rc = blk_rq_append_bio(req, &bio);
                                if (rc) {
                                        pr_err("pSCSI: failed to append bio\n");
                                        goto fail;
@@ -938,7 +938,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
        }
 
        if (bio) {
-               rc = blk_rq_append_bio(req, bio);
+               rc = blk_rq_append_bio(req, &bio);
                if (rc) {
                        pr_err("pSCSI: failed to append bio\n");
                        goto fail;
index 419a7a90bce0e21c40afc165506420aa9146fb72..f45bcbc63738ffb3598e958e1af529c443b98e1d 100644 (file)
@@ -339,7 +339,7 @@ static void __ring_interrupt(struct tb_ring *ring)
                return;
 
        if (ring->start_poll) {
-               __ring_interrupt_mask(ring, false);
+               __ring_interrupt_mask(ring, true);
                ring->start_poll(ring->poll_data);
        } else {
                schedule_work(&ring->work);
index 427e0d5d8f135e56249c120fdfb50aca1f65021d..539b49adb6afd41190eee97f6eaf1950c8a1ac3f 100644 (file)
@@ -1762,7 +1762,7 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
 {
        struct n_tty_data *ldata = tty->disc_data;
 
-       if (!old || (old->c_lflag ^ tty->termios.c_lflag) & ICANON) {
+       if (!old || (old->c_lflag ^ tty->termios.c_lflag) & (ICANON | EXTPROC)) {
                bitmap_zero(ldata->read_flags, N_TTY_BUF_SIZE);
                ldata->line_start = ldata->read_tail;
                if (!L_ICANON(tty) || !read_cnt(ldata)) {
@@ -2425,7 +2425,7 @@ static int n_tty_ioctl(struct tty_struct *tty, struct file *file,
                return put_user(tty_chars_in_buffer(tty), (int __user *) arg);
        case TIOCINQ:
                down_write(&tty->termios_rwsem);
-               if (L_ICANON(tty))
+               if (L_ICANON(tty) && !L_EXTPROC(tty))
                        retval = inq_canon(ldata);
                else
                        retval = read_cnt(ldata);
index 3593ce0ec641d848f1a15438ffb76295558ab9a6..880009987460affefa3aac2bf61aee6174195f81 100644 (file)
@@ -247,7 +247,7 @@ static int ci_hdrc_msm_probe(struct platform_device *pdev)
        if (ret)
                goto err_mux;
 
-       ulpi_node = of_find_node_by_name(of_node_get(pdev->dev.of_node), "ulpi");
+       ulpi_node = of_get_child_by_name(pdev->dev.of_node, "ulpi");
        if (ulpi_node) {
                phy_node = of_get_next_available_child(ulpi_node, NULL);
                ci->hsic = of_device_is_compatible(phy_node, "qcom,usb-hsic-phy");
index 78e92d29f8d98777c1294292808b1a868dcfcb7f..c821b4b9647e357468335fca83b3aa980e600315 100644 (file)
@@ -1007,7 +1007,7 @@ int usb_get_bos_descriptor(struct usb_device *dev)
                case USB_SSP_CAP_TYPE:
                        ssp_cap = (struct usb_ssp_cap_descriptor *)buffer;
                        ssac = (le32_to_cpu(ssp_cap->bmAttributes) &
-                               USB_SSP_SUBLINK_SPEED_ATTRIBS) + 1;
+                               USB_SSP_SUBLINK_SPEED_ATTRIBS);
                        if (length >= USB_DT_USB_SSP_CAP_SIZE(ssac))
                                dev->bos->ssp_cap = ssp_cap;
                        break;
index a10b346b9777dba58abe8346cb4926e42d8bb7aa..4024926c1d68c93e97e40f822c4a690a4c113987 100644 (file)
@@ -52,10 +52,11 @@ static const struct usb_device_id usb_quirk_list[] = {
        /* Microsoft LifeCam-VX700 v2.0 */
        { USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME },
 
-       /* Logitech HD Pro Webcams C920, C920-C and C930e */
+       /* Logitech HD Pro Webcams C920, C920-C, C925e and C930e */
        { USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT },
        { USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT },
        { USB_DEVICE(0x046d, 0x0843), .driver_info = USB_QUIRK_DELAY_INIT },
+       { USB_DEVICE(0x046d, 0x085b), .driver_info = USB_QUIRK_DELAY_INIT },
 
        /* Logitech ConferenceCam CC3000e */
        { USB_DEVICE(0x046d, 0x0847), .driver_info = USB_QUIRK_DELAY_INIT },
@@ -149,6 +150,9 @@ static const struct usb_device_id usb_quirk_list[] = {
        /* Genesys Logic hub, internally used by KY-688 USB 3.1 Type-C Hub */
        { USB_DEVICE(0x05e3, 0x0612), .driver_info = USB_QUIRK_NO_LPM },
 
+       /* ELSA MicroLink 56K */
+       { USB_DEVICE(0x05cc, 0x2267), .driver_info = USB_QUIRK_RESET_RESUME },
+
        /* Genesys Logic hub, internally used by Moshi USB to Ethernet Adapter */
        { USB_DEVICE(0x05e3, 0x0616), .driver_info = USB_QUIRK_NO_LPM },
 
index 4f7895dbcf880ef2a1e3b863b7002825e3cb752c..e26e685d8a578fddffaa7ff220a0c4442ba4f5ce 100644 (file)
@@ -162,7 +162,7 @@ static void xhci_debugfs_extcap_regset(struct xhci_hcd *xhci, int cap_id,
 static int xhci_ring_enqueue_show(struct seq_file *s, void *unused)
 {
        dma_addr_t              dma;
-       struct xhci_ring        *ring = s->private;
+       struct xhci_ring        *ring = *(struct xhci_ring **)s->private;
 
        dma = xhci_trb_virt_to_dma(ring->enq_seg, ring->enqueue);
        seq_printf(s, "%pad\n", &dma);
@@ -173,7 +173,7 @@ static int xhci_ring_enqueue_show(struct seq_file *s, void *unused)
 static int xhci_ring_dequeue_show(struct seq_file *s, void *unused)
 {
        dma_addr_t              dma;
-       struct xhci_ring        *ring = s->private;
+       struct xhci_ring        *ring = *(struct xhci_ring **)s->private;
 
        dma = xhci_trb_virt_to_dma(ring->deq_seg, ring->dequeue);
        seq_printf(s, "%pad\n", &dma);
@@ -183,7 +183,7 @@ static int xhci_ring_dequeue_show(struct seq_file *s, void *unused)
 
 static int xhci_ring_cycle_show(struct seq_file *s, void *unused)
 {
-       struct xhci_ring        *ring = s->private;
+       struct xhci_ring        *ring = *(struct xhci_ring **)s->private;
 
        seq_printf(s, "%d\n", ring->cycle_state);
 
@@ -346,7 +346,7 @@ static void xhci_debugfs_create_files(struct xhci_hcd *xhci,
 }
 
 static struct dentry *xhci_debugfs_create_ring_dir(struct xhci_hcd *xhci,
-                                                  struct xhci_ring *ring,
+                                                  struct xhci_ring **ring,
                                                   const char *name,
                                                   struct dentry *parent)
 {
@@ -387,7 +387,7 @@ void xhci_debugfs_create_endpoint(struct xhci_hcd *xhci,
 
        snprintf(epriv->name, sizeof(epriv->name), "ep%02d", ep_index);
        epriv->root = xhci_debugfs_create_ring_dir(xhci,
-                                                  dev->eps[ep_index].new_ring,
+                                                  &dev->eps[ep_index].new_ring,
                                                   epriv->name,
                                                   spriv->root);
        spriv->eps[ep_index] = epriv;
@@ -423,7 +423,7 @@ void xhci_debugfs_create_slot(struct xhci_hcd *xhci, int slot_id)
        priv->dev = dev;
        dev->debugfs_private = priv;
 
-       xhci_debugfs_create_ring_dir(xhci, dev->eps[0].ring,
+       xhci_debugfs_create_ring_dir(xhci, &dev->eps[0].ring,
                                     "ep00", priv->root);
 
        xhci_debugfs_create_context_files(xhci, priv->root, slot_id);
@@ -488,11 +488,11 @@ void xhci_debugfs_init(struct xhci_hcd *xhci)
                                   ARRAY_SIZE(xhci_extcap_dbc),
                                   "reg-ext-dbc");
 
-       xhci_debugfs_create_ring_dir(xhci, xhci->cmd_ring,
+       xhci_debugfs_create_ring_dir(xhci, &xhci->cmd_ring,
                                     "command-ring",
                                     xhci->debugfs_root);
 
-       xhci_debugfs_create_ring_dir(xhci, xhci->event_ring,
+       xhci_debugfs_create_ring_dir(xhci, &xhci->event_ring,
                                     "event-ring",
                                     xhci->debugfs_root);
 
index 7ef1274ef7f7f245a03e539982da07f6242343b1..1aad89b8aba0b5b5b4610aa161bc7379ba45fbb7 100644 (file)
@@ -177,6 +177,9 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
                xhci->quirks |= XHCI_TRUST_TX_LENGTH;
                xhci->quirks |= XHCI_BROKEN_STREAMS;
        }
+       if (pdev->vendor == PCI_VENDOR_ID_RENESAS &&
+                       pdev->device == 0x0014)
+               xhci->quirks |= XHCI_TRUST_TX_LENGTH;
        if (pdev->vendor == PCI_VENDOR_ID_RENESAS &&
                        pdev->device == 0x0015)
                xhci->quirks |= XHCI_RESET_ON_RESUME;
index 2424d3020ca364b22792376e36c21462af3b2f62..da6dbe3ebd8be9c8f137c6f500663be857184080 100644 (file)
@@ -3525,8 +3525,6 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev)
        struct xhci_slot_ctx *slot_ctx;
        int i, ret;
 
-       xhci_debugfs_remove_slot(xhci, udev->slot_id);
-
 #ifndef CONFIG_USB_DEFAULT_PERSIST
        /*
         * We called pm_runtime_get_noresume when the device was attached.
@@ -3555,8 +3553,10 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev)
        }
 
        ret = xhci_disable_slot(xhci, udev->slot_id);
-       if (ret)
+       if (ret) {
+               xhci_debugfs_remove_slot(xhci, udev->slot_id);
                xhci_free_virt_device(xhci, udev->slot_id);
+       }
 }
 
 int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id)
index 1aba9105b369678759a204780bb7b3d882b7d879..fc68952c994a5557bb8ca3e2de94f9d2c1b15791 100644 (file)
@@ -1013,6 +1013,7 @@ static const struct usb_device_id id_table_combined[] = {
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_BT_USB_PID) },
        { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_WL_USB_PID) },
+       { USB_DEVICE(AIRBUS_DS_VID, AIRBUS_DS_P8GR) },
        { }                                     /* Terminating entry */
 };
 
index 4faa09fe308ca0570cc9dcda6900ef74efa3e1d0..8b4ecd2bd297b84d5a86c9b02cd5c38d3a6e86a8 100644 (file)
 #define ICPDAS_I7561U_PID              0x0104
 #define ICPDAS_I7563U_PID              0x0105
 
+/*
+ * Airbus Defence and Space
+ */
+#define AIRBUS_DS_VID                  0x1e8e  /* Vendor ID */
+#define AIRBUS_DS_P8GR                 0x6001  /* Tetra P8GR */
+
 /*
  * RT Systems programming cables for various ham radios
  */
index 3b3513874cfd1e75a5380ee208f02c1144919cd1..b6320e3be42970c984cb86505251cde089437102 100644 (file)
@@ -233,6 +233,8 @@ static void option_instat_callback(struct urb *urb);
 /* These Quectel products use Qualcomm's vendor ID */
 #define QUECTEL_PRODUCT_UC20                   0x9003
 #define QUECTEL_PRODUCT_UC15                   0x9090
+/* These Yuga products use Qualcomm's vendor ID */
+#define YUGA_PRODUCT_CLM920_NC5                        0x9625
 
 #define QUECTEL_VENDOR_ID                      0x2c7c
 /* These Quectel products use Quectel's vendor ID */
@@ -280,6 +282,7 @@ static void option_instat_callback(struct urb *urb);
 #define TELIT_PRODUCT_LE922_USBCFG3            0x1043
 #define TELIT_PRODUCT_LE922_USBCFG5            0x1045
 #define TELIT_PRODUCT_ME910                    0x1100
+#define TELIT_PRODUCT_ME910_DUAL_MODEM         0x1101
 #define TELIT_PRODUCT_LE920                    0x1200
 #define TELIT_PRODUCT_LE910                    0x1201
 #define TELIT_PRODUCT_LE910_USBCFG4            0x1206
@@ -645,6 +648,11 @@ static const struct option_blacklist_info telit_me910_blacklist = {
        .reserved = BIT(1) | BIT(3),
 };
 
+static const struct option_blacklist_info telit_me910_dual_modem_blacklist = {
+       .sendsetup = BIT(0),
+       .reserved = BIT(3),
+};
+
 static const struct option_blacklist_info telit_le910_blacklist = {
        .sendsetup = BIT(0),
        .reserved = BIT(1) | BIT(2),
@@ -674,6 +682,10 @@ static const struct option_blacklist_info cinterion_rmnet2_blacklist = {
        .reserved = BIT(4) | BIT(5),
 };
 
+static const struct option_blacklist_info yuga_clm920_nc5_blacklist = {
+       .reserved = BIT(1) | BIT(4),
+};
+
 static const struct usb_device_id option_ids[] = {
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) },
@@ -1178,6 +1190,9 @@ static const struct usb_device_id option_ids[] = {
        { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC15)},
        { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC20),
          .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+       /* Yuga products use Qualcomm vendor ID */
+       { USB_DEVICE(QUALCOMM_VENDOR_ID, YUGA_PRODUCT_CLM920_NC5),
+         .driver_info = (kernel_ulong_t)&yuga_clm920_nc5_blacklist },
        /* Quectel products using Quectel vendor ID */
        { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21),
          .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
@@ -1244,6 +1259,8 @@ static const struct usb_device_id option_ids[] = {
                .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910),
                .driver_info = (kernel_ulong_t)&telit_me910_blacklist },
+       { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM),
+               .driver_info = (kernel_ulong_t)&telit_me910_dual_modem_blacklist },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910),
                .driver_info = (kernel_ulong_t)&telit_le910_blacklist },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4),
index e3892541a489940f58bfa8f534ff0e765fc527f4..613f91add03da189c0fd5334cbccbbf720060079 100644 (file)
@@ -162,6 +162,8 @@ static const struct usb_device_id id_table[] = {
        {DEVICE_SWI(0x1199, 0x9079)},   /* Sierra Wireless EM74xx */
        {DEVICE_SWI(0x1199, 0x907a)},   /* Sierra Wireless EM74xx QDL */
        {DEVICE_SWI(0x1199, 0x907b)},   /* Sierra Wireless EM74xx */
+       {DEVICE_SWI(0x1199, 0x9090)},   /* Sierra Wireless EM7565 QDL */
+       {DEVICE_SWI(0x1199, 0x9091)},   /* Sierra Wireless EM7565 */
        {DEVICE_SWI(0x413c, 0x81a2)},   /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */
        {DEVICE_SWI(0x413c, 0x81a3)},   /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */
        {DEVICE_SWI(0x413c, 0x81a4)},   /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */
@@ -342,6 +344,7 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id)
                        break;
                case 2:
                        dev_dbg(dev, "NMEA GPS interface found\n");
+                       sendsetup = true;
                        break;
                case 3:
                        dev_dbg(dev, "Modem port found\n");
index a3df8ee82faff77d9c114ed605412ccfb0704ce6..e31a6f204397dba47b3356ec3bbfe420539dc05b 100644 (file)
@@ -149,8 +149,7 @@ static void stub_shutdown_connection(struct usbip_device *ud)
         * step 1?
         */
        if (ud->tcp_socket) {
-               dev_dbg(&sdev->udev->dev, "shutdown tcp_socket %p\n",
-                       ud->tcp_socket);
+               dev_dbg(&sdev->udev->dev, "shutdown sockfd %d\n", ud->sockfd);
                kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR);
        }
 
index 4f48b306713f1a62942905122c9164beb134ce5f..c31c8402a0c55ddd2f4b463ebabd28be6438f490 100644 (file)
@@ -237,11 +237,12 @@ void stub_device_cleanup_urbs(struct stub_device *sdev)
        struct stub_priv *priv;
        struct urb *urb;
 
-       dev_dbg(&sdev->udev->dev, "free sdev %p\n", sdev);
+       dev_dbg(&sdev->udev->dev, "Stub device cleaning up urbs\n");
 
        while ((priv = stub_priv_pop(sdev))) {
                urb = priv->urb;
-               dev_dbg(&sdev->udev->dev, "free urb %p\n", urb);
+               dev_dbg(&sdev->udev->dev, "free urb seqnum %lu\n",
+                       priv->seqnum);
                usb_kill_urb(urb);
 
                kmem_cache_free(stub_priv_cache, priv);
index 493ac2928391accc4984e3ceddbfdadd2690a26c..6c5a593139996fe11be9c2bf86eb8cc19a390ef1 100644 (file)
@@ -211,9 +211,6 @@ static int stub_recv_cmd_unlink(struct stub_device *sdev,
                if (priv->seqnum != pdu->u.cmd_unlink.seqnum)
                        continue;
 
-               dev_info(&priv->urb->dev->dev, "unlink urb %p\n",
-                        priv->urb);
-
                /*
                 * This matched urb is not completed yet (i.e., be in
                 * flight in usb hcd hardware/driver). Now we are
@@ -252,8 +249,8 @@ static int stub_recv_cmd_unlink(struct stub_device *sdev,
                ret = usb_unlink_urb(priv->urb);
                if (ret != -EINPROGRESS)
                        dev_err(&priv->urb->dev->dev,
-                               "failed to unlink a urb %p, ret %d\n",
-                               priv->urb, ret);
+                               "failed to unlink a urb # %lu, ret %d\n",
+                               priv->seqnum, ret);
 
                return 0;
        }
@@ -342,14 +339,6 @@ static int get_pipe(struct stub_device *sdev, struct usbip_header *pdu)
 
        epd = &ep->desc;
 
-       /* validate transfer_buffer_length */
-       if (pdu->u.cmd_submit.transfer_buffer_length > INT_MAX) {
-               dev_err(&sdev->udev->dev,
-                       "CMD_SUBMIT: -EMSGSIZE transfer_buffer_length %d\n",
-                       pdu->u.cmd_submit.transfer_buffer_length);
-               return -1;
-       }
-
        if (usb_endpoint_xfer_control(epd)) {
                if (dir == USBIP_DIR_OUT)
                        return usb_sndctrlpipe(udev, epnum);
@@ -482,8 +471,7 @@ static void stub_recv_cmd_submit(struct stub_device *sdev,
        }
 
        /* allocate urb transfer buffer, if needed */
-       if (pdu->u.cmd_submit.transfer_buffer_length > 0 &&
-           pdu->u.cmd_submit.transfer_buffer_length <= INT_MAX) {
+       if (pdu->u.cmd_submit.transfer_buffer_length > 0) {
                priv->urb->transfer_buffer =
                        kzalloc(pdu->u.cmd_submit.transfer_buffer_length,
                                GFP_KERNEL);
index 53172b1f6257cf9f8d72dac57212c0ec939a8dc4..f0ec41a50cbc16f9814ca8b2a7590dbe3c465fab 100644 (file)
@@ -88,7 +88,7 @@ void stub_complete(struct urb *urb)
        /* link a urb to the queue of tx. */
        spin_lock_irqsave(&sdev->priv_lock, flags);
        if (sdev->ud.tcp_socket == NULL) {
-               usbip_dbg_stub_tx("ignore urb for closed connection %p", urb);
+               usbip_dbg_stub_tx("ignore urb for closed connection\n");
                /* It will be freed in stub_device_cleanup_urbs(). */
        } else if (priv->unlinking) {
                stub_enqueue_ret_unlink(sdev, priv->seqnum, urb->status);
@@ -190,8 +190,8 @@ static int stub_send_ret_submit(struct stub_device *sdev)
 
                /* 1. setup usbip_header */
                setup_ret_submit_pdu(&pdu_header, urb);
-               usbip_dbg_stub_tx("setup txdata seqnum: %d urb: %p\n",
-                                 pdu_header.base.seqnum, urb);
+               usbip_dbg_stub_tx("setup txdata seqnum: %d\n",
+                                 pdu_header.base.seqnum);
                usbip_header_correct_endian(&pdu_header, 1);
 
                iov[iovnum].iov_base = &pdu_header;
index f7978933b40290787ded82fd7b244783610d206e..7b219d9109b41282537363caa976e53c9b675332 100644 (file)
@@ -317,26 +317,20 @@ int usbip_recv(struct socket *sock, void *buf, int size)
        struct msghdr msg = {.msg_flags = MSG_NOSIGNAL};
        int total = 0;
 
+       if (!sock || !buf || !size)
+               return -EINVAL;
+
        iov_iter_kvec(&msg.msg_iter, READ|ITER_KVEC, &iov, 1, size);
 
        usbip_dbg_xmit("enter\n");
 
-       if (!sock || !buf || !size) {
-               pr_err("invalid arg, sock %p buff %p size %d\n", sock, buf,
-                      size);
-               return -EINVAL;
-       }
-
        do {
-               int sz = msg_data_left(&msg);
+               msg_data_left(&msg);
                sock->sk->sk_allocation = GFP_NOIO;
 
                result = sock_recvmsg(sock, &msg, MSG_WAITALL);
-               if (result <= 0) {
-                       pr_debug("receive sock %p buf %p size %u ret %d total %d\n",
-                                sock, buf + total, sz, result, total);
+               if (result <= 0)
                        goto err;
-               }
 
                total += result;
        } while (msg_data_left(&msg));
index 6b3278c4b72a0d745a724b3ef93cce0dc3dccd7d..c3e1008aa491ee45071adab6215f7440aa6dfa13 100644 (file)
@@ -656,9 +656,6 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag
        struct vhci_device *vdev;
        unsigned long flags;
 
-       usbip_dbg_vhci_hc("enter, usb_hcd %p urb %p mem_flags %d\n",
-                         hcd, urb, mem_flags);
-
        if (portnum > VHCI_HC_PORTS) {
                pr_err("invalid port number %d\n", portnum);
                return -ENODEV;
@@ -822,8 +819,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
        struct vhci_device *vdev;
        unsigned long flags;
 
-       pr_info("dequeue a urb %p\n", urb);
-
        spin_lock_irqsave(&vhci->lock, flags);
 
        priv = urb->hcpriv;
@@ -851,7 +846,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
                /* tcp connection is closed */
                spin_lock(&vdev->priv_lock);
 
-               pr_info("device %p seems to be disconnected\n", vdev);
                list_del(&priv->list);
                kfree(priv);
                urb->hcpriv = NULL;
@@ -863,8 +857,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
                 * vhci_rx will receive RET_UNLINK and give back the URB.
                 * Otherwise, we give back it here.
                 */
-               pr_info("gives back urb %p\n", urb);
-
                usb_hcd_unlink_urb_from_ep(hcd, urb);
 
                spin_unlock_irqrestore(&vhci->lock, flags);
@@ -892,8 +884,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
 
                unlink->unlink_seqnum = priv->seqnum;
 
-               pr_info("device %p seems to be still connected\n", vdev);
-
                /* send cmd_unlink and try to cancel the pending URB in the
                 * peer */
                list_add_tail(&unlink->list, &vdev->unlink_tx);
@@ -975,7 +965,7 @@ static void vhci_shutdown_connection(struct usbip_device *ud)
 
        /* need this? see stub_dev.c */
        if (ud->tcp_socket) {
-               pr_debug("shutdown tcp_socket %p\n", ud->tcp_socket);
+               pr_debug("shutdown tcp_socket %d\n", ud->sockfd);
                kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR);
        }
 
index 90577e8b2282393014558da959125345b1f7008c..112ebb90d8c95e0eb8ba3aa800d3b66941cb7e6c 100644 (file)
@@ -23,24 +23,23 @@ struct urb *pickup_urb_and_free_priv(struct vhci_device *vdev, __u32 seqnum)
                urb = priv->urb;
                status = urb->status;
 
-               usbip_dbg_vhci_rx("find urb %p vurb %p seqnum %u\n",
-                               urb, priv, seqnum);
+               usbip_dbg_vhci_rx("find urb seqnum %u\n", seqnum);
 
                switch (status) {
                case -ENOENT:
                        /* fall through */
                case -ECONNRESET:
-                       dev_info(&urb->dev->dev,
-                                "urb %p was unlinked %ssynchronuously.\n", urb,
-                                status == -ENOENT ? "" : "a");
+                       dev_dbg(&urb->dev->dev,
+                                "urb seq# %u was unlinked %ssynchronuously\n",
+                                seqnum, status == -ENOENT ? "" : "a");
                        break;
                case -EINPROGRESS:
                        /* no info output */
                        break;
                default:
-                       dev_info(&urb->dev->dev,
-                                "urb %p may be in a error, status %d\n", urb,
-                                status);
+                       dev_dbg(&urb->dev->dev,
+                                "urb seq# %u may be in a error, status %d\n",
+                                seqnum, status);
                }
 
                list_del(&priv->list);
@@ -67,8 +66,8 @@ static void vhci_recv_ret_submit(struct vhci_device *vdev,
        spin_unlock_irqrestore(&vdev->priv_lock, flags);
 
        if (!urb) {
-               pr_err("cannot find a urb of seqnum %u\n", pdu->base.seqnum);
-               pr_info("max seqnum %d\n",
+               pr_err("cannot find a urb of seqnum %u max seqnum %d\n",
+                       pdu->base.seqnum,
                        atomic_read(&vhci_hcd->seqnum));
                usbip_event_add(ud, VDEV_EVENT_ERROR_TCP);
                return;
@@ -91,7 +90,7 @@ static void vhci_recv_ret_submit(struct vhci_device *vdev,
        if (usbip_dbg_flag_vhci_rx)
                usbip_dump_urb(urb);
 
-       usbip_dbg_vhci_rx("now giveback urb %p\n", urb);
+       usbip_dbg_vhci_rx("now giveback urb %u\n", pdu->base.seqnum);
 
        spin_lock_irqsave(&vhci->lock, flags);
        usb_hcd_unlink_urb_from_ep(vhci_hcd_to_hcd(vhci_hcd), urb);
@@ -158,7 +157,7 @@ static void vhci_recv_ret_unlink(struct vhci_device *vdev,
                pr_info("the urb (seqnum %d) was already given back\n",
                        pdu->base.seqnum);
        } else {
-               usbip_dbg_vhci_rx("now giveback urb %p\n", urb);
+               usbip_dbg_vhci_rx("now giveback urb %d\n", pdu->base.seqnum);
 
                /* If unlink is successful, status is -ECONNRESET */
                urb->status = pdu->u.ret_unlink.status;
index d625a2ff4b712f4112bd48b8115ea0104e005d15..9aed15a358b7b98b0fab9e6b02b6820cff9ff808 100644 (file)
@@ -69,7 +69,8 @@ static int vhci_send_cmd_submit(struct vhci_device *vdev)
                memset(&msg, 0, sizeof(msg));
                memset(&iov, 0, sizeof(iov));
 
-               usbip_dbg_vhci_tx("setup txdata urb %p\n", urb);
+               usbip_dbg_vhci_tx("setup txdata urb seqnum %lu\n",
+                                 priv->seqnum);
 
                /* 1. setup usbip_header */
                setup_cmd_submit_pdu(&pdu_header, urb);
index f77e499afdddb02c2d7595459a70b15f3a8c56d5..065f0b607373402a0d4dd7520b9820503390fb0a 100644 (file)
@@ -257,10 +257,25 @@ static void release_memory_resource(struct resource *resource)
        kfree(resource);
 }
 
+/*
+ * Host memory not allocated to dom0. We can use this range for hotplug-based
+ * ballooning.
+ *
+ * It's a type-less resource. Setting IORESOURCE_MEM will make resource
+ * management algorithms (arch_remove_reservations()) look into guest e820,
+ * which we don't want.
+ */
+static struct resource hostmem_resource = {
+       .name   = "Host RAM",
+};
+
+void __attribute__((weak)) __init arch_xen_balloon_init(struct resource *res)
+{}
+
 static struct resource *additional_memory_resource(phys_addr_t size)
 {
-       struct resource *res;
-       int ret;
+       struct resource *res, *res_hostmem;
+       int ret = -ENOMEM;
 
        res = kzalloc(sizeof(*res), GFP_KERNEL);
        if (!res)
@@ -269,13 +284,42 @@ static struct resource *additional_memory_resource(phys_addr_t size)
        res->name = "System RAM";
        res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 
-       ret = allocate_resource(&iomem_resource, res,
-                               size, 0, -1,
-                               PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
-       if (ret < 0) {
-               pr_err("Cannot allocate new System RAM resource\n");
-               kfree(res);
-               return NULL;
+       res_hostmem = kzalloc(sizeof(*res), GFP_KERNEL);
+       if (res_hostmem) {
+               /* Try to grab a range from hostmem */
+               res_hostmem->name = "Host memory";
+               ret = allocate_resource(&hostmem_resource, res_hostmem,
+                                       size, 0, -1,
+                                       PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
+       }
+
+       if (!ret) {
+               /*
+                * Insert this resource into iomem. Because hostmem_resource
+                * tracks portion of guest e820 marked as UNUSABLE noone else
+                * should try to use it.
+                */
+               res->start = res_hostmem->start;
+               res->end = res_hostmem->end;
+               ret = insert_resource(&iomem_resource, res);
+               if (ret < 0) {
+                       pr_err("Can't insert iomem_resource [%llx - %llx]\n",
+                               res->start, res->end);
+                       release_memory_resource(res_hostmem);
+                       res_hostmem = NULL;
+                       res->start = res->end = 0;
+               }
+       }
+
+       if (ret) {
+               ret = allocate_resource(&iomem_resource, res,
+                                       size, 0, -1,
+                                       PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
+               if (ret < 0) {
+                       pr_err("Cannot allocate new System RAM resource\n");
+                       kfree(res);
+                       return NULL;
+               }
        }
 
 #ifdef CONFIG_SPARSEMEM
@@ -287,6 +331,7 @@ static struct resource *additional_memory_resource(phys_addr_t size)
                        pr_err("New System RAM resource outside addressable RAM (%lu > %lu)\n",
                               pfn, limit);
                        release_memory_resource(res);
+                       release_memory_resource(res_hostmem);
                        return NULL;
                }
        }
@@ -765,6 +810,8 @@ static int __init balloon_init(void)
        set_online_page_callback(&xen_online_page);
        register_memory_notifier(&xen_memory_nb);
        register_sysctl_table(xen_root);
+
+       arch_xen_balloon_init(&hostmem_resource);
 #endif
 
 #ifdef CONFIG_XEN_PV
index 0da80019a9173cba18811abc71e39ca4631a7449..83ed7715f856d2025509c308583436e63d0043a5 100644 (file)
@@ -702,7 +702,7 @@ xfs_alloc_ag_vextent(
        ASSERT(args->agbno % args->alignment == 0);
 
        /* if not file data, insert new block into the reverse map btree */
-       if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) {
+       if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
                error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
                                       args->agbno, args->len, &args->oinfo);
                if (error)
@@ -1682,7 +1682,7 @@ xfs_free_ag_extent(
        bno_cur = cnt_cur = NULL;
        mp = tp->t_mountp;
 
-       if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) {
+       if (!xfs_rmap_should_skip_owner_update(oinfo)) {
                error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
                if (error)
                        goto error0;
index 6249c92671debe20a45e3cb0577360552e36d523..a76914db72ef11094cd8d74e5bd6cd6add2a3fde 100644 (file)
@@ -212,6 +212,7 @@ xfs_attr_set(
        int                     flags)
 {
        struct xfs_mount        *mp = dp->i_mount;
+       struct xfs_buf          *leaf_bp = NULL;
        struct xfs_da_args      args;
        struct xfs_defer_ops    dfops;
        struct xfs_trans_res    tres;
@@ -327,9 +328,16 @@ xfs_attr_set(
                 * GROT: another possible req'mt for a double-split btree op.
                 */
                xfs_defer_init(args.dfops, args.firstblock);
-               error = xfs_attr_shortform_to_leaf(&args);
+               error = xfs_attr_shortform_to_leaf(&args, &leaf_bp);
                if (error)
                        goto out_defer_cancel;
+               /*
+                * Prevent the leaf buffer from being unlocked so that a
+                * concurrent AIL push cannot grab the half-baked leaf
+                * buffer and run into problems with the write verifier.
+                */
+               xfs_trans_bhold(args.trans, leaf_bp);
+               xfs_defer_bjoin(args.dfops, leaf_bp);
                xfs_defer_ijoin(args.dfops, dp);
                error = xfs_defer_finish(&args.trans, args.dfops);
                if (error)
@@ -337,13 +345,14 @@ xfs_attr_set(
 
                /*
                 * Commit the leaf transformation.  We'll need another (linked)
-                * transaction to add the new attribute to the leaf.
+                * transaction to add the new attribute to the leaf, which
+                * means that we have to hold & join the leaf buffer here too.
                 */
-
                error = xfs_trans_roll_inode(&args.trans, dp);
                if (error)
                        goto out;
-
+               xfs_trans_bjoin(args.trans, leaf_bp);
+               leaf_bp = NULL;
        }
 
        if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
@@ -374,8 +383,9 @@ xfs_attr_set(
 
 out_defer_cancel:
        xfs_defer_cancel(&dfops);
-       args.trans = NULL;
 out:
+       if (leaf_bp)
+               xfs_trans_brelse(args.trans, leaf_bp);
        if (args.trans)
                xfs_trans_cancel(args.trans);
        xfs_iunlock(dp, XFS_ILOCK_EXCL);
index 53cc8b986eac45c4e5ec4afd319891b6e9f716c5..601eaa36f1ada22e2213f9178bcb5cdb5868034d 100644 (file)
@@ -735,10 +735,13 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)
 }
 
 /*
- * Convert from using the shortform to the leaf.
+ * Convert from using the shortform to the leaf.  On success, return the
+ * buffer so that we can keep it locked until we're totally done with it.
  */
 int
-xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
+xfs_attr_shortform_to_leaf(
+       struct xfs_da_args      *args,
+       struct xfs_buf          **leaf_bp)
 {
        xfs_inode_t *dp;
        xfs_attr_shortform_t *sf;
@@ -818,7 +821,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
                sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
        }
        error = 0;
-
+       *leaf_bp = bp;
 out:
        kmem_free(tmpbuffer);
        return error;
index f7dda0c237b044b166d6d178fca3178feff2b644..894124efb421e0d0674b0f39bf63dabfe7916937 100644 (file)
@@ -48,7 +48,8 @@ void  xfs_attr_shortform_create(struct xfs_da_args *args);
 void   xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff);
 int    xfs_attr_shortform_lookup(struct xfs_da_args *args);
 int    xfs_attr_shortform_getvalue(struct xfs_da_args *args);
-int    xfs_attr_shortform_to_leaf(struct xfs_da_args *args);
+int    xfs_attr_shortform_to_leaf(struct xfs_da_args *args,
+                       struct xfs_buf **leaf_bp);
 int    xfs_attr_shortform_remove(struct xfs_da_args *args);
 int    xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
 int    xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes);
index 1210f684d3c28f9af8d8403c1f0222ef06dc380b..1bddbba6b80c960bdcc10c9a30210c119e1b2f77 100644 (file)
@@ -5136,7 +5136,7 @@ __xfs_bunmapi(
         * blowing out the transaction with a mix of EFIs and reflink
         * adjustments.
         */
-       if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
+       if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
                max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
        else
                max_len = len;
index 072ebfe1d6aeb3e00e306a06d71a1b478382f3ad..087fea02c3892c34e1e63df2839bd8d804ba1f3f 100644 (file)
@@ -249,6 +249,10 @@ xfs_defer_trans_roll(
        for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
                xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE);
 
+       /* Hold the (previously bjoin'd) buffer locked across the roll. */
+       for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++)
+               xfs_trans_dirty_buf(*tp, dop->dop_bufs[i]);
+
        trace_xfs_defer_trans_roll((*tp)->t_mountp, dop);
 
        /* Roll the transaction. */
@@ -264,6 +268,12 @@ xfs_defer_trans_roll(
        for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
                xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0);
 
+       /* Rejoin the buffers and dirty them so the log moves forward. */
+       for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) {
+               xfs_trans_bjoin(*tp, dop->dop_bufs[i]);
+               xfs_trans_bhold(*tp, dop->dop_bufs[i]);
+       }
+
        return error;
 }
 
@@ -295,6 +305,31 @@ xfs_defer_ijoin(
                }
        }
 
+       ASSERT(0);
+       return -EFSCORRUPTED;
+}
+
+/*
+ * Add this buffer to the deferred op.  Each joined buffer is relogged
+ * each time we roll the transaction.
+ */
+int
+xfs_defer_bjoin(
+       struct xfs_defer_ops            *dop,
+       struct xfs_buf                  *bp)
+{
+       int                             i;
+
+       for (i = 0; i < XFS_DEFER_OPS_NR_BUFS; i++) {
+               if (dop->dop_bufs[i] == bp)
+                       return 0;
+               else if (dop->dop_bufs[i] == NULL) {
+                       dop->dop_bufs[i] = bp;
+                       return 0;
+               }
+       }
+
+       ASSERT(0);
        return -EFSCORRUPTED;
 }
 
@@ -493,9 +528,7 @@ xfs_defer_init(
        struct xfs_defer_ops            *dop,
        xfs_fsblock_t                   *fbp)
 {
-       dop->dop_committed = false;
-       dop->dop_low = false;
-       memset(&dop->dop_inodes, 0, sizeof(dop->dop_inodes));
+       memset(dop, 0, sizeof(struct xfs_defer_ops));
        *fbp = NULLFSBLOCK;
        INIT_LIST_HEAD(&dop->dop_intake);
        INIT_LIST_HEAD(&dop->dop_pending);
index d4f046dd44bd4ae434d8104991d0327f2d2b9fc7..045beacdd37d81c9e01e0ab46ab2bbcbbb581fbb 100644 (file)
@@ -59,6 +59,7 @@ enum xfs_defer_ops_type {
 };
 
 #define XFS_DEFER_OPS_NR_INODES        2       /* join up to two inodes */
+#define XFS_DEFER_OPS_NR_BUFS  2       /* join up to two buffers */
 
 struct xfs_defer_ops {
        bool                    dop_committed;  /* did any trans commit? */
@@ -66,8 +67,9 @@ struct xfs_defer_ops {
        struct list_head        dop_intake;     /* unlogged pending work */
        struct list_head        dop_pending;    /* logged pending work */
 
-       /* relog these inodes with each roll */
+       /* relog these with each roll */
        struct xfs_inode        *dop_inodes[XFS_DEFER_OPS_NR_INODES];
+       struct xfs_buf          *dop_bufs[XFS_DEFER_OPS_NR_BUFS];
 };
 
 void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type,
@@ -77,6 +79,7 @@ void xfs_defer_cancel(struct xfs_defer_ops *dop);
 void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp);
 bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop);
 int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip);
+int xfs_defer_bjoin(struct xfs_defer_ops *dop, struct xfs_buf *bp);
 
 /* Description of a deferred type. */
 struct xfs_defer_op_type {
index 89bf16b4d9377293fa842c48f2b9b637f83c62c2..b0f31791c7e6137c0b7e46c35000e3c76e145ba1 100644 (file)
@@ -632,8 +632,6 @@ xfs_iext_insert(
        struct xfs_iext_leaf    *new = NULL;
        int                     nr_entries, i;
 
-       trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
-
        if (ifp->if_height == 0)
                xfs_iext_alloc_root(ifp, cur);
        else if (ifp->if_height == 1)
@@ -661,6 +659,8 @@ xfs_iext_insert(
        xfs_iext_set(cur_rec(cur), irec);
        ifp->if_bytes += sizeof(struct xfs_iext_rec);
 
+       trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
+
        if (new)
                xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2);
 }
index 585b35d34142157863740d3b8cd437b37e49e05d..c40d26763075307b064d49bd3cb48dfce8dd5b67 100644 (file)
@@ -1488,27 +1488,12 @@ __xfs_refcount_cow_alloc(
        xfs_extlen_t            aglen,
        struct xfs_defer_ops    *dfops)
 {
-       int                     error;
-
        trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno,
                        agbno, aglen);
 
        /* Add refcount btree reservation */
-       error = xfs_refcount_adjust_cow(rcur, agbno, aglen,
+       return xfs_refcount_adjust_cow(rcur, agbno, aglen,
                        XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops);
-       if (error)
-               return error;
-
-       /* Add rmap entry */
-       if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) {
-               error = xfs_rmap_alloc_extent(rcur->bc_mp, dfops,
-                               rcur->bc_private.a.agno,
-                               agbno, aglen, XFS_RMAP_OWN_COW);
-               if (error)
-                       return error;
-       }
-
-       return error;
 }
 
 /*
@@ -1521,27 +1506,12 @@ __xfs_refcount_cow_free(
        xfs_extlen_t            aglen,
        struct xfs_defer_ops    *dfops)
 {
-       int                     error;
-
        trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno,
                        agbno, aglen);
 
        /* Remove refcount btree reservation */
-       error = xfs_refcount_adjust_cow(rcur, agbno, aglen,
+       return xfs_refcount_adjust_cow(rcur, agbno, aglen,
                        XFS_REFCOUNT_ADJUST_COW_FREE, dfops);
-       if (error)
-               return error;
-
-       /* Remove rmap entry */
-       if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) {
-               error = xfs_rmap_free_extent(rcur->bc_mp, dfops,
-                               rcur->bc_private.a.agno,
-                               agbno, aglen, XFS_RMAP_OWN_COW);
-               if (error)
-                       return error;
-       }
-
-       return error;
 }
 
 /* Record a CoW staging extent in the refcount btree. */
@@ -1552,11 +1522,19 @@ xfs_refcount_alloc_cow_extent(
        xfs_fsblock_t                   fsb,
        xfs_extlen_t                    len)
 {
+       int                             error;
+
        if (!xfs_sb_version_hasreflink(&mp->m_sb))
                return 0;
 
-       return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW,
+       error = __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW,
                        fsb, len);
+       if (error)
+               return error;
+
+       /* Add rmap entry */
+       return xfs_rmap_alloc_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
+                       XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
 }
 
 /* Forget a CoW staging event in the refcount btree. */
@@ -1567,9 +1545,17 @@ xfs_refcount_free_cow_extent(
        xfs_fsblock_t                   fsb,
        xfs_extlen_t                    len)
 {
+       int                             error;
+
        if (!xfs_sb_version_hasreflink(&mp->m_sb))
                return 0;
 
+       /* Remove rmap entry */
+       error = xfs_rmap_free_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
+                       XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
+       if (error)
+               return error;
+
        return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW,
                        fsb, len);
 }
index dd019cee1b3bdccf4e08b25fd70b2c59220859a1..50db920ceeebbf077c2b3b13690066173ba7cf4e 100644 (file)
@@ -367,6 +367,51 @@ xfs_rmap_lookup_le_range(
        return error;
 }
 
+/*
+ * Perform all the relevant owner checks for a removal op.  If we're doing an
+ * unknown-owner removal then we have no owner information to check.
+ */
+static int
+xfs_rmap_free_check_owner(
+       struct xfs_mount        *mp,
+       uint64_t                ltoff,
+       struct xfs_rmap_irec    *rec,
+       xfs_fsblock_t           bno,
+       xfs_filblks_t           len,
+       uint64_t                owner,
+       uint64_t                offset,
+       unsigned int            flags)
+{
+       int                     error = 0;
+
+       if (owner == XFS_RMAP_OWN_UNKNOWN)
+               return 0;
+
+       /* Make sure the unwritten flag matches. */
+       XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) ==
+                       (rec->rm_flags & XFS_RMAP_UNWRITTEN), out);
+
+       /* Make sure the owner matches what we expect to find in the tree. */
+       XFS_WANT_CORRUPTED_GOTO(mp, owner == rec->rm_owner, out);
+
+       /* Check the offset, if necessary. */
+       if (XFS_RMAP_NON_INODE_OWNER(owner))
+               goto out;
+
+       if (flags & XFS_RMAP_BMBT_BLOCK) {
+               XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_flags & XFS_RMAP_BMBT_BLOCK,
+                               out);
+       } else {
+               XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_offset <= offset, out);
+               XFS_WANT_CORRUPTED_GOTO(mp,
+                               ltoff + rec->rm_blockcount >= offset + len,
+                               out);
+       }
+
+out:
+       return error;
+}
+
 /*
  * Find the extent in the rmap btree and remove it.
  *
@@ -444,33 +489,40 @@ xfs_rmap_unmap(
                goto out_done;
        }
 
-       /* Make sure the unwritten flag matches. */
-       XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) ==
-                       (ltrec.rm_flags & XFS_RMAP_UNWRITTEN), out_error);
+       /*
+        * If we're doing an unknown-owner removal for EFI recovery, we expect
+        * to find the full range in the rmapbt or nothing at all.  If we
+        * don't find any rmaps overlapping either end of the range, we're
+        * done.  Hopefully this means that the EFI creator already queued
+        * (and finished) a RUI to remove the rmap.
+        */
+       if (owner == XFS_RMAP_OWN_UNKNOWN &&
+           ltrec.rm_startblock + ltrec.rm_blockcount <= bno) {
+               struct xfs_rmap_irec    rtrec;
+
+               error = xfs_btree_increment(cur, 0, &i);
+               if (error)
+                       goto out_error;
+               if (i == 0)
+                       goto out_done;
+               error = xfs_rmap_get_rec(cur, &rtrec, &i);
+               if (error)
+                       goto out_error;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+               if (rtrec.rm_startblock >= bno + len)
+                       goto out_done;
+       }
 
        /* Make sure the extent we found covers the entire freeing range. */
        XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno &&
-               ltrec.rm_startblock + ltrec.rm_blockcount >=
-               bno + len, out_error);
+                       ltrec.rm_startblock + ltrec.rm_blockcount >=
+                       bno + len, out_error);
 
-       /* Make sure the owner matches what we expect to find in the tree. */
-       XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner ||
-                                   XFS_RMAP_NON_INODE_OWNER(owner), out_error);
-
-       /* Check the offset, if necessary. */
-       if (!XFS_RMAP_NON_INODE_OWNER(owner)) {
-               if (flags & XFS_RMAP_BMBT_BLOCK) {
-                       XFS_WANT_CORRUPTED_GOTO(mp,
-                                       ltrec.rm_flags & XFS_RMAP_BMBT_BLOCK,
-                                       out_error);
-               } else {
-                       XFS_WANT_CORRUPTED_GOTO(mp,
-                                       ltrec.rm_offset <= offset, out_error);
-                       XFS_WANT_CORRUPTED_GOTO(mp,
-                                       ltoff + ltrec.rm_blockcount >= offset + len,
-                                       out_error);
-               }
-       }
+       /* Check owner information. */
+       error = xfs_rmap_free_check_owner(mp, ltoff, &ltrec, bno, len, owner,
+                       offset, flags);
+       if (error)
+               goto out_error;
 
        if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) {
                /* exact match, simply remove the record from rmap tree */
@@ -664,6 +716,7 @@ xfs_rmap_map(
                flags |= XFS_RMAP_UNWRITTEN;
        trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len,
                        unwritten, oinfo);
+       ASSERT(!xfs_rmap_should_skip_owner_update(oinfo));
 
        /*
         * For the initial lookup, look for an exact match or the left-adjacent
index 466ede637080e5832046a96d62789eb2e46ed03f..0fcd5b1ba7295379081e0c61d230324447a8ae56 100644 (file)
@@ -61,7 +61,21 @@ static inline void
 xfs_rmap_skip_owner_update(
        struct xfs_owner_info   *oi)
 {
-       oi->oi_owner = XFS_RMAP_OWN_UNKNOWN;
+       xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_NULL);
+}
+
+static inline bool
+xfs_rmap_should_skip_owner_update(
+       struct xfs_owner_info   *oi)
+{
+       return oi->oi_owner == XFS_RMAP_OWN_NULL;
+}
+
+static inline void
+xfs_rmap_any_owner_update(
+       struct xfs_owner_info   *oi)
+{
+       xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_UNKNOWN);
 }
 
 /* Reverse mapping functions. */
index 44f8c54512102577dbe768e5f137298ba1ee79c7..64da90655e957c3fd01331720aa32093909ddad6 100644 (file)
@@ -538,7 +538,7 @@ xfs_efi_recover(
                return error;
        efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
 
-       xfs_rmap_skip_owner_update(&oinfo);
+       xfs_rmap_any_owner_update(&oinfo);
        for (i = 0; i < efip->efi_format.efi_nextents; i++) {
                extp = &efip->efi_format.efi_extents[i];
                error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
index 8f22fc579dbba4abf9b609040802d24e9cf2f732..60a2e128cb6a59aa7181faa5c519d511a308bc5c 100644 (file)
@@ -571,6 +571,11 @@ xfs_growfs_data_private(
                 * this doesn't actually exist in the rmap btree.
                 */
                xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL);
+               error = xfs_rmap_free(tp, bp, agno,
+                               be32_to_cpu(agf->agf_length) - new,
+                               new, &oinfo);
+               if (error)
+                       goto error0;
                error = xfs_free_extent(tp,
                                XFS_AGB_TO_FSB(mp, agno,
                                        be32_to_cpu(agf->agf_length) - new),
index 43005fbe8b1eefabc84ee762a9427ec784889814..3861d61fb265f66a9d39723286d5adc9772cc15e 100644 (file)
@@ -870,7 +870,7 @@ xfs_eofblocks_worker(
  * based on the 'speculative_cow_prealloc_lifetime' tunable (5m by default).
  * (We'll just piggyback on the post-EOF prealloc space workqueue.)
  */
-STATIC void
+void
 xfs_queue_cowblocks(
        struct xfs_mount *mp)
 {
@@ -1536,8 +1536,23 @@ xfs_inode_free_quota_eofblocks(
        return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_eofblocks);
 }
 
+static inline unsigned long
+xfs_iflag_for_tag(
+       int             tag)
+{
+       switch (tag) {
+       case XFS_ICI_EOFBLOCKS_TAG:
+               return XFS_IEOFBLOCKS;
+       case XFS_ICI_COWBLOCKS_TAG:
+               return XFS_ICOWBLOCKS;
+       default:
+               ASSERT(0);
+               return 0;
+       }
+}
+
 static void
-__xfs_inode_set_eofblocks_tag(
+__xfs_inode_set_blocks_tag(
        xfs_inode_t     *ip,
        void            (*execute)(struct xfs_mount *mp),
        void            (*set_tp)(struct xfs_mount *mp, xfs_agnumber_t agno,
@@ -1552,10 +1567,10 @@ __xfs_inode_set_eofblocks_tag(
         * Don't bother locking the AG and looking up in the radix trees
         * if we already know that we have the tag set.
         */
-       if (ip->i_flags & XFS_IEOFBLOCKS)
+       if (ip->i_flags & xfs_iflag_for_tag(tag))
                return;
        spin_lock(&ip->i_flags_lock);
-       ip->i_flags |= XFS_IEOFBLOCKS;
+       ip->i_flags |= xfs_iflag_for_tag(tag);
        spin_unlock(&ip->i_flags_lock);
 
        pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
@@ -1587,13 +1602,13 @@ xfs_inode_set_eofblocks_tag(
        xfs_inode_t     *ip)
 {
        trace_xfs_inode_set_eofblocks_tag(ip);
-       return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_eofblocks,
+       return __xfs_inode_set_blocks_tag(ip, xfs_queue_eofblocks,
                        trace_xfs_perag_set_eofblocks,
                        XFS_ICI_EOFBLOCKS_TAG);
 }
 
 static void
-__xfs_inode_clear_eofblocks_tag(
+__xfs_inode_clear_blocks_tag(
        xfs_inode_t     *ip,
        void            (*clear_tp)(struct xfs_mount *mp, xfs_agnumber_t agno,
                                    int error, unsigned long caller_ip),
@@ -1603,7 +1618,7 @@ __xfs_inode_clear_eofblocks_tag(
        struct xfs_perag *pag;
 
        spin_lock(&ip->i_flags_lock);
-       ip->i_flags &= ~XFS_IEOFBLOCKS;
+       ip->i_flags &= ~xfs_iflag_for_tag(tag);
        spin_unlock(&ip->i_flags_lock);
 
        pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
@@ -1630,7 +1645,7 @@ xfs_inode_clear_eofblocks_tag(
        xfs_inode_t     *ip)
 {
        trace_xfs_inode_clear_eofblocks_tag(ip);
-       return __xfs_inode_clear_eofblocks_tag(ip,
+       return __xfs_inode_clear_blocks_tag(ip,
                        trace_xfs_perag_clear_eofblocks, XFS_ICI_EOFBLOCKS_TAG);
 }
 
@@ -1724,7 +1739,7 @@ xfs_inode_set_cowblocks_tag(
        xfs_inode_t     *ip)
 {
        trace_xfs_inode_set_cowblocks_tag(ip);
-       return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks,
+       return __xfs_inode_set_blocks_tag(ip, xfs_queue_cowblocks,
                        trace_xfs_perag_set_cowblocks,
                        XFS_ICI_COWBLOCKS_TAG);
 }
@@ -1734,6 +1749,6 @@ xfs_inode_clear_cowblocks_tag(
        xfs_inode_t     *ip)
 {
        trace_xfs_inode_clear_cowblocks_tag(ip);
-       return __xfs_inode_clear_eofblocks_tag(ip,
+       return __xfs_inode_clear_blocks_tag(ip,
                        trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG);
 }
index bff4d85e54984ad84ca0741f801f536a99195055..d4a77588eca15b90639debc1ca6c5c62a3680de8 100644 (file)
@@ -81,6 +81,7 @@ void xfs_inode_clear_cowblocks_tag(struct xfs_inode *ip);
 int xfs_icache_free_cowblocks(struct xfs_mount *, struct xfs_eofblocks *);
 int xfs_inode_free_quota_cowblocks(struct xfs_inode *ip);
 void xfs_cowblocks_worker(struct work_struct *);
+void xfs_queue_cowblocks(struct xfs_mount *);
 
 int xfs_inode_ag_iterator(struct xfs_mount *mp,
        int (*execute)(struct xfs_inode *ip, int flags, void *args),
index b41952a4ddd851fe63a475be4f2b6bc8d7e47beb..6f95bdb408ced01b9471b931714d279003a22d92 100644 (file)
@@ -1487,6 +1487,24 @@ xfs_link(
        return error;
 }
 
+/* Clear the reflink flag and the cowblocks tag if possible. */
+static void
+xfs_itruncate_clear_reflink_flags(
+       struct xfs_inode        *ip)
+{
+       struct xfs_ifork        *dfork;
+       struct xfs_ifork        *cfork;
+
+       if (!xfs_is_reflink_inode(ip))
+               return;
+       dfork = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+       cfork = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+       if (dfork->if_bytes == 0 && cfork->if_bytes == 0)
+               ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
+       if (cfork->if_bytes == 0)
+               xfs_inode_clear_cowblocks_tag(ip);
+}
+
 /*
  * Free up the underlying blocks past new_size.  The new size must be smaller
  * than the current size.  This routine can be used both for the attribute and
@@ -1583,15 +1601,7 @@ xfs_itruncate_extents(
        if (error)
                goto out;
 
-       /*
-        * Clear the reflink flag if there are no data fork blocks and
-        * there are no extents staged in the cow fork.
-        */
-       if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) {
-               if (ip->i_d.di_nblocks == 0)
-                       ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
-               xfs_inode_clear_cowblocks_tag(ip);
-       }
+       xfs_itruncate_clear_reflink_flags(ip);
 
        /*
         * Always re-log the inode so that our permanent transaction can keep
index b2136af9289f3d854f88a549ec32efa455cfc78b..d383e392ec9ddcca6f552dc8c4cfe5329373d6c9 100644 (file)
@@ -232,6 +232,7 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip)
  * log recovery to replay a bmap operation on the inode.
  */
 #define XFS_IRECOVERY          (1 << 11)
+#define XFS_ICOWBLOCKS         (1 << 12)/* has the cowblocks tag set */
 
 /*
  * Per-lifetime flags need to be reset when re-using a reclaimable inode during
index cf7c8f81bebb566a486f0f732a0fe6a1040022c4..47aea2e82c268f4bbf9c25c1c1c6f3821c11caa3 100644 (file)
@@ -454,6 +454,8 @@ retry:
        if (error)
                goto out_bmap_cancel;
 
+       xfs_inode_set_cowblocks_tag(ip);
+
        /* Finish up. */
        error = xfs_defer_finish(&tp, &dfops);
        if (error)
@@ -490,8 +492,9 @@ xfs_reflink_find_cow_mapping(
        struct xfs_iext_cursor          icur;
 
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
-       ASSERT(xfs_is_reflink_inode(ip));
 
+       if (!xfs_is_reflink_inode(ip))
+               return false;
        offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
        if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got))
                return false;
@@ -610,6 +613,9 @@ xfs_reflink_cancel_cow_blocks(
 
                        /* Remove the mapping from the CoW fork. */
                        xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
+               } else {
+                       /* Didn't do anything, push cursor back. */
+                       xfs_iext_prev(ifp, &icur);
                }
 next_extent:
                if (!xfs_iext_get_extent(ifp, &icur, &got))
@@ -725,7 +731,7 @@ xfs_reflink_end_cow(
                        (unsigned int)(end_fsb - offset_fsb),
                        XFS_DATA_FORK);
        error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
-                       resblks, 0, 0, &tp);
+                       resblks, 0, XFS_TRANS_RESERVE, &tp);
        if (error)
                goto out;
 
@@ -1291,6 +1297,17 @@ xfs_reflink_remap_range(
 
        trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
 
+       /*
+        * Clear out post-eof preallocations because we don't have page cache
+        * backing the delayed allocations and they'll never get freed on
+        * their own.
+        */
+       if (xfs_can_free_eofblocks(dest, true)) {
+               ret = xfs_free_eofblocks(dest);
+               if (ret)
+                       goto out_unlock;
+       }
+
        /* Set flags and remap blocks. */
        ret = xfs_reflink_set_inode_flag(src, dest);
        if (ret)
index 5122d3021117f00e20d6dd1e195c28666cc71076..1dacccc367f81725a678ea3a6ed50528a731920d 100644 (file)
@@ -1360,6 +1360,7 @@ xfs_fs_remount(
                        xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
                        return error;
                }
+               xfs_queue_cowblocks(mp);
 
                /* Create the per-AG metadata reservation pool .*/
                error = xfs_fs_reserve_ag_blocks(mp);
@@ -1369,6 +1370,14 @@ xfs_fs_remount(
 
        /* rw -> ro */
        if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) {
+               /* Get rid of any leftover CoW reservations... */
+               cancel_delayed_work_sync(&mp->m_cowblocks_work);
+               error = xfs_icache_free_cowblocks(mp, NULL);
+               if (error) {
+                       xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+                       return error;
+               }
+
                /* Free the per-AG metadata reservation pool. */
                error = xfs_fs_unreserve_ag_blocks(mp);
                if (error) {
index ea189d88a3cc761e0239043785e00d9c6d8402f5..8ac4e68a12f08e4e00c1fcf850f0f8c97188d390 100644 (file)
@@ -7,9 +7,10 @@
 #ifndef _ASM_GENERIC_MM_HOOKS_H
 #define _ASM_GENERIC_MM_HOOKS_H
 
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
-                                struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm,
+                               struct mm_struct *mm)
 {
+       return 0;
 }
 
 static inline void arch_exit_mmap(struct mm_struct *mm)
index b234d54f2cb6e4c23a21db2af3b225264eccae2a..868e68561f913ecaec80ddf05f02816767ea5a17 100644 (file)
@@ -1025,6 +1025,11 @@ static inline int pmd_clear_huge(pmd_t *pmd)
 struct file;
 int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
                        unsigned long size, pgprot_t *vma_prot);
+
+#ifndef CONFIG_X86_ESPFIX64
+static inline void init_espfix_bsp(void) { }
+#endif
+
 #endif /* !__ASSEMBLY__ */
 
 #ifndef io_remap_pfn_range
index cceafa01f9073293cf4e813247b54bbe094728e3..b67404fc4b34bab495086b4b1e969f53a0921b39 100644 (file)
@@ -27,6 +27,7 @@ static inline struct mcryptd_ahash *__mcryptd_ahash_cast(
 
 struct mcryptd_cpu_queue {
        struct crypto_queue queue;
+       spinlock_t q_lock;
        struct work_struct work;
 };
 
index 6e45608b2399813329e2280e601c2465940def53..9da6ce22803f03fc318a7fdd33af380eae67d4e6 100644 (file)
@@ -62,7 +62,7 @@ struct arch_timer_cpu {
        bool                    enabled;
 };
 
-int kvm_timer_hyp_init(void);
+int kvm_timer_hyp_init(bool);
 int kvm_timer_enable(struct kvm_vcpu *vcpu);
 int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu);
 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
index 82f0c8fd7be8fd20951af806319f64f615f4ffc6..23d29b39f71e83e8a6a25540adc2e3f28702aec7 100644 (file)
@@ -492,6 +492,8 @@ extern unsigned int bvec_nr_vecs(unsigned short idx);
 
 #define bio_set_dev(bio, bdev)                         \
 do {                                           \
+       if ((bio)->bi_disk != (bdev)->bd_disk)  \
+               bio_clear_flag(bio, BIO_THROTTLED);\
        (bio)->bi_disk = (bdev)->bd_disk;       \
        (bio)->bi_partno = (bdev)->bd_partno;   \
 } while (0)
index a1e628e032dad75bf1837a25e45b55a7f54ca2df..9e7d8bd776d227d2ba92b137af7230300f5b1d4a 100644 (file)
@@ -50,8 +50,6 @@ struct blk_issue_stat {
 struct bio {
        struct bio              *bi_next;       /* request queue link */
        struct gendisk          *bi_disk;
-       u8                      bi_partno;
-       blk_status_t            bi_status;
        unsigned int            bi_opf;         /* bottom bits req flags,
                                                 * top bits REQ_OP. Use
                                                 * accessors.
@@ -59,8 +57,8 @@ struct bio {
        unsigned short          bi_flags;       /* status, etc and bvec pool number */
        unsigned short          bi_ioprio;
        unsigned short          bi_write_hint;
-
-       struct bvec_iter        bi_iter;
+       blk_status_t            bi_status;
+       u8                      bi_partno;
 
        /* Number of segments in this BIO after
         * physical address coalescing is performed.
@@ -74,8 +72,9 @@ struct bio {
        unsigned int            bi_seg_front_size;
        unsigned int            bi_seg_back_size;
 
-       atomic_t                __bi_remaining;
+       struct bvec_iter        bi_iter;
 
+       atomic_t                __bi_remaining;
        bio_end_io_t            *bi_end_io;
 
        void                    *bi_private;
index 8089ca17db9ac65998ec9cf82f65743bb5c5abb9..0ce8a372d5069a7aca7810429a968d20e923d3d1 100644 (file)
@@ -135,7 +135,7 @@ typedef __u32 __bitwise req_flags_t;
 struct request {
        struct list_head queuelist;
        union {
-               call_single_data_t csd;
+               struct __call_single_data csd;
                u64 fifo_time;
        };
 
@@ -241,14 +241,24 @@ struct request {
        struct request *next_rq;
 };
 
+static inline bool blk_op_is_scsi(unsigned int op)
+{
+       return op == REQ_OP_SCSI_IN || op == REQ_OP_SCSI_OUT;
+}
+
+static inline bool blk_op_is_private(unsigned int op)
+{
+       return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT;
+}
+
 static inline bool blk_rq_is_scsi(struct request *rq)
 {
-       return req_op(rq) == REQ_OP_SCSI_IN || req_op(rq) == REQ_OP_SCSI_OUT;
+       return blk_op_is_scsi(req_op(rq));
 }
 
 static inline bool blk_rq_is_private(struct request *rq)
 {
-       return req_op(rq) == REQ_OP_DRV_IN || req_op(rq) == REQ_OP_DRV_OUT;
+       return blk_op_is_private(req_op(rq));
 }
 
 static inline bool blk_rq_is_passthrough(struct request *rq)
@@ -256,6 +266,13 @@ static inline bool blk_rq_is_passthrough(struct request *rq)
        return blk_rq_is_scsi(rq) || blk_rq_is_private(rq);
 }
 
+static inline bool bio_is_passthrough(struct bio *bio)
+{
+       unsigned op = bio_op(bio);
+
+       return blk_op_is_scsi(op) || blk_op_is_private(op);
+}
+
 static inline unsigned short req_get_ioprio(struct request *req)
 {
        return req->ioprio;
@@ -948,7 +965,7 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 extern void blk_rq_unprep_clone(struct request *rq);
 extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
                                     struct request *rq);
-extern int blk_rq_append_bio(struct request *rq, struct bio *bio);
+extern int blk_rq_append_bio(struct request *rq, struct bio **bio);
 extern void blk_delay_queue(struct request_queue *, unsigned long);
 extern void blk_queue_split(struct request_queue *, struct bio **);
 extern void blk_recount_segments(struct request_queue *, struct bio *);
index c561b986bab0ebf886000ea34e377ea789138a3b..1632bb13ad8aed8cfeba2ccc69cfa458d02540bb 100644 (file)
  * In practice this is far bigger than any realistic pointer offset; this limit
  * ensures that umax_value + (int)off + (int)size cannot overflow a u64.
  */
-#define BPF_MAX_VAR_OFF        (1ULL << 31)
+#define BPF_MAX_VAR_OFF        (1 << 29)
 /* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO].  This ensures
  * that converting umax_value to int cannot overflow.
  */
-#define BPF_MAX_VAR_SIZ        INT_MAX
+#define BPF_MAX_VAR_SIZ        (1 << 29)
 
 /* Liveness marks, used for registers and spilled-regs (in stack slots).
  * Read marks propagate upwards until they find a write mark; they record that
index 55e672592fa93cd94aef5dfab761b688df0a7235..7258cd676df42c3ea77700d1a6ed15e7957e4f37 100644 (file)
@@ -66,9 +66,10 @@ struct gpio_irq_chip {
        /**
         * @lock_key:
         *
-        * Per GPIO IRQ chip lockdep class.
+        * Per GPIO IRQ chip lockdep classes.
         */
        struct lock_class_key *lock_key;
+       struct lock_class_key *request_key;
 
        /**
         * @parent_handler:
@@ -323,7 +324,8 @@ extern const char *gpiochip_is_requested(struct gpio_chip *chip,
 
 /* add/remove chips */
 extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
-                                     struct lock_class_key *lock_key);
+                                     struct lock_class_key *lock_key,
+                                     struct lock_class_key *request_key);
 
 /**
  * gpiochip_add_data() - register a gpio_chip
@@ -350,11 +352,13 @@ extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
  */
 #ifdef CONFIG_LOCKDEP
 #define gpiochip_add_data(chip, data) ({               \
-               static struct lock_class_key key;       \
-               gpiochip_add_data_with_key(chip, data, &key);   \
+               static struct lock_class_key lock_key;  \
+               static struct lock_class_key request_key;         \
+               gpiochip_add_data_with_key(chip, data, &lock_key, \
+                                          &request_key);         \
        })
 #else
-#define gpiochip_add_data(chip, data) gpiochip_add_data_with_key(chip, data, NULL)
+#define gpiochip_add_data(chip, data) gpiochip_add_data_with_key(chip, data, NULL, NULL)
 #endif
 
 static inline int gpiochip_add(struct gpio_chip *chip)
@@ -429,7 +433,8 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip,
                             irq_flow_handler_t handler,
                             unsigned int type,
                             bool threaded,
-                            struct lock_class_key *lock_key);
+                            struct lock_class_key *lock_key,
+                            struct lock_class_key *request_key);
 
 #ifdef CONFIG_LOCKDEP
 
@@ -445,10 +450,12 @@ static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip,
                                       irq_flow_handler_t handler,
                                       unsigned int type)
 {
-       static struct lock_class_key key;
+       static struct lock_class_key lock_key;
+       static struct lock_class_key request_key;
 
        return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
-                                       handler, type, false, &key);
+                                       handler, type, false,
+                                       &lock_key, &request_key);
 }
 
 static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
@@ -458,10 +465,12 @@ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
                          unsigned int type)
 {
 
-       static struct lock_class_key key;
+       static struct lock_class_key lock_key;
+       static struct lock_class_key request_key;
 
        return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
-                                       handler, type, true, &key);
+                                       handler, type, true,
+                                       &lock_key, &request_key);
 }
 #else
 static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip,
@@ -471,7 +480,7 @@ static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip,
                                       unsigned int type)
 {
        return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
-                                       handler, type, false, NULL);
+                                       handler, type, false, NULL, NULL);
 }
 
 static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
@@ -481,7 +490,7 @@ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
                          unsigned int type)
 {
        return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
-                                       handler, type, true, NULL);
+                                       handler, type, true, NULL, NULL);
 }
 #endif /* CONFIG_LOCKDEP */
 
index cb18c6290ca87290996e636f3eda14eb03d26316..8415bf1a9776245b810c8f92fa16c98eb038a45f 100644 (file)
@@ -273,7 +273,8 @@ struct ipv6_pinfo {
                                                 * 100: prefer care-of address
                                                 */
                                dontfrag:1,
-                               autoflowlabel:1;
+                               autoflowlabel:1,
+                               autoflowlabel_set:1;
        __u8                    min_hopcount;
        __u8                    tclass;
        __be32                  rcv_flowinfo;
index e140f69163b693b386bdc709719b4efc3d8a30b0..a0231e96a578348c21596a3c9d2d0cbe4a9a1249 100644 (file)
@@ -212,6 +212,7 @@ struct irq_data {
  *                               mask. Applies only to affinity managed irqs.
  * IRQD_SINGLE_TARGET          - IRQ allows only a single affinity target
  * IRQD_DEFAULT_TRIGGER_SET    - Expected trigger already been set
+ * IRQD_CAN_RESERVE            - Can use reservation mode
  */
 enum {
        IRQD_TRIGGER_MASK               = 0xf,
@@ -233,6 +234,7 @@ enum {
        IRQD_MANAGED_SHUTDOWN           = (1 << 23),
        IRQD_SINGLE_TARGET              = (1 << 24),
        IRQD_DEFAULT_TRIGGER_SET        = (1 << 25),
+       IRQD_CAN_RESERVE                = (1 << 26),
 };
 
 #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
@@ -377,6 +379,21 @@ static inline bool irqd_is_managed_and_shutdown(struct irq_data *d)
        return __irqd_to_state(d) & IRQD_MANAGED_SHUTDOWN;
 }
 
+static inline void irqd_set_can_reserve(struct irq_data *d)
+{
+       __irqd_to_state(d) |= IRQD_CAN_RESERVE;
+}
+
+static inline void irqd_clr_can_reserve(struct irq_data *d)
+{
+       __irqd_to_state(d) &= ~IRQD_CAN_RESERVE;
+}
+
+static inline bool irqd_can_reserve(struct irq_data *d)
+{
+       return __irqd_to_state(d) & IRQD_CAN_RESERVE;
+}
+
 #undef __irqd_to_state
 
 static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
index 39fb3700f7a92aae1a6c3d417f5effbfb93d6494..25b33b66453773cb01509725fa68664c555ffd3f 100644 (file)
@@ -255,12 +255,15 @@ static inline bool irq_is_percpu_devid(unsigned int irq)
 }
 
 static inline void
-irq_set_lockdep_class(unsigned int irq, struct lock_class_key *class)
+irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class,
+                     struct lock_class_key *request_class)
 {
        struct irq_desc *desc = irq_to_desc(irq);
 
-       if (desc)
-               lockdep_set_class(&desc->lock, class);
+       if (desc) {
+               lockdep_set_class(&desc->lock, lock_class);
+               lockdep_set_class(&desc->request_mutex, request_class);
+       }
 }
 
 #ifdef CONFIG_IRQ_PREFLOW_FASTEOI
index a34355d195463f93d3235283978633bf60f0b59d..48c7e86bb5566798aba1b1b132ab242b75b05ea9 100644 (file)
@@ -113,7 +113,7 @@ struct irq_domain_ops {
                     unsigned int nr_irqs, void *arg);
        void (*free)(struct irq_domain *d, unsigned int virq,
                     unsigned int nr_irqs);
-       int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool early);
+       int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool reserve);
        void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data);
        int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec,
                         unsigned long *out_hwirq, unsigned int *out_type);
index a2a1318a3d0c8be0a1fb3d1a08fcf671ff9d8bee..c3d3f04d8cc689eddf217c0626e71d8c16530db5 100644 (file)
@@ -915,10 +915,10 @@ enum PDEV_STAT  {PDEV_STAT_IDLE, PDEV_STAT_RUN};
 #define LTR_L1SS_PWR_GATE_CHECK_CARD_EN        BIT(6)
 
 enum dev_aspm_mode {
-       DEV_ASPM_DISABLE = 0,
        DEV_ASPM_DYNAMIC,
        DEV_ASPM_BACKDOOR,
        DEV_ASPM_STATIC,
+       DEV_ASPM_DISABLE,
 };
 
 /*
index a886b51511abbf146c4c76309aa313e4bbf77dda..1f509d072026d3d62a9e3701ed72bd5ab2d5b5c4 100644 (file)
@@ -556,6 +556,7 @@ struct mlx5_core_sriov {
 };
 
 struct mlx5_irq_info {
+       cpumask_var_t mask;
        char name[MLX5_MAX_IRQ_NAME];
 };
 
@@ -1048,7 +1049,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
                       enum mlx5_eq_type type);
 int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
 int mlx5_start_eqs(struct mlx5_core_dev *dev);
-int mlx5_stop_eqs(struct mlx5_core_dev *dev);
+void mlx5_stop_eqs(struct mlx5_core_dev *dev);
 int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
                    unsigned int *irqn);
 int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
@@ -1164,6 +1165,10 @@ int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev);
 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev);
+int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
+                                u64 *values,
+                                int num_counters,
+                                size_t *offsets);
 struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev);
 void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up);
 
index 38a7577a9ce71fbcf63c21e2911364795842daa8..d44ec5f41d4a04c72b25b4db1d6fb0217f8f1fa1 100644 (file)
@@ -147,7 +147,7 @@ enum {
        MLX5_CMD_OP_ALLOC_Q_COUNTER               = 0x771,
        MLX5_CMD_OP_DEALLOC_Q_COUNTER             = 0x772,
        MLX5_CMD_OP_QUERY_Q_COUNTER               = 0x773,
-       MLX5_CMD_OP_SET_RATE_LIMIT                = 0x780,
+       MLX5_CMD_OP_SET_PP_RATE_LIMIT             = 0x780,
        MLX5_CMD_OP_QUERY_RATE_LIMIT              = 0x781,
        MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT      = 0x782,
        MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT     = 0x783,
@@ -7239,7 +7239,7 @@ struct mlx5_ifc_add_vxlan_udp_dport_in_bits {
        u8         vxlan_udp_port[0x10];
 };
 
-struct mlx5_ifc_set_rate_limit_out_bits {
+struct mlx5_ifc_set_pp_rate_limit_out_bits {
        u8         status[0x8];
        u8         reserved_at_8[0x18];
 
@@ -7248,7 +7248,7 @@ struct mlx5_ifc_set_rate_limit_out_bits {
        u8         reserved_at_40[0x40];
 };
 
-struct mlx5_ifc_set_rate_limit_in_bits {
+struct mlx5_ifc_set_pp_rate_limit_in_bits {
        u8         opcode[0x10];
        u8         reserved_at_10[0x10];
 
@@ -7261,6 +7261,8 @@ struct mlx5_ifc_set_rate_limit_in_bits {
        u8         reserved_at_60[0x20];
 
        u8         rate_limit[0x20];
+
+       u8         reserved_at_a0[0x160];
 };
 
 struct mlx5_ifc_access_register_out_bits {
diff --git a/include/linux/pti.h b/include/linux/pti.h
new file mode 100644 (file)
index 0000000..0174883
--- /dev/null
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _INCLUDE_PTI_H
+#define _INCLUDE_PTI_H
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#include <asm/pti.h>
+#else
+static inline void pti_init(void) { }
+#endif
+
+#endif
index 7b2170bfd6e7dae432478fffdbc70e1408740394..bc6bb325d1bf7c03db223c568b891e5d33dc93ca 100644 (file)
@@ -126,7 +126,7 @@ void spi_statistics_add_transfer_stats(struct spi_statistics *stats,
  *     for that name.  This appears in the sysfs "modalias" attribute
  *     for driver coldplugging, and in uevents used for hotplugging
  * @cs_gpio: gpio number of the chipselect line (optional, -ENOENT when
- *     when not using a GPIO line)
+ *     not using a GPIO line)
  *
  * @statistics: statistics for the spi_device
  *
index f442d1a42025925eb4446c6678e3a059a8cfbb39..7cc35921218ecb745634e29d0558749981b4089d 100644 (file)
@@ -119,6 +119,7 @@ extern void tick_nohz_idle_exit(void);
 extern void tick_nohz_irq_exit(void);
 extern ktime_t tick_nohz_get_sleep_length(void);
 extern unsigned long tick_nohz_get_idle_calls(void);
+extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
 #else /* !CONFIG_NO_HZ_COMMON */
index 8b8118a7fadbc74a9aa879dc934de0442bb3a013..cb4d92b79cd932eda4e178861d8345683b329bdb 100644 (file)
@@ -3226,7 +3226,6 @@ struct cfg80211_ops {
  * @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN.
  * @WIPHY_FLAG_MESH_AUTH: The device supports mesh authentication by routing
  *     auth frames to userspace. See @NL80211_MESH_SETUP_USERSPACE_AUTH.
- * @WIPHY_FLAG_SUPPORTS_SCHED_SCAN: The device supports scheduled scans.
  * @WIPHY_FLAG_SUPPORTS_FW_ROAM: The device supports roaming feature in the
  *     firmware.
  * @WIPHY_FLAG_AP_UAPSD: The device supports uapsd on AP.
index 0105445cab83d32008b3526794c077f4bfbd9816..8e08b6da72f325bd4a623191e886fb1b746644d7 100644 (file)
@@ -694,9 +694,7 @@ struct tc_cls_matchall_offload {
 };
 
 enum tc_clsbpf_command {
-       TC_CLSBPF_ADD,
-       TC_CLSBPF_REPLACE,
-       TC_CLSBPF_DESTROY,
+       TC_CLSBPF_OFFLOAD,
        TC_CLSBPF_STATS,
 };
 
@@ -705,6 +703,7 @@ struct tc_cls_bpf_offload {
        enum tc_clsbpf_command command;
        struct tcf_exts *exts;
        struct bpf_prog *prog;
+       struct bpf_prog *oldprog;
        const char *name;
        bool exts_integrated;
        u32 gen_flags;
index 9155da42269208b358df8535b14dfd3dba509365..7a7b14e9628a1174e18dda30cd0b5bfd5b32d30d 100644 (file)
@@ -1514,6 +1514,11 @@ static inline bool sock_owned_by_user(const struct sock *sk)
        return sk->sk_lock.owned;
 }
 
+static inline bool sock_owned_by_user_nocheck(const struct sock *sk)
+{
+       return sk->sk_lock.owned;
+}
+
 /* no reclassification while locks are held */
 static inline bool sock_allow_reclassification(const struct sock *csk)
 {
index dc28a98ce97ca7d53d67809a0fc718f0b9d622f8..ae35991b5877029b217a2b49fba7f012f193148c 100644 (file)
@@ -1570,6 +1570,9 @@ int xfrm_init_state(struct xfrm_state *x);
 int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb);
 int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type);
 int xfrm_input_resume(struct sk_buff *skb, int nexthdr);
+int xfrm_trans_queue(struct sk_buff *skb,
+                    int (*finish)(struct net *, struct sock *,
+                                  struct sk_buff *));
 int xfrm_output_resume(struct sk_buff *skb, int err);
 int xfrm_output(struct sock *sk, struct sk_buff *skb);
 int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb);
index 758607226bfdd5269d33557b650a31628a59d22f..2cd449328aee37e55de94633d803d73fe3057f9e 100644 (file)
@@ -134,12 +134,12 @@ DECLARE_EVENT_CLASS(clk_parent,
 
        TP_STRUCT__entry(
                __string(        name,           core->name                )
-               __string(        pname,          parent->name              )
+               __string(        pname, parent ? parent->name : "none"     )
        ),
 
        TP_fast_assign(
                __assign_str(name, core->name);
-               __assign_str(pname, parent->name);
+               __assign_str(pname, parent ? parent->name : "none");
        ),
 
        TP_printk("%s %s", __get_str(name), __get_str(pname))
index e4b0b8e099325f2801e4f3af168004c603c23794..2c735a3e66133fc08740b4df6d64919c491c9d1e 100644 (file)
@@ -211,7 +211,7 @@ TRACE_EVENT(kvm_ack_irq,
        { KVM_TRACE_MMIO_WRITE, "write" }
 
 TRACE_EVENT(kvm_mmio,
-       TP_PROTO(int type, int len, u64 gpa, u64 val),
+       TP_PROTO(int type, int len, u64 gpa, void *val),
        TP_ARGS(type, len, gpa, val),
 
        TP_STRUCT__entry(
@@ -225,7 +225,10 @@ TRACE_EVENT(kvm_mmio,
                __entry->type           = type;
                __entry->len            = len;
                __entry->gpa            = gpa;
-               __entry->val            = val;
+               __entry->val            = 0;
+               if (val)
+                       memcpy(&__entry->val, val,
+                              min_t(u32, sizeof(__entry->val), len));
        ),
 
        TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx",
index 07cccca6cbf1762684152146372e35e1cd758338..ab34c561f26bec42a8ff32cb6f7e911447f03af2 100644 (file)
                tcp_state_name(TCP_CLOSING),            \
                tcp_state_name(TCP_NEW_SYN_RECV))
 
+#define TP_STORE_V4MAPPED(__entry, saddr, daddr)               \
+       do {                                                    \
+               struct in6_addr *pin6;                          \
+                                                               \
+               pin6 = (struct in6_addr *)__entry->saddr_v6;    \
+               ipv6_addr_set_v4mapped(saddr, pin6);            \
+               pin6 = (struct in6_addr *)__entry->daddr_v6;    \
+               ipv6_addr_set_v4mapped(daddr, pin6);            \
+       } while (0)
+
+#if IS_ENABLED(CONFIG_IPV6)
+#define TP_STORE_ADDRS(__entry, saddr, daddr, saddr6, daddr6)          \
+       do {                                                            \
+               if (sk->sk_family == AF_INET6) {                        \
+                       struct in6_addr *pin6;                          \
+                                                                       \
+                       pin6 = (struct in6_addr *)__entry->saddr_v6;    \
+                       *pin6 = saddr6;                                 \
+                       pin6 = (struct in6_addr *)__entry->daddr_v6;    \
+                       *pin6 = daddr6;                                 \
+               } else {                                                \
+                       TP_STORE_V4MAPPED(__entry, saddr, daddr);       \
+               }                                                       \
+       } while (0)
+#else
+#define TP_STORE_ADDRS(__entry, saddr, daddr, saddr6, daddr6)  \
+       TP_STORE_V4MAPPED(__entry, saddr, daddr)
+#endif
+
 /*
  * tcp event with arguments sk and skb
  *
@@ -50,7 +79,6 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb,
 
        TP_fast_assign(
                struct inet_sock *inet = inet_sk(sk);
-               struct in6_addr *pin6;
                __be32 *p32;
 
                __entry->skbaddr = skb;
@@ -65,20 +93,8 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb,
                p32 = (__be32 *) __entry->daddr;
                *p32 =  inet->inet_daddr;
 
-#if IS_ENABLED(CONFIG_IPV6)
-               if (sk->sk_family == AF_INET6) {
-                       pin6 = (struct in6_addr *)__entry->saddr_v6;
-                       *pin6 = sk->sk_v6_rcv_saddr;
-                       pin6 = (struct in6_addr *)__entry->daddr_v6;
-                       *pin6 = sk->sk_v6_daddr;
-               } else
-#endif
-               {
-                       pin6 = (struct in6_addr *)__entry->saddr_v6;
-                       ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
-                       pin6 = (struct in6_addr *)__entry->daddr_v6;
-                       ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
-               }
+               TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
+                             sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
        ),
 
        TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
@@ -127,7 +143,6 @@ DECLARE_EVENT_CLASS(tcp_event_sk,
 
        TP_fast_assign(
                struct inet_sock *inet = inet_sk(sk);
-               struct in6_addr *pin6;
                __be32 *p32;
 
                __entry->skaddr = sk;
@@ -141,20 +156,8 @@ DECLARE_EVENT_CLASS(tcp_event_sk,
                p32 = (__be32 *) __entry->daddr;
                *p32 =  inet->inet_daddr;
 
-#if IS_ENABLED(CONFIG_IPV6)
-               if (sk->sk_family == AF_INET6) {
-                       pin6 = (struct in6_addr *)__entry->saddr_v6;
-                       *pin6 = sk->sk_v6_rcv_saddr;
-                       pin6 = (struct in6_addr *)__entry->daddr_v6;
-                       *pin6 = sk->sk_v6_daddr;
-               } else
-#endif
-               {
-                       pin6 = (struct in6_addr *)__entry->saddr_v6;
-                       ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
-                       pin6 = (struct in6_addr *)__entry->daddr_v6;
-                       ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
-               }
+               TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
+                              sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
        ),
 
        TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
@@ -197,7 +200,6 @@ TRACE_EVENT(tcp_set_state,
 
        TP_fast_assign(
                struct inet_sock *inet = inet_sk(sk);
-               struct in6_addr *pin6;
                __be32 *p32;
 
                __entry->skaddr = sk;
@@ -213,20 +215,8 @@ TRACE_EVENT(tcp_set_state,
                p32 = (__be32 *) __entry->daddr;
                *p32 =  inet->inet_daddr;
 
-#if IS_ENABLED(CONFIG_IPV6)
-               if (sk->sk_family == AF_INET6) {
-                       pin6 = (struct in6_addr *)__entry->saddr_v6;
-                       *pin6 = sk->sk_v6_rcv_saddr;
-                       pin6 = (struct in6_addr *)__entry->daddr_v6;
-                       *pin6 = sk->sk_v6_daddr;
-               } else
-#endif
-               {
-                       pin6 = (struct in6_addr *)__entry->saddr_v6;
-                       ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
-                       pin6 = (struct in6_addr *)__entry->daddr_v6;
-                       ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
-               }
+               TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
+                              sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
        ),
 
        TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c oldstate=%s newstate=%s",
@@ -256,7 +246,6 @@ TRACE_EVENT(tcp_retransmit_synack,
 
        TP_fast_assign(
                struct inet_request_sock *ireq = inet_rsk(req);
-               struct in6_addr *pin6;
                __be32 *p32;
 
                __entry->skaddr = sk;
@@ -271,20 +260,8 @@ TRACE_EVENT(tcp_retransmit_synack,
                p32 = (__be32 *) __entry->daddr;
                *p32 = ireq->ir_rmt_addr;
 
-#if IS_ENABLED(CONFIG_IPV6)
-               if (sk->sk_family == AF_INET6) {
-                       pin6 = (struct in6_addr *)__entry->saddr_v6;
-                       *pin6 = ireq->ir_v6_loc_addr;
-                       pin6 = (struct in6_addr *)__entry->daddr_v6;
-                       *pin6 = ireq->ir_v6_rmt_addr;
-               } else
-#endif
-               {
-                       pin6 = (struct in6_addr *)__entry->saddr_v6;
-                       ipv6_addr_set_v4mapped(ireq->ir_loc_addr, pin6);
-                       pin6 = (struct in6_addr *)__entry->daddr_v6;
-                       ipv6_addr_set_v4mapped(ireq->ir_rmt_addr, pin6);
-               }
+               TP_STORE_ADDRS(__entry, ireq->ir_loc_addr, ireq->ir_rmt_addr,
+                             ireq->ir_v6_loc_addr, ireq->ir_v6_rmt_addr);
        ),
 
        TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
index 4914b93a23f2bdeb066a4048d7ab749456ae3fe3..61f410fd74e4cf4180f7ad5ffa1d996cc1528c91 100644 (file)
@@ -44,3 +44,8 @@ static inline void xen_balloon_init(void)
 {
 }
 #endif
+
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+struct resource;
+void arch_xen_balloon_init(struct resource *hostmem_resource);
+#endif
index e96e3a14533cda199963fe96b97dc78779c66037..a8100b9548398e8b102052f2c1418b21ea423825 100644 (file)
@@ -75,6 +75,7 @@
 #include <linux/slab.h>
 #include <linux/perf_event.h>
 #include <linux/ptrace.h>
+#include <linux/pti.h>
 #include <linux/blkdev.h>
 #include <linux/elevator.h>
 #include <linux/sched_clock.h>
@@ -504,6 +505,10 @@ static void __init mm_init(void)
        pgtable_init();
        vmalloc_init();
        ioremap_huge_init();
+       /* Should be run before the first non-init thread is created */
+       init_espfix_bsp();
+       /* Should be run after espfix64 is set up. */
+       pti_init();
 }
 
 asmlinkage __visible void __init start_kernel(void)
@@ -678,10 +683,6 @@ asmlinkage __visible void __init start_kernel(void)
 #ifdef CONFIG_X86
        if (efi_enabled(EFI_RUNTIME_SERVICES))
                efi_enter_virtual_mode();
-#endif
-#ifdef CONFIG_X86_ESPFIX64
-       /* Should be run before the first non-init thread is created */
-       init_espfix_bsp();
 #endif
        thread_stack_cache_init();
        cred_init();
index d4593571c4049b8d046f53f81f8e17911a21e0c9..04b24876cd23c83c9502afc60853c871ee3fee13 100644 (file)
@@ -1059,6 +1059,11 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
                break;
        case PTR_TO_STACK:
                pointer_desc = "stack ";
+               /* The stack spill tracking logic in check_stack_write()
+                * and check_stack_read() relies on stack accesses being
+                * aligned.
+                */
+               strict = true;
                break;
        default:
                break;
@@ -1067,6 +1072,29 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
                                           strict);
 }
 
+/* truncate register to smaller size (in bytes)
+ * must be called with size < BPF_REG_SIZE
+ */
+static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
+{
+       u64 mask;
+
+       /* clear high bits in bit representation */
+       reg->var_off = tnum_cast(reg->var_off, size);
+
+       /* fix arithmetic bounds */
+       mask = ((u64)1 << (size * 8)) - 1;
+       if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
+               reg->umin_value &= mask;
+               reg->umax_value &= mask;
+       } else {
+               reg->umin_value = 0;
+               reg->umax_value = mask;
+       }
+       reg->smin_value = reg->umin_value;
+       reg->smax_value = reg->umax_value;
+}
+
 /* check whether memory at (regno + off) is accessible for t = (read | write)
  * if t==write, value_regno is a register which value is stored into memory
  * if t==read, value_regno is a register which will receive the value from memory
@@ -1200,9 +1228,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
        if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
            regs[value_regno].type == SCALAR_VALUE) {
                /* b/h/w load zero-extends, mark upper bits as known 0 */
-               regs[value_regno].var_off =
-                       tnum_cast(regs[value_regno].var_off, size);
-               __update_reg_bounds(&regs[value_regno]);
+               coerce_reg_to_size(&regs[value_regno], size);
        }
        return err;
 }
@@ -1282,6 +1308,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
                tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off);
                verbose(env, "invalid variable stack read R%d var_off=%s\n",
                        regno, tn_buf);
+               return -EACCES;
        }
        off = regs[regno].off + regs[regno].var_off.value;
        if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
@@ -1674,7 +1701,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
                return -EINVAL;
        }
 
+       /* With LD_ABS/IND some JITs save/restore skb from r1. */
        changes_data = bpf_helper_changes_pkt_data(fn->func);
+       if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
+               verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
+                       func_id_name(func_id), func_id);
+               return -EINVAL;
+       }
 
        memset(&meta, 0, sizeof(meta));
        meta.pkt_access = fn->pkt_access;
@@ -1766,14 +1799,6 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
        return 0;
 }
 
-static void coerce_reg_to_32(struct bpf_reg_state *reg)
-{
-       /* clear high 32 bits */
-       reg->var_off = tnum_cast(reg->var_off, 4);
-       /* Update bounds */
-       __update_reg_bounds(reg);
-}
-
 static bool signed_add_overflows(s64 a, s64 b)
 {
        /* Do the add in u64, where overflow is well-defined */
@@ -1794,6 +1819,41 @@ static bool signed_sub_overflows(s64 a, s64 b)
        return res > a;
 }
 
+static bool check_reg_sane_offset(struct bpf_verifier_env *env,
+                                 const struct bpf_reg_state *reg,
+                                 enum bpf_reg_type type)
+{
+       bool known = tnum_is_const(reg->var_off);
+       s64 val = reg->var_off.value;
+       s64 smin = reg->smin_value;
+
+       if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
+               verbose(env, "math between %s pointer and %lld is not allowed\n",
+                       reg_type_str[type], val);
+               return false;
+       }
+
+       if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
+               verbose(env, "%s pointer offset %d is not allowed\n",
+                       reg_type_str[type], reg->off);
+               return false;
+       }
+
+       if (smin == S64_MIN) {
+               verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
+                       reg_type_str[type]);
+               return false;
+       }
+
+       if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
+               verbose(env, "value %lld makes %s pointer be out of bounds\n",
+                       smin, reg_type_str[type]);
+               return false;
+       }
+
+       return true;
+}
+
 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
  * Caller should also handle BPF_MOV case separately.
  * If we return -EACCES, caller may want to try again treating pointer as a
@@ -1830,29 +1890,25 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 
        if (BPF_CLASS(insn->code) != BPF_ALU64) {
                /* 32-bit ALU ops on pointers produce (meaningless) scalars */
-               if (!env->allow_ptr_leaks)
-                       verbose(env,
-                               "R%d 32-bit pointer arithmetic prohibited\n",
-                               dst);
+               verbose(env,
+                       "R%d 32-bit pointer arithmetic prohibited\n",
+                       dst);
                return -EACCES;
        }
 
        if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
-               if (!env->allow_ptr_leaks)
-                       verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
-                               dst);
+               verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
+                       dst);
                return -EACCES;
        }
        if (ptr_reg->type == CONST_PTR_TO_MAP) {
-               if (!env->allow_ptr_leaks)
-                       verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
-                               dst);
+               verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
+                       dst);
                return -EACCES;
        }
        if (ptr_reg->type == PTR_TO_PACKET_END) {
-               if (!env->allow_ptr_leaks)
-                       verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
-                               dst);
+               verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
+                       dst);
                return -EACCES;
        }
 
@@ -1862,6 +1918,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
        dst_reg->type = ptr_reg->type;
        dst_reg->id = ptr_reg->id;
 
+       if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
+           !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
+               return -EINVAL;
+
        switch (opcode) {
        case BPF_ADD:
                /* We can take a fixed offset as long as it doesn't overflow
@@ -1915,9 +1975,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
        case BPF_SUB:
                if (dst_reg == off_reg) {
                        /* scalar -= pointer.  Creates an unknown scalar */
-                       if (!env->allow_ptr_leaks)
-                               verbose(env, "R%d tried to subtract pointer from scalar\n",
-                                       dst);
+                       verbose(env, "R%d tried to subtract pointer from scalar\n",
+                               dst);
                        return -EACCES;
                }
                /* We don't allow subtraction from FP, because (according to
@@ -1925,9 +1984,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
                 * be able to deal with it.
                 */
                if (ptr_reg->type == PTR_TO_STACK) {
-                       if (!env->allow_ptr_leaks)
-                               verbose(env, "R%d subtraction from stack pointer prohibited\n",
-                                       dst);
+                       verbose(env, "R%d subtraction from stack pointer prohibited\n",
+                               dst);
                        return -EACCES;
                }
                if (known && (ptr_reg->off - smin_val ==
@@ -1976,28 +2034,30 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
        case BPF_AND:
        case BPF_OR:
        case BPF_XOR:
-               /* bitwise ops on pointers are troublesome, prohibit for now.
-                * (However, in principle we could allow some cases, e.g.
-                * ptr &= ~3 which would reduce min_value by 3.)
-                */
-               if (!env->allow_ptr_leaks)
-                       verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
-                               dst, bpf_alu_string[opcode >> 4]);
+               /* bitwise ops on pointers are troublesome, prohibit. */
+               verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
+                       dst, bpf_alu_string[opcode >> 4]);
                return -EACCES;
        default:
                /* other operators (e.g. MUL,LSH) produce non-pointer results */
-               if (!env->allow_ptr_leaks)
-                       verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
-                               dst, bpf_alu_string[opcode >> 4]);
+               verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
+                       dst, bpf_alu_string[opcode >> 4]);
                return -EACCES;
        }
 
+       if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
+               return -EINVAL;
+
        __update_reg_bounds(dst_reg);
        __reg_deduce_bounds(dst_reg);
        __reg_bound_offset(dst_reg);
        return 0;
 }
 
+/* WARNING: This function does calculations on 64-bit values, but the actual
+ * execution may occur on 32-bit values. Therefore, things like bitshifts
+ * need extra checks in the 32-bit case.
+ */
 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
                                      struct bpf_insn *insn,
                                      struct bpf_reg_state *dst_reg,
@@ -2008,12 +2068,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
        bool src_known, dst_known;
        s64 smin_val, smax_val;
        u64 umin_val, umax_val;
+       u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
 
-       if (BPF_CLASS(insn->code) != BPF_ALU64) {
-               /* 32-bit ALU ops are (32,32)->64 */
-               coerce_reg_to_32(dst_reg);
-               coerce_reg_to_32(&src_reg);
-       }
        smin_val = src_reg.smin_value;
        smax_val = src_reg.smax_value;
        umin_val = src_reg.umin_value;
@@ -2021,6 +2077,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
        src_known = tnum_is_const(src_reg.var_off);
        dst_known = tnum_is_const(dst_reg->var_off);
 
+       if (!src_known &&
+           opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
+               __mark_reg_unknown(dst_reg);
+               return 0;
+       }
+
        switch (opcode) {
        case BPF_ADD:
                if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
@@ -2149,9 +2211,9 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
                __update_reg_bounds(dst_reg);
                break;
        case BPF_LSH:
-               if (umax_val > 63) {
-                       /* Shifts greater than 63 are undefined.  This includes
-                        * shifts by a negative number.
+               if (umax_val >= insn_bitness) {
+                       /* Shifts greater than 31 or 63 are undefined.
+                        * This includes shifts by a negative number.
                         */
                        mark_reg_unknown(env, regs, insn->dst_reg);
                        break;
@@ -2177,27 +2239,29 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
                __update_reg_bounds(dst_reg);
                break;
        case BPF_RSH:
-               if (umax_val > 63) {
-                       /* Shifts greater than 63 are undefined.  This includes
-                        * shifts by a negative number.
+               if (umax_val >= insn_bitness) {
+                       /* Shifts greater than 31 or 63 are undefined.
+                        * This includes shifts by a negative number.
                         */
                        mark_reg_unknown(env, regs, insn->dst_reg);
                        break;
                }
-               /* BPF_RSH is an unsigned shift, so make the appropriate casts */
-               if (dst_reg->smin_value < 0) {
-                       if (umin_val) {
-                               /* Sign bit will be cleared */
-                               dst_reg->smin_value = 0;
-                       } else {
-                               /* Lost sign bit information */
-                               dst_reg->smin_value = S64_MIN;
-                               dst_reg->smax_value = S64_MAX;
-                       }
-               } else {
-                       dst_reg->smin_value =
-                               (u64)(dst_reg->smin_value) >> umax_val;
-               }
+               /* BPF_RSH is an unsigned shift.  If the value in dst_reg might
+                * be negative, then either:
+                * 1) src_reg might be zero, so the sign bit of the result is
+                *    unknown, so we lose our signed bounds
+                * 2) it's known negative, thus the unsigned bounds capture the
+                *    signed bounds
+                * 3) the signed bounds cross zero, so they tell us nothing
+                *    about the result
+                * If the value in dst_reg is known nonnegative, then again the
+                * unsigned bounts capture the signed bounds.
+                * Thus, in all cases it suffices to blow away our signed bounds
+                * and rely on inferring new ones from the unsigned bounds and
+                * var_off of the result.
+                */
+               dst_reg->smin_value = S64_MIN;
+               dst_reg->smax_value = S64_MAX;
                if (src_known)
                        dst_reg->var_off = tnum_rshift(dst_reg->var_off,
                                                       umin_val);
@@ -2213,6 +2277,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
                break;
        }
 
+       if (BPF_CLASS(insn->code) != BPF_ALU64) {
+               /* 32-bit ALU ops are (32,32)->32 */
+               coerce_reg_to_size(dst_reg, 4);
+               coerce_reg_to_size(&src_reg, 4);
+       }
+
        __reg_deduce_bounds(dst_reg);
        __reg_bound_offset(dst_reg);
        return 0;
@@ -2227,7 +2297,6 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
        struct bpf_reg_state *regs = cur_regs(env), *dst_reg, *src_reg;
        struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
        u8 opcode = BPF_OP(insn->code);
-       int rc;
 
        dst_reg = &regs[insn->dst_reg];
        src_reg = NULL;
@@ -2238,43 +2307,29 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
                if (src_reg->type != SCALAR_VALUE) {
                        if (dst_reg->type != SCALAR_VALUE) {
                                /* Combining two pointers by any ALU op yields
-                                * an arbitrary scalar.
+                                * an arbitrary scalar. Disallow all math except
+                                * pointer subtraction
                                 */
-                               if (!env->allow_ptr_leaks) {
-                                       verbose(env, "R%d pointer %s pointer prohibited\n",
-                                               insn->dst_reg,
-                                               bpf_alu_string[opcode >> 4]);
-                                       return -EACCES;
+                               if (opcode == BPF_SUB){
+                                       mark_reg_unknown(env, regs, insn->dst_reg);
+                                       return 0;
                                }
-                               mark_reg_unknown(env, regs, insn->dst_reg);
-                               return 0;
+                               verbose(env, "R%d pointer %s pointer prohibited\n",
+                                       insn->dst_reg,
+                                       bpf_alu_string[opcode >> 4]);
+                               return -EACCES;
                        } else {
                                /* scalar += pointer
                                 * This is legal, but we have to reverse our
                                 * src/dest handling in computing the range
                                 */
-                               rc = adjust_ptr_min_max_vals(env, insn,
-                                                            src_reg, dst_reg);
-                               if (rc == -EACCES && env->allow_ptr_leaks) {
-                                       /* scalar += unknown scalar */
-                                       __mark_reg_unknown(&off_reg);
-                                       return adjust_scalar_min_max_vals(
-                                                       env, insn,
-                                                       dst_reg, off_reg);
-                               }
-                               return rc;
+                               return adjust_ptr_min_max_vals(env, insn,
+                                                              src_reg, dst_reg);
                        }
                } else if (ptr_reg) {
                        /* pointer += scalar */
-                       rc = adjust_ptr_min_max_vals(env, insn,
-                                                    dst_reg, src_reg);
-                       if (rc == -EACCES && env->allow_ptr_leaks) {
-                               /* unknown scalar += scalar */
-                               __mark_reg_unknown(dst_reg);
-                               return adjust_scalar_min_max_vals(
-                                               env, insn, dst_reg, *src_reg);
-                       }
-                       return rc;
+                       return adjust_ptr_min_max_vals(env, insn,
+                                                      dst_reg, src_reg);
                }
        } else {
                /* Pretend the src is a reg with a known value, since we only
@@ -2283,17 +2338,9 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
                off_reg.type = SCALAR_VALUE;
                __mark_reg_known(&off_reg, insn->imm);
                src_reg = &off_reg;
-               if (ptr_reg) { /* pointer += K */
-                       rc = adjust_ptr_min_max_vals(env, insn,
-                                                    ptr_reg, src_reg);
-                       if (rc == -EACCES && env->allow_ptr_leaks) {
-                               /* unknown scalar += K */
-                               __mark_reg_unknown(dst_reg);
-                               return adjust_scalar_min_max_vals(
-                                               env, insn, dst_reg, off_reg);
-                       }
-                       return rc;
-               }
+               if (ptr_reg) /* pointer += K */
+                       return adjust_ptr_min_max_vals(env, insn,
+                                                      ptr_reg, src_reg);
        }
 
        /* Got here implies adding two SCALAR_VALUEs */
@@ -2390,17 +2437,20 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
                                        return -EACCES;
                                }
                                mark_reg_unknown(env, regs, insn->dst_reg);
-                               /* high 32 bits are known zero. */
-                               regs[insn->dst_reg].var_off = tnum_cast(
-                                               regs[insn->dst_reg].var_off, 4);
-                               __update_reg_bounds(&regs[insn->dst_reg]);
+                               coerce_reg_to_size(&regs[insn->dst_reg], 4);
                        }
                } else {
                        /* case: R = imm
                         * remember the value we stored into this reg
                         */
                        regs[insn->dst_reg].type = SCALAR_VALUE;
-                       __mark_reg_known(regs + insn->dst_reg, insn->imm);
+                       if (BPF_CLASS(insn->code) == BPF_ALU64) {
+                               __mark_reg_known(regs + insn->dst_reg,
+                                                insn->imm);
+                       } else {
+                               __mark_reg_known(regs + insn->dst_reg,
+                                                (u32)insn->imm);
+                       }
                }
 
        } else if (opcode > BPF_END) {
@@ -3431,15 +3481,14 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
                        return range_within(rold, rcur) &&
                               tnum_in(rold->var_off, rcur->var_off);
                } else {
-                       /* if we knew anything about the old value, we're not
-                        * equal, because we can't know anything about the
-                        * scalar value of the pointer in the new value.
+                       /* We're trying to use a pointer in place of a scalar.
+                        * Even if the scalar was unbounded, this could lead to
+                        * pointer leaks because scalars are allowed to leak
+                        * while pointers are not. We could make this safe in
+                        * special cases if root is calling us, but it's
+                        * probably not worth the hassle.
                         */
-                       return rold->umin_value == 0 &&
-                              rold->umax_value == U64_MAX &&
-                              rold->smin_value == S64_MIN &&
-                              rold->smax_value == S64_MAX &&
-                              tnum_is_unknown(rold->var_off);
+                       return false;
                }
        case PTR_TO_MAP_VALUE:
                /* If the new min/max/var_off satisfy the old ones and
index 432eadf6b58c18d9de6a3d09f3fef36089b4b5a2..2295fc69717f6c3d877ef3cac15b55336d7746c6 100644 (file)
@@ -721,8 +721,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
                        goto out;
        }
        /* a new mm has just been created */
-       arch_dup_mmap(oldmm, mm);
-       retval = 0;
+       retval = arch_dup_mmap(oldmm, mm);
 out:
        up_write(&mm->mmap_sem);
        flush_tlb_mm(oldmm);
index 17f05ef8f575f996f87e614a914bbe21d537f949..e4d3819a91cc7d7bda5416bfd5aaba99a5576cce 100644 (file)
 
 static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
 {
+       static DEFINE_RATELIMIT_STATE(ratelimit, 5 * HZ, 5);
+
+       if (!__ratelimit(&ratelimit))
+               return;
+
        printk("irq %d, desc: %p, depth: %d, count: %d, unhandled: %d\n",
                irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled);
        printk("->handle_irq():  %p, ", desc->handle_irq);
index 7f608ac3965379fc112d85051fd80325b2ec84ce..acfaaef8672ad2b1d2419bd7522f5556ebb1ae50 100644 (file)
@@ -113,6 +113,7 @@ static const struct irq_bit_descr irqdata_states[] = {
        BIT_MASK_DESCR(IRQD_SETAFFINITY_PENDING),
        BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED),
        BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN),
+       BIT_MASK_DESCR(IRQD_CAN_RESERVE),
 
        BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU),
 
index c26c5bb6b491f75f76f1190cdc21989f79d17e09..508c03dfef254b9dc8c3a7a1e3e7307d3a7201b3 100644 (file)
@@ -364,10 +364,11 @@ irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq)
 EXPORT_SYMBOL_GPL(irq_get_domain_generic_chip);
 
 /*
- * Separate lockdep class for interrupt chip which can nest irq_desc
- * lock.
+ * Separate lockdep classes for interrupt chip which can nest irq_desc
+ * lock and request mutex.
  */
 static struct lock_class_key irq_nested_lock_class;
+static struct lock_class_key irq_nested_request_class;
 
 /*
  * irq_map_generic_chip - Map a generic chip for an irq domain
@@ -409,7 +410,8 @@ int irq_map_generic_chip(struct irq_domain *d, unsigned int virq,
        set_bit(idx, &gc->installed);
 
        if (dgc->gc_flags & IRQ_GC_INIT_NESTED_LOCK)
-               irq_set_lockdep_class(virq, &irq_nested_lock_class);
+               irq_set_lockdep_class(virq, &irq_nested_lock_class,
+                                     &irq_nested_request_class);
 
        if (chip->irq_calc_mask)
                chip->irq_calc_mask(data);
@@ -479,7 +481,8 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk,
                        continue;
 
                if (flags & IRQ_GC_INIT_NESTED_LOCK)
-                       irq_set_lockdep_class(i, &irq_nested_lock_class);
+                       irq_set_lockdep_class(i, &irq_nested_lock_class,
+                                             &irq_nested_request_class);
 
                if (!(flags & IRQ_GC_NO_MASK)) {
                        struct irq_data *d = irq_get_irq_data(i);
index 07d08ca701ec4627b558d0435c54cf6de7e147d2..ab19371eab9b8e1e9a7789b882ed4a308883b555 100644 (file)
@@ -440,7 +440,7 @@ static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear)
 #endif /* !CONFIG_GENERIC_PENDING_IRQ */
 
 #if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY)
-static inline int irq_domain_activate_irq(struct irq_data *data, bool early)
+static inline int irq_domain_activate_irq(struct irq_data *data, bool reserve)
 {
        irqd_set_activated(data);
        return 0;
index 4f4f60015e8ab4196ef1df5107f04beda37d4c6c..62068ad46930dd12088081df5233dffdfee0d7f4 100644 (file)
@@ -1693,7 +1693,7 @@ static void __irq_domain_deactivate_irq(struct irq_data *irq_data)
        }
 }
 
-static int __irq_domain_activate_irq(struct irq_data *irqd, bool early)
+static int __irq_domain_activate_irq(struct irq_data *irqd, bool reserve)
 {
        int ret = 0;
 
@@ -1702,9 +1702,9 @@ static int __irq_domain_activate_irq(struct irq_data *irqd, bool early)
 
                if (irqd->parent_data)
                        ret = __irq_domain_activate_irq(irqd->parent_data,
-                                                       early);
+                                                       reserve);
                if (!ret && domain->ops->activate) {
-                       ret = domain->ops->activate(domain, irqd, early);
+                       ret = domain->ops->activate(domain, irqd, reserve);
                        /* Rollback in case of error */
                        if (ret && irqd->parent_data)
                                __irq_domain_deactivate_irq(irqd->parent_data);
@@ -1716,17 +1716,18 @@ static int __irq_domain_activate_irq(struct irq_data *irqd, bool early)
 /**
  * irq_domain_activate_irq - Call domain_ops->activate recursively to activate
  *                          interrupt
- * @irq_data:  outermost irq_data associated with interrupt
+ * @irq_data:  Outermost irq_data associated with interrupt
+ * @reserve:   If set only reserve an interrupt vector instead of assigning one
  *
  * This is the second step to call domain_ops->activate to program interrupt
  * controllers, so the interrupt could actually get delivered.
  */
-int irq_domain_activate_irq(struct irq_data *irq_data, bool early)
+int irq_domain_activate_irq(struct irq_data *irq_data, bool reserve)
 {
        int ret = 0;
 
        if (!irqd_is_activated(irq_data))
-               ret = __irq_domain_activate_irq(irq_data, early);
+               ret = __irq_domain_activate_irq(irq_data, reserve);
        if (!ret)
                irqd_set_activated(irq_data);
        return ret;
index edb987b2c58dc1553342b5a87c91335b42888e8b..2f3c4f5382cc6bad8daf528146cf613d9a1ac3e6 100644 (file)
@@ -339,6 +339,40 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
        return ret;
 }
 
+/*
+ * Carefully check whether the device can use reservation mode. If
+ * reservation mode is enabled then the early activation will assign a
+ * dummy vector to the device. If the PCI/MSI device does not support
+ * masking of the entry then this can result in spurious interrupts when
+ * the device driver is not absolutely careful. But even then a malfunction
+ * of the hardware could result in a spurious interrupt on the dummy vector
+ * and render the device unusable. If the entry can be masked then the core
+ * logic will prevent the spurious interrupt and reservation mode can be
+ * used. For now reservation mode is restricted to PCI/MSI.
+ */
+static bool msi_check_reservation_mode(struct irq_domain *domain,
+                                      struct msi_domain_info *info,
+                                      struct device *dev)
+{
+       struct msi_desc *desc;
+
+       if (domain->bus_token != DOMAIN_BUS_PCI_MSI)
+               return false;
+
+       if (!(info->flags & MSI_FLAG_MUST_REACTIVATE))
+               return false;
+
+       if (IS_ENABLED(CONFIG_PCI_MSI) && pci_msi_ignore_mask)
+               return false;
+
+       /*
+        * Checking the first MSI descriptor is sufficient. MSIX supports
+        * masking and MSI does so when the maskbit is set.
+        */
+       desc = first_msi_entry(dev);
+       return desc->msi_attrib.is_msix || desc->msi_attrib.maskbit;
+}
+
 /**
  * msi_domain_alloc_irqs - Allocate interrupts from a MSI interrupt domain
  * @domain:    The domain to allocate from
@@ -353,9 +387,11 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
 {
        struct msi_domain_info *info = domain->host_data;
        struct msi_domain_ops *ops = info->ops;
-       msi_alloc_info_t arg;
+       struct irq_data *irq_data;
        struct msi_desc *desc;
+       msi_alloc_info_t arg;
        int i, ret, virq;
+       bool can_reserve;
 
        ret = msi_domain_prepare_irqs(domain, dev, nvec, &arg);
        if (ret)
@@ -385,6 +421,8 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
        if (ops->msi_finish)
                ops->msi_finish(&arg, 0);
 
+       can_reserve = msi_check_reservation_mode(domain, info, dev);
+
        for_each_msi_entry(desc, dev) {
                virq = desc->irq;
                if (desc->nvec_used == 1)
@@ -397,15 +435,25 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
                 * the MSI entries before the PCI layer enables MSI in the
                 * card. Otherwise the card latches a random msi message.
                 */
-               if (info->flags & MSI_FLAG_ACTIVATE_EARLY) {
-                       struct irq_data *irq_data;
+               if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY))
+                       continue;
 
+               irq_data = irq_domain_get_irq_data(domain, desc->irq);
+               if (!can_reserve)
+                       irqd_clr_can_reserve(irq_data);
+               ret = irq_domain_activate_irq(irq_data, can_reserve);
+               if (ret)
+                       goto cleanup;
+       }
+
+       /*
+        * If these interrupts use reservation mode, clear the activated bit
+        * so request_irq() will assign the final vector.
+        */
+       if (can_reserve) {
+               for_each_msi_entry(desc, dev) {
                        irq_data = irq_domain_get_irq_data(domain, desc->irq);
-                       ret = irq_domain_activate_irq(irq_data, true);
-                       if (ret)
-                               goto cleanup;
-                       if (info->flags & MSI_FLAG_MUST_REACTIVATE)
-                               irqd_clr_activated(irq_data);
+                       irqd_clr_activated(irq_data);
                }
        }
        return 0;
index 2f52ec0f1539fce62a099d9fa559df334a19869c..d6717a3331a1b21bd1be02f034596ab293e8ceac 100644 (file)
@@ -244,7 +244,7 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
 #ifdef CONFIG_NO_HZ_COMMON
 static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
 {
-       unsigned long idle_calls = tick_nohz_get_idle_calls();
+       unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu);
        bool ret = idle_calls == sg_cpu->saved_idle_calls;
 
        sg_cpu->saved_idle_calls = idle_calls;
index 99578f06c8d4fe57cb15cdd44fd58865cae8d0f3..77555faf6fbc578e5006dc28d5d03675d8afd6a2 100644 (file)
@@ -985,6 +985,19 @@ ktime_t tick_nohz_get_sleep_length(void)
        return ts->sleep_length;
 }
 
+/**
+ * tick_nohz_get_idle_calls_cpu - return the current idle calls counter value
+ * for a particular CPU.
+ *
+ * Called from the schedutil frequency scaling governor in scheduler context.
+ */
+unsigned long tick_nohz_get_idle_calls_cpu(int cpu)
+{
+       struct tick_sched *ts = tick_get_tick_sched(cpu);
+
+       return ts->idle_calls;
+}
+
 /**
  * tick_nohz_get_idle_calls - return the current idle calls counter value
  *
index c87766c1c20446de2d191edda885669d447adecc..9ab18995ff1ebeea0e862f48b43f64924c87e127 100644 (file)
@@ -280,6 +280,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
 /* Missed count stored at end */
 #define RB_MISSED_STORED       (1 << 30)
 
+#define RB_MISSED_FLAGS                (RB_MISSED_EVENTS|RB_MISSED_STORED)
+
 struct buffer_data_page {
        u64              time_stamp;    /* page time stamp */
        local_t          commit;        /* write committed index */
@@ -331,7 +333,9 @@ static void rb_init_page(struct buffer_data_page *bpage)
  */
 size_t ring_buffer_page_len(void *page)
 {
-       return local_read(&((struct buffer_data_page *)page)->commit)
+       struct buffer_data_page *bpage = page;
+
+       return (local_read(&bpage->commit) & ~RB_MISSED_FLAGS)
                + BUF_PAGE_HDR_SIZE;
 }
 
@@ -4400,8 +4404,13 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data)
 {
        struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
        struct buffer_data_page *bpage = data;
+       struct page *page = virt_to_page(bpage);
        unsigned long flags;
 
+       /* If the page is still in use someplace else, we can't reuse it */
+       if (page_ref_count(page) > 1)
+               goto out;
+
        local_irq_save(flags);
        arch_spin_lock(&cpu_buffer->lock);
 
@@ -4413,6 +4422,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data)
        arch_spin_unlock(&cpu_buffer->lock);
        local_irq_restore(flags);
 
+ out:
        free_page((unsigned long)bpage);
 }
 EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
index 59518b8126d04b4f1f62a526571490dda8398e3b..2a8d8a294345a258baca50b8a6b272c1ac0fc658 100644 (file)
@@ -6769,7 +6769,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
                .spd_release    = buffer_spd_release,
        };
        struct buffer_ref *ref;
-       int entries, size, i;
+       int entries, i;
        ssize_t ret = 0;
 
 #ifdef CONFIG_TRACER_MAX_TRACE
@@ -6823,14 +6823,6 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
                        break;
                }
 
-               /*
-                * zero out any left over data, this is going to
-                * user land.
-                */
-               size = ring_buffer_page_len(ref->page);
-               if (size < PAGE_SIZE)
-                       memset(ref->page + size, 0, PAGE_SIZE - size);
-
                page = virt_to_page(ref->page);
 
                spd.pages[i] = page;
@@ -7588,6 +7580,7 @@ allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size
        buf->data = alloc_percpu(struct trace_array_cpu);
        if (!buf->data) {
                ring_buffer_free(buf->buffer);
+               buf->buffer = NULL;
                return -ENOMEM;
        }
 
@@ -7611,7 +7604,9 @@ static int allocate_trace_buffers(struct trace_array *tr, int size)
                                    allocate_snapshot ? size : 1);
        if (WARN_ON(ret)) {
                ring_buffer_free(tr->trace_buffer.buffer);
+               tr->trace_buffer.buffer = NULL;
                free_percpu(tr->trace_buffer.data);
+               tr->trace_buffer.data = NULL;
                return -ENOMEM;
        }
        tr->allocated_snapshot = allocate_snapshot;
index c3e84edc47c965d40199b652ba78876cdaa9c70c..2615074d3de5c63e63da31995adc4a1f07f7e9fd 100644 (file)
@@ -346,7 +346,8 @@ static int kobject_uevent_net_broadcast(struct kobject *kobj,
 static void zap_modalias_env(struct kobj_uevent_env *env)
 {
        static const char modalias_prefix[] = "MODALIAS=";
-       int i;
+       size_t len;
+       int i, j;
 
        for (i = 0; i < env->envp_idx;) {
                if (strncmp(env->envp[i], modalias_prefix,
@@ -355,11 +356,18 @@ static void zap_modalias_env(struct kobj_uevent_env *env)
                        continue;
                }
 
-               if (i != env->envp_idx - 1)
-                       memmove(&env->envp[i], &env->envp[i + 1],
-                               sizeof(env->envp[i]) * env->envp_idx - 1);
+               len = strlen(env->envp[i]) + 1;
+
+               if (i != env->envp_idx - 1) {
+                       memmove(env->envp[i], env->envp[i + 1],
+                               env->buflen - len);
+
+                       for (j = i; j < env->envp_idx - 1; j++)
+                               env->envp[j] = env->envp[j + 1] - len;
+               }
 
                env->envp_idx--;
+               env->buflen -= len;
        }
 }
 
index aa8812ae6776ee31712fe88c58da4048ff9c31e4..9e97480892709957e127e9941710ce45f11ff724 100644 (file)
@@ -435,6 +435,41 @@ loop:
        return 0;
 }
 
+static int bpf_fill_ld_abs_vlan_push_pop2(struct bpf_test *self)
+{
+       struct bpf_insn *insn;
+
+       insn = kmalloc_array(16, sizeof(*insn), GFP_KERNEL);
+       if (!insn)
+               return -ENOMEM;
+
+       /* Due to func address being non-const, we need to
+        * assemble this here.
+        */
+       insn[0] = BPF_MOV64_REG(R6, R1);
+       insn[1] = BPF_LD_ABS(BPF_B, 0);
+       insn[2] = BPF_LD_ABS(BPF_H, 0);
+       insn[3] = BPF_LD_ABS(BPF_W, 0);
+       insn[4] = BPF_MOV64_REG(R7, R6);
+       insn[5] = BPF_MOV64_IMM(R6, 0);
+       insn[6] = BPF_MOV64_REG(R1, R7);
+       insn[7] = BPF_MOV64_IMM(R2, 1);
+       insn[8] = BPF_MOV64_IMM(R3, 2);
+       insn[9] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                              bpf_skb_vlan_push_proto.func - __bpf_call_base);
+       insn[10] = BPF_MOV64_REG(R6, R7);
+       insn[11] = BPF_LD_ABS(BPF_B, 0);
+       insn[12] = BPF_LD_ABS(BPF_H, 0);
+       insn[13] = BPF_LD_ABS(BPF_W, 0);
+       insn[14] = BPF_MOV64_IMM(R0, 42);
+       insn[15] = BPF_EXIT_INSN();
+
+       self->u.ptr.insns = insn;
+       self->u.ptr.len = 16;
+
+       return 0;
+}
+
 static int bpf_fill_jump_around_ld_abs(struct bpf_test *self)
 {
        unsigned int len = BPF_MAXINSNS;
@@ -6066,6 +6101,14 @@ static struct bpf_test tests[] = {
                {},
                { {0x1, 0x42 } },
        },
+       {
+               "LD_ABS with helper changing skb data",
+               { },
+               INTERNAL,
+               { 0x34 },
+               { { ETH_HLEN, 42 } },
+               .fill_helper = bpf_fill_ld_abs_vlan_push_pop2,
+       },
 };
 
 static struct net_device dev;
index 84b2dc76f140e922e2ed0d7c4d545b4d4ddf496d..b5f940ce0143ba061a183db0df3ef0dc17f57c72 100644 (file)
@@ -882,13 +882,10 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
        if (IS_ERR(dev))
                return PTR_ERR(dev);
 
-       if (bdi_debug_register(bdi, dev_name(dev))) {
-               device_destroy(bdi_class, dev->devt);
-               return -ENOMEM;
-       }
        cgwb_bdi_register(bdi);
        bdi->dev = dev;
 
+       bdi_debug_register(bdi, dev_name(dev));
        set_bit(WB_registered, &bdi->wb.state);
 
        spin_lock_bh(&bdi_lock);
index d0ef0a8e8831920cb86fc767eb0d756bf89feb46..015f465c514b28564c9e91eec40dc041b765fe25 100644 (file)
@@ -1262,19 +1262,20 @@ static int br_dev_newlink(struct net *src_net, struct net_device *dev,
        struct net_bridge *br = netdev_priv(dev);
        int err;
 
+       err = register_netdevice(dev);
+       if (err)
+               return err;
+
        if (tb[IFLA_ADDRESS]) {
                spin_lock_bh(&br->lock);
                br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS]));
                spin_unlock_bh(&br->lock);
        }
 
-       err = register_netdevice(dev);
-       if (err)
-               return err;
-
        err = br_changelink(dev, tb, data, extack);
        if (err)
-               unregister_netdevice(dev);
+               br_dev_delete(dev, NULL);
+
        return err;
 }
 
index f47e96b623088ae354947787ef17217cd32ae64e..01ee854454a8089cdd49e2c8964a99f6a2d74730 100644 (file)
@@ -3904,7 +3904,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
                                     hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
                                     troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
                        goto do_drop;
-               if (troom > 0 && __skb_linearize(skb))
+               if (skb_linearize(skb))
                        goto do_drop;
        }
 
index b797832565d34ccefb374ee87f9cbc460779a484..60a71be75aea063b418a48ade2a1e1c7804ab35c 100644 (file)
@@ -267,7 +267,7 @@ struct net *get_net_ns_by_id(struct net *net, int id)
        spin_lock_bh(&net->nsid_lock);
        peer = idr_find(&net->netns_ids, id);
        if (peer)
-               get_net(peer);
+               peer = maybe_get_net(peer);
        spin_unlock_bh(&net->nsid_lock);
        rcu_read_unlock();
 
index a592ca025fc46bf4ff26c900d273b1ae12afa334..08f57408131523adec3cc36044952e14aa5a6b9a 100644 (file)
@@ -1177,12 +1177,12 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
        int i, new_frags;
        u32 d_off;
 
-       if (!num_frags)
-               return 0;
-
        if (skb_shared(skb) || skb_unclone(skb, gfp_mask))
                return -EINVAL;
 
+       if (!num_frags)
+               goto release;
+
        new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT;
        for (i = 0; i < new_frags; i++) {
                page = alloc_page(gfp_mask);
@@ -1238,6 +1238,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
        __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off);
        skb_shinfo(skb)->nr_frags = new_frags;
 
+release:
        skb_zcopy_clear(skb, false);
        return 0;
 }
@@ -3654,8 +3655,6 @@ normal:
 
                skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags &
                                              SKBTX_SHARED_FRAG;
-               if (skb_zerocopy_clone(nskb, head_skb, GFP_ATOMIC))
-                       goto err;
 
                while (pos < offset + len) {
                        if (i >= nfrags) {
@@ -3681,6 +3680,8 @@ normal:
 
                        if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC)))
                                goto err;
+                       if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
+                               goto err;
 
                        *nskb_frag = *frag;
                        __skb_frag_ref(nskb_frag);
index f52d27a422c37298b2ad0c1dbba0e5307f1a46b6..08259d078b1ca821c581aeb34251c79a9aba8c8d 100644 (file)
@@ -1298,14 +1298,19 @@ err_table_hash_alloc:
 
 static void ip_fib_net_exit(struct net *net)
 {
-       unsigned int i;
+       int i;
 
        rtnl_lock();
 #ifdef CONFIG_IP_MULTIPLE_TABLES
        RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
        RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
 #endif
-       for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
+       /* Destroy the tables in reverse order to guarantee that the
+        * local table, ID 255, is destroyed before the main table, ID
+        * 254. This is necessary as the local table may contain
+        * references to data contained in the main table.
+        */
+       for (i = FIB_TABLE_HASHSZ - 1; i >= 0; i--) {
                struct hlist_head *head = &net->ipv4.fib_table_hash[i];
                struct hlist_node *tmp;
                struct fib_table *tb;
index f04d944f8abe0bfbb840837bb35d28fe6d8d25d0..c586597da20dbb0e46eb0f693fd65bccfc8f3633 100644 (file)
@@ -698,7 +698,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
 
        nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
                int type = nla_type(nla);
-               u32 val;
+               u32 fi_val, val;
 
                if (!type)
                        continue;
@@ -715,7 +715,11 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
                        val = nla_get_u32(nla);
                }
 
-               if (fi->fib_metrics->metrics[type - 1] != val)
+               fi_val = fi->fib_metrics->metrics[type - 1];
+               if (type == RTAX_FEATURES)
+                       fi_val &= ~DST_FEATURE_ECN_CA;
+
+               if (fi_val != val)
                        return false;
        }
 
index 9c1735632c8c43cc0607d54403571362895d91e2..45ffd3d045d240cad8e4d0ed8dd0dd7da997bf9e 100644 (file)
@@ -1310,6 +1310,7 @@ static const struct net_device_ops erspan_netdev_ops = {
 static void ipgre_tap_setup(struct net_device *dev)
 {
        ether_setup(dev);
+       dev->max_mtu = 0;
        dev->netdev_ops = &gre_tap_netdev_ops;
        dev->priv_flags &= ~IFF_TX_SKB_SHARING;
        dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
index e50b7fea57ee35c117002463f473ccd41face358..bcfc00e88756dabb1f491d3d41137ccbc7ab1cbc 100644 (file)
@@ -23,6 +23,12 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb)
        return xfrm4_extract_header(skb);
 }
 
+static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
+                                  struct sk_buff *skb)
+{
+       return dst_input(skb);
+}
+
 static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk,
                                         struct sk_buff *skb)
 {
@@ -33,7 +39,11 @@ static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk,
                                         iph->tos, skb->dev))
                        goto drop;
        }
-       return dst_input(skb);
+
+       if (xfrm_trans_queue(skb, xfrm4_rcv_encap_finish2))
+               goto drop;
+
+       return 0;
 drop:
        kfree_skb(skb);
        return NET_RX_DROP;
index c26f71234b9c01a82ec9d40423ee28957468ed46..c9441ca4539936486291147a47a84ef1b2ecf095 100644 (file)
@@ -210,7 +210,6 @@ lookup_protocol:
        np->mcast_hops  = IPV6_DEFAULT_MCASTHOPS;
        np->mc_loop     = 1;
        np->pmtudisc    = IPV6_PMTUDISC_WANT;
-       np->autoflowlabel = ip6_default_np_autolabel(net);
        np->repflow     = net->ipv6.sysctl.flowlabel_reflect;
        sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
 
index 4cfd8e0696fe77f6d7af7ca3579a2418aef972f6..772695960890893f9ab7862cf64728b783f5bb96 100644 (file)
@@ -1014,6 +1014,36 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
        eth_random_addr(dev->perm_addr);
 }
 
+#define GRE6_FEATURES (NETIF_F_SG |            \
+                      NETIF_F_FRAGLIST |       \
+                      NETIF_F_HIGHDMA |        \
+                      NETIF_F_HW_CSUM)
+
+static void ip6gre_tnl_init_features(struct net_device *dev)
+{
+       struct ip6_tnl *nt = netdev_priv(dev);
+
+       dev->features           |= GRE6_FEATURES;
+       dev->hw_features        |= GRE6_FEATURES;
+
+       if (!(nt->parms.o_flags & TUNNEL_SEQ)) {
+               /* TCP offload with GRE SEQ is not supported, nor
+                * can we support 2 levels of outer headers requiring
+                * an update.
+                */
+               if (!(nt->parms.o_flags & TUNNEL_CSUM) ||
+                   nt->encap.type == TUNNEL_ENCAP_NONE) {
+                       dev->features    |= NETIF_F_GSO_SOFTWARE;
+                       dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+               }
+
+               /* Can use a lockless transmit, unless we generate
+                * output sequences
+                */
+               dev->features |= NETIF_F_LLTX;
+       }
+}
+
 static int ip6gre_tunnel_init_common(struct net_device *dev)
 {
        struct ip6_tnl *tunnel;
@@ -1048,6 +1078,8 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
        if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
                dev->mtu -= 8;
 
+       ip6gre_tnl_init_features(dev);
+
        return 0;
 }
 
@@ -1298,16 +1330,12 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {
        .ndo_get_iflink = ip6_tnl_get_iflink,
 };
 
-#define GRE6_FEATURES (NETIF_F_SG |            \
-                      NETIF_F_FRAGLIST |       \
-                      NETIF_F_HIGHDMA |                \
-                      NETIF_F_HW_CSUM)
-
 static void ip6gre_tap_setup(struct net_device *dev)
 {
 
        ether_setup(dev);
 
+       dev->max_mtu = 0;
        dev->netdev_ops = &ip6gre_tap_netdev_ops;
        dev->needs_free_netdev = true;
        dev->priv_destructor = ip6gre_dev_free;
@@ -1382,26 +1410,6 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
        nt->net = dev_net(dev);
        ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
 
-       dev->features           |= GRE6_FEATURES;
-       dev->hw_features        |= GRE6_FEATURES;
-
-       if (!(nt->parms.o_flags & TUNNEL_SEQ)) {
-               /* TCP offload with GRE SEQ is not supported, nor
-                * can we support 2 levels of outer headers requiring
-                * an update.
-                */
-               if (!(nt->parms.o_flags & TUNNEL_CSUM) ||
-                   (nt->encap.type == TUNNEL_ENCAP_NONE)) {
-                       dev->features    |= NETIF_F_GSO_SOFTWARE;
-                       dev->hw_features |= NETIF_F_GSO_SOFTWARE;
-               }
-
-               /* Can use a lockless transmit, unless we generate
-                * output sequences
-                */
-               dev->features |= NETIF_F_LLTX;
-       }
-
        err = register_netdevice(dev);
        if (err)
                goto out;
index 5110a418cc4d0c1040506394460cb482698d8c15..f7dd51c4231415fd1321fd431194d896ea2d1689 100644 (file)
@@ -166,6 +166,14 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
                            !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 }
 
+static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
+{
+       if (!np->autoflowlabel_set)
+               return ip6_default_np_autolabel(net);
+       else
+               return np->autoflowlabel;
+}
+
 /*
  * xmit an sk_buff (used by TCP, SCTP and DCCP)
  * Note : socket lock is not held for SYNACK packets, but might be modified
@@ -230,7 +238,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
                hlimit = ip6_dst_hoplimit(dst);
 
        ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
-                                                    np->autoflowlabel, fl6));
+                               ip6_autoflowlabel(net, np), fl6));
 
        hdr->payload_len = htons(seg_len);
        hdr->nexthdr = proto;
@@ -1626,7 +1634,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
 
        ip6_flow_hdr(hdr, v6_cork->tclass,
                     ip6_make_flowlabel(net, skb, fl6->flowlabel,
-                                       np->autoflowlabel, fl6));
+                                       ip6_autoflowlabel(net, np), fl6));
        hdr->hop_limit = v6_cork->hop_limit;
        hdr->nexthdr = proto;
        hdr->saddr = fl6->saddr;
index db84f523656ddf876e1971c416ee03a6a1794d9d..931c38f6ff4a42fb17cf129cf6035706a24176dc 100644 (file)
@@ -1123,8 +1123,13 @@ route_lookup:
                max_headroom += 8;
                mtu -= 8;
        }
-       if (mtu < IPV6_MIN_MTU)
-               mtu = IPV6_MIN_MTU;
+       if (skb->protocol == htons(ETH_P_IPV6)) {
+               if (mtu < IPV6_MIN_MTU)
+                       mtu = IPV6_MIN_MTU;
+       } else if (mtu < 576) {
+               mtu = 576;
+       }
+
        if (skb_dst(skb) && !t->parms.collect_md)
                skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
        if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
index b9404feabd7857fe0873fbc4f346d281f3600807..2d4680e0376f41deee6c999eadaf9409353e0b4a 100644 (file)
@@ -886,6 +886,7 @@ pref_skip_coa:
                break;
        case IPV6_AUTOFLOWLABEL:
                np->autoflowlabel = valbool;
+               np->autoflowlabel_set = 1;
                retv = 0;
                break;
        case IPV6_RECVFRAGSIZE:
index 7a8d1500d374b4089e623ed2b20d68110cff498e..0458b761f3c56ce765841e0a3a7e5e78f90b95eb 100644 (file)
@@ -2336,6 +2336,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
        }
 
        rt->dst.flags |= DST_HOST;
+       rt->dst.input = ip6_input;
        rt->dst.output  = ip6_output;
        rt->rt6i_gateway  = fl6->daddr;
        rt->rt6i_dst.addr = fl6->daddr;
@@ -4297,19 +4298,13 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
                if (!ipv6_addr_any(&fl6.saddr))
                        flags |= RT6_LOOKUP_F_HAS_SADDR;
 
-               if (!fibmatch)
-                       dst = ip6_route_input_lookup(net, dev, &fl6, flags);
-               else
-                       dst = ip6_route_lookup(net, &fl6, 0);
+               dst = ip6_route_input_lookup(net, dev, &fl6, flags);
 
                rcu_read_unlock();
        } else {
                fl6.flowi6_oif = oif;
 
-               if (!fibmatch)
-                       dst = ip6_route_output(net, NULL, &fl6);
-               else
-                       dst = ip6_route_lookup(net, &fl6, 0);
+               dst = ip6_route_output(net, NULL, &fl6);
        }
 
 
@@ -4326,6 +4321,15 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
                goto errout;
        }
 
+       if (fibmatch && rt->dst.from) {
+               struct rt6_info *ort = container_of(rt->dst.from,
+                                                   struct rt6_info, dst);
+
+               dst_hold(&ort->dst);
+               ip6_rt_put(rt);
+               rt = ort;
+       }
+
        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
        if (!skb) {
                ip6_rt_put(rt);
index fe04e23af9862557fde2a9c214faf3a10b7e5eda..841f4a07438e83502eadd6ec6c16a16d1de6aa55 100644 (file)
@@ -32,6 +32,14 @@ int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
 }
 EXPORT_SYMBOL(xfrm6_rcv_spi);
 
+static int xfrm6_transport_finish2(struct net *net, struct sock *sk,
+                                  struct sk_buff *skb)
+{
+       if (xfrm_trans_queue(skb, ip6_rcv_finish))
+               __kfree_skb(skb);
+       return -1;
+}
+
 int xfrm6_transport_finish(struct sk_buff *skb, int async)
 {
        struct xfrm_offload *xo = xfrm_offload(skb);
@@ -56,7 +64,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
 
        NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
                dev_net(skb->dev), NULL, skb, skb->dev, NULL,
-               ip6_rcv_finish);
+               xfrm6_transport_finish2);
        return -1;
 }
 
index dbe2379329c5517fb164b6024d40fabebe7855c8..f039064ce922f3aac8419dcda65ad875f89e966b 100644 (file)
@@ -579,6 +579,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
                        return -EINVAL;
 
                skb_reset_network_header(skb);
+               key->eth.type = skb->protocol;
        } else {
                eth = eth_hdr(skb);
                ether_addr_copy(key->eth.src, eth->h_source);
@@ -592,15 +593,23 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
                if (unlikely(parse_vlan(skb, key)))
                        return -ENOMEM;
 
-               skb->protocol = parse_ethertype(skb);
-               if (unlikely(skb->protocol == htons(0)))
+               key->eth.type = parse_ethertype(skb);
+               if (unlikely(key->eth.type == htons(0)))
                        return -ENOMEM;
 
+               /* Multiple tagged packets need to retain TPID to satisfy
+                * skb_vlan_pop(), which will later shift the ethertype into
+                * skb->protocol.
+                */
+               if (key->eth.cvlan.tci & htons(VLAN_TAG_PRESENT))
+                       skb->protocol = key->eth.cvlan.tpid;
+               else
+                       skb->protocol = key->eth.type;
+
                skb_reset_network_header(skb);
                __skb_push(skb, skb->data - skb_mac_header(skb));
        }
        skb_reset_mac_len(skb);
-       key->eth.type = skb->protocol;
 
        /* Network layer. */
        if (key->eth.type == htons(ETH_P_IP)) {
index b52cdc8ae428819a5853509a06852ec2ebc43a5a..f72466c63f0c5657a2f44e11ae432dd1457991cf 100644 (file)
@@ -1009,6 +1009,9 @@ static int rds_rdma_bytes(struct msghdr *msg, size_t *rdma_bytes)
                        continue;
 
                if (cmsg->cmsg_type == RDS_CMSG_RDMA_ARGS) {
+                       if (cmsg->cmsg_len <
+                           CMSG_LEN(sizeof(struct rds_rdma_args)))
+                               return -EINVAL;
                        args = CMSG_DATA(cmsg);
                        *rdma_bytes += args->remote_vec.bytes;
                }
index b91ea03e3afa717225c00a3d2a03e9d722229fbc..b9d63d2246e667329c30606165dd485b9dc777aa 100644 (file)
@@ -379,6 +379,8 @@ void tcf_block_put(struct tcf_block *block)
 {
        struct tcf_block_ext_info ei = {0, };
 
+       if (!block)
+               return;
        tcf_block_put_ext(block, block->q, &ei);
 }
 
index 6fe798c2df1a5303cd61cd3ad53cd2f9385d16de..8d78e7f4ecc33082517aaab5767a30c119f49dc0 100644 (file)
@@ -42,7 +42,6 @@ struct cls_bpf_prog {
        struct list_head link;
        struct tcf_result res;
        bool exts_integrated;
-       bool offloaded;
        u32 gen_flags;
        struct tcf_exts exts;
        u32 handle;
@@ -148,33 +147,37 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
 }
 
 static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
-                              enum tc_clsbpf_command cmd)
+                              struct cls_bpf_prog *oldprog)
 {
-       bool addorrep = cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE;
        struct tcf_block *block = tp->chain->block;
-       bool skip_sw = tc_skip_sw(prog->gen_flags);
        struct tc_cls_bpf_offload cls_bpf = {};
+       struct cls_bpf_prog *obj;
+       bool skip_sw;
        int err;
 
+       skip_sw = prog && tc_skip_sw(prog->gen_flags);
+       obj = prog ?: oldprog;
+
        tc_cls_common_offload_init(&cls_bpf.common, tp);
-       cls_bpf.command = cmd;
-       cls_bpf.exts = &prog->exts;
-       cls_bpf.prog = prog->filter;
-       cls_bpf.name = prog->bpf_name;
-       cls_bpf.exts_integrated = prog->exts_integrated;
-       cls_bpf.gen_flags = prog->gen_flags;
+       cls_bpf.command = TC_CLSBPF_OFFLOAD;
+       cls_bpf.exts = &obj->exts;
+       cls_bpf.prog = prog ? prog->filter : NULL;
+       cls_bpf.oldprog = oldprog ? oldprog->filter : NULL;
+       cls_bpf.name = obj->bpf_name;
+       cls_bpf.exts_integrated = obj->exts_integrated;
+       cls_bpf.gen_flags = obj->gen_flags;
 
        err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
-       if (addorrep) {
+       if (prog) {
                if (err < 0) {
-                       cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
+                       cls_bpf_offload_cmd(tp, oldprog, prog);
                        return err;
                } else if (err > 0) {
                        prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
                }
        }
 
-       if (addorrep && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
+       if (prog && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
                return -EINVAL;
 
        return 0;
@@ -183,38 +186,17 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
                           struct cls_bpf_prog *oldprog)
 {
-       struct cls_bpf_prog *obj = prog;
-       enum tc_clsbpf_command cmd;
-       bool skip_sw;
-       int ret;
-
-       skip_sw = tc_skip_sw(prog->gen_flags) ||
-               (oldprog && tc_skip_sw(oldprog->gen_flags));
-
-       if (oldprog && oldprog->offloaded) {
-               if (!tc_skip_hw(prog->gen_flags)) {
-                       cmd = TC_CLSBPF_REPLACE;
-               } else if (!tc_skip_sw(prog->gen_flags)) {
-                       obj = oldprog;
-                       cmd = TC_CLSBPF_DESTROY;
-               } else {
-                       return -EINVAL;
-               }
-       } else {
-               if (tc_skip_hw(prog->gen_flags))
-                       return skip_sw ? -EINVAL : 0;
-               cmd = TC_CLSBPF_ADD;
-       }
-
-       ret = cls_bpf_offload_cmd(tp, obj, cmd);
-       if (ret)
-               return ret;
+       if (prog && oldprog && prog->gen_flags != oldprog->gen_flags)
+               return -EINVAL;
 
-       obj->offloaded = true;
-       if (oldprog)
-               oldprog->offloaded = false;
+       if (prog && tc_skip_hw(prog->gen_flags))
+               prog = NULL;
+       if (oldprog && tc_skip_hw(oldprog->gen_flags))
+               oldprog = NULL;
+       if (!prog && !oldprog)
+               return 0;
 
-       return 0;
+       return cls_bpf_offload_cmd(tp, prog, oldprog);
 }
 
 static void cls_bpf_stop_offload(struct tcf_proto *tp,
@@ -222,25 +204,26 @@ static void cls_bpf_stop_offload(struct tcf_proto *tp,
 {
        int err;
 
-       if (!prog->offloaded)
-               return;
-
-       err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
-       if (err) {
+       err = cls_bpf_offload_cmd(tp, NULL, prog);
+       if (err)
                pr_err("Stopping hardware offload failed: %d\n", err);
-               return;
-       }
-
-       prog->offloaded = false;
 }
 
 static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
                                         struct cls_bpf_prog *prog)
 {
-       if (!prog->offloaded)
-               return;
+       struct tcf_block *block = tp->chain->block;
+       struct tc_cls_bpf_offload cls_bpf = {};
+
+       tc_cls_common_offload_init(&cls_bpf.common, tp);
+       cls_bpf.command = TC_CLSBPF_STATS;
+       cls_bpf.exts = &prog->exts;
+       cls_bpf.prog = prog->filter;
+       cls_bpf.name = prog->bpf_name;
+       cls_bpf.exts_integrated = prog->exts_integrated;
+       cls_bpf.gen_flags = prog->gen_flags;
 
-       cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS);
+       tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, false);
 }
 
 static int cls_bpf_init(struct tcf_proto *tp)
index cd1b200acae7415d5e26c8aa3dbfed602f5796b2..661c7144b53af048b3a65484777910e2d60f25aa 100644 (file)
@@ -1040,6 +1040,8 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
 
        if (!tp_head) {
                RCU_INIT_POINTER(*miniqp->p_miniq, NULL);
+               /* Wait for flying RCU callback before it is freed. */
+               rcu_barrier_bh();
                return;
        }
 
@@ -1055,7 +1057,7 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
        rcu_assign_pointer(*miniqp->p_miniq, miniq);
 
        if (miniq_old)
-               /* This is counterpart of the rcu barrier above. We need to
+               /* This is counterpart of the rcu barriers above. We need to
                 * block potential new user of miniq_old until all readers
                 * are not seeing it.
                 */
index 3f619fdcbf0a0b4a6f35ece8021c011f874a2d79..291c97b07058218635fcfcd06214aa79d74ec80d 100644 (file)
@@ -78,6 +78,9 @@ const char *sctp_cname(const union sctp_subtype cid)
        case SCTP_CID_AUTH:
                return "AUTH";
 
+       case SCTP_CID_RECONF:
+               return "RECONF";
+
        default:
                break;
        }
index 3253f724a995256084dcb1f6610de3384b475e79..b4fb6e4886d264f302fc67b8d822cd8bdb5f3f41 100644 (file)
@@ -4498,7 +4498,7 @@ static int sctp_init_sock(struct sock *sk)
        SCTP_DBG_OBJCNT_INC(sock);
 
        local_bh_disable();
-       percpu_counter_inc(&sctp_sockets_allocated);
+       sk_sockets_allocated_inc(sk);
        sock_prot_inuse_add(net, sk->sk_prot, 1);
 
        /* Nothing can fail after this block, otherwise
@@ -4542,7 +4542,7 @@ static void sctp_destroy_sock(struct sock *sk)
        }
        sctp_endpoint_free(sp->ep);
        local_bh_disable();
-       percpu_counter_dec(&sctp_sockets_allocated);
+       sk_sockets_allocated_dec(sk);
        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
        local_bh_enable();
 }
index a71be33f3afeb0aaaef174ee082c4c547aab1e2d..e36ec5dd64c6ff969fc30aae893d1d5ca8c221bf 100644 (file)
@@ -1084,29 +1084,21 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
 void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
                      gfp_t gfp)
 {
-       struct sctp_association *asoc;
-       __u16 needed, freed;
-
-       asoc = ulpq->asoc;
+       struct sctp_association *asoc = ulpq->asoc;
+       __u32 freed = 0;
+       __u16 needed;
 
-       if (chunk) {
-               needed = ntohs(chunk->chunk_hdr->length);
-               needed -= sizeof(struct sctp_data_chunk);
-       } else
-               needed = SCTP_DEFAULT_MAXWINDOW;
-
-       freed = 0;
+       needed = ntohs(chunk->chunk_hdr->length) -
+                sizeof(struct sctp_data_chunk);
 
        if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) {
                freed = sctp_ulpq_renege_order(ulpq, needed);
-               if (freed < needed) {
+               if (freed < needed)
                        freed += sctp_ulpq_renege_frags(ulpq, needed - freed);
-               }
        }
        /* If able to free enough room, accept this chunk. */
-       if (chunk && (freed >= needed)) {
-               int retval;
-               retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
+       if (freed >= needed) {
+               int retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
                /*
                 * Enter partial delivery if chunk has not been
                 * delivered; otherwise, drain the reassembly queue.
index c5fda15ba3193f811151043ac3675a2ebfb15c38..1fdab5c4eda8c2a218448abb9bc461cf6474615c 100644 (file)
@@ -401,7 +401,7 @@ void strp_data_ready(struct strparser *strp)
         * allows a thread in BH context to safely check if the process
         * lock is held. In this case, if the lock is held, queue work.
         */
-       if (sock_owned_by_user(strp->sk)) {
+       if (sock_owned_by_user_nocheck(strp->sk)) {
                queue_work(strp_wq, &strp->work);
                return;
        }
index 47ec121574ce4ef95850f688d85b50eff766a710..c8001471da6c3c53be6c63dde1311302b093f415 100644 (file)
@@ -324,6 +324,7 @@ restart:
        if (res) {
                pr_warn("Bearer <%s> rejected, enable failure (%d)\n",
                        name, -res);
+               kfree(b);
                return -EINVAL;
        }
 
@@ -347,8 +348,10 @@ restart:
        if (skb)
                tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
 
-       if (tipc_mon_create(net, bearer_id))
+       if (tipc_mon_create(net, bearer_id)) {
+               bearer_disable(net, b);
                return -ENOMEM;
+       }
 
        pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
                name,
index 95fec2c057d6ebdb223e19ef83bf9c383cb2156e..8e12ab55346b0cf45b9244c470b36c5f81db30d2 100644 (file)
@@ -351,8 +351,7 @@ void tipc_group_update_member(struct tipc_member *m, int len)
        if (m->window >= ADV_IDLE)
                return;
 
-       if (!list_empty(&m->congested))
-               return;
+       list_del_init(&m->congested);
 
        /* Sort member into congested members' list */
        list_for_each_entry_safe(_m, tmp, &grp->congested, congested) {
@@ -369,18 +368,20 @@ void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack)
        u16 prev = grp->bc_snd_nxt - 1;
        struct tipc_member *m;
        struct rb_node *n;
+       u16 ackers = 0;
 
        for (n = rb_first(&grp->members); n; n = rb_next(n)) {
                m = container_of(n, struct tipc_member, tree_node);
                if (tipc_group_is_enabled(m)) {
                        tipc_group_update_member(m, len);
                        m->bc_acked = prev;
+                       ackers++;
                }
        }
 
        /* Mark number of acknowledges to expect, if any */
        if (ack)
-               grp->bc_ackers = grp->member_cnt;
+               grp->bc_ackers = ackers;
        grp->bc_snd_nxt++;
 }
 
@@ -648,6 +649,7 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
        } else if (mtyp == GRP_REMIT_MSG) {
                msg_set_grp_remitted(hdr, m->window);
        }
+       msg_set_dest_droppable(hdr, true);
        __skb_queue_tail(xmitq, skb);
 }
 
@@ -689,15 +691,16 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
                        msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
                        __skb_queue_tail(inputq, m->event_msg);
                }
-               if (m->window < ADV_IDLE)
-                       tipc_group_update_member(m, 0);
-               else
-                       list_del_init(&m->congested);
+               list_del_init(&m->congested);
+               tipc_group_update_member(m, 0);
                return;
        case GRP_LEAVE_MSG:
                if (!m)
                        return;
                m->bc_syncpt = msg_grp_bc_syncpt(hdr);
+               list_del_init(&m->list);
+               list_del_init(&m->congested);
+               *usr_wakeup = true;
 
                /* Wait until WITHDRAW event is received */
                if (m->state != MBR_LEAVING) {
@@ -709,8 +712,6 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
                ehdr = buf_msg(m->event_msg);
                msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
                __skb_queue_tail(inputq, m->event_msg);
-               *usr_wakeup = true;
-               list_del_init(&m->congested);
                return;
        case GRP_ADV_MSG:
                if (!m)
@@ -849,19 +850,29 @@ void tipc_group_member_evt(struct tipc_group *grp,
                *usr_wakeup = true;
                m->usr_pending = false;
                node_up = tipc_node_is_up(net, node);
-
-               /* Hold back event if more messages might be expected */
-               if (m->state != MBR_LEAVING && node_up) {
-                       m->event_msg = skb;
-                       tipc_group_decr_active(grp, m);
-                       m->state = MBR_LEAVING;
-               } else {
-                       if (node_up)
+               m->event_msg = NULL;
+
+               if (node_up) {
+                       /* Hold back event if a LEAVE msg should be expected */
+                       if (m->state != MBR_LEAVING) {
+                               m->event_msg = skb;
+                               tipc_group_decr_active(grp, m);
+                               m->state = MBR_LEAVING;
+                       } else {
                                msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
-                       else
+                               __skb_queue_tail(inputq, skb);
+                       }
+               } else {
+                       if (m->state != MBR_LEAVING) {
+                               tipc_group_decr_active(grp, m);
+                               m->state = MBR_LEAVING;
                                msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt);
+                       } else {
+                               msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
+                       }
                        __skb_queue_tail(inputq, skb);
                }
+               list_del_init(&m->list);
                list_del_init(&m->congested);
        }
        *sk_rcvbuf = tipc_group_rcvbuf_limit(grp);
index 8e884ed06d4b13d9751c27a51959b216b7f48b18..32dc33a94bc714f762a066389a4907e558244cd7 100644 (file)
@@ -642,9 +642,13 @@ void tipc_mon_delete(struct net *net, int bearer_id)
 {
        struct tipc_net *tn = tipc_net(net);
        struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
-       struct tipc_peer *self = get_self(net, bearer_id);
+       struct tipc_peer *self;
        struct tipc_peer *peer, *tmp;
 
+       if (!mon)
+               return;
+
+       self = get_self(net, bearer_id);
        write_lock_bh(&mon->lock);
        tn->monitors[bearer_id] = NULL;
        list_for_each_entry_safe(peer, tmp, &self->list, list) {
index 41127d0b925ea4d515e7c7bbe6739dee99a442f2..3b408448037769d3dba2da38fc00cdaf360e4950 100644 (file)
@@ -727,11 +727,11 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,
 
        switch (sk->sk_state) {
        case TIPC_ESTABLISHED:
+       case TIPC_CONNECTING:
                if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
                        revents |= POLLOUT;
                /* fall thru' */
        case TIPC_LISTEN:
-       case TIPC_CONNECTING:
                if (!skb_queue_empty(&sk->sk_receive_queue))
                        revents |= POLLIN | POLLRDNORM;
                break;
index d7d6cb00c47bbab3c963f4b83839aadcc94606a1..1d84f91bbfb0c8c9087e309821eb687325733358 100644 (file)
@@ -23,27 +23,14 @@ ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),)
 cfg80211-y += extra-certs.o
 endif
 
-$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509)
+$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex)
        @$(kecho) "  GEN     $@"
-       @(set -e; \
-         allf=""; \
-         for f in $^ ; do \
-             # similar to hexdump -v -e '1/1 "0x%.2x," "\n"' \
-             thisf=$$(od -An -v -tx1 < $$f | \
-                          sed -e 's/ /\n/g' | \
-                          sed -e 's/^[0-9a-f]\+$$/\0/;t;d' | \
-                          sed -e 's/^/0x/;s/$$/,/'); \
-             # file should not be empty - maybe command substitution failed? \
-             test ! -z "$$thisf";\
-             allf=$$allf$$thisf;\
-         done; \
-         ( \
-             echo '#include "reg.h"'; \
-             echo 'const u8 shipped_regdb_certs[] = {'; \
-             echo "$$allf"; \
-             echo '};'; \
-             echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
-         ) >> $@)
+       @(echo '#include "reg.h"'; \
+         echo 'const u8 shipped_regdb_certs[] = {'; \
+         cat $^ ; \
+         echo '};'; \
+         echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
+        ) > $@
 
 $(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \
                      $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509)
@@ -66,4 +53,6 @@ $(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \
              echo "$$allf"; \
              echo '};'; \
              echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);'; \
-         ) >> $@)
+         ) > $@)
+
+clean-files += shipped-certs.c extra-certs.c
diff --git a/net/wireless/certs/sforshee.hex b/net/wireless/certs/sforshee.hex
new file mode 100644 (file)
index 0000000..14ea666
--- /dev/null
@@ -0,0 +1,86 @@
+/* Seth Forshee's regdb certificate */
+0x30, 0x82, 0x02, 0xa4, 0x30, 0x82, 0x01, 0x8c,
+0x02, 0x09, 0x00, 0xb2, 0x8d, 0xdf, 0x47, 0xae,
+0xf9, 0xce, 0xa7, 0x30, 0x0d, 0x06, 0x09, 0x2a,
+0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x0b,
+0x05, 0x00, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f,
+0x06, 0x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73,
+0x66, 0x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30,
+0x20, 0x17, 0x0d, 0x31, 0x37, 0x31, 0x30, 0x30,
+0x36, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35, 0x5a,
+0x18, 0x0f, 0x32, 0x31, 0x31, 0x37, 0x30, 0x39,
+0x31, 0x32, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35,
+0x5a, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f, 0x06,
+0x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73, 0x66,
+0x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30, 0x82,
+0x01, 0x22, 0x30, 0x0d, 0x06, 0x09, 0x2a, 0x86,
+0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x01, 0x05,
+0x00, 0x03, 0x82, 0x01, 0x0f, 0x00, 0x30, 0x82,
+0x01, 0x0a, 0x02, 0x82, 0x01, 0x01, 0x00, 0xb5,
+0x40, 0xe3, 0x9c, 0x28, 0x84, 0x39, 0x03, 0xf2,
+0x39, 0xd7, 0x66, 0x2c, 0x41, 0x38, 0x15, 0xac,
+0x7e, 0xa5, 0x83, 0x71, 0x25, 0x7e, 0x90, 0x7c,
+0x68, 0xdd, 0x6f, 0x3f, 0xd9, 0xd7, 0x59, 0x38,
+0x9f, 0x7c, 0x6a, 0x52, 0xc2, 0x03, 0x2a, 0x2d,
+0x7e, 0x66, 0xf4, 0x1e, 0xb3, 0x12, 0x70, 0x20,
+0x5b, 0xd4, 0x97, 0x32, 0x3d, 0x71, 0x8b, 0x3b,
+0x1b, 0x08, 0x17, 0x14, 0x6b, 0x61, 0xc4, 0x57,
+0x8b, 0x96, 0x16, 0x1c, 0xfd, 0x24, 0xd5, 0x0b,
+0x09, 0xf9, 0x68, 0x11, 0x84, 0xfb, 0xca, 0x51,
+0x0c, 0xd1, 0x45, 0x19, 0xda, 0x10, 0x44, 0x8a,
+0xd9, 0xfe, 0x76, 0xa9, 0xfd, 0x60, 0x2d, 0x18,
+0x0b, 0x28, 0x95, 0xb2, 0x2d, 0xea, 0x88, 0x98,
+0xb8, 0xd1, 0x56, 0x21, 0xf0, 0x53, 0x1f, 0xf1,
+0x02, 0x6f, 0xe9, 0x46, 0x9b, 0x93, 0x5f, 0x28,
+0x90, 0x0f, 0xac, 0x36, 0xfa, 0x68, 0x23, 0x71,
+0x57, 0x56, 0xf6, 0xcc, 0xd3, 0xdf, 0x7d, 0x2a,
+0xd9, 0x1b, 0x73, 0x45, 0xeb, 0xba, 0x27, 0x85,
+0xef, 0x7a, 0x7f, 0xa5, 0xcb, 0x80, 0xc7, 0x30,
+0x36, 0xd2, 0x53, 0xee, 0xec, 0xac, 0x1e, 0xe7,
+0x31, 0xf1, 0x36, 0xa2, 0x9c, 0x63, 0xc6, 0x65,
+0x5b, 0x7f, 0x25, 0x75, 0x68, 0xa1, 0xea, 0xd3,
+0x7e, 0x00, 0x5c, 0x9a, 0x5e, 0xd8, 0x20, 0x18,
+0x32, 0x77, 0x07, 0x29, 0x12, 0x66, 0x1e, 0x36,
+0x73, 0xe7, 0x97, 0x04, 0x41, 0x37, 0xb1, 0xb1,
+0x72, 0x2b, 0xf4, 0xa1, 0x29, 0x20, 0x7c, 0x96,
+0x79, 0x0b, 0x2b, 0xd0, 0xd8, 0xde, 0xc8, 0x6c,
+0x3f, 0x93, 0xfb, 0xc5, 0xee, 0x78, 0x52, 0x11,
+0x15, 0x1b, 0x7a, 0xf6, 0xe2, 0x68, 0x99, 0xe7,
+0xfb, 0x46, 0x16, 0x84, 0xe3, 0xc7, 0xa1, 0xe6,
+0xe0, 0xd2, 0x46, 0xd5, 0xe1, 0xc4, 0x5f, 0xa0,
+0x66, 0xf4, 0xda, 0xc4, 0xff, 0x95, 0x1d, 0x02,
+0x03, 0x01, 0x00, 0x01, 0x30, 0x0d, 0x06, 0x09,
+0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01,
+0x0b, 0x05, 0x00, 0x03, 0x82, 0x01, 0x01, 0x00,
+0x87, 0x03, 0xda, 0xf2, 0x82, 0xc2, 0xdd, 0xaf,
+0x7c, 0x44, 0x2f, 0x86, 0xd3, 0x5f, 0x4c, 0x93,
+0x48, 0xb9, 0xfe, 0x07, 0x17, 0xbb, 0x21, 0xf7,
+0x25, 0x23, 0x4e, 0xaa, 0x22, 0x0c, 0x16, 0xb9,
+0x73, 0xae, 0x9d, 0x46, 0x7c, 0x75, 0xd9, 0xc3,
+0x49, 0x57, 0x47, 0xbf, 0x33, 0xb7, 0x97, 0xec,
+0xf5, 0x40, 0x75, 0xc0, 0x46, 0x22, 0xf0, 0xa0,
+0x5d, 0x9c, 0x79, 0x13, 0xa1, 0xff, 0xb8, 0xa3,
+0x2f, 0x7b, 0x8e, 0x06, 0x3f, 0xc8, 0xb6, 0xe4,
+0x6a, 0x28, 0xf2, 0x34, 0x5c, 0x23, 0x3f, 0x32,
+0xc0, 0xe6, 0xad, 0x0f, 0xac, 0xcf, 0x55, 0x74,
+0x47, 0x73, 0xd3, 0x01, 0x85, 0xb7, 0x0b, 0x22,
+0x56, 0x24, 0x7d, 0x9f, 0x09, 0xa9, 0x0e, 0x86,
+0x9e, 0x37, 0x5b, 0x9c, 0x6d, 0x02, 0xd9, 0x8c,
+0xc8, 0x50, 0x6a, 0xe2, 0x59, 0xf3, 0x16, 0x06,
+0xea, 0xb2, 0x42, 0xb5, 0x58, 0xfe, 0xba, 0xd1,
+0x81, 0x57, 0x1a, 0xef, 0xb2, 0x38, 0x88, 0x58,
+0xf6, 0xaa, 0xc4, 0x2e, 0x8b, 0x5a, 0x27, 0xe4,
+0xa5, 0xe8, 0xa4, 0xca, 0x67, 0x5c, 0xac, 0x72,
+0x67, 0xc3, 0x6f, 0x13, 0xc3, 0x2d, 0x35, 0x79,
+0xd7, 0x8a, 0xe7, 0xf5, 0xd4, 0x21, 0x30, 0x4a,
+0xd5, 0xf6, 0xa3, 0xd9, 0x79, 0x56, 0xf2, 0x0f,
+0x10, 0xf7, 0x7d, 0xd0, 0x51, 0x93, 0x2f, 0x47,
+0xf8, 0x7d, 0x4b, 0x0a, 0x84, 0x55, 0x12, 0x0a,
+0x7d, 0x4e, 0x3b, 0x1f, 0x2b, 0x2f, 0xfc, 0x28,
+0xb3, 0x69, 0x34, 0xe1, 0x80, 0x80, 0xbb, 0xe2,
+0xaf, 0xb9, 0xd6, 0x30, 0xf1, 0x1d, 0x54, 0x87,
+0x23, 0x99, 0x9f, 0x51, 0x03, 0x4c, 0x45, 0x7d,
+0x02, 0x65, 0x73, 0xab, 0xfd, 0xcf, 0x94, 0xcc,
+0x0d, 0x3a, 0x60, 0xfd, 0x3c, 0x14, 0x2f, 0x16,
+0x33, 0xa9, 0x21, 0x1f, 0xcb, 0x50, 0xb1, 0x8f,
+0x03, 0xee, 0xa0, 0x66, 0xa9, 0x16, 0x79, 0x14,
diff --git a/net/wireless/certs/sforshee.x509 b/net/wireless/certs/sforshee.x509
deleted file mode 100644 (file)
index c6f8f9d..0000000
Binary files a/net/wireless/certs/sforshee.x509 and /dev/null differ
index b1ac23ca20c86be0af71e9a1ba92cc99d8d5a967..213d0c498c97d78b17c81d1fd8b850c8768f7057 100644 (file)
@@ -2610,7 +2610,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
        case NL80211_IFTYPE_AP:
                if (wdev->ssid_len &&
                    nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid))
-                       goto nla_put_failure;
+                       goto nla_put_failure_locked;
                break;
        case NL80211_IFTYPE_STATION:
        case NL80211_IFTYPE_P2P_CLIENT:
@@ -2623,7 +2623,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
                if (!ssid_ie)
                        break;
                if (nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2))
-                       goto nla_put_failure;
+                       goto nla_put_failure_locked;
                break;
                }
        default:
@@ -2635,6 +2635,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
        genlmsg_end(msg, hdr);
        return 0;
 
+ nla_put_failure_locked:
+       wdev_unlock(wdev);
  nla_put_failure:
        genlmsg_cancel(msg, hdr);
        return -EMSGSIZE;
index 347ab31574d509ac9edd448bcbb7501a05f5ac2d..3f6f6f8c9fa5224e75c1b62f299f217da172d370 100644 (file)
@@ -8,15 +8,29 @@
  *
  */
 
+#include <linux/bottom_half.h>
+#include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/netdevice.h>
+#include <linux/percpu.h>
 #include <net/dst.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
 #include <net/ip_tunnels.h>
 #include <net/ip6_tunnel.h>
 
+struct xfrm_trans_tasklet {
+       struct tasklet_struct tasklet;
+       struct sk_buff_head queue;
+};
+
+struct xfrm_trans_cb {
+       int (*finish)(struct net *net, struct sock *sk, struct sk_buff *skb);
+};
+
+#define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0]))
+
 static struct kmem_cache *secpath_cachep __read_mostly;
 
 static DEFINE_SPINLOCK(xfrm_input_afinfo_lock);
@@ -25,6 +39,8 @@ static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1];
 static struct gro_cells gro_cells;
 static struct net_device xfrm_napi_dev;
 
+static DEFINE_PER_CPU(struct xfrm_trans_tasklet, xfrm_trans_tasklet);
+
 int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo)
 {
        int err = 0;
@@ -207,7 +223,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
        xfrm_address_t *daddr;
        struct xfrm_mode *inner_mode;
        u32 mark = skb->mark;
-       unsigned int family;
+       unsigned int family = AF_UNSPEC;
        int decaps = 0;
        int async = 0;
        bool xfrm_gro = false;
@@ -216,6 +232,16 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 
        if (encap_type < 0) {
                x = xfrm_input_state(skb);
+
+               if (unlikely(x->km.state != XFRM_STATE_VALID)) {
+                       if (x->km.state == XFRM_STATE_ACQ)
+                               XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR);
+                       else
+                               XFRM_INC_STATS(net,
+                                              LINUX_MIB_XFRMINSTATEINVALID);
+                       goto drop;
+               }
+
                family = x->outer_mode->afinfo->family;
 
                /* An encap_type of -1 indicates async resumption. */
@@ -467,9 +493,41 @@ int xfrm_input_resume(struct sk_buff *skb, int nexthdr)
 }
 EXPORT_SYMBOL(xfrm_input_resume);
 
+static void xfrm_trans_reinject(unsigned long data)
+{
+       struct xfrm_trans_tasklet *trans = (void *)data;
+       struct sk_buff_head queue;
+       struct sk_buff *skb;
+
+       __skb_queue_head_init(&queue);
+       skb_queue_splice_init(&trans->queue, &queue);
+
+       while ((skb = __skb_dequeue(&queue)))
+               XFRM_TRANS_SKB_CB(skb)->finish(dev_net(skb->dev), NULL, skb);
+}
+
+int xfrm_trans_queue(struct sk_buff *skb,
+                    int (*finish)(struct net *, struct sock *,
+                                  struct sk_buff *))
+{
+       struct xfrm_trans_tasklet *trans;
+
+       trans = this_cpu_ptr(&xfrm_trans_tasklet);
+
+       if (skb_queue_len(&trans->queue) >= netdev_max_backlog)
+               return -ENOBUFS;
+
+       XFRM_TRANS_SKB_CB(skb)->finish = finish;
+       skb_queue_tail(&trans->queue, skb);
+       tasklet_schedule(&trans->tasklet);
+       return 0;
+}
+EXPORT_SYMBOL(xfrm_trans_queue);
+
 void __init xfrm_input_init(void)
 {
        int err;
+       int i;
 
        init_dummy_netdev(&xfrm_napi_dev);
        err = gro_cells_init(&gro_cells, &xfrm_napi_dev);
@@ -480,4 +538,13 @@ void __init xfrm_input_init(void)
                                           sizeof(struct sec_path),
                                           0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
                                           NULL);
+
+       for_each_possible_cpu(i) {
+               struct xfrm_trans_tasklet *trans;
+
+               trans = &per_cpu(xfrm_trans_tasklet, i);
+               __skb_queue_head_init(&trans->queue);
+               tasklet_init(&trans->tasklet, xfrm_trans_reinject,
+                            (unsigned long)trans);
+       }
 }
index 9542975eb2f90dcb2bae894edeb9b418d04f252e..70aa5cb0c659d54eacb85f92dc95b2db17bfe1d0 100644 (file)
@@ -1168,9 +1168,15 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
  again:
        pol = rcu_dereference(sk->sk_policy[dir]);
        if (pol != NULL) {
-               bool match = xfrm_selector_match(&pol->selector, fl, family);
+               bool match;
                int err = 0;
 
+               if (pol->family != family) {
+                       pol = NULL;
+                       goto out;
+               }
+
+               match = xfrm_selector_match(&pol->selector, fl, family);
                if (match) {
                        if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
                                pol = NULL;
@@ -1833,6 +1839,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
                   sizeof(struct xfrm_policy *) * num_pols) == 0 &&
            xfrm_xdst_can_reuse(xdst, xfrm, err)) {
                dst_hold(&xdst->u.dst);
+               xfrm_pols_put(pols, num_pols);
                while (err > 0)
                        xfrm_state_put(xfrm[--err]);
                return xdst;
index 065d89606888ec1bf053577d3949746bcea6f099..500b3391f474b96fe273060ff8eae16f1e23f3c2 100644 (file)
@@ -1343,6 +1343,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
 
        if (orig->aead) {
                x->aead = xfrm_algo_aead_clone(orig->aead);
+               x->geniv = orig->geniv;
                if (!x->aead)
                        goto error;
        }
index 983b0233767bec16ba55b763ef82ce781ca5046a..bdb48e5dba0480aa4c3c6855be42cfb93f3bf335 100644 (file)
@@ -1419,11 +1419,14 @@ static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut,
 
 static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
 {
+       u16 prev_family;
        int i;
 
        if (nr > XFRM_MAX_DEPTH)
                return -EINVAL;
 
+       prev_family = family;
+
        for (i = 0; i < nr; i++) {
                /* We never validated the ut->family value, so many
                 * applications simply leave it at zero.  The check was
@@ -1435,6 +1438,12 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
                if (!ut[i].family)
                        ut[i].family = family;
 
+               if ((ut[i].mode == XFRM_MODE_TRANSPORT) &&
+                   (ut[i].family != prev_family))
+                       return -EINVAL;
+
+               prev_family = ut[i].family;
+
                switch (ut[i].family) {
                case AF_INET:
                        break;
@@ -1445,6 +1454,21 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
                default:
                        return -EINVAL;
                }
+
+               switch (ut[i].id.proto) {
+               case IPPROTO_AH:
+               case IPPROTO_ESP:
+               case IPPROTO_COMP:
+#if IS_ENABLED(CONFIG_IPV6)
+               case IPPROTO_ROUTING:
+               case IPPROTO_DSTOPTS:
+#endif
+               case IPSEC_PROTO_ANY:
+                       break;
+               default:
+                       return -EINVAL;
+               }
+
        }
 
        return 0;
@@ -2470,7 +2494,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
        [XFRMA_PROTO]           = { .type = NLA_U8 },
        [XFRMA_ADDRESS_FILTER]  = { .len = sizeof(struct xfrm_address_filter) },
        [XFRMA_OFFLOAD_DEV]     = { .len = sizeof(struct xfrm_user_offload) },
-       [XFRMA_OUTPUT_MARK]     = { .len = NLA_U32 },
+       [XFRMA_OUTPUT_MARK]     = { .type = NLA_U32 },
 };
 
 static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {
index e8e449444e658be4a9190c6ea2de14cca8fc4890..a623d13bf2884afdc6fdcdbe656811b7d31b2951 100644 (file)
@@ -54,6 +54,16 @@ config SECURITY_NETWORK
          implement socket and networking access controls.
          If you are unsure how to answer this question, answer N.
 
+config PAGE_TABLE_ISOLATION
+       bool "Remove the kernel mapping in user mode"
+       depends on X86_64 && !UML
+       help
+         This feature reduces the number of hardware side channels by
+         ensuring that the majority of kernel addresses are not mapped
+         into userspace.
+
+         See Documentation/x86/pagetable-isolation.txt for more details.
+
 config SECURITY_INFINIBAND
        bool "Infiniband Security Hooks"
        depends on SECURITY && INFINIBAND
index b3b353d7252724e10f23a9288cd24aab3ef34007..f055ca10bbc1d33c9c1cee1fd913b7c930984ac1 100644 (file)
@@ -579,15 +579,14 @@ static int snd_rawmidi_info_user(struct snd_rawmidi_substream *substream,
        return 0;
 }
 
-int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info)
+static int __snd_rawmidi_info_select(struct snd_card *card,
+                                    struct snd_rawmidi_info *info)
 {
        struct snd_rawmidi *rmidi;
        struct snd_rawmidi_str *pstr;
        struct snd_rawmidi_substream *substream;
 
-       mutex_lock(&register_mutex);
        rmidi = snd_rawmidi_search(card, info->device);
-       mutex_unlock(&register_mutex);
        if (!rmidi)
                return -ENXIO;
        if (info->stream < 0 || info->stream > 1)
@@ -603,6 +602,16 @@ int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info
        }
        return -ENXIO;
 }
+
+int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info)
+{
+       int ret;
+
+       mutex_lock(&register_mutex);
+       ret = __snd_rawmidi_info_select(card, info);
+       mutex_unlock(&register_mutex);
+       return ret;
+}
 EXPORT_SYMBOL(snd_rawmidi_info_select);
 
 static int snd_rawmidi_info_select_user(struct snd_card *card,
index 038a180d3f8117a7455ca6914ed883173aed8863..cbe818eda3363c0f0595c3496e980f64343776d6 100644 (file)
@@ -325,7 +325,7 @@ static int hdac_component_master_match(struct device *dev, void *data)
  */
 int snd_hdac_i915_register_notifier(const struct i915_audio_component_audio_ops *aops)
 {
-       if (WARN_ON(!hdac_acomp))
+       if (!hdac_acomp)
                return -ENODEV;
 
        hdac_acomp->audio_ops = aops;
index a81aacf684b26341ec9257366d7c83a47962a16d..37e1cf8218ff0f864de4635d0188ed5b9b91d73c 100644 (file)
@@ -271,6 +271,8 @@ enum {
        CXT_FIXUP_HP_SPECTRE,
        CXT_FIXUP_HP_GATE_MIC,
        CXT_FIXUP_MUTE_LED_GPIO,
+       CXT_FIXUP_HEADSET_MIC,
+       CXT_FIXUP_HP_MIC_NO_PRESENCE,
 };
 
 /* for hda_fixup_thinkpad_acpi() */
@@ -350,6 +352,18 @@ static void cxt_fixup_headphone_mic(struct hda_codec *codec,
        }
 }
 
+static void cxt_fixup_headset_mic(struct hda_codec *codec,
+                                   const struct hda_fixup *fix, int action)
+{
+       struct conexant_spec *spec = codec->spec;
+
+       switch (action) {
+       case HDA_FIXUP_ACT_PRE_PROBE:
+               spec->parse_flags |= HDA_PINCFG_HEADSET_MIC;
+               break;
+       }
+}
+
 /* OPLC XO 1.5 fixup */
 
 /* OLPC XO-1.5 supports DC input mode (e.g. for use with analog sensors)
@@ -880,6 +894,19 @@ static const struct hda_fixup cxt_fixups[] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = cxt_fixup_mute_led_gpio,
        },
+       [CXT_FIXUP_HEADSET_MIC] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = cxt_fixup_headset_mic,
+       },
+       [CXT_FIXUP_HP_MIC_NO_PRESENCE] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x1a, 0x02a1113c },
+                       { }
+               },
+               .chained = true,
+               .chain_id = CXT_FIXUP_HEADSET_MIC,
+       },
 };
 
 static const struct snd_pci_quirk cxt5045_fixups[] = {
@@ -934,6 +961,8 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
        SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC),
        SND_PCI_QUIRK(0x103c, 0x814f, "HP ZBook 15u G3", CXT_FIXUP_MUTE_LED_GPIO),
        SND_PCI_QUIRK(0x103c, 0x822e, "HP ProBook 440 G4", CXT_FIXUP_MUTE_LED_GPIO),
+       SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN),
        SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO),
        SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410),
index c19c81d230bd7423b4153d2266a45e09333f8714..b4f1b6e88305496f91d028ceb82fe9b8a6a60ccb 100644 (file)
@@ -55,10 +55,11 @@ MODULE_PARM_DESC(static_hdmi_pcm, "Don't restrict PCM parameters per ELD info");
 #define is_kabylake(codec) ((codec)->core.vendor_id == 0x8086280b)
 #define is_geminilake(codec) (((codec)->core.vendor_id == 0x8086280d) || \
                                ((codec)->core.vendor_id == 0x80862800))
+#define is_cannonlake(codec) ((codec)->core.vendor_id == 0x8086280c)
 #define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \
                                || is_skylake(codec) || is_broxton(codec) \
-                               || is_kabylake(codec)) || is_geminilake(codec)
-
+                               || is_kabylake(codec)) || is_geminilake(codec) \
+                               || is_cannonlake(codec)
 #define is_valleyview(codec) ((codec)->core.vendor_id == 0x80862882)
 #define is_cherryview(codec) ((codec)->core.vendor_id == 0x80862883)
 #define is_valleyview_plus(codec) (is_valleyview(codec) || is_cherryview(codec))
@@ -3841,6 +3842,7 @@ HDA_CODEC_ENTRY(0x80862808, "Broadwell HDMI",     patch_i915_hsw_hdmi),
 HDA_CODEC_ENTRY(0x80862809, "Skylake HDMI",    patch_i915_hsw_hdmi),
 HDA_CODEC_ENTRY(0x8086280a, "Broxton HDMI",    patch_i915_hsw_hdmi),
 HDA_CODEC_ENTRY(0x8086280b, "Kabylake HDMI",   patch_i915_hsw_hdmi),
+HDA_CODEC_ENTRY(0x8086280c, "Cannonlake HDMI", patch_i915_glk_hdmi),
 HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI", patch_i915_glk_hdmi),
 HDA_CODEC_ENTRY(0x80862800, "Geminilake HDMI", patch_i915_glk_hdmi),
 HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi),
index 4b21f71d685c78fd00345b2e229541b493c614f0..8fd2d9c62c96ce53a78dced9d0cd3529961978a9 100644 (file)
@@ -324,8 +324,12 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
        case 0x10ec0292:
                alc_update_coef_idx(codec, 0x4, 1<<15, 0);
                break;
-       case 0x10ec0215:
        case 0x10ec0225:
+       case 0x10ec0295:
+       case 0x10ec0299:
+               alc_update_coef_idx(codec, 0x67, 0xf000, 0x3000);
+               /* fallthrough */
+       case 0x10ec0215:
        case 0x10ec0233:
        case 0x10ec0236:
        case 0x10ec0255:
@@ -336,10 +340,8 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
        case 0x10ec0286:
        case 0x10ec0288:
        case 0x10ec0285:
-       case 0x10ec0295:
        case 0x10ec0298:
        case 0x10ec0289:
-       case 0x10ec0299:
                alc_update_coef_idx(codec, 0x10, 1<<9, 0);
                break;
        case 0x10ec0275:
@@ -5185,6 +5187,22 @@ static void alc233_alc662_fixup_lenovo_dual_codecs(struct hda_codec *codec,
        }
 }
 
+/* Forcibly assign NID 0x03 to HP/LO while NID 0x02 to SPK for EQ */
+static void alc274_fixup_bind_dacs(struct hda_codec *codec,
+                                   const struct hda_fixup *fix, int action)
+{
+       struct alc_spec *spec = codec->spec;
+       static hda_nid_t preferred_pairs[] = {
+               0x21, 0x03, 0x1b, 0x03, 0x16, 0x02,
+               0
+       };
+
+       if (action != HDA_FIXUP_ACT_PRE_PROBE)
+               return;
+
+       spec->gen.preferred_dacs = preferred_pairs;
+}
+
 /* for hda_fixup_thinkpad_acpi() */
 #include "thinkpad_helper.c"
 
@@ -5302,6 +5320,8 @@ enum {
        ALC233_FIXUP_LENOVO_MULTI_CODECS,
        ALC294_FIXUP_LENOVO_MIC_LOCATION,
        ALC700_FIXUP_INTEL_REFERENCE,
+       ALC274_FIXUP_DELL_BIND_DACS,
+       ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -6112,6 +6132,21 @@ static const struct hda_fixup alc269_fixups[] = {
                        {}
                }
        },
+       [ALC274_FIXUP_DELL_BIND_DACS] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc274_fixup_bind_dacs,
+               .chained = true,
+               .chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE
+       },
+       [ALC274_FIXUP_DELL_AIO_LINEOUT_VERB] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x1b, 0x0401102f },
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC274_FIXUP_DELL_BIND_DACS
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -6295,6 +6330,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
        SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
        SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
+       SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
        SND_PCI_QUIRK(0x17aa, 0x3112, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
        SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
        SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC),
@@ -6552,6 +6588,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
        SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
                {0x1b, 0x01011020},
                {0x21, 0x02211010}),
+       SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+               {0x12, 0x90a60130},
+               {0x14, 0x90170110},
+               {0x1b, 0x01011020},
+               {0x21, 0x0221101f}),
        SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
                {0x12, 0x90a60160},
                {0x14, 0x90170120},
@@ -6578,7 +6619,7 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
                {0x14, 0x90170110},
                {0x1b, 0x90a70130},
                {0x21, 0x03211020}),
-       SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
+       SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
                {0x12, 0xb7a60130},
                {0x13, 0xb8a61140},
                {0x16, 0x90170110},
index 9f521a55d610186ae78d433909edd26c8dc7035f..b5e41df6bb3a895f10a90756fab0e54954a685d7 100644 (file)
@@ -1051,6 +1051,11 @@ static int acp_audio_probe(struct platform_device *pdev)
        struct resource *res;
        const u32 *pdata = pdev->dev.platform_data;
 
+       if (!pdata) {
+               dev_err(&pdev->dev, "Missing platform data\n");
+               return -ENODEV;
+       }
+
        audio_drv_data = devm_kzalloc(&pdev->dev, sizeof(struct audio_drv_data),
                                        GFP_KERNEL);
        if (audio_drv_data == NULL)
@@ -1058,6 +1063,8 @@ static int acp_audio_probe(struct platform_device *pdev)
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        audio_drv_data->acp_mmio = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(audio_drv_data->acp_mmio))
+               return PTR_ERR(audio_drv_data->acp_mmio);
 
        /* The following members gets populated in device 'open'
         * function. Till then interrupts are disabled in 'acp_init'
index 4a56f3dfba5132a01bbb49eb985540d16a944ecf..dcee145dd17922ac2f81b2fb92a4085d3754e78d 100644 (file)
@@ -64,7 +64,7 @@ config SND_AT91_SOC_SAM9X5_WM8731
 config SND_ATMEL_SOC_CLASSD
        tristate "Atmel ASoC driver for boards using CLASSD"
        depends on ARCH_AT91 || COMPILE_TEST
-       select SND_ATMEL_SOC_DMA
+       select SND_SOC_GENERIC_DMAENGINE_PCM
        select REGMAP_MMIO
        help
          Say Y if you want to add support for Atmel ASoC driver for boards using
index b2d42ec1dcd9f7f75e98c04dd35c06351bf3ca7b..56564ce90cb6b0cb156f08ae39cc46a828f32594 100644 (file)
@@ -2520,7 +2520,7 @@ static struct da7218_pdata *da7218_of_to_pdata(struct snd_soc_codec *codec)
        }
 
        if (da7218->dev_id == DA7218_DEV_ID) {
-               hpldet_np = of_find_node_by_name(np, "da7218_hpldet");
+               hpldet_np = of_get_child_by_name(np, "da7218_hpldet");
                if (!hpldet_np)
                        return pdata;
 
index 5f3c42c4f74ad7d12d5c697c56e028c8329c48cb..066ea2f4ce7b02a8f2cc58c2920b2ef7b6f2b160 100644 (file)
 #define MSM8916_WCD_ANALOG_RATES (SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 |\
                        SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_48000)
 #define MSM8916_WCD_ANALOG_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\
-                                   SNDRV_PCM_FMTBIT_S24_LE)
+                                   SNDRV_PCM_FMTBIT_S32_LE)
 
 static int btn_mask = SND_JACK_BTN_0 | SND_JACK_BTN_1 |
               SND_JACK_BTN_2 | SND_JACK_BTN_3 | SND_JACK_BTN_4;
index a10a724eb448f4d06bffb034fbc86c68ccecb94d..13354d6304a848759433122e124d83119321bd71 100644 (file)
                                   SNDRV_PCM_RATE_32000 | \
                                   SNDRV_PCM_RATE_48000)
 #define MSM8916_WCD_DIGITAL_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\
-                                    SNDRV_PCM_FMTBIT_S24_LE)
+                                    SNDRV_PCM_FMTBIT_S32_LE)
 
 struct msm8916_wcd_digital_priv {
        struct clk *ahbclk, *mclk;
@@ -645,7 +645,7 @@ static int msm8916_wcd_digital_hw_params(struct snd_pcm_substream *substream,
                                    RX_I2S_CTL_RX_I2S_MODE_MASK,
                                    RX_I2S_CTL_RX_I2S_MODE_16);
                break;
-       case SNDRV_PCM_FORMAT_S24_LE:
+       case SNDRV_PCM_FORMAT_S32_LE:
                snd_soc_update_bits(dai->codec, LPASS_CDC_CLK_TX_I2S_CTL,
                                    TX_I2S_CTL_TX_I2S_MODE_MASK,
                                    TX_I2S_CTL_TX_I2S_MODE_32);
index 714ce17da717c0edbc8817aa482dc1f2ec6fbcbe..e853a6dfd33b0ee7b8bf700dffea8b1e477f2d09 100644 (file)
@@ -905,6 +905,7 @@ static int nau8825_adc_event(struct snd_soc_dapm_widget *w,
 
        switch (event) {
        case SND_SOC_DAPM_POST_PMU:
+               msleep(125);
                regmap_update_bits(nau8825->regmap, NAU8825_REG_ENA_CTRL,
                        NAU8825_ENABLE_ADC, NAU8825_ENABLE_ADC);
                break;
index 2df91db765acd6300406aa330cb0d85b3edb7d96..64bf26cec20d535314551ae542f8c28ed25352dc 100644 (file)
@@ -289,6 +289,8 @@ static int rt5514_spi_pcm_probe(struct snd_soc_platform *platform)
                        dev_err(&rt5514_spi->dev,
                                "%s Failed to reguest IRQ: %d\n", __func__,
                                ret);
+               else
+                       device_init_wakeup(rt5514_dsp->dev, true);
        }
 
        return 0;
@@ -456,8 +458,6 @@ static int rt5514_spi_probe(struct spi_device *spi)
                return ret;
        }
 
-       device_init_wakeup(&spi->dev, true);
-
        return 0;
 }
 
@@ -482,10 +482,13 @@ static int __maybe_unused rt5514_resume(struct device *dev)
        if (device_may_wakeup(dev))
                disable_irq_wake(irq);
 
-       if (rt5514_dsp->substream) {
-               rt5514_spi_burst_read(RT5514_IRQ_CTRL, (u8 *)&buf, sizeof(buf));
-               if (buf[0] & RT5514_IRQ_STATUS_BIT)
-                       rt5514_schedule_copy(rt5514_dsp);
+       if (rt5514_dsp) {
+               if (rt5514_dsp->substream) {
+                       rt5514_spi_burst_read(RT5514_IRQ_CTRL, (u8 *)&buf,
+                               sizeof(buf));
+                       if (buf[0] & RT5514_IRQ_STATUS_BIT)
+                               rt5514_schedule_copy(rt5514_dsp);
+               }
        }
 
        return 0;
index 2a5b5d74e69714eeb966c6fc793021fcb7cd3a13..2dd6e9f990a4c4c7a2377c5f159987a046220b7a 100644 (file)
@@ -496,7 +496,7 @@ static const struct snd_soc_dapm_widget rt5514_dapm_widgets[] = {
        SND_SOC_DAPM_PGA("DMIC1", SND_SOC_NOPM, 0, 0, NULL, 0),
        SND_SOC_DAPM_PGA("DMIC2", SND_SOC_NOPM, 0, 0, NULL, 0),
 
-       SND_SOC_DAPM_SUPPLY("DMIC CLK", SND_SOC_NOPM, 0, 0,
+       SND_SOC_DAPM_SUPPLY_S("DMIC CLK", 1, SND_SOC_NOPM, 0, 0,
                rt5514_set_dmic_clk, SND_SOC_DAPM_PRE_PMU),
 
        SND_SOC_DAPM_SUPPLY("ADC CLK", RT5514_CLK_CTRL1,
index f020d2d1eef4a98baa8c91442485ee64257c708e..edc152c8a1fe7596e9bbc5760574d973a9185d16 100644 (file)
@@ -3823,6 +3823,8 @@ static int rt5645_i2c_probe(struct i2c_client *i2c,
        regmap_read(regmap, RT5645_VENDOR_ID, &val);
        rt5645->v_id = val & 0xff;
 
+       regmap_write(rt5645->regmap, RT5645_AD_DA_MIXER, 0x8080);
+
        ret = regmap_register_patch(rt5645->regmap, init_list,
                                    ARRAY_SIZE(init_list));
        if (ret != 0)
index b036c9dc0c8cf1883dd550a6755ae61c7a81e87d..d329bf719d80f01e5dc67865e4258f0cca50d6d5 100644 (file)
@@ -1560,6 +1560,10 @@ static int rt5663_jack_detect(struct snd_soc_codec *codec, int jack_insert)
                        RT5663_IRQ_POW_SAV_MASK, RT5663_IRQ_POW_SAV_EN);
                snd_soc_update_bits(codec, RT5663_IRQ_1,
                        RT5663_EN_IRQ_JD1_MASK, RT5663_EN_IRQ_JD1_EN);
+               snd_soc_update_bits(codec, RT5663_EM_JACK_TYPE_1,
+                       RT5663_EM_JD_MASK, RT5663_EM_JD_RST);
+               snd_soc_update_bits(codec, RT5663_EM_JACK_TYPE_1,
+                       RT5663_EM_JD_MASK, RT5663_EM_JD_NOR);
 
                while (true) {
                        regmap_read(rt5663->regmap, RT5663_INT_ST_2, &val);
index c5a9b69579ad216d9b039359910e8c10acfa1832..03adc8004ba98d73da3f132d23285fdae77bf72d 100644 (file)
 #define RT5663_POL_EXT_JD_SHIFT                        10
 #define RT5663_POL_EXT_JD_EN                   (0x1 << 10)
 #define RT5663_POL_EXT_JD_DIS                  (0x0 << 10)
+#define RT5663_EM_JD_MASK                      (0x1 << 7)
+#define RT5663_EM_JD_SHIFT                     7
+#define RT5663_EM_JD_NOR                       (0x1 << 7)
+#define RT5663_EM_JD_RST                       (0x0 << 7)
 
 /* DACREF LDO Control (0x0112)*/
 #define RT5663_PWR_LDO_DACREFL_MASK            (0x1 << 9)
index 730fb2058869978b3f1b5281b123844a44458c42..1ff3edb7bbb6b28eb45c99b7b14baeadb46651ec 100644 (file)
@@ -116,7 +116,7 @@ struct aic31xx_pdata {
 /* INT2 interrupt control */
 #define AIC31XX_INT2CTRL       AIC31XX_REG(0, 49)
 /* GPIO1 control */
-#define AIC31XX_GPIO1          AIC31XX_REG(0, 50)
+#define AIC31XX_GPIO1          AIC31XX_REG(0, 51)
 
 #define AIC31XX_DACPRB         AIC31XX_REG(0, 60)
 /* ADC Instruction Set Register */
index c482b2e7a7d2a55d7ead20e56a680da6c0791378..cfe72b9d4356069327e80d592ff1e660bca39258 100644 (file)
@@ -232,7 +232,7 @@ static struct twl4030_codec_data *twl4030_get_pdata(struct snd_soc_codec *codec)
        struct twl4030_codec_data *pdata = dev_get_platdata(codec->dev);
        struct device_node *twl4030_codec_node = NULL;
 
-       twl4030_codec_node = of_find_node_by_name(codec->dev->parent->of_node,
+       twl4030_codec_node = of_get_child_by_name(codec->dev->parent->of_node,
                                                  "codec");
 
        if (!pdata && twl4030_codec_node) {
@@ -241,9 +241,11 @@ static struct twl4030_codec_data *twl4030_get_pdata(struct snd_soc_codec *codec)
                                     GFP_KERNEL);
                if (!pdata) {
                        dev_err(codec->dev, "Can not allocate memory\n");
+                       of_node_put(twl4030_codec_node);
                        return NULL;
                }
                twl4030_setup_pdata_of(pdata, twl4030_codec_node);
+               of_node_put(twl4030_codec_node);
        }
 
        return pdata;
index 65c059b5ffd784066fd6d66ae74dae044b721eeb..66e32f5d2917f2f0b958c124b5f4a0eeca296c10 100644 (file)
@@ -1733,7 +1733,7 @@ static int wm_adsp_load(struct wm_adsp *dsp)
                 le64_to_cpu(footer->timestamp));
 
        while (pos < firmware->size &&
-              pos - firmware->size > sizeof(*region)) {
+              sizeof(*region) < firmware->size - pos) {
                region = (void *)&(firmware->data[pos]);
                region_name = "Unknown";
                reg = 0;
@@ -1782,8 +1782,8 @@ static int wm_adsp_load(struct wm_adsp *dsp)
                         regions, le32_to_cpu(region->len), offset,
                         region_name);
 
-               if ((pos + le32_to_cpu(region->len) + sizeof(*region)) >
-                   firmware->size) {
+               if (le32_to_cpu(region->len) >
+                   firmware->size - pos - sizeof(*region)) {
                        adsp_err(dsp,
                                 "%s.%d: %s region len %d bytes exceeds file length %zu\n",
                                 file, regions, region_name,
@@ -2253,7 +2253,7 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp)
 
        blocks = 0;
        while (pos < firmware->size &&
-              pos - firmware->size > sizeof(*blk)) {
+              sizeof(*blk) < firmware->size - pos) {
                blk = (void *)(&firmware->data[pos]);
 
                type = le16_to_cpu(blk->type);
@@ -2327,8 +2327,8 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp)
                }
 
                if (reg) {
-                       if ((pos + le32_to_cpu(blk->len) + sizeof(*blk)) >
-                           firmware->size) {
+                       if (le32_to_cpu(blk->len) >
+                           firmware->size - pos - sizeof(*blk)) {
                                adsp_err(dsp,
                                         "%s.%d: %s region len %d bytes exceeds file length %zu\n",
                                         file, blocks, region_name,
index 0f163abe4ba37d93bec022afd0842b20423f2361..52c27a358933b100d85b2392abe2d2fc4501dd3a 100644 (file)
 #define ASRFSTi_OUTPUT_FIFO_SHIFT      12
 #define ASRFSTi_OUTPUT_FIFO_MASK       (((1 << ASRFSTi_OUTPUT_FIFO_WIDTH) - 1) << ASRFSTi_OUTPUT_FIFO_SHIFT)
 #define ASRFSTi_IAEi_SHIFT             11
-#define ASRFSTi_IAEi_MASK              (1 << ASRFSTi_OAFi_SHIFT)
-#define ASRFSTi_IAEi                   (1 << ASRFSTi_OAFi_SHIFT)
+#define ASRFSTi_IAEi_MASK              (1 << ASRFSTi_IAEi_SHIFT)
+#define ASRFSTi_IAEi                   (1 << ASRFSTi_IAEi_SHIFT)
 #define ASRFSTi_INPUT_FIFO_WIDTH       7
 #define ASRFSTi_INPUT_FIFO_SHIFT       0
 #define ASRFSTi_INPUT_FIFO_MASK                ((1 << ASRFSTi_INPUT_FIFO_WIDTH) - 1)
index f2f51e06e22cc8b6ff3f9e4f21046be256bd1265..424bafaf51efe63e108fcd83ce3c6bda43f9e92c 100644 (file)
@@ -38,6 +38,7 @@
 #include <linux/ctype.h>
 #include <linux/device.h>
 #include <linux/delay.h>
+#include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/of.h>
@@ -265,6 +266,8 @@ struct fsl_ssi_private {
 
        u32 fifo_watermark;
        u32 dma_maxburst;
+
+       struct mutex ac97_reg_lock;
 };
 
 /*
@@ -1260,11 +1263,13 @@ static void fsl_ssi_ac97_write(struct snd_ac97 *ac97, unsigned short reg,
        if (reg > 0x7f)
                return;
 
+       mutex_lock(&fsl_ac97_data->ac97_reg_lock);
+
        ret = clk_prepare_enable(fsl_ac97_data->clk);
        if (ret) {
                pr_err("ac97 write clk_prepare_enable failed: %d\n",
                        ret);
-               return;
+               goto ret_unlock;
        }
 
        lreg = reg <<  12;
@@ -1278,6 +1283,9 @@ static void fsl_ssi_ac97_write(struct snd_ac97 *ac97, unsigned short reg,
        udelay(100);
 
        clk_disable_unprepare(fsl_ac97_data->clk);
+
+ret_unlock:
+       mutex_unlock(&fsl_ac97_data->ac97_reg_lock);
 }
 
 static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97,
@@ -1285,16 +1293,18 @@ static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97,
 {
        struct regmap *regs = fsl_ac97_data->regs;
 
-       unsigned short val = -1;
+       unsigned short val = 0;
        u32 reg_val;
        unsigned int lreg;
        int ret;
 
+       mutex_lock(&fsl_ac97_data->ac97_reg_lock);
+
        ret = clk_prepare_enable(fsl_ac97_data->clk);
        if (ret) {
                pr_err("ac97 read clk_prepare_enable failed: %d\n",
                        ret);
-               return -1;
+               goto ret_unlock;
        }
 
        lreg = (reg & 0x7f) <<  12;
@@ -1309,6 +1319,8 @@ static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97,
 
        clk_disable_unprepare(fsl_ac97_data->clk);
 
+ret_unlock:
+       mutex_unlock(&fsl_ac97_data->ac97_reg_lock);
        return val;
 }
 
@@ -1458,12 +1470,6 @@ static int fsl_ssi_probe(struct platform_device *pdev)
                                sizeof(fsl_ssi_ac97_dai));
 
                fsl_ac97_data = ssi_private;
-
-               ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev);
-               if (ret) {
-                       dev_err(&pdev->dev, "could not set AC'97 ops\n");
-                       return ret;
-               }
        } else {
                /* Initialize this copy of the CPU DAI driver structure */
                memcpy(&ssi_private->cpu_dai_drv, &fsl_ssi_dai_template,
@@ -1574,6 +1580,15 @@ static int fsl_ssi_probe(struct platform_device *pdev)
                        return ret;
        }
 
+       if (fsl_ssi_is_ac97(ssi_private)) {
+               mutex_init(&ssi_private->ac97_reg_lock);
+               ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev);
+               if (ret) {
+                       dev_err(&pdev->dev, "could not set AC'97 ops\n");
+                       goto error_ac97_ops;
+               }
+       }
+
        ret = devm_snd_soc_register_component(&pdev->dev, &fsl_ssi_component,
                                              &ssi_private->cpu_dai_drv, 1);
        if (ret) {
@@ -1657,6 +1672,13 @@ error_sound_card:
        fsl_ssi_debugfs_remove(&ssi_private->dbg_stats);
 
 error_asoc_register:
+       if (fsl_ssi_is_ac97(ssi_private))
+               snd_soc_set_ac97_ops(NULL);
+
+error_ac97_ops:
+       if (fsl_ssi_is_ac97(ssi_private))
+               mutex_destroy(&ssi_private->ac97_reg_lock);
+
        if (ssi_private->soc->imx)
                fsl_ssi_imx_clean(pdev, ssi_private);
 
@@ -1675,8 +1697,10 @@ static int fsl_ssi_remove(struct platform_device *pdev)
        if (ssi_private->soc->imx)
                fsl_ssi_imx_clean(pdev, ssi_private);
 
-       if (fsl_ssi_is_ac97(ssi_private))
+       if (fsl_ssi_is_ac97(ssi_private)) {
                snd_soc_set_ac97_ops(NULL);
+               mutex_destroy(&ssi_private->ac97_reg_lock);
+       }
 
        return 0;
 }
index 6f9a8bcf20f3ebb4407a2452eed8870a11b40c02..6dcad0a8a0d045969873d983fa8b89bbe1f91e96 100644 (file)
@@ -101,7 +101,7 @@ static const struct snd_soc_dapm_route kabylake_map[] = {
        { "ssp0 Tx", NULL, "spk_out" },
 
        { "AIF Playback", NULL, "ssp1 Tx" },
-       { "ssp1 Tx", NULL, "hs_out" },
+       { "ssp1 Tx", NULL, "codec1_out" },
 
        { "hs_in", NULL, "ssp1 Rx" },
        { "ssp1 Rx", NULL, "AIF Capture" },
index 6072164f2d43db7c7f8f50000892f6e61e80d79c..271ae3c2c5354c5788aa9383772ebbe63b4e8d9a 100644 (file)
@@ -109,7 +109,7 @@ static const struct snd_soc_dapm_route kabylake_map[] = {
        { "ssp0 Tx", NULL, "spk_out" },
 
        { "AIF Playback", NULL, "ssp1 Tx" },
-       { "ssp1 Tx", NULL, "hs_out" },
+       { "ssp1 Tx", NULL, "codec1_out" },
 
        { "hs_in", NULL, "ssp1 Rx" },
        { "ssp1 Rx", NULL, "AIF Capture" },
index d14c50a602894c4ad8aa76f2259839552aff2f6a..3eaac41090ca7f8b07ed99511d58cbc1f5498c56 100644 (file)
@@ -119,11 +119,16 @@ static bool skl_check_ep_match(struct device *dev, struct nhlt_endpoint *epnt,
 
        if ((epnt->virtual_bus_id == instance_id) &&
                        (epnt->linktype == link_type) &&
-                       (epnt->direction == dirn) &&
-                       (epnt->device_type == dev_type))
-               return true;
-       else
-               return false;
+                       (epnt->direction == dirn)) {
+               /* do not check dev_type for DMIC link type */
+               if (epnt->linktype == NHLT_LINK_DMIC)
+                       return true;
+
+               if (epnt->device_type == dev_type)
+                       return true;
+       }
+
+       return false;
 }
 
 struct nhlt_specific_cfg
index a072bcf209d2aa4c9c72503466e027e6867cd9bb..81923da18ac2259ad159b8c0282242cc7c94c53c 100644 (file)
@@ -2908,7 +2908,7 @@ static int skl_tplg_control_load(struct snd_soc_component *cmpnt,
                break;
 
        default:
-               dev_warn(bus->dev, "Control load not supported %d:%d:%d\n",
+               dev_dbg(bus->dev, "Control load not supported %d:%d:%d\n",
                        hdr->ops.get, hdr->ops.put, hdr->ops.info);
                break;
        }
index ee5055d47d13d0ba6f63417947c63f731bcac4a8..a89fe9b6463ba6b56d88c9afa6ae3fdd1397e6ae 100644 (file)
@@ -322,26 +322,30 @@ static int rk_spdif_probe(struct platform_device *pdev)
        spdif->mclk = devm_clk_get(&pdev->dev, "mclk");
        if (IS_ERR(spdif->mclk)) {
                dev_err(&pdev->dev, "Can't retrieve rk_spdif master clock\n");
-               return PTR_ERR(spdif->mclk);
+               ret = PTR_ERR(spdif->mclk);
+               goto err_disable_hclk;
        }
 
        ret = clk_prepare_enable(spdif->mclk);
        if (ret) {
                dev_err(spdif->dev, "clock enable failed %d\n", ret);
-               return ret;
+               goto err_disable_clocks;
        }
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        regs = devm_ioremap_resource(&pdev->dev, res);
-       if (IS_ERR(regs))
-               return PTR_ERR(regs);
+       if (IS_ERR(regs)) {
+               ret = PTR_ERR(regs);
+               goto err_disable_clocks;
+       }
 
        spdif->regmap = devm_regmap_init_mmio_clk(&pdev->dev, "hclk", regs,
                                                  &rk_spdif_regmap_config);
        if (IS_ERR(spdif->regmap)) {
                dev_err(&pdev->dev,
                        "Failed to initialise managed register map\n");
-               return PTR_ERR(spdif->regmap);
+               ret = PTR_ERR(spdif->regmap);
+               goto err_disable_clocks;
        }
 
        spdif->playback_dma_data.addr = res->start + SPDIF_SMPDR;
@@ -373,6 +377,10 @@ static int rk_spdif_probe(struct platform_device *pdev)
 
 err_pm_runtime:
        pm_runtime_disable(&pdev->dev);
+err_disable_clocks:
+       clk_disable_unprepare(spdif->mclk);
+err_disable_hclk:
+       clk_disable_unprepare(spdif->hclk);
 
        return ret;
 }
index 8ddb08714faabb5bc2b58716155b0ecec501db0d..4672688cac325ce79970c620c5f9e6d2d81b3525 100644 (file)
@@ -222,7 +222,7 @@ int rsnd_adg_set_cmd_timsel_gen2(struct rsnd_mod *cmd_mod,
                                   NULL, &val, NULL);
 
        val  = val      << shift;
-       mask = 0xffff   << shift;
+       mask = 0x0f1f   << shift;
 
        rsnd_mod_bset(adg_mod, CMDOUT_TIMSEL, mask, val);
 
@@ -250,7 +250,7 @@ int rsnd_adg_set_src_timesel_gen2(struct rsnd_mod *src_mod,
 
        in   = in       << shift;
        out  = out      << shift;
-       mask = 0xffff   << shift;
+       mask = 0x0f1f   << shift;
 
        switch (id / 2) {
        case 0:
@@ -380,7 +380,7 @@ int rsnd_adg_ssi_clk_try_start(struct rsnd_mod *ssi_mod, unsigned int rate)
                        ckr = 0x80000000;
        }
 
-       rsnd_mod_bset(adg_mod, BRGCKR, 0x80FF0000, adg->ckr | ckr);
+       rsnd_mod_bset(adg_mod, BRGCKR, 0x80770000, adg->ckr | ckr);
        rsnd_mod_write(adg_mod, BRRA,  adg->rbga);
        rsnd_mod_write(adg_mod, BRRB,  adg->rbgb);
 
index c70eb20978163d6952a6764381d083779dbd758b..f12a88a21dfa24d32f04dad32635dee2482a9337 100644 (file)
@@ -1332,8 +1332,8 @@ static int rsnd_pcm_new(struct snd_soc_pcm_runtime *rtd)
 
        return snd_pcm_lib_preallocate_pages_for_all(
                rtd->pcm,
-               SNDRV_DMA_TYPE_CONTINUOUS,
-               snd_dma_continuous_data(GFP_KERNEL),
+               SNDRV_DMA_TYPE_DEV,
+               rtd->card->snd_card->dev,
                PREALLOC_BUFFER, PREALLOC_BUFFER_MAX);
 }
 
index fd557abfe390a1ea6c6800e9e9836d039bdbcee3..4d750bdf8e2449981537a4b6d41bea33af321789 100644 (file)
 struct rsnd_dmaen {
        struct dma_chan         *chan;
        dma_cookie_t            cookie;
-       dma_addr_t              dma_buf;
        unsigned int            dma_len;
-       unsigned int            dma_period;
-       unsigned int            dma_cnt;
 };
 
 struct rsnd_dmapp {
@@ -71,38 +68,10 @@ static struct rsnd_mod mem = {
 /*
  *             Audio DMAC
  */
-#define rsnd_dmaen_sync(dmaen, io, i)  __rsnd_dmaen_sync(dmaen, io, i, 1)
-#define rsnd_dmaen_unsync(dmaen, io, i)        __rsnd_dmaen_sync(dmaen, io, i, 0)
-static void __rsnd_dmaen_sync(struct rsnd_dmaen *dmaen, struct rsnd_dai_stream *io,
-                             int i, int sync)
-{
-       struct device *dev = dmaen->chan->device->dev;
-       enum dma_data_direction dir;
-       int is_play = rsnd_io_is_play(io);
-       dma_addr_t buf;
-       int len, max;
-       size_t period;
-
-       len     = dmaen->dma_len;
-       period  = dmaen->dma_period;
-       max     = len / period;
-       i       = i % max;
-       buf     = dmaen->dma_buf + (period * i);
-
-       dir = is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
-
-       if (sync)
-               dma_sync_single_for_device(dev, buf, period, dir);
-       else
-               dma_sync_single_for_cpu(dev, buf, period, dir);
-}
-
 static void __rsnd_dmaen_complete(struct rsnd_mod *mod,
                                  struct rsnd_dai_stream *io)
 {
        struct rsnd_priv *priv = rsnd_mod_to_priv(mod);
-       struct rsnd_dma *dma = rsnd_mod_to_dma(mod);
-       struct rsnd_dmaen *dmaen = rsnd_dma_to_dmaen(dma);
        bool elapsed = false;
        unsigned long flags;
 
@@ -115,22 +84,9 @@ static void __rsnd_dmaen_complete(struct rsnd_mod *mod,
         */
        spin_lock_irqsave(&priv->lock, flags);
 
-       if (rsnd_io_is_working(io)) {
-               rsnd_dmaen_unsync(dmaen, io, dmaen->dma_cnt);
-
-               /*
-                * Next period is already started.
-                * Let's sync Next Next period
-                * see
-                *      rsnd_dmaen_start()
-                */
-               rsnd_dmaen_sync(dmaen, io, dmaen->dma_cnt + 2);
-
+       if (rsnd_io_is_working(io))
                elapsed = true;
 
-               dmaen->dma_cnt++;
-       }
-
        spin_unlock_irqrestore(&priv->lock, flags);
 
        if (elapsed)
@@ -165,14 +121,8 @@ static int rsnd_dmaen_stop(struct rsnd_mod *mod,
        struct rsnd_dma *dma = rsnd_mod_to_dma(mod);
        struct rsnd_dmaen *dmaen = rsnd_dma_to_dmaen(dma);
 
-       if (dmaen->chan) {
-               int is_play = rsnd_io_is_play(io);
-
+       if (dmaen->chan)
                dmaengine_terminate_all(dmaen->chan);
-               dma_unmap_single(dmaen->chan->device->dev,
-                                dmaen->dma_buf, dmaen->dma_len,
-                                is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-       }
 
        return 0;
 }
@@ -237,11 +187,7 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod,
        struct device *dev = rsnd_priv_to_dev(priv);
        struct dma_async_tx_descriptor *desc;
        struct dma_slave_config cfg = {};
-       dma_addr_t buf;
-       size_t len;
-       size_t period;
        int is_play = rsnd_io_is_play(io);
-       int i;
        int ret;
 
        cfg.direction   = is_play ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM;
@@ -258,19 +204,10 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod,
        if (ret < 0)
                return ret;
 
-       len     = snd_pcm_lib_buffer_bytes(substream);
-       period  = snd_pcm_lib_period_bytes(substream);
-       buf     = dma_map_single(dmaen->chan->device->dev,
-                                substream->runtime->dma_area,
-                                len,
-                                is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-       if (dma_mapping_error(dmaen->chan->device->dev, buf)) {
-               dev_err(dev, "dma map failed\n");
-               return -EIO;
-       }
-
        desc = dmaengine_prep_dma_cyclic(dmaen->chan,
-                                        buf, len, period,
+                                        substream->runtime->dma_addr,
+                                        snd_pcm_lib_buffer_bytes(substream),
+                                        snd_pcm_lib_period_bytes(substream),
                                         is_play ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM,
                                         DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 
@@ -282,18 +219,7 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod,
        desc->callback          = rsnd_dmaen_complete;
        desc->callback_param    = rsnd_mod_get(dma);
 
-       dmaen->dma_buf          = buf;
-       dmaen->dma_len          = len;
-       dmaen->dma_period       = period;
-       dmaen->dma_cnt          = 0;
-
-       /*
-        * synchronize this and next period
-        * see
-        *      __rsnd_dmaen_complete()
-        */
-       for (i = 0; i < 2; i++)
-               rsnd_dmaen_sync(dmaen, io, i);
+       dmaen->dma_len          = snd_pcm_lib_buffer_bytes(substream);
 
        dmaen->cookie = dmaengine_submit(desc);
        if (dmaen->cookie < 0) {
index fece1e5f582f35ab5e558a0b66b3b558816048d6..cbf3bf312d23bda9427747c1af2dde30d9fcf43d 100644 (file)
@@ -446,25 +446,29 @@ static bool rsnd_ssi_pointer_update(struct rsnd_mod *mod,
                                    int byte)
 {
        struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod);
+       bool ret = false;
+       int byte_pos;
 
-       ssi->byte_pos += byte;
+       byte_pos = ssi->byte_pos + byte;
 
-       if (ssi->byte_pos >= ssi->next_period_byte) {
+       if (byte_pos >= ssi->next_period_byte) {
                struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io);
 
                ssi->period_pos++;
                ssi->next_period_byte += ssi->byte_per_period;
 
                if (ssi->period_pos >= runtime->periods) {
-                       ssi->byte_pos = 0;
+                       byte_pos = 0;
                        ssi->period_pos = 0;
                        ssi->next_period_byte = ssi->byte_per_period;
                }
 
-               return true;
+               ret = true;
        }
 
-       return false;
+       WRITE_ONCE(ssi->byte_pos, byte_pos);
+
+       return ret;
 }
 
 /*
@@ -838,7 +842,7 @@ static int rsnd_ssi_pointer(struct rsnd_mod *mod,
        struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod);
        struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io);
 
-       *pointer = bytes_to_frames(runtime, ssi->byte_pos);
+       *pointer = bytes_to_frames(runtime, READ_ONCE(ssi->byte_pos));
 
        return 0;
 }
index 4d948757d300d04be3bfca3c63f78c68085a8506..6ff8a36c2c82224da8ae88c94b317092ea87f88f 100644 (file)
@@ -125,6 +125,7 @@ static int rsnd_ssiu_init_gen2(struct rsnd_mod *mod,
 {
        int hdmi = rsnd_ssi_hdmi_port(io);
        int ret;
+       u32 mode = 0;
 
        ret = rsnd_ssiu_init(mod, io, priv);
        if (ret < 0)
@@ -136,9 +137,11 @@ static int rsnd_ssiu_init_gen2(struct rsnd_mod *mod,
                 * see
                 *      rsnd_ssi_config_init()
                 */
-               rsnd_mod_write(mod, SSI_MODE, 0x1);
+               mode = 0x1;
        }
 
+       rsnd_mod_write(mod, SSI_MODE, mode);
+
        if (rsnd_ssi_use_busif(io)) {
                rsnd_mod_write(mod, SSI_BUSIF_ADINR,
                               rsnd_get_adinr_bit(mod, io) |
index 7c9e361b2200be081aca8f5a99d1b71a5846d30b..2b4ceda36291c01c6cca69d3a1cacd6c23014f40 100644 (file)
@@ -2173,20 +2173,25 @@ static int parse_audio_selector_unit(struct mixer_build *state, int unitid,
        kctl->private_value = (unsigned long)namelist;
        kctl->private_free = usb_mixer_selector_elem_free;
 
-       nameid = uac_selector_unit_iSelector(desc);
+       /* check the static mapping table at first */
        len = check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name));
-       if (len)
-               ;
-       else if (nameid)
-               len = snd_usb_copy_string_desc(state, nameid, kctl->id.name,
-                                        sizeof(kctl->id.name));
-       else
-               len = get_term_name(state, &state->oterm,
-                                   kctl->id.name, sizeof(kctl->id.name), 0);
-
        if (!len) {
-               strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name));
+               /* no mapping ? */
+               /* if iSelector is given, use it */
+               nameid = uac_selector_unit_iSelector(desc);
+               if (nameid)
+                       len = snd_usb_copy_string_desc(state, nameid,
+                                                      kctl->id.name,
+                                                      sizeof(kctl->id.name));
+               /* ... or pick up the terminal name at next */
+               if (!len)
+                       len = get_term_name(state, &state->oterm,
+                                   kctl->id.name, sizeof(kctl->id.name), 0);
+               /* ... or use the fixed string "USB" as the last resort */
+               if (!len)
+                       strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name));
 
+               /* and add the proper suffix */
                if (desc->bDescriptorSubtype == UAC2_CLOCK_SELECTOR)
                        append_ctl_name(kctl, " Clock Source");
                else if ((state->oterm.type & 0xff00) == 0x0100)
index 77eecaa4db1f32c9b7af87273c599181bf307443..a66ef5777887a78d7416e64c049c73b26477c7f7 100644 (file)
@@ -1166,10 +1166,11 @@ static bool is_marantz_denon_dac(unsigned int id)
 /* TEAC UD-501/UD-503/NT-503 USB DACs need a vendor cmd to switch
  * between PCM/DOP and native DSD mode
  */
-static bool is_teac_50X_dac(unsigned int id)
+static bool is_teac_dsd_dac(unsigned int id)
 {
        switch (id) {
        case USB_ID(0x0644, 0x8043): /* TEAC UD-501/UD-503/NT-503 */
+       case USB_ID(0x0644, 0x8044): /* Esoteric D-05X */
                return true;
        }
        return false;
@@ -1202,7 +1203,7 @@ int snd_usb_select_mode_quirk(struct snd_usb_substream *subs,
                        break;
                }
                mdelay(20);
-       } else if (is_teac_50X_dac(subs->stream->chip->usb_id)) {
+       } else if (is_teac_dsd_dac(subs->stream->chip->usb_id)) {
                /* Vendor mode switch cmd is required. */
                switch (fmt->altsetting) {
                case 3: /* DSD mode (DSD_U32) requested */
@@ -1392,7 +1393,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
        }
 
        /* TEAC devices with USB DAC functionality */
-       if (is_teac_50X_dac(chip->usb_id)) {
+       if (is_teac_dsd_dac(chip->usb_id)) {
                if (fp->altsetting == 3)
                        return SNDRV_PCM_FMTBIT_DSD_U32_BE;
        }
index cefe7c7cd4f6f29fabd90e33495d7a6d8ac6dbdd..0a8e37a519f258e72317f39a87ac9cc60ad47f6f 100644 (file)
@@ -2,7 +2,7 @@
 #ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
 #define _UAPI__ASM_BPF_PERF_EVENT_H__
 
-#include <asm/ptrace.h>
+#include "ptrace.h"
 
 typedef user_pt_regs bpf_user_pt_regs_t;
 
diff --git a/tools/arch/s390/include/uapi/asm/perf_regs.h b/tools/arch/s390/include/uapi/asm/perf_regs.h
new file mode 100644 (file)
index 0000000..d17dd9e
--- /dev/null
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_S390_PERF_REGS_H
+#define _ASM_S390_PERF_REGS_H
+
+enum perf_event_s390_regs {
+       PERF_REG_S390_R0,
+       PERF_REG_S390_R1,
+       PERF_REG_S390_R2,
+       PERF_REG_S390_R3,
+       PERF_REG_S390_R4,
+       PERF_REG_S390_R5,
+       PERF_REG_S390_R6,
+       PERF_REG_S390_R7,
+       PERF_REG_S390_R8,
+       PERF_REG_S390_R9,
+       PERF_REG_S390_R10,
+       PERF_REG_S390_R11,
+       PERF_REG_S390_R12,
+       PERF_REG_S390_R13,
+       PERF_REG_S390_R14,
+       PERF_REG_S390_R15,
+       PERF_REG_S390_FP0,
+       PERF_REG_S390_FP1,
+       PERF_REG_S390_FP2,
+       PERF_REG_S390_FP3,
+       PERF_REG_S390_FP4,
+       PERF_REG_S390_FP5,
+       PERF_REG_S390_FP6,
+       PERF_REG_S390_FP7,
+       PERF_REG_S390_FP8,
+       PERF_REG_S390_FP9,
+       PERF_REG_S390_FP10,
+       PERF_REG_S390_FP11,
+       PERF_REG_S390_FP12,
+       PERF_REG_S390_FP13,
+       PERF_REG_S390_FP14,
+       PERF_REG_S390_FP15,
+       PERF_REG_S390_MASK,
+       PERF_REG_S390_PC,
+
+       PERF_REG_S390_MAX
+};
+
+#endif /* _ASM_S390_PERF_REGS_H */
index e2450c8e88e6f13cfc0934b772880e80b6f18c37..a8c3a33dd185e213f68f97b8719c7ca20198783e 100644 (file)
@@ -523,21 +523,23 @@ static int do_show(int argc, char **argv)
                                break;
                        p_err("can't get next map: %s%s", strerror(errno),
                              errno == EINVAL ? " -- kernel too old?" : "");
-                       return -1;
+                       break;
                }
 
                fd = bpf_map_get_fd_by_id(id);
                if (fd < 0) {
+                       if (errno == ENOENT)
+                               continue;
                        p_err("can't get map by id (%u): %s",
                              id, strerror(errno));
-                       return -1;
+                       break;
                }
 
                err = bpf_obj_get_info_by_fd(fd, &info, &len);
                if (err) {
                        p_err("can't get map info: %s", strerror(errno));
                        close(fd);
-                       return -1;
+                       break;
                }
 
                if (json_output)
index ad619b96c27664300df9a78bc28958c4d8662a4b..dded77345bfb05fef1fd50a3f3e7f54d43ae5cb7 100644 (file)
@@ -382,6 +382,8 @@ static int do_show(int argc, char **argv)
 
                fd = bpf_prog_get_fd_by_id(id);
                if (fd < 0) {
+                       if (errno == ENOENT)
+                               continue;
                        p_err("can't get prog by id (%u): %s",
                              id, strerror(errno));
                        err = -1;
index 217cf6f95c366037ccd2ff3cedb1d61de22c616a..a5684d0968b4fd087905e659c0ce80bd170434c2 100755 (executable)
@@ -478,7 +478,7 @@ class Provider(object):
     @staticmethod
     def is_field_wanted(fields_filter, field):
         """Indicate whether field is valid according to fields_filter."""
-        if not fields_filter or fields_filter == "help":
+        if not fields_filter:
             return True
         return re.match(fields_filter, field) is not None
 
@@ -549,8 +549,8 @@ class TracepointProvider(Provider):
 
     def update_fields(self, fields_filter):
         """Refresh fields, applying fields_filter"""
-        self._fields = [field for field in self.get_available_fields()
-                        if self.is_field_wanted(fields_filter, field)]
+        self.fields = [field for field in self.get_available_fields()
+                       if self.is_field_wanted(fields_filter, field)]
 
     @staticmethod
     def get_online_cpus():
@@ -950,7 +950,8 @@ class Tui(object):
             curses.nocbreak()
             curses.endwin()
 
-    def get_all_gnames(self):
+    @staticmethod
+    def get_all_gnames():
         """Returns a list of (pid, gname) tuples of all running guests"""
         res = []
         try:
@@ -963,7 +964,7 @@ class Tui(object):
             # perform a sanity check before calling the more expensive
             # function to possibly extract the guest name
             if ' -name ' in line[1]:
-                res.append((line[0], self.get_gname_from_pid(line[0])))
+                res.append((line[0], Tui.get_gname_from_pid(line[0])))
         child.stdout.close()
 
         return res
@@ -984,7 +985,8 @@ class Tui(object):
         except Exception:
             self.screen.addstr(row + 1, 2, 'Not available')
 
-    def get_pid_from_gname(self, gname):
+    @staticmethod
+    def get_pid_from_gname(gname):
         """Fuzzy function to convert guest name to QEMU process pid.
 
         Returns a list of potential pids, can be empty if no match found.
@@ -992,7 +994,7 @@ class Tui(object):
 
         """
         pids = []
-        for line in self.get_all_gnames():
+        for line in Tui.get_all_gnames():
             if gname == line[1]:
                 pids.append(int(line[0]))
 
@@ -1090,15 +1092,16 @@ class Tui(object):
             # sort by totals
             return (0, -stats[x][0])
         total = 0.
-        for val in stats.values():
-            total += val[0]
+        for key in stats.keys():
+            if key.find('(') is -1:
+                total += stats[key][0]
         if self._sorting == SORT_DEFAULT:
             sortkey = sortCurAvg
         else:
             sortkey = sortTotal
+        tavg = 0
         for key in sorted(stats.keys(), key=sortkey):
-
-            if row >= self.screen.getmaxyx()[0]:
+            if row >= self.screen.getmaxyx()[0] - 1:
                 break
             values = stats[key]
             if not values[0] and not values[1]:
@@ -1110,9 +1113,15 @@ class Tui(object):
                 self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' %
                                    (key, values[0], values[0] * 100 / total,
                                     cur))
+                if cur is not '' and key.find('(') is -1:
+                    tavg += cur
             row += 1
         if row == 3:
             self.screen.addstr(4, 1, 'No matching events reported yet')
+        else:
+            self.screen.addstr(row, 1, '%-40s %10d        %8s' %
+                               ('Total', total, tavg if tavg else ''),
+                               curses.A_BOLD)
         self.screen.refresh()
 
     def show_msg(self, text):
@@ -1358,7 +1367,7 @@ class Tui(object):
                 if char == 'x':
                     self.update_drilldown()
                     # prevents display of current values on next refresh
-                    self.stats.get()
+                    self.stats.get(self._display_guests)
             except KeyboardInterrupt:
                 break
             except curses.error:
@@ -1451,16 +1460,13 @@ Press any other key to refresh statistics immediately.
         try:
             pids = Tui.get_pid_from_gname(val)
         except:
-            raise optparse.OptionValueError('Error while searching for guest '
-                                            '"{}", use "-p" to specify a pid '
-                                            'instead'.format(val))
+            sys.exit('Error while searching for guest "{}". Use "-p" to '
+                     'specify a pid instead?'.format(val))
         if len(pids) == 0:
-            raise optparse.OptionValueError('No guest by the name "{}" '
-                                            'found'.format(val))
+            sys.exit('Error: No guest by the name "{}" found'.format(val))
         if len(pids) > 1:
-            raise optparse.OptionValueError('Multiple processes found (pids: '
-                                            '{}) - use "-p" to specify a pid '
-                                            'instead'.format(" ".join(pids)))
+            sys.exit('Error: Multiple processes found (pids: {}). Use "-p" '
+                     'to specify the desired pid'.format(" ".join(pids)))
         parser.values.pid = pids[0]
 
     optparser = optparse.OptionParser(description=description_text,
@@ -1518,7 +1524,16 @@ Press any other key to refresh statistics immediately.
                          help='restrict statistics to guest by name',
                          callback=cb_guest_to_pid,
                          )
-    (options, _) = optparser.parse_args(sys.argv)
+    options, unkn = optparser.parse_args(sys.argv)
+    if len(unkn) != 1:
+        sys.exit('Error: Extra argument(s): ' + ' '.join(unkn[1:]))
+    try:
+        # verify that we were passed a valid regex up front
+        re.compile(options.fields)
+    except re.error:
+        sys.exit('Error: "' + options.fields + '" is not a valid regular '
+                 'expression')
+
     return options
 
 
@@ -1564,16 +1579,13 @@ def main():
 
     stats = Stats(options)
 
-    if options.fields == "help":
-        event_list = "\n"
-        s = stats.get()
-        for key in s.keys():
-            if key.find('(') != -1:
-                key = key[0:key.find('(')]
-            if event_list.find('\n' + key + '\n') == -1:
-                event_list += key + '\n'
-        sys.stdout.write(event_list)
-        return ""
+    if options.fields == 'help':
+        stats.fields_filter = None
+        event_list = []
+        for key in stats.get().keys():
+            event_list.append(key.split('(', 1)[0])
+        sys.stdout.write('  ' + '\n  '.join(sorted(set(event_list))) + '\n')
+        sys.exit(0)
 
     if options.log:
         log(stats)
index e5cf836be8a1848bb82f39cfa3c7c75dcc67b4fa..b5b3810c9e945d7f3a39568840fbc5b73f84983b 100644 (file)
@@ -50,6 +50,8 @@ INTERACTIVE COMMANDS
 *s*::   set update interval
 
 *x*::  toggle reporting of stats for child trace events
+ ::     *Note*: The stats for the parents summarize the respective child trace
+                events
 
 Press any other key to refresh statistics immediately.
 
@@ -86,7 +88,7 @@ OPTIONS
 
 -f<fields>::
 --fields=<fields>::
-       fields to display (regex)
+       fields to display (regex), "-f help" for a list of available events
 
 -h::
 --help::
index 8acfc47af70efde4c1a3bb3ad6aff809f0ad0308..540a209b78ab3cd6ae3b972c57b338dc0aa9b58d 100644 (file)
@@ -138,7 +138,7 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
                        *type = INSN_STACK;
                        op->src.type = OP_SRC_ADD;
                        op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
-                       op->dest.type = OP_SRC_REG;
+                       op->dest.type = OP_DEST_REG;
                        op->dest.reg = CFI_SP;
                }
                break;
index 4c6b5c9ef073b31c3a01dd0ebb85efbccb4bfee0..91e8e19ff5e06193adc55c455b4d97ad947da7ee 100644 (file)
@@ -44,6 +44,9 @@ int cmd_orc(int argc, const char **argv)
        const char *objname;
 
        argc--; argv++;
+       if (argc <= 0)
+               usage_with_options(orc_usage, check_options);
+
        if (!strncmp(argv[0], "gen", 3)) {
                argc = parse_options(argc, argv, check_options, orc_usage, 0);
                if (argc != 1)
@@ -52,7 +55,6 @@ int cmd_orc(int argc, const char **argv)
                objname = argv[0];
 
                return check(objname, no_fp, no_unreachable, true);
-
        }
 
        if (!strcmp(argv[0], "dump")) {
index e5ca31429c9bac29a9c66c4f8ddf97a94651690b..e61fe703197baa9b21814674c0250f6420e38c93 100644 (file)
@@ -165,6 +165,8 @@ int create_orc_sections(struct objtool_file *file)
 
        /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */
        sec = elf_create_section(file->elf, ".orc_unwind_ip", sizeof(int), idx);
+       if (!sec)
+               return -1;
 
        ip_relasec = elf_create_rela_section(file->elf, sec);
        if (!ip_relasec)
index ed65e82f034efe0e76cef718fdb8a9bd07511edb..0294bfb6c5f87c990500ee57e931034091d0da48 100644 (file)
@@ -188,9 +188,7 @@ ifdef PYTHON_CONFIG
   PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
   PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
   PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
-  ifeq ($(CC_NO_CLANG), 1)
-    PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
-  endif
+  PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
   FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
 endif
 
@@ -576,14 +574,15 @@ ifndef NO_GTK2
   endif
 endif
 
-
 ifdef NO_LIBPERL
   CFLAGS += -DNO_LIBPERL
 else
   PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null)
   PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS))
   PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
-  PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
+  PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null)
+  PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS))
+  PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS))
   FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
 
   ifneq ($(feature-libperl), 1)
index d2df54a6bc5a24a588ca3153833623ca0ecaa1c3..bcfbaed78cc257f15d9820238bc1ef58256f1c6a 100644 (file)
@@ -3,7 +3,7 @@
 
 #include <stdlib.h>
 #include <linux/types.h>
-#include <../../../../arch/s390/include/uapi/asm/perf_regs.h>
+#include <asm/perf_regs.h>
 
 void perf_regs_load(u64 *regs);
 
index 6db9d809fe9722a9e4eb0afb5140443490aa43e3..3e64f10b6d6678810715e517e7354679b86efd8e 100755 (executable)
@@ -21,6 +21,7 @@ arch/x86/include/asm/cpufeatures.h
 arch/arm/include/uapi/asm/perf_regs.h
 arch/arm64/include/uapi/asm/perf_regs.h
 arch/powerpc/include/uapi/asm/perf_regs.h
+arch/s390/include/uapi/asm/perf_regs.h
 arch/x86/include/uapi/asm/perf_regs.h
 arch/x86/include/uapi/asm/kvm.h
 arch/x86/include/uapi/asm/kvm_perf.h
index cf36de7ea25587907d50db9c44e3dae5c3746c1a..0c6d1002b524eaf62ef62cc32763b041b2f33ba1 100644 (file)
@@ -384,13 +384,13 @@ jvmti_write_code(void *agent, char const *sym,
 }
 
 int
-jvmti_write_debug_info(void *agent, uint64_t code, const char *file,
-                      jvmti_line_info_t *li, int nr_lines)
+jvmti_write_debug_info(void *agent, uint64_t code,
+    int nr_lines, jvmti_line_info_t *li,
+    const char * const * file_names)
 {
        struct jr_code_debug_info rec;
-       size_t sret, len, size, flen;
+       size_t sret, len, size, flen = 0;
        uint64_t addr;
-       const char *fn = file;
        FILE *fp = agent;
        int i;
 
@@ -405,7 +405,9 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file,
                return -1;
        }
 
-       flen = strlen(file) + 1;
+       for (i = 0; i < nr_lines; ++i) {
+           flen += strlen(file_names[i]) + 1;
+       }
 
        rec.p.id        = JIT_CODE_DEBUG_INFO;
        size            = sizeof(rec);
@@ -421,7 +423,7 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file,
         * file[]   : source file name
         */
        size += nr_lines * sizeof(struct debug_entry);
-       size += flen * nr_lines;
+       size += flen;
        rec.p.total_size = size;
 
        /*
@@ -452,7 +454,7 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file,
                if (sret != 1)
                        goto error;
 
-               sret = fwrite_unlocked(fn, flen, 1, fp);
+               sret = fwrite_unlocked(file_names[i], strlen(file_names[i]) + 1, 1, fp);
                if (sret != 1)
                        goto error;
        }
index fe32d8344a823f56de37f3ecea53af4c2f78329f..6ed82f6c06ddd0ed85ee79a5015707753d4f38b9 100644 (file)
@@ -14,6 +14,7 @@ typedef struct {
        unsigned long   pc;
        int             line_number;
        int             discrim; /* discriminator -- 0 for now */
+       jmethodID       methodID;
 } jvmti_line_info_t;
 
 void *jvmti_open(void);
@@ -22,11 +23,9 @@ int   jvmti_write_code(void *agent, char const *symbol_name,
                       uint64_t vma, void const *code,
                       const unsigned int code_size);
 
-int   jvmti_write_debug_info(void *agent,
-                            uint64_t code,
-                            const char *file,
+int   jvmti_write_debug_info(void *agent, uint64_t code, int nr_lines,
                             jvmti_line_info_t *li,
-                            int nr_lines);
+                            const char * const * file_names);
 
 #if defined(__cplusplus)
 }
index c62c9fc9a525995c83ef8fc8bf2f790d599abadc..6add3e9826141346a34a93b9e0a970a22720e361 100644 (file)
@@ -47,6 +47,7 @@ do_get_line_numbers(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci,
                        tab[lines].pc = (unsigned long)pc;
                        tab[lines].line_number = loc_tab[i].line_number;
                        tab[lines].discrim = 0; /* not yet used */
+                       tab[lines].methodID = m;
                        lines++;
                } else {
                        break;
@@ -125,6 +126,99 @@ get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t **
        return JVMTI_ERROR_NONE;
 }
 
+static void
+copy_class_filename(const char * class_sign, const char * file_name, char * result, size_t max_length)
+{
+       /*
+       * Assume path name is class hierarchy, this is a common practice with Java programs
+       */
+       if (*class_sign == 'L') {
+               int j, i = 0;
+               char *p = strrchr(class_sign, '/');
+               if (p) {
+                       /* drop the 'L' prefix and copy up to the final '/' */
+                       for (i = 0; i < (p - class_sign); i++)
+                               result[i] = class_sign[i+1];
+               }
+               /*
+               * append file name, we use loops and not string ops to avoid modifying
+               * class_sign which is used later for the symbol name
+               */
+               for (j = 0; i < (max_length - 1) && file_name && j < strlen(file_name); j++, i++)
+                       result[i] = file_name[j];
+
+               result[i] = '\0';
+       } else {
+               /* fallback case */
+               size_t file_name_len = strlen(file_name);
+               strncpy(result, file_name, file_name_len < max_length ? file_name_len : max_length);
+       }
+}
+
+static jvmtiError
+get_source_filename(jvmtiEnv *jvmti, jmethodID methodID, char ** buffer)
+{
+       jvmtiError ret;
+       jclass decl_class;
+       char *file_name = NULL;
+       char *class_sign = NULL;
+       char fn[PATH_MAX];
+       size_t len;
+
+       ret = (*jvmti)->GetMethodDeclaringClass(jvmti, methodID, &decl_class);
+       if (ret != JVMTI_ERROR_NONE) {
+               print_error(jvmti, "GetMethodDeclaringClass", ret);
+               return ret;
+       }
+
+       ret = (*jvmti)->GetSourceFileName(jvmti, decl_class, &file_name);
+       if (ret != JVMTI_ERROR_NONE) {
+               print_error(jvmti, "GetSourceFileName", ret);
+               return ret;
+       }
+
+       ret = (*jvmti)->GetClassSignature(jvmti, decl_class, &class_sign, NULL);
+       if (ret != JVMTI_ERROR_NONE) {
+               print_error(jvmti, "GetClassSignature", ret);
+               goto free_file_name_error;
+       }
+
+       copy_class_filename(class_sign, file_name, fn, PATH_MAX);
+       len = strlen(fn);
+       *buffer = malloc((len + 1) * sizeof(char));
+       if (!*buffer) {
+               print_error(jvmti, "GetClassSignature", ret);
+               ret = JVMTI_ERROR_OUT_OF_MEMORY;
+               goto free_class_sign_error;
+       }
+       strcpy(*buffer, fn);
+       ret = JVMTI_ERROR_NONE;
+
+free_class_sign_error:
+       (*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign);
+free_file_name_error:
+       (*jvmti)->Deallocate(jvmti, (unsigned char *)file_name);
+
+       return ret;
+}
+
+static jvmtiError
+fill_source_filenames(jvmtiEnv *jvmti, int nr_lines,
+                     const jvmti_line_info_t * line_tab,
+                     char ** file_names)
+{
+       int index;
+       jvmtiError ret;
+
+       for (index = 0; index < nr_lines; ++index) {
+               ret = get_source_filename(jvmti, line_tab[index].methodID, &(file_names[index]));
+               if (ret != JVMTI_ERROR_NONE)
+                       return ret;
+       }
+
+       return JVMTI_ERROR_NONE;
+}
+
 static void JNICALL
 compiled_method_load_cb(jvmtiEnv *jvmti,
                        jmethodID method,
@@ -135,16 +229,18 @@ compiled_method_load_cb(jvmtiEnv *jvmti,
                        const void *compile_info)
 {
        jvmti_line_info_t *line_tab = NULL;
+       char ** line_file_names = NULL;
        jclass decl_class;
        char *class_sign = NULL;
        char *func_name = NULL;
        char *func_sign = NULL;
-       char *file_name= NULL;
+       char *file_name = NULL;
        char fn[PATH_MAX];
        uint64_t addr = (uint64_t)(uintptr_t)code_addr;
        jvmtiError ret;
        int nr_lines = 0; /* in line_tab[] */
        size_t len;
+       int output_debug_info = 0;
 
        ret = (*jvmti)->GetMethodDeclaringClass(jvmti, method,
                                                &decl_class);
@@ -158,6 +254,19 @@ compiled_method_load_cb(jvmtiEnv *jvmti,
                if (ret != JVMTI_ERROR_NONE) {
                        warnx("jvmti: cannot get line table for method");
                        nr_lines = 0;
+               } else if (nr_lines > 0) {
+                       line_file_names = malloc(sizeof(char*) * nr_lines);
+                       if (!line_file_names) {
+                               warnx("jvmti: cannot allocate space for line table method names");
+                       } else {
+                               memset(line_file_names, 0, sizeof(char*) * nr_lines);
+                               ret = fill_source_filenames(jvmti, nr_lines, line_tab, line_file_names);
+                               if (ret != JVMTI_ERROR_NONE) {
+                                       warnx("jvmti: fill_source_filenames failed");
+                               } else {
+                                       output_debug_info = 1;
+                               }
+                       }
                }
        }
 
@@ -181,33 +290,14 @@ compiled_method_load_cb(jvmtiEnv *jvmti,
                goto error;
        }
 
-       /*
-        * Assume path name is class hierarchy, this is a common practice with Java programs
-        */
-       if (*class_sign == 'L') {
-               int j, i = 0;
-               char *p = strrchr(class_sign, '/');
-               if (p) {
-                       /* drop the 'L' prefix and copy up to the final '/' */
-                       for (i = 0; i < (p - class_sign); i++)
-                               fn[i] = class_sign[i+1];
-               }
-               /*
-                * append file name, we use loops and not string ops to avoid modifying
-                * class_sign which is used later for the symbol name
-                */
-               for (j = 0; i < (PATH_MAX - 1) && file_name && j < strlen(file_name); j++, i++)
-                       fn[i] = file_name[j];
-               fn[i] = '\0';
-       } else {
-               /* fallback case */
-               strcpy(fn, file_name);
-       }
+       copy_class_filename(class_sign, file_name, fn, PATH_MAX);
+
        /*
         * write source line info record if we have it
         */
-       if (jvmti_write_debug_info(jvmti_agent, addr, fn, line_tab, nr_lines))
-               warnx("jvmti: write_debug_info() failed");
+       if (output_debug_info)
+               if (jvmti_write_debug_info(jvmti_agent, addr, nr_lines, line_tab, (const char * const *) line_file_names))
+                       warnx("jvmti: write_debug_info() failed");
 
        len = strlen(func_name) + strlen(class_sign) + strlen(func_sign) + 2;
        {
@@ -223,6 +313,13 @@ error:
        (*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign);
        (*jvmti)->Deallocate(jvmti, (unsigned char *)file_name);
        free(line_tab);
+       while (line_file_names && (nr_lines > 0)) {
+           if (line_file_names[nr_lines - 1]) {
+               free(line_file_names[nr_lines - 1]);
+           }
+           nr_lines -= 1;
+       }
+       free(line_file_names);
 }
 
 static void JNICALL
index 792af7c3b74f98029ae32953a26e342157be5632..9316e648a880db61cad82c399c30218695d7d215 100644 (file)
@@ -11,7 +11,7 @@ ifneq ($(wildcard $(GENHDR)),)
 endif
 
 CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
-LDLIBS += -lcap -lelf
+LDLIBS += -lcap -lelf -lrt
 
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
        test_align test_verifier_log test_dev_cgroup
@@ -39,7 +39,7 @@ $(BPFOBJ): force
 CLANG ?= clang
 LLC   ?= llc
 
-PROBE := $(shell llc -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
+PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
 
 # Let newer LLVM versions transparently probe the kernel for availability
 # of full BPF instruction set.
index 69427531408dd22ef1887d473ab4bf6548e173b9..6761be18a91fccc2d4f8ad52b0f83fec293189ae 100644 (file)
@@ -351,7 +351,7 @@ static void test_bpf_obj_id(void)
                          info_len != sizeof(struct bpf_map_info) ||
                          strcmp((char *)map_infos[i].name, expected_map_name),
                          "get-map-info(fd)",
-                         "err %d errno %d type %d(%d) info_len %u(%lu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
+                         "err %d errno %d type %d(%d) info_len %u(%Zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
                          err, errno,
                          map_infos[i].type, BPF_MAP_TYPE_ARRAY,
                          info_len, sizeof(struct bpf_map_info),
@@ -395,7 +395,7 @@ static void test_bpf_obj_id(void)
                          *(int *)prog_infos[i].map_ids != map_infos[i].id ||
                          strcmp((char *)prog_infos[i].name, expected_prog_name),
                          "get-prog-info(fd)",
-                         "err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
+                         "err %d errno %d i %d type %d(%d) info_len %u(%Zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
                          err, errno, i,
                          prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
                          info_len, sizeof(struct bpf_prog_info),
@@ -463,7 +463,7 @@ static void test_bpf_obj_id(void)
                      memcmp(&prog_info, &prog_infos[i], info_len) ||
                      *(int *)prog_info.map_ids != saved_map_id,
                      "get-prog-info(next_id->fd)",
-                     "err %d errno %d info_len %u(%lu) memcmp %d map_id %u(%u)\n",
+                     "err %d errno %d info_len %u(%Zu) memcmp %d map_id %u(%u)\n",
                      err, errno, info_len, sizeof(struct bpf_prog_info),
                      memcmp(&prog_info, &prog_infos[i], info_len),
                      *(int *)prog_info.map_ids, saved_map_id);
@@ -509,7 +509,7 @@ static void test_bpf_obj_id(void)
                      memcmp(&map_info, &map_infos[i], info_len) ||
                      array_value != array_magic_value,
                      "check get-map-info(next_id->fd)",
-                     "err %d errno %d info_len %u(%lu) memcmp %d array_value %llu(%llu)\n",
+                     "err %d errno %d info_len %u(%Zu) memcmp %d array_value %llu(%llu)\n",
                      err, errno, info_len, sizeof(struct bpf_map_info),
                      memcmp(&map_info, &map_infos[i], info_len),
                      array_value, array_magic_value);
index 3c64f30cf63cc2b6adb532a3b1f3201533193f7f..b51017404c62d0dc8198afdf035016f6e5e2fd0b 100644 (file)
@@ -422,9 +422,7 @@ static struct bpf_test tests[] = {
                        BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
                        BPF_EXIT_INSN(),
                },
-               .errstr_unpriv = "R1 subtraction from stack pointer",
-               .result_unpriv = REJECT,
-               .errstr = "R1 invalid mem access",
+               .errstr = "R1 subtraction from stack pointer",
                .result = REJECT,
        },
        {
@@ -606,7 +604,6 @@ static struct bpf_test tests[] = {
                },
                .errstr = "misaligned stack access",
                .result = REJECT,
-               .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
        },
        {
                "invalid map_fd for function call",
@@ -1797,7 +1794,6 @@ static struct bpf_test tests[] = {
                },
                .result = REJECT,
                .errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8",
-               .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
        },
        {
                "PTR_TO_STACK store/load - bad alignment on reg",
@@ -1810,7 +1806,6 @@ static struct bpf_test tests[] = {
                },
                .result = REJECT,
                .errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8",
-               .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
        },
        {
                "PTR_TO_STACK store/load - out of bounds low",
@@ -1862,9 +1857,8 @@ static struct bpf_test tests[] = {
                        BPF_MOV64_IMM(BPF_REG_0, 0),
                        BPF_EXIT_INSN(),
                },
-               .result = ACCEPT,
-               .result_unpriv = REJECT,
-               .errstr_unpriv = "R1 pointer += pointer",
+               .result = REJECT,
+               .errstr = "R1 pointer += pointer",
        },
        {
                "unpriv: neg pointer",
@@ -2592,7 +2586,8 @@ static struct bpf_test tests[] = {
                        BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
                                    offsetof(struct __sk_buff, data)),
                        BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_4),
-                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct __sk_buff, len)),
                        BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 49),
                        BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 49),
                        BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
@@ -2899,7 +2894,7 @@ static struct bpf_test tests[] = {
                        BPF_MOV64_IMM(BPF_REG_0, 0),
                        BPF_EXIT_INSN(),
                },
-               .errstr = "invalid access to packet",
+               .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
                .result = REJECT,
                .prog_type = BPF_PROG_TYPE_SCHED_CLS,
        },
@@ -3885,9 +3880,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map2 = { 3, 11 },
-               .errstr_unpriv = "R0 pointer += pointer",
-               .errstr = "R0 invalid mem access 'inv'",
-               .result_unpriv = REJECT,
+               .errstr = "R0 pointer += pointer",
                .result = REJECT,
                .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
        },
@@ -3928,7 +3921,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 4 },
-               .errstr = "R4 invalid mem access",
+               .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
                .result = REJECT,
                .prog_type = BPF_PROG_TYPE_SCHED_CLS
        },
@@ -3949,7 +3942,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 4 },
-               .errstr = "R4 invalid mem access",
+               .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
                .result = REJECT,
                .prog_type = BPF_PROG_TYPE_SCHED_CLS
        },
@@ -3970,7 +3963,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 4 },
-               .errstr = "R4 invalid mem access",
+               .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
                .result = REJECT,
                .prog_type = BPF_PROG_TYPE_SCHED_CLS
        },
@@ -5195,10 +5188,8 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map2 = { 3 },
-               .errstr_unpriv = "R0 bitwise operator &= on pointer",
-               .errstr = "invalid mem access 'inv'",
+               .errstr = "R0 bitwise operator &= on pointer",
                .result = REJECT,
-               .result_unpriv = REJECT,
        },
        {
                "map element value illegal alu op, 2",
@@ -5214,10 +5205,8 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map2 = { 3 },
-               .errstr_unpriv = "R0 32-bit pointer arithmetic prohibited",
-               .errstr = "invalid mem access 'inv'",
+               .errstr = "R0 32-bit pointer arithmetic prohibited",
                .result = REJECT,
-               .result_unpriv = REJECT,
        },
        {
                "map element value illegal alu op, 3",
@@ -5233,10 +5222,8 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map2 = { 3 },
-               .errstr_unpriv = "R0 pointer arithmetic with /= operator",
-               .errstr = "invalid mem access 'inv'",
+               .errstr = "R0 pointer arithmetic with /= operator",
                .result = REJECT,
-               .result_unpriv = REJECT,
        },
        {
                "map element value illegal alu op, 4",
@@ -6019,8 +6006,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map_in_map = { 3 },
-               .errstr = "R1 type=inv expected=map_ptr",
-               .errstr_unpriv = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
+               .errstr = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
                .result = REJECT,
        },
        {
@@ -6116,6 +6102,30 @@ static struct bpf_test tests[] = {
                },
                .result = ACCEPT,
        },
+       {
+               "ld_abs: tests on r6 and skb data reload helper",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+                       BPF_LD_ABS(BPF_B, 0),
+                       BPF_LD_ABS(BPF_H, 0),
+                       BPF_LD_ABS(BPF_W, 0),
+                       BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+                       BPF_MOV64_IMM(BPF_REG_6, 0),
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+                       BPF_MOV64_IMM(BPF_REG_2, 1),
+                       BPF_MOV64_IMM(BPF_REG_3, 2),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_skb_vlan_push),
+                       BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+                       BPF_LD_ABS(BPF_B, 0),
+                       BPF_LD_ABS(BPF_H, 0),
+                       BPF_LD_ABS(BPF_W, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 42),
+                       BPF_EXIT_INSN(),
+               },
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+               .result = ACCEPT,
+       },
        {
                "ld_ind: check calling conv, r1",
                .insns = {
@@ -6300,7 +6310,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6324,7 +6334,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6350,7 +6360,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R8 invalid mem access 'inv'",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6375,7 +6385,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R8 invalid mem access 'inv'",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6423,7 +6433,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6494,7 +6504,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6545,7 +6555,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6572,7 +6582,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6598,7 +6608,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6627,7 +6637,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6657,7 +6667,7 @@ static struct bpf_test tests[] = {
                        BPF_JMP_IMM(BPF_JA, 0, 0, -7),
                },
                .fixup_map1 = { 4 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6685,8 +6695,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr_unpriv = "R0 pointer comparison prohibited",
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
                .result_unpriv = REJECT,
        },
@@ -6741,6 +6750,462 @@ static struct bpf_test tests[] = {
                .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
                .result = REJECT,
        },
+       {
+               "bounds check based on zero-extended MOV",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+                       /* r2 = 0x0000'0000'ffff'ffff */
+                       BPF_MOV32_IMM(BPF_REG_2, 0xffffffff),
+                       /* r2 = 0 */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
+                       /* no-op */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+                       /* access at offset 0 */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .result = ACCEPT
+       },
+       {
+               "bounds check based on sign-extended MOV. test1",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+                       /* r2 = 0xffff'ffff'ffff'ffff */
+                       BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
+                       /* r2 = 0xffff'ffff */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
+                       /* r0 = <oob pointer> */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+                       /* access to OOB pointer */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "map_value pointer and 4294967295",
+               .result = REJECT
+       },
+       {
+               "bounds check based on sign-extended MOV. test2",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+                       /* r2 = 0xffff'ffff'ffff'ffff */
+                       BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
+                       /* r2 = 0xfff'ffff */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36),
+                       /* r0 = <oob pointer> */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+                       /* access to OOB pointer */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "R0 min value is outside of the array range",
+               .result = REJECT
+       },
+       {
+               "bounds check based on reg_off + var_off + insn_off. test1",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+                                   offsetof(struct __sk_buff, mark)),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+                       BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 29) - 1),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 4 },
+               .errstr = "value_size=8 off=1073741825",
+               .result = REJECT,
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       },
+       {
+               "bounds check based on reg_off + var_off + insn_off. test2",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+                                   offsetof(struct __sk_buff, mark)),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+                       BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 30) - 1),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 4 },
+               .errstr = "value 1073741823",
+               .result = REJECT,
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       },
+       {
+               "bounds check after truncation of non-boundary-crossing range",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+                       /* r1 = [0x00, 0xff] */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_MOV64_IMM(BPF_REG_2, 1),
+                       /* r2 = 0x10'0000'0000 */
+                       BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 36),
+                       /* r1 = [0x10'0000'0000, 0x10'0000'00ff] */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+                       /* r1 = [0x10'7fff'ffff, 0x10'8000'00fe] */
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+                       /* r1 = [0x00, 0xff] */
+                       BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 0x7fffffff),
+                       /* r1 = 0 */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+                       /* no-op */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       /* access at offset 0 */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .result = ACCEPT
+       },
+       {
+               "bounds check after truncation of boundary-crossing range (1)",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+                       /* r1 = [0x00, 0xff] */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = [0xffff'ff80, 0x1'0000'007f] */
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = [0xffff'ff80, 0xffff'ffff] or
+                        *      [0x0000'0000, 0x0000'007f]
+                        */
+                       BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 0),
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = [0x00, 0xff] or
+                        *      [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
+                        */
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = 0 or
+                        *      [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
+                        */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+                       /* no-op or OOB pointer computation */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       /* potentially OOB access */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               /* not actually fully unbounded, but the bound is very high */
+               .errstr = "R0 unbounded memory access",
+               .result = REJECT
+       },
+       {
+               "bounds check after truncation of boundary-crossing range (2)",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+                       /* r1 = [0x00, 0xff] */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = [0xffff'ff80, 0x1'0000'007f] */
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = [0xffff'ff80, 0xffff'ffff] or
+                        *      [0x0000'0000, 0x0000'007f]
+                        * difference to previous test: truncation via MOV32
+                        * instead of ALU32.
+                        */
+                       BPF_MOV32_REG(BPF_REG_1, BPF_REG_1),
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = [0x00, 0xff] or
+                        *      [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
+                        */
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = 0 or
+                        *      [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
+                        */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+                       /* no-op or OOB pointer computation */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       /* potentially OOB access */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               /* not actually fully unbounded, but the bound is very high */
+               .errstr = "R0 unbounded memory access",
+               .result = REJECT
+       },
+       {
+               "bounds check after wrapping 32-bit addition",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+                       /* r1 = 0x7fff'ffff */
+                       BPF_MOV64_IMM(BPF_REG_1, 0x7fffffff),
+                       /* r1 = 0xffff'fffe */
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+                       /* r1 = 0 */
+                       BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 2),
+                       /* no-op */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       /* access at offset 0 */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .result = ACCEPT
+       },
+       {
+               "bounds check after shift with oversized count operand",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+                       BPF_MOV64_IMM(BPF_REG_2, 32),
+                       BPF_MOV64_IMM(BPF_REG_1, 1),
+                       /* r1 = (u32)1 << (u32)32 = ? */
+                       BPF_ALU32_REG(BPF_LSH, BPF_REG_1, BPF_REG_2),
+                       /* r1 = [0x0000, 0xffff] */
+                       BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xffff),
+                       /* computes unknown pointer, potentially OOB */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       /* potentially OOB access */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "R0 max value is outside of the array range",
+               .result = REJECT
+       },
+       {
+               "bounds check after right shift of maybe-negative number",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+                       /* r1 = [0x00, 0xff] */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       /* r1 = [-0x01, 0xfe] */
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
+                       /* r1 = 0 or 0xff'ffff'ffff'ffff */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+                       /* r1 = 0 or 0xffff'ffff'ffff */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+                       /* computes unknown pointer, potentially OOB */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       /* potentially OOB access */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "R0 unbounded memory access",
+               .result = REJECT
+       },
+       {
+               "bounds check map access with off+size signed 32bit overflow. test1",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7ffffffe),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+                       BPF_JMP_A(0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "map_value pointer and 2147483646",
+               .result = REJECT
+       },
+       {
+               "bounds check map access with off+size signed 32bit overflow. test2",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+                       BPF_JMP_A(0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "pointer offset 1073741822",
+               .result = REJECT
+       },
+       {
+               "bounds check map access with off+size signed 32bit overflow. test3",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
+                       BPF_JMP_A(0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "pointer offset -1073741822",
+               .result = REJECT
+       },
+       {
+               "bounds check map access with off+size signed 32bit overflow. test4",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV64_IMM(BPF_REG_1, 1000000),
+                       BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 1000000),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
+                       BPF_JMP_A(0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "map_value pointer and 1000000000000",
+               .result = REJECT
+       },
+       {
+               "pointer/scalar confusion in state equality check (way 1)",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+                       BPF_JMP_A(1),
+                       BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+                       BPF_JMP_A(0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .result = ACCEPT,
+               .result_unpriv = REJECT,
+               .errstr_unpriv = "R0 leaks addr as return value"
+       },
+       {
+               "pointer/scalar confusion in state equality check (way 2)",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+                       BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+                       BPF_JMP_A(1),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .result = ACCEPT,
+               .result_unpriv = REJECT,
+               .errstr_unpriv = "R0 leaks addr as return value"
+       },
        {
                "variable-offset ctx access",
                .insns = {
@@ -6782,6 +7247,71 @@ static struct bpf_test tests[] = {
                .result = REJECT,
                .prog_type = BPF_PROG_TYPE_LWT_IN,
        },
+       {
+               "indirect variable-offset stack access",
+               .insns = {
+                       /* Fill the top 8 bytes of the stack */
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       /* Get an unknown value */
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+                       /* Make it small and 4-byte aligned */
+                       BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
+                       /* add it to fp.  We now have either fp-4 or fp-8, but
+                        * we don't know which
+                        */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+                       /* dereference it indirectly */
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 5 },
+               .errstr = "variable stack read R2",
+               .result = REJECT,
+               .prog_type = BPF_PROG_TYPE_LWT_IN,
+       },
+       {
+               "direct stack access with 32-bit wraparound. test1",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+                       BPF_MOV32_IMM(BPF_REG_0, 0),
+                       BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_EXIT_INSN()
+               },
+               .errstr = "fp pointer and 2147483647",
+               .result = REJECT
+       },
+       {
+               "direct stack access with 32-bit wraparound. test2",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
+                       BPF_MOV32_IMM(BPF_REG_0, 0),
+                       BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_EXIT_INSN()
+               },
+               .errstr = "fp pointer and 1073741823",
+               .result = REJECT
+       },
+       {
+               "direct stack access with 32-bit wraparound. test3",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
+                       BPF_MOV32_IMM(BPF_REG_0, 0),
+                       BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_EXIT_INSN()
+               },
+               .errstr = "fp pointer offset 1073741822",
+               .result = REJECT
+       },
        {
                "liveness pruning and write screening",
                .insns = {
@@ -7103,6 +7633,19 @@ static struct bpf_test tests[] = {
                .result = REJECT,
                .prog_type = BPF_PROG_TYPE_SCHED_CLS,
        },
+       {
+               "pkt_end - pkt_start is allowed",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data_end)),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data)),
+                       BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2),
+                       BPF_EXIT_INSN(),
+               },
+               .result = ACCEPT,
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       },
        {
                "XDP pkt read, pkt_end mangling, bad access 1",
                .insns = {
@@ -7118,7 +7661,7 @@ static struct bpf_test tests[] = {
                        BPF_MOV64_IMM(BPF_REG_0, 0),
                        BPF_EXIT_INSN(),
                },
-               .errstr = "R1 offset is outside of the packet",
+               .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
                .result = REJECT,
                .prog_type = BPF_PROG_TYPE_XDP,
        },
@@ -7137,7 +7680,7 @@ static struct bpf_test tests[] = {
                        BPF_MOV64_IMM(BPF_REG_0, 0),
                        BPF_EXIT_INSN(),
                },
-               .errstr = "R1 offset is outside of the packet",
+               .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
                .result = REJECT,
                .prog_type = BPF_PROG_TYPE_XDP,
        },
index e57b4ac40e72e0502dff75ea1d80c543280428eb..7177bea1fdfa62a1aa4e424d4dab665d8a9b7aaf 100644 (file)
@@ -1,3 +1,4 @@
 CONFIG_USER_NS=y
 CONFIG_BPF_SYSCALL=y
 CONFIG_TEST_BPF=m
+CONFIG_NUMA=y
index 66e5ce5b91f008d5dffc7848d0e99d730ad589b5..1aef72df20a112a30c17f2e567ca8b972a550104 100644 (file)
@@ -122,8 +122,7 @@ static void check_valid_segment(uint16_t index, int ldt,
         * NB: Different Linux versions do different things with the
         * accessed bit in set_thread_area().
         */
-       if (ar != expected_ar &&
-           (ldt || ar != (expected_ar | AR_ACCESSED))) {
+       if (ar != expected_ar && ar != (expected_ar | AR_ACCESSED)) {
                printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n",
                       (ldt ? "LDT" : "GDT"), index, ar, expected_ar);
                nerrs++;
@@ -627,13 +626,10 @@ static void do_multicpu_tests(void)
 static int finish_exec_test(void)
 {
        /*
-        * In a sensible world, this would be check_invalid_segment(0, 1);
-        * For better or for worse, though, the LDT is inherited across exec.
-        * We can probably change this safely, but for now we test it.
+        * Older kernel versions did inherit the LDT on exec() which is
+        * wrong because exec() starts from a clean state.
         */
-       check_valid_segment(0, 1,
-                           AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB,
-                           42, true);
+       check_invalid_segment(0, 1);
 
        return nerrs ? 1 : 0;
 }
index 2b3d6d2350158b73a2597dc1a78ac007b6a3563c..3d7b42e7729941b1ca8f35fb7f6d10d4151a1a2f 100644 (file)
@@ -30,6 +30,7 @@ int modify_match_busid(char *busid, int add)
        char command[SYSFS_BUS_ID_SIZE + 4];
        char match_busid_attr_path[SYSFS_PATH_MAX];
        int rc;
+       int cmd_size;
 
        snprintf(match_busid_attr_path, sizeof(match_busid_attr_path),
                 "%s/%s/%s/%s/%s/%s", SYSFS_MNT_PATH, SYSFS_BUS_NAME,
@@ -37,12 +38,14 @@ int modify_match_busid(char *busid, int add)
                 attr_name);
 
        if (add)
-               snprintf(command, SYSFS_BUS_ID_SIZE + 4, "add %s", busid);
+               cmd_size = snprintf(command, SYSFS_BUS_ID_SIZE + 4, "add %s",
+                                   busid);
        else
-               snprintf(command, SYSFS_BUS_ID_SIZE + 4, "del %s", busid);
+               cmd_size = snprintf(command, SYSFS_BUS_ID_SIZE + 4, "del %s",
+                                   busid);
 
        rc = write_sysfs_attribute(match_busid_attr_path, command,
-                                  sizeof(command));
+                                  cmd_size);
        if (rc < 0) {
                dbg("failed to write match_busid: %s", strerror(errno));
                return -1;
index f9555b1e7f158f5203c1aaba47002424d3279203..cc29a814832837f5fb237dfdf74845a284e04367 100644 (file)
@@ -92,16 +92,23 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
 {
        struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
        struct arch_timer_context *vtimer;
+       u32 cnt_ctl;
 
-       if (!vcpu) {
-               pr_warn_once("Spurious arch timer IRQ on non-VCPU thread\n");
-               return IRQ_NONE;
-       }
-       vtimer = vcpu_vtimer(vcpu);
+       /*
+        * We may see a timer interrupt after vcpu_put() has been called which
+        * sets the CPU's vcpu pointer to NULL, because even though the timer
+        * has been disabled in vtimer_save_state(), the hardware interrupt
+        * signal may not have been retired from the interrupt controller yet.
+        */
+       if (!vcpu)
+               return IRQ_HANDLED;
 
+       vtimer = vcpu_vtimer(vcpu);
        if (!vtimer->irq.level) {
-               vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
-               if (kvm_timer_irq_can_fire(vtimer))
+               cnt_ctl = read_sysreg_el0(cntv_ctl);
+               cnt_ctl &= ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT |
+                          ARCH_TIMER_CTRL_IT_MASK;
+               if (cnt_ctl == (ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT))
                        kvm_timer_update_irq(vcpu, true, vtimer);
        }
 
@@ -355,6 +362,7 @@ static void vtimer_save_state(struct kvm_vcpu *vcpu)
 
        /* Disable the virtual timer */
        write_sysreg_el0(0, cntv_ctl);
+       isb();
 
        vtimer->loaded = false;
 out:
@@ -720,7 +728,7 @@ static int kvm_timer_dying_cpu(unsigned int cpu)
        return 0;
 }
 
-int kvm_timer_hyp_init(void)
+int kvm_timer_hyp_init(bool has_gic)
 {
        struct arch_timer_kvm_info *info;
        int err;
@@ -756,10 +764,13 @@ int kvm_timer_hyp_init(void)
                return err;
        }
 
-       err = irq_set_vcpu_affinity(host_vtimer_irq, kvm_get_running_vcpus());
-       if (err) {
-               kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
-               goto out_free_irq;
+       if (has_gic) {
+               err = irq_set_vcpu_affinity(host_vtimer_irq,
+                                           kvm_get_running_vcpus());
+               if (err) {
+                       kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
+                       goto out_free_irq;
+               }
        }
 
        kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
@@ -835,10 +846,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
 no_vgic:
        preempt_disable();
        timer->enabled = 1;
-       if (!irqchip_in_kernel(vcpu->kvm))
-               kvm_timer_vcpu_load_user(vcpu);
-       else
-               kvm_timer_vcpu_load_vgic(vcpu);
+       kvm_timer_vcpu_load(vcpu);
        preempt_enable();
 
        return 0;
index 6b60c98a6e2294c773eb20ea4794445a667415ea..2e43f9d42bd5db2a07438bb98f5e029c6246adb4 100644 (file)
@@ -1326,7 +1326,7 @@ static int init_subsystems(void)
        /*
         * Init HYP architected timer support
         */
-       err = kvm_timer_hyp_init();
+       err = kvm_timer_hyp_init(vgic_present);
        if (err)
                goto out;
 
index b6e715fd3c90af8c74408b72652f9974a3fb894d..dac7ceb1a677746cadb086a2cf8a07d8e560373c 100644 (file)
@@ -112,7 +112,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
                }
 
                trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
-                              data);
+                              &data);
                data = vcpu_data_host_to_guest(vcpu, data, len);
                vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data);
        }
@@ -182,14 +182,14 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
                data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt),
                                               len);
 
-               trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
+               trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, &data);
                kvm_mmio_write_buf(data_buf, len, data);
 
                ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
                                       data_buf);
        } else {
                trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
-                              fault_ipa, 0);
+                              fault_ipa, NULL);
 
                ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
                                      data_buf);
index b36945d49986dd5c0f097f16837d72d81f655308..b4b69c2d10120237e12bc6524243071bf645f1d9 100644 (file)
@@ -509,8 +509,6 @@ static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
  */
 void free_hyp_pgds(void)
 {
-       unsigned long addr;
-
        mutex_lock(&kvm_hyp_pgd_mutex);
 
        if (boot_hyp_pgd) {
@@ -521,10 +519,10 @@ void free_hyp_pgds(void)
 
        if (hyp_pgd) {
                unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE);
-               for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
-                       unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE);
-               for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
-                       unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE);
+               unmap_hyp_range(hyp_pgd, kern_hyp_va(PAGE_OFFSET),
+                               (uintptr_t)high_memory - PAGE_OFFSET);
+               unmap_hyp_range(hyp_pgd, kern_hyp_va(VMALLOC_START),
+                               VMALLOC_END - VMALLOC_START);
 
                free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
                hyp_pgd = NULL;