]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/commitdiff
Merge tag 'regulator-v4.1' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 13 Apr 2015 22:13:25 +0000 (15:13 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 13 Apr 2015 22:13:25 +0000 (15:13 -0700)
Pull regulator updates from Mark Brown:
 "Another release, another set of regulator updates.  Not much of it is
  showing up in the code yet but there's been a lot of discussion going
  on about how to enhance the regulator API to work better with modern
  SoCs which have a microcontroller sitting between Linux and the
  hardware.

  I'm hopeful that'll start to come through into mainline for v4.2 but
  it's not quite there for v4.1 - what we do have (along with the usual
  small updates is) is:

   - work from Bjorn Andersson on refactoring the configuration of
     regulator loading interfaces to be useful for use with
     microcontrollers, the existing interfaces were never actually
     useful for anything as-is since nobody was willing to put enough
     data into public code.

   - a summary tree display in debugfs from Heiko Stübner.

   - support for act6000 regulators"

* tag 'regulator-v4.1' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator: (34 commits)
  regulator: max8660: Handle empty regulator data
  regulator: output current-limit for all regulators in summary
  regulator: add a summary tree in debugfs
  regulator: qcom: Tidy up probe()
  regulator: qcom: Rework to single platform device
  regulator: qcom: Refactor of-parsing code
  regulator: qcom: Don't enable DRMS in driver
  regulator: max8660: fix assignment of pdata to data that becomes dead
  regulator: Defer lookup of supply to regulator_get
  mfd: max77693: Remove unused structures
  regulator: max77693: Let core parse DT and drop board files support
  regulator: Ensure unique regulator debugfs directory names
  regulator: stw481x: Remove unused fields from struct stw481x
  regulator: palmas: Add has_regen3 check for TPS659038
  regulator: constify of_device_id array
  regulator: fixes for regulator_set_optimum_mode name change
  regulator: Drop temporary regulator_set_optimum_mode wrapper
  usb: phy: phy-msm-usb: Rename regulator_set_optimum_mode
  usb: phy: ab8500-usb: Rename regulator_set_optimum_mode
  ufs: Rename of regulator_set_optimum_mode
  ...

671 files changed:
CREDITS
Documentation/acpi/apei/einj.txt
Documentation/devicetree/bindings/gpio/gpio-fan.txt
Documentation/devicetree/bindings/mmc/brcm,sdhci-iproc.txt [new file with mode: 0644]
Documentation/devicetree/bindings/mmc/exynos-dw-mshc.txt
Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt
Documentation/devicetree/bindings/mmc/mmc-card.txt [new file with mode: 0644]
Documentation/devicetree/bindings/mmc/sdhci-st.txt
Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
Documentation/devicetree/bindings/spi/qcom,spi-qup.txt
Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt
Documentation/devicetree/bindings/spi/spi-img-spfi.txt
Documentation/devicetree/bindings/spi/spi-rockchip.txt
Documentation/devicetree/bindings/thermal/rcar-thermal.txt
Documentation/hwmon/it87
Documentation/hwmon/jc42
Documentation/hwmon/nct7904 [new file with mode: 0644]
Documentation/kernel-parameters.txt
Documentation/rtc.txt
Documentation/spi/spi-summary
Documentation/spi/spidev_test.c
Documentation/virtual/kvm/api.txt
Documentation/virtual/kvm/devices/s390_flic.txt
Documentation/x86/boot.txt
MAINTAINERS
Makefile
arch/alpha/kernel/rtc.c
arch/arm/common/bL_switcher.c
arch/arm/include/asm/jump_label.h
arch/arm/include/asm/kvm_arm.h
arch/arm/include/asm/kvm_host.h
arch/arm/include/asm/kvm_mmio.h
arch/arm/include/asm/mach/time.h
arch/arm/include/uapi/asm/kvm.h
arch/arm/kernel/asm-offsets.c
arch/arm/kernel/time.c
arch/arm/kvm/Kconfig
arch/arm/kvm/Makefile
arch/arm/kvm/arm.c
arch/arm/kvm/guest.c
arch/arm/kvm/interrupts_head.S
arch/arm/kvm/mmio.c
arch/arm/kvm/mmu.c
arch/arm/kvm/trace.h
arch/arm/mach-omap2/cpuidle44xx.c
arch/arm/mach-omap2/hsmmc.c
arch/arm/mach-tegra/cpuidle-tegra114.c
arch/arm/mach-tegra/cpuidle-tegra20.c
arch/arm/mach-tegra/cpuidle-tegra30.c
arch/arm/plat-omap/counter_32k.c
arch/arm64/include/asm/esr.h
arch/arm64/include/asm/jump_label.h
arch/arm64/include/asm/kvm_arm.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_mmio.h
arch/arm64/include/uapi/asm/kvm.h
arch/arm64/kernel/vdso.c
arch/arm64/kvm/Kconfig
arch/arm64/kvm/Makefile
arch/avr32/include/asm/elf.h
arch/m68k/configs/amiga_defconfig
arch/m68k/configs/apollo_defconfig
arch/m68k/configs/atari_defconfig
arch/m68k/configs/bvme6000_defconfig
arch/m68k/configs/hp300_defconfig
arch/m68k/configs/mac_defconfig
arch/m68k/configs/multi_defconfig
arch/m68k/configs/mvme147_defconfig
arch/m68k/configs/mvme16x_defconfig
arch/m68k/configs/q40_defconfig
arch/m68k/configs/sun3_defconfig
arch/m68k/configs/sun3x_defconfig
arch/m68k/include/asm/mcfqspi.h
arch/m68k/kernel/pcibios.c
arch/m68k/lib/ashldi3.c
arch/m68k/lib/ashrdi3.c
arch/m68k/lib/divsi3.S
arch/m68k/lib/lshrdi3.c
arch/m68k/lib/modsi3.S
arch/m68k/lib/muldi3.c
arch/m68k/lib/mulsi3.S
arch/m68k/lib/udivsi3.S
arch/m68k/lib/umodsi3.S
arch/m68k/mac/oss.c
arch/mips/include/asm/asmmacro-32.h
arch/mips/include/asm/asmmacro.h
arch/mips/include/asm/fpu.h
arch/mips/include/asm/jump_label.h
arch/mips/include/asm/kdebug.h
arch/mips/include/asm/kvm_host.h
arch/mips/include/asm/processor.h
arch/mips/include/uapi/asm/kvm.h
arch/mips/kernel/asm-offsets.c
arch/mips/kernel/genex.S
arch/mips/kernel/ptrace.c
arch/mips/kernel/r4k_fpu.S
arch/mips/kernel/traps.c
arch/mips/kvm/Makefile
arch/mips/kvm/emulate.c
arch/mips/kvm/fpu.S [new file with mode: 0644]
arch/mips/kvm/locore.S
arch/mips/kvm/mips.c
arch/mips/kvm/msa.S [new file with mode: 0644]
arch/mips/kvm/stats.c
arch/mips/kvm/tlb.c
arch/mips/kvm/trap_emul.c
arch/mips/lasat/sysctl.c
arch/nios2/include/asm/thread_info.h
arch/nios2/include/uapi/asm/ptrace.h
arch/nios2/kernel/entry.S
arch/nios2/kernel/signal.c
arch/nios2/mm/cacheflush.c
arch/powerpc/kvm/mpic.c
arch/powerpc/kvm/powerpc.c
arch/powerpc/platforms/powernv/opal-wrappers.S
arch/powerpc/platforms/pseries/hvCall.S
arch/powerpc/platforms/pseries/lpar.c
arch/s390/include/asm/jump_label.h
arch/s390/include/asm/kvm_host.h
arch/s390/include/uapi/asm/kvm.h
arch/s390/include/uapi/asm/sie.h
arch/s390/kernel/asm-offsets.c
arch/s390/kernel/time.c
arch/s390/kvm/diag.c
arch/s390/kvm/gaccess.c
arch/s390/kvm/gaccess.h
arch/s390/kvm/guestdbg.c
arch/s390/kvm/intercept.c
arch/s390/kvm/interrupt.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/kvm-s390.h
arch/s390/kvm/priv.c
arch/s390/kvm/sigp.c
arch/sparc/include/asm/jump_label.h
arch/sparc/kernel/pci.c
arch/sparc/kernel/time_32.c
arch/tile/kernel/time.c
arch/x86/Kconfig
arch/x86/boot/compressed/aslr.c
arch/x86/boot/compressed/head_32.S
arch/x86/boot/compressed/head_64.S
arch/x86/boot/compressed/misc.c
arch/x86/boot/compressed/misc.h
arch/x86/boot/string.c
arch/x86/boot/video-mode.c
arch/x86/boot/video.c
arch/x86/boot/video.h
arch/x86/configs/i386_defconfig
arch/x86/configs/x86_64_defconfig
arch/x86/crypto/crc32c-pcl-intel-asm_64.S
arch/x86/crypto/twofish-x86_64-asm_64.S
arch/x86/ia32/Makefile
arch/x86/ia32/ia32_signal.c
arch/x86/ia32/ia32entry.S
arch/x86/ia32/nosyscall.c [deleted file]
arch/x86/ia32/sys_ia32.c
arch/x86/ia32/syscall_ia32.c [deleted file]
arch/x86/include/asm/alternative-asm.h
arch/x86/include/asm/alternative.h
arch/x86/include/asm/apic.h
arch/x86/include/asm/barrier.h
arch/x86/include/asm/calling.h
arch/x86/include/asm/compat.h
arch/x86/include/asm/cpufeature.h
arch/x86/include/asm/desc.h
arch/x86/include/asm/dwarf2.h
arch/x86/include/asm/efi.h
arch/x86/include/asm/elf.h
arch/x86/include/asm/fpu-internal.h
arch/x86/include/asm/hw_irq.h
arch/x86/include/asm/insn.h
arch/x86/include/asm/iommu_table.h
arch/x86/include/asm/irqflags.h
arch/x86/include/asm/jump_label.h
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/kvm_para.h
arch/x86/include/asm/mce.h
arch/x86/include/asm/microcode.h
arch/x86/include/asm/microcode_intel.h
arch/x86/include/asm/mwait.h
arch/x86/include/asm/paravirt.h
arch/x86/include/asm/processor.h
arch/x86/include/asm/ptrace.h
arch/x86/include/asm/pvclock.h
arch/x86/include/asm/segment.h
arch/x86/include/asm/setup.h
arch/x86/include/asm/sigcontext.h
arch/x86/include/asm/sighandling.h
arch/x86/include/asm/smap.h
arch/x86/include/asm/smp.h
arch/x86/include/asm/special_insns.h
arch/x86/include/asm/thread_info.h
arch/x86/include/asm/uaccess_64.h
arch/x86/include/uapi/asm/bootparam.h
arch/x86/include/uapi/asm/ptrace-abi.h
arch/x86/include/uapi/asm/ptrace.h
arch/x86/include/uapi/asm/sigcontext.h
arch/x86/include/uapi/asm/vmx.h
arch/x86/kernel/Makefile
arch/x86/kernel/alternative.c
arch/x86/kernel/apic/apic.c
arch/x86/kernel/apic/x2apic_cluster.c
arch/x86/kernel/apic/x2apic_uv_x.c
arch/x86/kernel/asm-offsets_32.c
arch/x86/kernel/asm-offsets_64.c
arch/x86/kernel/cpu/amd.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/intel_cacheinfo.c
arch/x86/kernel/cpu/mcheck/mce-internal.h
arch/x86/kernel/cpu/mcheck/mce-severity.c
arch/x86/kernel/cpu/mcheck/mce.c
arch/x86/kernel/cpu/mcheck/mce_amd.c
arch/x86/kernel/cpu/mcheck/mce_intel.c
arch/x86/kernel/cpu/microcode/amd.c
arch/x86/kernel/cpu/microcode/core_early.c
arch/x86/kernel/cpu/microcode/intel.c
arch/x86/kernel/cpu/microcode/intel_early.c
arch/x86/kernel/cpu/microcode/intel_lib.c
arch/x86/kernel/cpu/mkcapflags.sh
arch/x86/kernel/cpu/perf_event.c
arch/x86/kernel/crash.c
arch/x86/kernel/devicetree.c
arch/x86/kernel/dumpstack.c
arch/x86/kernel/dumpstack_32.c
arch/x86/kernel/dumpstack_64.c
arch/x86/kernel/e820.c
arch/x86/kernel/early_printk.c
arch/x86/kernel/entry_32.S
arch/x86/kernel/entry_64.S
arch/x86/kernel/head64.c
arch/x86/kernel/head_32.S
arch/x86/kernel/head_64.S
arch/x86/kernel/i387.c
arch/x86/kernel/ioport.c
arch/x86/kernel/irq.c
arch/x86/kernel/irq_32.c
arch/x86/kernel/irq_64.c
arch/x86/kernel/irqinit.c
arch/x86/kernel/kgdb.c
arch/x86/kernel/kprobes/core.c
arch/x86/kernel/module.c
arch/x86/kernel/perf_regs.c
arch/x86/kernel/process.c
arch/x86/kernel/process_32.c
arch/x86/kernel/process_64.c
arch/x86/kernel/ptrace.c
arch/x86/kernel/pvclock.c
arch/x86/kernel/relocate_kernel_32.S
arch/x86/kernel/relocate_kernel_64.S
arch/x86/kernel/setup.c
arch/x86/kernel/signal.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/sys_x86_64.c
arch/x86/kernel/syscall_32.c
arch/x86/kernel/time.c
arch/x86/kernel/traps.c
arch/x86/kernel/uprobes.c
arch/x86/kernel/vm86_32.c
arch/x86/kernel/vsyscall_gtod.c
arch/x86/kernel/xsave.c
arch/x86/kvm/Makefile
arch/x86/kvm/cpuid.c
arch/x86/kvm/cpuid.h
arch/x86/kvm/emulate.c
arch/x86/kvm/i8254.c
arch/x86/kvm/i8254.h
arch/x86/kvm/i8259.c
arch/x86/kvm/ioapic.c
arch/x86/kvm/ioapic.h
arch/x86/kvm/irq.h
arch/x86/kvm/lapic.c
arch/x86/kvm/lapic.h
arch/x86/kvm/mmu.c
arch/x86/kvm/pmu.c
arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
arch/x86/lguest/boot.c
arch/x86/lib/atomic64_cx8_32.S
arch/x86/lib/checksum_32.S
arch/x86/lib/clear_page_64.S
arch/x86/lib/copy_page_64.S
arch/x86/lib/copy_user_64.S
arch/x86/lib/csum-copy_64.S
arch/x86/lib/insn.c
arch/x86/lib/memcpy_64.S
arch/x86/lib/memmove_64.S
arch/x86/lib/memset_64.S
arch/x86/lib/msr-reg.S
arch/x86/lib/rwsem.S
arch/x86/lib/thunk_32.S
arch/x86/lib/thunk_64.S
arch/x86/lib/usercopy_64.c
arch/x86/lib/x86-opcode-map.txt
arch/x86/mm/fault.c
arch/x86/mm/init.c
arch/x86/mm/init_64.c
arch/x86/mm/numa.c
arch/x86/mm/pageattr.c
arch/x86/mm/pat.c
arch/x86/mm/pgtable.c
arch/x86/oprofile/backtrace.c
arch/x86/platform/efi/efi-bgrt.c
arch/x86/platform/efi/efi.c
arch/x86/platform/efi/efi_32.c
arch/x86/platform/efi/efi_64.c
arch/x86/platform/intel-quark/imr_selftest.c
arch/x86/platform/uv/tlb_uv.c
arch/x86/power/cpu.c
arch/x86/syscalls/syscall_32.tbl
arch/x86/syscalls/syscall_64.tbl
arch/x86/um/asm/barrier.h
arch/x86/um/sys_call_table_64.c
arch/x86/vdso/Makefile
arch/x86/vdso/vclock_gettime.c
arch/x86/vdso/vdso32/syscall.S
arch/x86/xen/enlighten.c
arch/x86/xen/smp.c
arch/x86/xen/suspend.c
arch/x86/xen/xen-asm_64.S
block/blk-mq.c
drivers/acpi/acpi_pad.c
drivers/acpi/processor_idle.c
drivers/base/regmap/Makefile
drivers/base/regmap/regcache.c
drivers/base/regmap/regmap.c
drivers/base/regmap/trace.h [new file with mode: 0644]
drivers/char/ipmi/ipmi_powernv.c
drivers/char/ipmi/ipmi_si_intf.c
drivers/char/ipmi/ipmi_ssif.c
drivers/clocksource/arm_arch_timer.c
drivers/clocksource/dw_apb_timer_of.c
drivers/clocksource/em_sti.c
drivers/clocksource/sh_cmt.c
drivers/clocksource/sh_tmu.c
drivers/clocksource/sun4i_timer.c
drivers/clocksource/tegra20_timer.c
drivers/clocksource/time-efm32.c
drivers/clocksource/timer-atmel-pit.c
drivers/clocksource/timer-sun5i.c
drivers/cpufreq/cpufreq.c
drivers/cpuidle/cpuidle.c
drivers/cpuidle/driver.c
drivers/cpuidle/sysfs.c
drivers/dma/Kconfig
drivers/dma/Makefile
drivers/dma/cppi41.c
drivers/dma/dmaengine.c
drivers/dma/intel_mid_dma.c [deleted file]
drivers/dma/intel_mid_dma_regs.h [deleted file]
drivers/edac/amd64_edac.c
drivers/edac/amd64_edac.h
drivers/edac/amd64_edac_dbg.c
drivers/edac/amd64_edac_inj.c
drivers/edac/edac_core.h
drivers/edac/edac_mc.c
drivers/edac/edac_mc_sysfs.c
drivers/edac/edac_module.c
drivers/edac/edac_module.h
drivers/edac/highbank_mc_edac.c
drivers/edac/i7core_edac.c
drivers/edac/i82443bxgx_edac.c
drivers/edac/i82860_edac.c
drivers/edac/i82875p_edac.c
drivers/edac/i82975x_edac.c
drivers/edac/mpc85xx_edac.c
drivers/edac/octeon_edac-lmc.c
drivers/edac/ppc4xx_edac.c
drivers/edac/synopsys_edac.c
drivers/firmware/dmi_scan.c
drivers/firmware/efi/libstub/arm-stub.c
drivers/firmware/efi/libstub/efistub.h
drivers/firmware/efi/libstub/fdt.c
drivers/gpu/drm/drm_crtc.c
drivers/gpu/drm/i915/i915_drv.c
drivers/gpu/drm/i915/i915_drv.h
drivers/hwmon/Kconfig
drivers/hwmon/Makefile
drivers/hwmon/coretemp.c
drivers/hwmon/gpio-fan.c
drivers/hwmon/ibmpex.c
drivers/hwmon/ibmpowernv.c
drivers/hwmon/it87.c
drivers/hwmon/jc42.c
drivers/hwmon/nct6775.c
drivers/hwmon/nct7904.c [new file with mode: 0644]
drivers/hwmon/pwm-fan.c
drivers/hwmon/vexpress.c
drivers/idle/intel_idle.c
drivers/md/md.c
drivers/md/raid0.c
drivers/media/dvb-frontends/rtl2832.c
drivers/media/pci/cx23885/cx23885-417.c
drivers/media/platform/s5p-jpeg/jpeg-core.c
drivers/media/platform/s5p-jpeg/jpeg-hw-exynos3250.c
drivers/media/platform/s5p-mfc/s5p_mfc.c
drivers/media/platform/s5p-mfc/s5p_mfc_common.h
drivers/media/platform/s5p-mfc/s5p_mfc_opr.h
drivers/media/platform/s5p-mfc/s5p_mfc_opr_v5.c
drivers/media/platform/s5p-mfc/s5p_mfc_opr_v6.c
drivers/media/platform/s5p-tv/Kconfig
drivers/media/platform/sh_veu.c
drivers/media/platform/soc_camera/atmel-isi.c
drivers/media/platform/soc_camera/soc_camera.c
drivers/media/usb/dvb-usb-v2/rtl28xxu.c
drivers/media/usb/gspca/Kconfig
drivers/media/v4l2-core/videobuf2-core.c
drivers/media/v4l2-core/videobuf2-dma-contig.c
drivers/misc/enclosure.c
drivers/misc/sgi-xp/xpc_main.c
drivers/mmc/core/core.c
drivers/mmc/core/mmc.c
drivers/mmc/core/pwrseq.c
drivers/mmc/core/pwrseq.h
drivers/mmc/core/pwrseq_emmc.c
drivers/mmc/core/pwrseq_simple.c
drivers/mmc/core/sdio.c
drivers/mmc/host/Kconfig
drivers/mmc/host/Makefile
drivers/mmc/host/atmel-mci-regs.h
drivers/mmc/host/dw_mmc-exynos.c
drivers/mmc/host/dw_mmc-exynos.h
drivers/mmc/host/dw_mmc-rockchip.c
drivers/mmc/host/dw_mmc.c
drivers/mmc/host/dw_mmc.h
drivers/mmc/host/mmc_spi.c
drivers/mmc/host/mmci.c
drivers/mmc/host/omap_hsmmc.c
drivers/mmc/host/sdhci-acpi.c
drivers/mmc/host/sdhci-bcm-kona.c
drivers/mmc/host/sdhci-bcm2835.c
drivers/mmc/host/sdhci-cns3xxx.c
drivers/mmc/host/sdhci-dove.c
drivers/mmc/host/sdhci-esdhc-imx.c
drivers/mmc/host/sdhci-iproc.c [new file with mode: 0644]
drivers/mmc/host/sdhci-msm.c
drivers/mmc/host/sdhci-of-arasan.c
drivers/mmc/host/sdhci-of-esdhc.c
drivers/mmc/host/sdhci-of-hlwd.c
drivers/mmc/host/sdhci-pci.c
drivers/mmc/host/sdhci-pltfm.c
drivers/mmc/host/sdhci-sirf.c
drivers/mmc/host/sdhci-spear.c
drivers/mmc/host/sdhci-st.c
drivers/mmc/host/sdhci-tegra.c
drivers/mmc/host/sdhci.c
drivers/mmc/host/sdhci.h
drivers/mmc/host/sh_mmcif.c
drivers/mmc/host/sunxi-mmc.c
drivers/mmc/host/tmio_mmc_pio.c
drivers/mmc/host/wmt-sdmmc.c
drivers/pci/host/pcie-designware.c
drivers/pci/host/pcie-spear13xx.c
drivers/pci/hotplug/cpci_hotplug_pci.c
drivers/pci/pci-acpi.c
drivers/pci/pcie/aer/aerdrv_errprint.c
drivers/rtc/class.c
drivers/rtc/interface.c
drivers/rtc/rtc-ab3100.c
drivers/rtc/rtc-mc13xxx.c
drivers/rtc/rtc-mxc.c
drivers/rtc/rtc-test.c
drivers/rtc/systohc.c
drivers/scsi/be2iscsi/be_main.c
drivers/scsi/scsi_lib.c
drivers/spi/Kconfig
drivers/spi/spi-atmel.c
drivers/spi/spi-bcm2835.c
drivers/spi/spi-bcm53xx.c
drivers/spi/spi-bfin5xx.c
drivers/spi/spi-bitbang-txrx.h
drivers/spi/spi-dw-mid.c
drivers/spi/spi-dw.c
drivers/spi/spi-dw.h
drivers/spi/spi-fsl-dspi.c
drivers/spi/spi-img-spfi.c
drivers/spi/spi-imx.c
drivers/spi/spi-mpc512x-psc.c
drivers/spi/spi-octeon.c
drivers/spi/spi-omap-100k.c
drivers/spi/spi-omap-uwire.c
drivers/spi/spi-pl022.c
drivers/spi/spi-pxa2xx.c
drivers/spi/spi-qup.c
drivers/spi/spi-rockchip.c
drivers/spi/spi-rspi.c
drivers/spi/spi-s3c64xx.c
drivers/spi/spi-sc18is602.c
drivers/spi/spi-st-ssc4.c
drivers/spi/spi.c
drivers/spi/spidev.c
drivers/target/iscsi/iscsi_target.c
drivers/target/target_core_device.c
drivers/thermal/st/st_thermal.c
drivers/thermal/st/st_thermal_memmap.c
drivers/thermal/st/st_thermal_syscfg.c
drivers/thermal/thermal_core.c
fs/aio.c
fs/ocfs2/file.c
include/kvm/arm_arch_timer.h
include/kvm/arm_vgic.h
include/kvm/iodev.h [new file with mode: 0644]
include/linux/blk_types.h
include/linux/clockchips.h
include/linux/clocksource.h
include/linux/compiler.h
include/linux/cpuidle.h
include/linux/dmapool.h
include/linux/efi.h
include/linux/fs.h
include/linux/init.h
include/linux/intel_mid_dma.h [deleted file]
include/linux/irq_work.h
include/linux/jump_label.h
include/linux/kvm_host.h
include/linux/mmc/core.h
include/linux/mmc/dw_mmc.h
include/linux/mmc/host.h
include/linux/mmc/sdhci-spear.h [deleted file]
include/linux/mmc/sdhci.h [deleted file]
include/linux/mmzone.h
include/linux/platform_data/hsmmc-omap.h
include/linux/rtc.h
include/linux/sched.h
include/linux/seqlock.h
include/linux/spi/spi.h
include/linux/stddef.h
include/linux/tick.h
include/linux/timekeeper_internal.h
include/linux/timekeeping.h
include/linux/vfio.h
include/media/atmel-isi.h
include/trace/events/regmap.h [deleted file]
include/uapi/linux/kvm.h
kernel/cpu.c
kernel/futex.c
kernel/locking/mcs_spinlock.h
kernel/locking/mutex.c
kernel/locking/osq_lock.c
kernel/locking/rtmutex.c
kernel/locking/rwsem-spinlock.c
kernel/locking/rwsem-xadd.c
kernel/locking/rwsem.c
kernel/locking/rwsem.h [new file with mode: 0644]
kernel/module.c
kernel/power/snapshot.c
kernel/sched/core.c
kernel/sched/deadline.c
kernel/sched/debug.c
kernel/sched/fair.c
kernel/sched/features.h
kernel/sched/idle.c
kernel/sched/rt.c
kernel/sched/sched.h
kernel/time/Kconfig
kernel/time/Makefile
kernel/time/clockevents.c
kernel/time/clocksource.c
kernel/time/hrtimer.c
kernel/time/jiffies.c
kernel/time/ntp.c
kernel/time/sched_clock.c
kernel/time/tick-broadcast.c
kernel/time/tick-common.c
kernel/time/tick-internal.h
kernel/time/tick-oneshot.c
kernel/time/tick-sched.c
kernel/time/tick-sched.h [new file with mode: 0644]
kernel/time/timekeeping.c
kernel/time/timekeeping.h
kernel/time/timer.c
kernel/time/timer_list.c
lib/Kconfig.debug
lib/lockref.c
mm/mremap.c
net/ceph/messenger.c
sound/firewire/bebob/bebob_maudio.c
sound/pci/hda/patch_realtek.c
sound/soc/codecs/pcm512x.c
sound/usb/mixer_quirks.c
sound/usb/quirks.c
tools/perf/bench/mem-memcpy-x86-64-asm-def.h
tools/perf/bench/mem-memcpy-x86-64-asm.S
tools/perf/bench/mem-memcpy.c
tools/perf/bench/mem-memset-x86-64-asm-def.h
tools/perf/bench/mem-memset-x86-64-asm.S
tools/perf/util/include/asm/alternative-asm.h
tools/testing/selftests/Makefile
tools/testing/selftests/breakpoints/Makefile
tools/testing/selftests/cpu-hotplug/Makefile
tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh [new file with mode: 0755]
tools/testing/selftests/cpu-hotplug/on-off-test.sh [deleted file]
tools/testing/selftests/efivarfs/Makefile
tools/testing/selftests/efivarfs/efivarfs.sh [changed mode: 0644->0755]
tools/testing/selftests/exec/Makefile
tools/testing/selftests/firmware/Makefile
tools/testing/selftests/firmware/fw_filesystem.sh [changed mode: 0644->0755]
tools/testing/selftests/firmware/fw_userhelper.sh [changed mode: 0644->0755]
tools/testing/selftests/ftrace/Makefile
tools/testing/selftests/ftrace/test.d/00basic/basic4.tc
tools/testing/selftests/ftrace/test.d/event/event-enable.tc
tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc
tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc
tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc
tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc
tools/testing/selftests/gen_kselftest_tar.sh [new file with mode: 0755]
tools/testing/selftests/ipc/Makefile
tools/testing/selftests/kcmp/Makefile
tools/testing/selftests/kselftest_install.sh [new file with mode: 0755]
tools/testing/selftests/lib.mk [new file with mode: 0644]
tools/testing/selftests/memfd/Makefile
tools/testing/selftests/memory-hotplug/Makefile
tools/testing/selftests/memory-hotplug/mem-on-off-test.sh [new file with mode: 0755]
tools/testing/selftests/memory-hotplug/on-off-test.sh [deleted file]
tools/testing/selftests/mount/.gitignore [new file with mode: 0644]
tools/testing/selftests/mount/Makefile
tools/testing/selftests/mqueue/Makefile
tools/testing/selftests/net/Makefile
tools/testing/selftests/net/run_afpackettests [changed mode: 0644->0755]
tools/testing/selftests/net/run_netsocktests [changed mode: 0644->0755]
tools/testing/selftests/powerpc/Makefile
tools/testing/selftests/powerpc/copyloops/Makefile
tools/testing/selftests/powerpc/mm/Makefile
tools/testing/selftests/powerpc/pmu/Makefile
tools/testing/selftests/powerpc/pmu/ebb/Makefile
tools/testing/selftests/powerpc/primitives/Makefile
tools/testing/selftests/powerpc/stringloops/Makefile
tools/testing/selftests/powerpc/tm/Makefile
tools/testing/selftests/ptrace/Makefile
tools/testing/selftests/size/Makefile
tools/testing/selftests/sysctl/Makefile
tools/testing/selftests/sysctl/run_numerictests [changed mode: 0644->0755]
tools/testing/selftests/sysctl/run_stringtests [changed mode: 0644->0755]
tools/testing/selftests/timers/Makefile
tools/testing/selftests/timers/alarmtimer-suspend.c [new file with mode: 0644]
tools/testing/selftests/timers/change_skew.c [new file with mode: 0644]
tools/testing/selftests/timers/clocksource-switch.c [new file with mode: 0644]
tools/testing/selftests/timers/inconsistency-check.c [new file with mode: 0644]
tools/testing/selftests/timers/leap-a-day.c [new file with mode: 0644]
tools/testing/selftests/timers/leapcrash.c [new file with mode: 0644]
tools/testing/selftests/timers/mqueue-lat.c [new file with mode: 0644]
tools/testing/selftests/timers/nanosleep.c [new file with mode: 0644]
tools/testing/selftests/timers/nsleep-lat.c [new file with mode: 0644]
tools/testing/selftests/timers/posix_timers.c
tools/testing/selftests/timers/raw_skew.c [new file with mode: 0644]
tools/testing/selftests/timers/rtctest.c [new file with mode: 0644]
tools/testing/selftests/timers/set-2038.c [new file with mode: 0644]
tools/testing/selftests/timers/set-tai.c [new file with mode: 0644]
tools/testing/selftests/timers/set-timer-lat.c [new file with mode: 0644]
tools/testing/selftests/timers/skew_consistency.c [new file with mode: 0644]
tools/testing/selftests/timers/threadtest.c [new file with mode: 0644]
tools/testing/selftests/timers/valid-adjtimex.c [new file with mode: 0644]
tools/testing/selftests/user/Makefile
tools/testing/selftests/vm/Makefile
tools/testing/selftests/vm/run_vmtests [changed mode: 0644->0755]
tools/testing/selftests/x86/.gitignore [new file with mode: 0644]
tools/testing/selftests/x86/Makefile [new file with mode: 0644]
tools/testing/selftests/x86/run_x86_tests.sh [new file with mode: 0644]
tools/testing/selftests/x86/sigreturn.c [new file with mode: 0644]
tools/testing/selftests/x86/trivial_32bit_program.c [new file with mode: 0644]
virt/kvm/arm/arch_timer.c
virt/kvm/arm/vgic-v2-emul.c
virt/kvm/arm/vgic-v3-emul.c
virt/kvm/arm/vgic.c
virt/kvm/arm/vgic.h
virt/kvm/coalesced_mmio.c
virt/kvm/eventfd.c
virt/kvm/iodev.h [deleted file]
virt/kvm/irqchip.c
virt/kvm/kvm_main.c

diff --git a/CREDITS b/CREDITS
index 96935df0b6fe5d10cf1558c8ed6a15af89fb0cac..843e17647f3b72bfda48632d67ba089307b15602 100644 (file)
--- a/CREDITS
+++ b/CREDITS
@@ -187,6 +187,10 @@ N: Krishna Balasubramanian
 E: balasub@cis.ohio-state.edu
 D: Wrote SYS V IPC (part of standard kernel since 0.99.10)
 
+N: Chris Ball
+E: chris@printf.net
+D: Former maintainer of the MMC/SD/SDIO subsystem.
+
 N: Dario Ballabio
 E: ballabio_dario@emc.com
 E: dario.ballabio@tiscalinet.it
index f51861bcb07bbbc226e0d1af2897f6b49bcf82c5..e550c8b98139974343de5e4890c94f642f0a53d3 100644 (file)
                        APEI Error INJection
                        ~~~~~~~~~~~~~~~~~~~~
 
-EINJ provides a hardware error injection mechanism
-It is very useful for debugging and testing of other APEI and RAS features.
+EINJ provides a hardware error injection mechanism. It is very useful
+for debugging and testing APEI and RAS features in general.
 
-To use EINJ, make sure the following are enabled in your kernel
+You need to check whether your BIOS supports EINJ first. For that, look
+for early boot messages similar to this one:
+
+ACPI: EINJ 0x000000007370A000 000150 (v01 INTEL           00000001 INTL 00000001)
+
+which shows that the BIOS is exposing an EINJ table - it is the
+mechanism through which the injection is done.
+
+Alternatively, look in /sys/firmware/acpi/tables for an "EINJ" file,
+which is a different representation of the same thing.
+
+It doesn't necessarily mean that EINJ is not supported if those above
+don't exist: before you give up, go into BIOS setup to see if the BIOS
+has an option to enable error injection. Look for something called WHEA
+or similar. Often, you need to enable an ACPI5 support option prior, in
+order to see the APEI,EINJ,... functionality supported and exposed by
+the BIOS menu.
+
+To use EINJ, make sure the following are options enabled in your kernel
 configuration:
 
 CONFIG_DEBUG_FS
 CONFIG_ACPI_APEI
 CONFIG_ACPI_APEI_EINJ
 
-The user interface of EINJ is debug file system, under the
-directory apei/einj. The following files are provided.
+The EINJ user interface is in <debugfs mount point>/apei/einj.
+
+The following files belong to it:
 
 - available_error_type
-  Reading this file returns the error injection capability of the
-  platform, that is, which error types are supported. The error type
-  definition is as follow, the left field is the error type value, the
-  right field is error description.
-
-    0x00000001 Processor Correctable
-    0x00000002 Processor Uncorrectable non-fatal
-    0x00000004 Processor Uncorrectable fatal
-    0x00000008  Memory Correctable
-    0x00000010  Memory Uncorrectable non-fatal
-    0x00000020  Memory Uncorrectable fatal
-    0x00000040 PCI Express Correctable
-    0x00000080 PCI Express Uncorrectable fatal
-    0x00000100 PCI Express Uncorrectable non-fatal
-    0x00000200 Platform Correctable
-    0x00000400 Platform Uncorrectable non-fatal
-    0x00000800 Platform Uncorrectable fatal
-
-  The format of file contents are as above, except there are only the
-  available error type lines.
+
+  This file shows which error types are supported:
+
+  Error Type Value     Error Description
+  ================     =================
+  0x00000001           Processor Correctable
+  0x00000002           Processor Uncorrectable non-fatal
+  0x00000004           Processor Uncorrectable fatal
+  0x00000008           Memory Correctable
+  0x00000010           Memory Uncorrectable non-fatal
+  0x00000020           Memory Uncorrectable fatal
+  0x00000040           PCI Express Correctable
+  0x00000080           PCI Express Uncorrectable fatal
+  0x00000100           PCI Express Uncorrectable non-fatal
+  0x00000200           Platform Correctable
+  0x00000400           Platform Uncorrectable non-fatal
+  0x00000800           Platform Uncorrectable fatal
+
+  The format of the file contents are as above, except present are only
+  the available error types.
 
 - error_type
-  This file is used to set the error type value. The error type value
-  is defined in "available_error_type" description.
+
+  Set the value of the error type being injected. Possible error types
+  are defined in the file available_error_type above.
 
 - error_inject
-  Write any integer to this file to trigger the error
-  injection. Before this, please specify all necessary error
-  parameters.
+
+  Write any integer to this file to trigger the error injection. Make
+  sure you have specified all necessary error parameters, i.e. this
+  write should be the last step when injecting errors.
 
 - flags
-  Present for kernel version 3.13 and above. Used to specify which
-  of param{1..4} are valid and should be used by BIOS during injection.
-  Value is a bitmask as specified in ACPI5.0 spec for the
+
+  Present for kernel versions 3.13 and above. Used to specify which
+  of param{1..4} are valid and should be used by the firmware during
+  injection. Value is a bitmask as specified in ACPI5.0 spec for the
   SET_ERROR_TYPE_WITH_ADDRESS data structure:
-       Bit 0 - Processor APIC field valid (see param3 below)
-       Bit 1 - Memory address and mask valid (param1 and param2)
-       Bit 2 - PCIe (seg,bus,dev,fn) valid (param4 below)
-  If set to zero, legacy behaviour is used where the type of injection
-  specifies just one bit set, and param1 is multiplexed.
+
+       Bit 0 - Processor APIC field valid (see param3 below).
+       Bit 1 - Memory address and mask valid (param1 and param2).
+       Bit 2 - PCIe (seg,bus,dev,fn) valid (see param4 below).
+
+  If set to zero, legacy behavior is mimicked where the type of
+  injection specifies just one bit set, and param1 is multiplexed.
 
 - param1
-  This file is used to set the first error parameter value. Effect of
-  parameter depends on error_type specified. For example, if error
-  type is memory related type, the param1 should be a valid physical
-  memory address. [Unless "flag" is set - see above]
+
+  This file is used to set the first error parameter value. Its effect
+  depends on the error type specified in error_type. For example, if
+  error type is memory related type, the param1 should be a valid
+  physical memory address. [Unless "flag" is set - see above]
 
 - param2
-  This file is used to set the second error parameter value. Effect of
-  parameter depends on error_type specified. For example, if error
-  type is memory related type, the param2 should be a physical memory
-  address mask. Linux requires page or narrower granularity, say,
-  0xfffffffffffff000.
+
+  Same use as param1 above. For example, if error type is of memory
+  related type, then param2 should be a physical memory address mask.
+  Linux requires page or narrower granularity, say, 0xfffffffffffff000.
 
 - param3
-  Used when the 0x1 bit is set in "flag" to specify the APIC id
+
+  Used when the 0x1 bit is set in "flags" to specify the APIC id
 
 - param4
-  Used when the 0x4 bit is set in "flag" to specify target PCIe device
+  Used when the 0x4 bit is set in "flags" to specify target PCIe device
 
 - notrigger
-  The EINJ mechanism is a two step process. First inject the error, then
-  perform some actions to trigger it. Setting "notrigger" to 1 skips the
-  trigger phase, which *may* allow the user to cause the error in some other
-  context by a simple access to the cpu, memory location, or device that is
-  the target of the error injection. Whether this actually works depends
-  on what operations the BIOS actually includes in the trigger phase.
-
-BIOS versions based in the ACPI 4.0 specification have limited options
-to control where the errors are injected.  Your BIOS may support an
-extension (enabled with the param_extension=1 module parameter, or
-boot command line einj.param_extension=1). This allows the address
-and mask for memory injections to be specified by the param1 and
-param2 files in apei/einj.
-
-BIOS versions using the ACPI 5.0 specification have more control over
-the target of the injection. For processor related errors (type 0x1,
-0x2 and 0x4) the APICID of the target should be provided using the
-param1 file in apei/einj. For memory errors (type 0x8, 0x10 and 0x20)
-the address is set using param1 with a mask in param2 (0x0 is equivalent
-to all ones). For PCI express errors (type 0x40, 0x80 and 0x100) the
-segment, bus, device and function are specified using param1:
+
+  The error injection mechanism is a two-step process. First inject the
+  error, then perform some actions to trigger it. Setting "notrigger"
+  to 1 skips the trigger phase, which *may* allow the user to cause the
+  error in some other context by a simple access to the CPU, memory
+  location, or device that is the target of the error injection. Whether
+  this actually works depends on what operations the BIOS actually
+  includes in the trigger phase.
+
+BIOS versions based on the ACPI 4.0 specification have limited options
+in controlling where the errors are injected. Your BIOS may support an
+extension (enabled with the param_extension=1 module parameter, or boot
+command line einj.param_extension=1). This allows the address and mask
+for memory injections to be specified by the param1 and param2 files in
+apei/einj.
+
+BIOS versions based on the ACPI 5.0 specification have more control over
+the target of the injection. For processor-related errors (type 0x1, 0x2
+and 0x4), you can set flags to 0x3 (param3 for bit 0, and param1 and
+param2 for bit 1) so that you have more information added to the error
+signature being injected. The actual data passed is this:
+
+       memory_address = param1;
+       memory_address_range = param2;
+       apicid = param3;
+       pcie_sbdf = param4;
+
+For memory errors (type 0x8, 0x10 and 0x20) the address is set using
+param1 with a mask in param2 (0x0 is equivalent to all ones). For PCI
+express errors (type 0x40, 0x80 and 0x100) the segment, bus, device and
+function are specified using param1:
 
          31     24 23    16 15    11 10      8  7        0
        +-------------------------------------------------+
        | segment |   bus  | device | function | reserved |
        +-------------------------------------------------+
 
-An ACPI 5.0 BIOS may also allow vendor specific errors to be injected.
+Anyway, you get the idea, if there's doubt just take a look at the code
+in drivers/acpi/apei/einj.c.
+
+An ACPI 5.0 BIOS may also allow vendor-specific errors to be injected.
 In this case a file named vendor will contain identifying information
 from the BIOS that hopefully will allow an application wishing to use
-the vendor specific extension to tell that they are running on a BIOS
+the vendor-specific extension to tell that they are running on a BIOS
 that supports it. All vendor extensions have the 0x80000000 bit set in
 error_type. A file vendor_flags controls the interpretation of param1
 and param2 (1 = PROCESSOR, 2 = MEMORY, 4 = PCI). See your BIOS vendor
 documentation for details (and expect changes to this API if vendors
 creativity in using this feature expands beyond our expectations).
 
-Example:
+
+An error injection example:
+
 # cd /sys/kernel/debug/apei/einj
 # cat available_error_type             # See which errors can be injected
 0x00000002     Processor Uncorrectable non-fatal
 0x00000008     Memory Correctable
 0x00000010     Memory Uncorrectable non-fatal
 # echo 0x12345000 > param1             # Set memory address for injection
-# echo 0xfffffffffffff000 > param2     # Mask - anywhere in this page
+# echo $((-1 << 12)) > param2          # Mask 0xfffffffffffff000 - anywhere in this page
 # echo 0x8 > error_type                        # Choose correctable memory error
 # echo 1 > error_inject                        # Inject now
 
+You should see something like this in dmesg:
+
+[22715.830801] EDAC sbridge MC3: HANDLING MCE MEMORY ERROR
+[22715.834759] EDAC sbridge MC3: CPU 0: Machine Check Event: 0 Bank 7: 8c00004000010090
+[22715.834759] EDAC sbridge MC3: TSC 0
+[22715.834759] EDAC sbridge MC3: ADDR 12345000 EDAC sbridge MC3: MISC 144780c86
+[22715.834759] EDAC sbridge MC3: PROCESSOR 0:306e7 TIME 1422553404 SOCKET 0 APIC 0
+[22716.616173] EDAC MC3: 1 CE memory read error on CPU_SrcID#0_Channel#0_DIMM#0 (channel:0 slot:0 page:0x12345 offset:0x0 grain:32 syndrome:0x0 -  area:DRAM err_code:0001:0090 socket:0 channel_mask:1 rank:0)
 
 For more information about EINJ, please refer to ACPI specification
 version 4.0, section 17.5 and ACPI 5.0, section 18.6.
index 2dd457a3469af94baa89534c79aaacdf56e2d75e..439a7430fc6827e15b79811b8cfcc176fed205f7 100644 (file)
@@ -2,15 +2,20 @@ Bindings for fan connected to GPIO lines
 
 Required properties:
 - compatible : "gpio-fan"
+
+Optional properties:
 - gpios: Specifies the pins that map to bits in the control value,
   ordered MSB-->LSB.
 - gpio-fan,speed-map: A mapping of possible fan RPM speeds and the
   control value that should be set to achieve them. This array
   must have the RPM values in ascending order.
-
-Optional properties:
 - alarm-gpios: This pin going active indicates something is wrong with
   the fan, and a udev event will be fired.
+- cooling-cells: If used as a cooling device, must be <2>
+  Also see: Documentation/devicetree/bindings/thermal/thermal.txt
+  min and max states are derived from the speed-map of the fan.
+
+Note: At least one the "gpios" or "alarm-gpios" properties must be set.
 
 Examples:
 
@@ -23,3 +28,13 @@ Examples:
                                      6000 2>;
                alarm-gpios = <&gpio1 15 1>;
        };
+       gpio_fan_cool: gpio_fan {
+               compatible = "gpio-fan";
+               gpios = <&gpio2 14 1
+                        &gpio2 13 1>;
+               gpio-fan,speed-map =    <0    0>,
+                                       <3000 1>,
+                                       <6000 2>;
+               alarm-gpios = <&gpio2 15 1>;
+               #cooling-cells = <2>; /* min followed by max */
+       };
diff --git a/Documentation/devicetree/bindings/mmc/brcm,sdhci-iproc.txt b/Documentation/devicetree/bindings/mmc/brcm,sdhci-iproc.txt
new file mode 100644 (file)
index 0000000..72cc9cc
--- /dev/null
@@ -0,0 +1,23 @@
+Broadcom IPROC SDHCI controller
+
+This file documents differences between the core properties described
+by mmc.txt and the properties that represent the IPROC SDHCI controller.
+
+Required properties:
+- compatible : Should be "brcm,sdhci-iproc-cygnus".
+- clocks : The clock feeding the SDHCI controller.
+
+Optional properties:
+  - sdhci,auto-cmd12: specifies that controller should use auto CMD12.
+
+Example:
+
+sdhci0: sdhci@0x18041000 {
+       compatible = "brcm,sdhci-iproc-cygnus";
+       reg = <0x18041000 0x100>;
+       interrupts = <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>;
+       clocks = <&lcpll0_clks BCM_CYGNUS_LCPLL0_SDIO_CLK>;
+       bus-width = <4>;
+       sdhci,auto-cmd12;
+       no-1-8-v;
+};
index ee4fc0576c7d866b318eda917066e76e920b4f53..aad98442788bc386366b019a6166e6145b2f0017 100644 (file)
@@ -36,6 +36,8 @@ Required Properties:
   in transmit mode and CIU clock phase shift value in receive mode for double
   data rate mode operation. Refer notes below for the order of the cells and the
   valid values.
+* samsung,dw-mshc-hs400-timing: Specifies the value of CIU TX and RX clock phase
+  shift value for hs400 mode operation.
 
   Notes for the sdr-timing and ddr-timing values:
 
@@ -50,6 +52,9 @@ Required Properties:
       - if CIU clock divider value is 0 (that is divide by 1), both tx and rx
         phase shift clocks should be 0.
 
+* samsung,read-strobe-delay: RCLK (Data strobe) delay to control HS400 mode
+  (Latency value for delay line in Read path)
+
 Required properties for a slot (Deprecated - Recommend to use one slot per host):
 
 * gpios: specifies a list of gpios used for command, clock and data bus. The
@@ -82,5 +87,7 @@ Example:
                samsung,dw-mshc-ciu-div = <3>;
                samsung,dw-mshc-sdr-timing = <2 3>;
                samsung,dw-mshc-ddr-timing = <1 2>;
+               samsung,dw-mshc-hs400-timing = <0 2>;
+               samsung,read-strobe-delay = <90>;
                bus-width = <8>;
        };
index 9046ba06c47ab63570f99769455dcfeeea020e10..415c5575cbf7a1394439f3ba807e0f8547167231 100644 (file)
@@ -17,6 +17,10 @@ Optional properties:
   to select a proper data sampling window in case the clock quality is not good
   due to signal path is too long on the board. Please refer to eSDHC/uSDHC
   chapter, DLL (Delay Line) section in RM for details.
+- voltage-ranges : Specify the voltage range in case there are software
+  transparent level shifters on the outputs of the controller. Two cells are
+  required, first cell specifies minimum slot voltage (mV), second cell
+  specifies maximum slot voltage (mV). Several ranges could be specified.
 
 Examples:
 
diff --git a/Documentation/devicetree/bindings/mmc/mmc-card.txt b/Documentation/devicetree/bindings/mmc/mmc-card.txt
new file mode 100644 (file)
index 0000000..a70fcd6
--- /dev/null
@@ -0,0 +1,31 @@
+mmc-card / eMMC bindings
+------------------------
+
+This documents describes the devicetree bindings for a mmc-host controller
+child node describing a mmc-card / an eMMC, see "Use of Function subnodes"
+in mmc.txt
+
+Required properties:
+-compatible : Must be "mmc-card"
+-reg        : Must be <0>
+
+Optional properties:
+-broken-hpi : Use this to indicate that the mmc-card has a broken hpi
+              implementation, and that hpi should not be used
+
+Example:
+
+&mmc2 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&mmc2_pins_a>;
+       vmmc-supply = <&reg_vcc3v3>;
+       bus-width = <8>;
+       non-removable;
+       status = "okay";
+
+       mmccard: mmccard@0 {
+               reg = <0>;
+               compatible = "mmc-card";
+               broken-hpi;
+       };
+};
index 7527db447a35b1ed92c79611852f1f365b8900e3..18d950df2749cbf3964f52f6718ff14d21c88348 100644 (file)
@@ -5,20 +5,62 @@ Documentation/devicetree/bindings/mmc/mmc.txt and the properties
 used by the sdhci-st driver.
 
 Required properties:
-- compatible :  Must be "st,sdhci"
-- clock-names : Should be "mmc"
-                See: Documentation/devicetree/bindings/resource-names.txt
-- clocks :      Phandle of the clock used by the sdhci controler
-                See: Documentation/devicetree/bindings/clock/clock-bindings.txt
+- compatible:          Must be "st,sdhci" and it can be compatible to "st,sdhci-stih407"
+                       to set the internal glue logic used for configuring the MMC
+                       subsystem (mmcss) inside the FlashSS (available in STiH407 SoC
+                       family).
+
+- clock-names:         Should be "mmc".
+                       See: Documentation/devicetree/bindings/resource-names.txt
+- clocks:              Phandle to the clock.
+                       See: Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+- interrupts:          One mmc interrupt should be described here.
+- interrupt-names:     Should be "mmcirq".
+
+- pinctrl-names:       A pinctrl state names "default" must be defined.
+- pinctrl-0:           Phandle referencing pin configuration of the sd/emmc controller.
+                       See: Documentation/devicetree/bindings/pinctrl/pinctrl-binding.txt
+
+- reg:                 This must provide the host controller base address and it can also
+                       contain the FlashSS Top register for TX/RX delay used by the driver
+                       to configure DLL inside the flashSS, if so reg-names must also be
+                       specified.
 
 Optional properties:
-- non-removable: non-removable slot
-                 See: Documentation/devicetree/bindings/mmc/mmc.txt
-- bus-width: Number of data lines
-                 See: Documentation/devicetree/bindings/mmc/mmc.txt
+- reg-names:           Should be "mmc" and "top-mmc-delay". "top-mmc-delay" is optional
+                       for eMMC on stih407 family silicon to configure DLL inside FlashSS.
+
+- non-removable:       Non-removable slot. Also used for configuring mmcss in STiH407 SoC
+                       family.
+                       See:  Documentation/devicetree/bindings/mmc/mmc.txt.
+
+- bus-width:           Number of data lines.
+                       See:  Documentation/devicetree/bindings/mmc/mmc.txt.
+
+- max-frequency:       Can be 200MHz, 100Mz or 50MHz (default) and used for
+                       configuring the CCONFIG3 in the mmcss.
+                       See:  Documentation/devicetree/bindings/mmc/mmc.txt.
+
+- resets:              Phandle and reset specifier pair to softreset line of HC IP.
+                       See: Documentation/devicetree/bindings/reset/reset.txt
+
+- vqmmc-supply:                Phandle to the regulator dt node, mentioned as the vcc/vdd
+                       supply in eMMC/SD specs.
+
+- sd-uhs--sdr50:       To enable the SDR50 in the mmcss.
+                       See:  Documentation/devicetree/bindings/mmc/mmc.txt.
+
+- sd-uhs-sdr104:       To enable the SDR104 in the mmcss.
+                       See:  Documentation/devicetree/bindings/mmc/mmc.txt.
+
+- sd-uhs-ddr50:                To enable the DDR50 in the mmcss.
+                       See:  Documentation/devicetree/bindings/mmc/mmc.txt.
 
 Example:
 
+/* Example stih416e eMMC configuration */
+
 mmc0: sdhci@fe81e000 {
        compatible      = "st,sdhci";
        status          = "disabled";
@@ -29,5 +71,43 @@ mmc0: sdhci@fe81e000 {
        pinctrl-0       = <&pinctrl_mmc0>;
        clock-names     = "mmc";
        clocks          = <&clk_s_a1_ls 1>;
-       bus-width       = <8>
+       bus-width       = <8>
+
+/* Example SD stih407 family configuration */
+
+mmc1: sdhci@09080000 {
+       compatible      = "st,sdhci-stih407", "st,sdhci";
+       status          = "disabled";
+       reg             = <0x09080000 0x7ff>;
+       reg-names       = "mmc";
+       interrupts      = <GIC_SPI 90 IRQ_TYPE_NONE>;
+       interrupt-names = "mmcirq";
+       pinctrl-names   = "default";
+       pinctrl-0       = <&pinctrl_sd1>;
+       clock-names     = "mmc";
+       clocks          = <&clk_s_c0_flexgen CLK_MMC_1>;
+       resets          = <&softreset STIH407_MMC1_SOFTRESET>;
+       bus-width       = <4>;
+};
+
+/* Example eMMC stih407 family configuration */
+
+mmc0: sdhci@09060000 {
+       compatible      = "st,sdhci-stih407", "st,sdhci";
+       status          = "disabled";
+       reg             = <0x09060000 0x7ff>, <0x9061008 0x20>;
+       reg-names       = "mmc", "top-mmc-delay";
+       interrupts      = <GIC_SPI 92 IRQ_TYPE_NONE>;
+       interrupt-names = "mmcirq";
+       pinctrl-names   = "default";
+       pinctrl-0       = <&pinctrl_mmc0>;
+       clock-names     = "mmc";
+       clocks          = <&clk_s_c0_flexgen CLK_MMC_0>;
+       vqmmc-supply    = <&vmmc_reg>;
+       max-frequency   = <200000000>;
+       bus-width       = <8>;
+       non-removable;
+       sd-uhs-sdr50;
+       sd-uhs-sdr104;
+       sd-uhs-ddr50;
 };
index aad527b357a0473cc4527868843a8de1d5385965..523341a0e1131a3e7b51878632b4040fb3a428c9 100644 (file)
@@ -2,11 +2,21 @@
   (CSPI/eCSPI) for i.MX
 
 Required properties:
-- compatible : Should be "fsl,<soc>-cspi" or "fsl,<soc>-ecspi"
+- compatible :
+  - "fsl,imx1-cspi" for SPI compatible with the one integrated on i.MX1
+  - "fsl,imx21-cspi" for SPI compatible with the one integrated on i.MX21
+  - "fsl,imx27-cspi" for SPI compatible with the one integrated on i.MX27
+  - "fsl,imx31-cspi" for SPI compatible with the one integrated on i.MX31
+  - "fsl,imx35-cspi" for SPI compatible with the one integrated on i.MX35
+  - "fsl,imx51-ecspi" for SPI compatible with the one integrated on i.MX51
 - reg : Offset and length of the register set for the device
 - interrupts : Should contain CSPI/eCSPI interrupt
 - fsl,spi-num-chipselects : Contains the number of the chipselect
 - cs-gpios : Specifies the gpio pins to be used for chipselects.
+- clocks : Clock specifiers for both ipg and per clocks.
+- clock-names : Clock names should include both "ipg" and "per"
+See the clock consumer binding,
+       Documentation/devicetree/bindings/clock/clock-bindings.txt
 - dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
                Documentation/devicetree/bindings/dma/dma.txt
 - dma-names: DMA request names should include "tx" and "rx" if present.
index e2c88df2cc15ae75677b0562d3a1a9bf4d102bd9..5c090771c016cd22f38ebeaa4a4158ee7da20ccf 100644 (file)
@@ -33,6 +33,11 @@ Optional properties:
                nodes.  If unspecified, a single SPI device without a chip
                select can be used.
 
+- dmas:         Two DMA channel specifiers following the convention outlined
+                in bindings/dma/dma.txt
+- dma-names:    Names for the dma channels, if present. There must be at
+                least one channel named "tx" for transmit and named "rx" for
+                receive.
 
 SPI slave nodes must be children of the SPI master node and can contain
 properties described in Documentation/devicetree/bindings/spi/spi-bus.txt
@@ -51,6 +56,9 @@ Example:
                clocks = <&gcc GCC_BLSP2_QUP2_SPI_APPS_CLK>, <&gcc GCC_BLSP2_AHB_CLK>;
                clock-names = "core", "iface";
 
+               dmas = <&blsp1_bam 13>, <&blsp1_bam 12>;
+               dma-names = "rx", "tx";
+
                pinctrl-names = "default";
                pinctrl-0 = <&spi8_default>;
 
index cbbe16ed3874f88c1902b90253afc756a01acb0d..70af78a9185e9b04bc8e24d30b6d9bc3bf705520 100644 (file)
@@ -16,6 +16,12 @@ Optional property:
   in big endian mode, otherwise in native mode(same with CPU), for more
   detail please see: Documentation/devicetree/bindings/regmap/regmap.txt.
 
+Optional SPI slave node properties:
+- fsl,spi-cs-sck-delay: a delay in nanoseconds between activating chip
+  select and the start of clock signal, at the start of a transfer.
+- fsl,spi-sck-cs-delay: a delay in nanoseconds between stopping the clock
+  signal and deactivating chip select, at the end of a transfer.
+
 Example:
 
 dspi0@4002c000 {
@@ -43,6 +49,8 @@ dspi0@4002c000 {
                reg = <0>;
                linux,modalias = "m25p80";
                modal = "at26df081a";
+               fsl,spi-cs-sck-delay = <100>;
+               fsl,spi-sck-cs-delay = <50>;
        };
 };
 
index c7dd50fb8eb29c7650bba9fa50c67d72de46fc90..e02fbf18c82cecad21a355cfacd3ae84c1676ac9 100644 (file)
@@ -14,6 +14,7 @@ Required properties:
 - dma-names: Must include the following entries:
   - rx
   - tx
+- cs-gpios: Must specify the GPIOs used for chipselect lines.
 - #address-cells: Must be 1.
 - #size-cells: Must be 0.
 
index 467dec441c62a545f3d011f45c8707f35287ded4..0c491bda4c65f9bace70c051fc06389261d82cad 100644 (file)
@@ -24,6 +24,9 @@ Optional Properties:
 - dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
                Documentation/devicetree/bindings/dma/dma.txt
 - dma-names: DMA request names should include "tx" and "rx" if present.
+- rx-sample-delay-ns: nanoseconds to delay after the SCLK edge before sampling
+               Rx data (may need to be fine tuned for high capacitance lines).
+               No delay (0) by default.
 
 
 Example:
@@ -33,6 +36,7 @@ Example:
                reg = <0xff110000 0x1000>;
                dmas = <&pdma1 11>, <&pdma1 12>;
                dma-names = "tx", "rx";
+               rx-sample-delay-ns = <10>;
                #address-cells = <1>;
                #size-cells = <0>;
                interrupts = <GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>;
index 43404b197933262859e79686d5a8fa25ab12e5f3..332e625f6ed01cb4e442c0ea530ab29eb92f2a77 100644 (file)
@@ -4,7 +4,7 @@ Required properties:
 - compatible           : "renesas,thermal-<soctype>", "renesas,rcar-thermal"
                          as fallback.
                          Examples with soctypes are:
-                           - "renesas,thermal-r8a73a4" (R-Mobile AP6)
+                           - "renesas,thermal-r8a73a4" (R-Mobile APE6)
                            - "renesas,thermal-r8a7779" (R-Car H1)
                            - "renesas,thermal-r8a7790" (R-Car H2)
                            - "renesas,thermal-r8a7791" (R-Car M2-W)
index fe80e9adebfa452d184cf8ec75491cb4d26785b4..e87294878334d92af5af6420a8eae3d4222aaafc 100644 (file)
@@ -6,6 +6,10 @@ Supported chips:
     Prefix: 'it8603'
     Addresses scanned: from Super I/O config space (8 I/O ports)
     Datasheet: Not publicly available
+  * IT8620E
+    Prefix: 'it8620'
+    Addresses scanned: from Super I/O config space (8 I/O ports)
+    Datasheet: Not publicly available
   * IT8705F
     Prefix: 'it87'
     Addresses scanned: from Super I/O config space (8 I/O ports)
@@ -42,6 +46,10 @@ Supported chips:
     Prefix: 'it8772'
     Addresses scanned: from Super I/O config space (8 I/O ports)
     Datasheet: Not publicly available
+  * IT8781F
+    Prefix: 'it8781'
+    Addresses scanned: from Super I/O config space (8 I/O ports)
+    Datasheet: Not publicly available
   * IT8782F
     Prefix: 'it8782'
     Addresses scanned: from Super I/O config space (8 I/O ports)
@@ -50,6 +58,14 @@ Supported chips:
     Prefix: 'it8783'
     Addresses scanned: from Super I/O config space (8 I/O ports)
     Datasheet: Not publicly available
+  * IT8786E
+    Prefix: 'it8786'
+    Addresses scanned: from Super I/O config space (8 I/O ports)
+    Datasheet: Not publicly available
+  * IT8790E
+    Prefix: 'it8790'
+    Addresses scanned: from Super I/O config space (8 I/O ports)
+    Datasheet: Not publicly available
   * SiS950   [clone of IT8705F]
     Prefix: 'it87'
     Addresses scanned: from Super I/O config space (8 I/O ports)
@@ -94,9 +110,10 @@ motherboard models.
 Description
 -----------
 
-This driver implements support for the IT8603E, IT8623E, IT8705F, IT8712F,
-IT8716F, IT8718F, IT8720F, IT8721F, IT8726F, IT8728F, IT8758E, IT8771E,
-IT8772E, IT8782F, IT8783E/F, and SiS950 chips.
+This driver implements support for the IT8603E, IT8620E, IT8623E, IT8705F,
+IT8712F, IT8716F, IT8718F, IT8720F, IT8721F, IT8726F, IT8728F, IT8758E,
+IT8771E, IT8772E, IT8781F, IT8782F, IT8783E/F, IT8786E, IT8790E, and SiS950
+chips.
 
 These chips are 'Super I/O chips', supporting floppy disks, infrared ports,
 joysticks and other miscellaneous stuff. For hardware monitoring, they
@@ -120,11 +137,11 @@ The IT8716F, IT8718F, IT8720F, IT8721F/IT8758E and later IT8712F revisions
 have support for 2 additional fans. The additional fans are supported by the
 driver.
 
-The IT8716F, IT8718F, IT8720F, IT8721F/IT8758E, IT8782F, IT8783E/F, and late
-IT8712F and IT8705F also have optional 16-bit tachometer counters for fans 1 to
-3. This is better (no more fan clock divider mess) but not compatible with the
-older chips and revisions. The 16-bit tachometer mode is enabled by the driver
-when one of the above chips is detected.
+The IT8716F, IT8718F, IT8720F, IT8721F/IT8758E, IT8781F, IT8782F, IT8783E/F,
+and late IT8712F and IT8705F also have optional 16-bit tachometer counters
+for fans 1 to 3. This is better (no more fan clock divider mess) but not
+compatible with the older chips and revisions. The 16-bit tachometer mode
+is enabled by the driver when one of the above chips is detected.
 
 The IT8726F is just bit enhanced IT8716F with additional hardware
 for AMD power sequencing. Therefore the chip will appear as IT8716F
@@ -134,8 +151,13 @@ The IT8728F, IT8771E, and IT8772E are considered compatible with the IT8721F,
 until a datasheet becomes available (hopefully.)
 
 The IT8603E/IT8623E is a custom design, hardware monitoring part is similar to
-IT8728F. It only supports 16-bit fan mode, the full speed mode of the
-fan is not supported (value 0 of pwmX_enable).
+IT8728F. It only supports 3 fans, 16-bit fan mode, and the full speed mode
+of the fan is not supported (value 0 of pwmX_enable).
+
+The IT8620E is another custom design, hardware monitoring part is similar to
+IT8728F. It only supports 16-bit fan mode.
+
+The IT8790E supports up to 3 fans. 16-bit fan mode is always enabled.
 
 Temperatures are measured in degrees Celsius. An alarm is triggered once
 when the Overtemperature Shutdown limit is crossed.
@@ -156,10 +178,10 @@ inputs can measure voltages between 0 and 4.08 volts, with a resolution of
 0.016 volt (except IT8603E, IT8721F/IT8758E and IT8728F: 0.012 volt.) The
 battery voltage in8 does not have limit registers.
 
-On the IT8603E, IT8721F/IT8758E, IT8782F, and IT8783E/F, some voltage inputs
-are internal and scaled inside the chip:
+On the IT8603E, IT8721F/IT8758E, IT8781F, IT8782F, and IT8783E/F, some
+voltage inputs are internal and scaled inside the chip:
 * in3 (optional)
-* in7 (optional for IT8782F and IT8783E/F)
+* in7 (optional for IT8781F, IT8782F, and IT8783E/F)
 * in8 (always)
 * in9 (relevant for IT8603E only)
 The driver handles this transparently so user-space doesn't have to care.
index f3893f7440def4e6038bc2263709bacda9d15ff3..f7f1830a25663c6273812abe9cbddf8245a050d4 100644 (file)
@@ -11,12 +11,10 @@ Supported chips:
        http://www.atmel.com/Images/doc8711.pdf
        http://www.atmel.com/Images/Atmel-8852-SEEPROM-AT30TSE002A-Datasheet.pdf
        http://www.atmel.com/Images/Atmel-8868-DTS-AT30TSE004A-Datasheet.pdf
-  * IDT TSE2002B3, TSE2002GB2, TS3000B3, TS3000GB2
+  * IDT TSE2002B3, TSE2002GB2, TSE2004GB2, TS3000B3, TS3000GB0, TS3000GB2,
+       TS3001GB2
     Datasheets:
-       http://www.idt.com/sites/default/files/documents/IDT_TSE2002B3C_DST_20100512_120303152056.pdf
-       http://www.idt.com/sites/default/files/documents/IDT_TSE2002GB2A1_DST_20111107_120303145914.pdf
-       http://www.idt.com/sites/default/files/documents/IDT_TS3000B3A_DST_20101129_120303152013.pdf
-       http://www.idt.com/sites/default/files/documents/IDT_TS3000GB2A1_DST_20111104_120303151012.pdf
+       Available from IDT web site
   * Maxim MAX6604
     Datasheets:
        http://datasheets.maxim-ic.com/en/ds/MAX6604.pdf
diff --git a/Documentation/hwmon/nct7904 b/Documentation/hwmon/nct7904
new file mode 100644 (file)
index 0000000..014f112
--- /dev/null
@@ -0,0 +1,60 @@
+Kernel driver nct7904
+====================
+
+Supported chip:
+  * Nuvoton NCT7904D
+    Prefix: nct7904
+    Addresses: I2C 0x2d, 0x2e
+    Datasheet: Publicly available at Nuvoton website
+       http://www.nuvoton.com/
+
+Author: Vadim V. Vlasov <vvlasov@dev.rtsoft.ru>
+
+
+Description
+-----------
+
+The NCT7904D is a hardware monitor supporting up to 20 voltage sensors,
+internal temperature sensor, Intel PECI and AMD SB-TSI CPU temperature
+interface, up to 12 fan tachometer inputs, up to 4 fan control channels
+with SmartFan.
+
+
+Sysfs entries
+-------------
+
+Currently, the driver supports only the following features:
+
+in[1-20]_input         Input voltage measurements (mV)
+
+fan[1-12]_input                Fan tachometer measurements (rpm)
+
+temp1_input            Local temperature (1/1000 degree,
+                       0.125 degree resolution)
+
+temp[2-9]_input                CPU temperatures (1/1000 degree,
+                       0.125 degree resolution)
+
+fan[1-4]_mode          R/W, 0/1 for manual or SmartFan mode
+                       Setting SmartFan mode is supported only if it has been
+                       previously configured by BIOS (or configuration EEPROM)
+
+fan[1-4]_pwm           R/O in SmartFan mode, R/W in manual control mode
+
+The driver checks sensor control registers and does not export the sensors
+that are not enabled. Anyway, a sensor that is enabled may actually be not
+connected and thus provide zero readings.
+
+
+Limitations
+-----------
+
+The following features are not supported in current version:
+
+ - SmartFan control
+ - Watchdog
+ - GPIO
+ - external temperature sensors
+ - SMI
+ - min/max values
+ - many other...
index bfcb1a62a7b48466f3cbe5a08d3e1a632eadc3d2..01aa47d3b6ab607e6a66f2a6a827b4b1afe4da73 100644 (file)
@@ -1036,7 +1036,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        Format: {"off" | "on" | "skip[mbr]"}
 
        efi=            [EFI]
-                       Format: { "old_map", "nochunk", "noruntime" }
+                       Format: { "old_map", "nochunk", "noruntime", "debug" }
                        old_map [X86-64]: switch to the old ioremap-based EFI
                        runtime services mapping. 32-bit still uses this one by
                        default.
@@ -1044,6 +1044,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        boot stub, as chunking can cause problems with some
                        firmware implementations.
                        noruntime : disable EFI runtime services support
+                       debug: enable misc debug output
 
        efi_no_storage_paranoia [EFI; X86]
                        Using this parameter you can use more than 50% of
index 596b60c08b7451a9739d22aa2a29cb3c5c2559c9..8446f1ea1410b87b071047dc310a787a92606c31 100644 (file)
@@ -204,266 +204,4 @@ Some common examples:
 
     *  RTC_PIE_ON, RTC_PIE_OFF: These are also emulated by the generic code.
 
-If all else fails, check out the rtc-test.c driver!
-
-
--------------------- 8< ---------------- 8< -----------------------------
-
-/*
- *      Real Time Clock Driver Test/Example Program
- *
- *      Compile with:
- *                  gcc -s -Wall -Wstrict-prototypes rtctest.c -o rtctest
- *
- *      Copyright (C) 1996, Paul Gortmaker.
- *
- *      Released under the GNU General Public License, version 2,
- *      included herein by reference.
- *
- */
-
-#include <stdio.h>
-#include <linux/rtc.h>
-#include <sys/ioctl.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <errno.h>
-
-
-/*
- * This expects the new RTC class driver framework, working with
- * clocks that will often not be clones of what the PC-AT had.
- * Use the command line to specify another RTC if you need one.
- */
-static const char default_rtc[] = "/dev/rtc0";
-
-
-int main(int argc, char **argv)
-{
-       int i, fd, retval, irqcount = 0;
-       unsigned long tmp, data;
-       struct rtc_time rtc_tm;
-       const char *rtc = default_rtc;
-
-       switch (argc) {
-       case 2:
-               rtc = argv[1];
-               /* FALLTHROUGH */
-       case 1:
-               break;
-       default:
-               fprintf(stderr, "usage:  rtctest [rtcdev]\n");
-               return 1;
-       }
-
-       fd = open(rtc, O_RDONLY);
-
-       if (fd ==  -1) {
-               perror(rtc);
-               exit(errno);
-       }
-
-       fprintf(stderr, "\n\t\t\tRTC Driver Test Example.\n\n");
-
-       /* Turn on update interrupts (one per second) */
-       retval = ioctl(fd, RTC_UIE_ON, 0);
-       if (retval == -1) {
-               if (errno == ENOTTY) {
-                       fprintf(stderr,
-                               "\n...Update IRQs not supported.\n");
-                       goto test_READ;
-               }
-               perror("RTC_UIE_ON ioctl");
-               exit(errno);
-       }
-
-       fprintf(stderr, "Counting 5 update (1/sec) interrupts from reading %s:",
-                       rtc);
-       fflush(stderr);
-       for (i=1; i<6; i++) {
-               /* This read will block */
-               retval = read(fd, &data, sizeof(unsigned long));
-               if (retval == -1) {
-                       perror("read");
-                       exit(errno);
-               }
-               fprintf(stderr, " %d",i);
-               fflush(stderr);
-               irqcount++;
-       }
-
-       fprintf(stderr, "\nAgain, from using select(2) on /dev/rtc:");
-       fflush(stderr);
-       for (i=1; i<6; i++) {
-               struct timeval tv = {5, 0};     /* 5 second timeout on select */
-               fd_set readfds;
-
-               FD_ZERO(&readfds);
-               FD_SET(fd, &readfds);
-               /* The select will wait until an RTC interrupt happens. */
-               retval = select(fd+1, &readfds, NULL, NULL, &tv);
-               if (retval == -1) {
-                       perror("select");
-                       exit(errno);
-               }
-               /* This read won't block unlike the select-less case above. */
-               retval = read(fd, &data, sizeof(unsigned long));
-               if (retval == -1) {
-                       perror("read");
-                       exit(errno);
-               }
-               fprintf(stderr, " %d",i);
-               fflush(stderr);
-               irqcount++;
-       }
-
-       /* Turn off update interrupts */
-       retval = ioctl(fd, RTC_UIE_OFF, 0);
-       if (retval == -1) {
-               perror("RTC_UIE_OFF ioctl");
-               exit(errno);
-       }
-
-test_READ:
-       /* Read the RTC time/date */
-       retval = ioctl(fd, RTC_RD_TIME, &rtc_tm);
-       if (retval == -1) {
-               perror("RTC_RD_TIME ioctl");
-               exit(errno);
-       }
-
-       fprintf(stderr, "\n\nCurrent RTC date/time is %d-%d-%d, %02d:%02d:%02d.\n",
-               rtc_tm.tm_mday, rtc_tm.tm_mon + 1, rtc_tm.tm_year + 1900,
-               rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
-
-       /* Set the alarm to 5 sec in the future, and check for rollover */
-       rtc_tm.tm_sec += 5;
-       if (rtc_tm.tm_sec >= 60) {
-               rtc_tm.tm_sec %= 60;
-               rtc_tm.tm_min++;
-       }
-       if (rtc_tm.tm_min == 60) {
-               rtc_tm.tm_min = 0;
-               rtc_tm.tm_hour++;
-       }
-       if (rtc_tm.tm_hour == 24)
-               rtc_tm.tm_hour = 0;
-
-       retval = ioctl(fd, RTC_ALM_SET, &rtc_tm);
-       if (retval == -1) {
-               if (errno == ENOTTY) {
-                       fprintf(stderr,
-                               "\n...Alarm IRQs not supported.\n");
-                       goto test_PIE;
-               }
-               perror("RTC_ALM_SET ioctl");
-               exit(errno);
-       }
-
-       /* Read the current alarm settings */
-       retval = ioctl(fd, RTC_ALM_READ, &rtc_tm);
-       if (retval == -1) {
-               perror("RTC_ALM_READ ioctl");
-               exit(errno);
-       }
-
-       fprintf(stderr, "Alarm time now set to %02d:%02d:%02d.\n",
-               rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
-
-       /* Enable alarm interrupts */
-       retval = ioctl(fd, RTC_AIE_ON, 0);
-       if (retval == -1) {
-               perror("RTC_AIE_ON ioctl");
-               exit(errno);
-       }
-
-       fprintf(stderr, "Waiting 5 seconds for alarm...");
-       fflush(stderr);
-       /* This blocks until the alarm ring causes an interrupt */
-       retval = read(fd, &data, sizeof(unsigned long));
-       if (retval == -1) {
-               perror("read");
-               exit(errno);
-       }
-       irqcount++;
-       fprintf(stderr, " okay. Alarm rang.\n");
-
-       /* Disable alarm interrupts */
-       retval = ioctl(fd, RTC_AIE_OFF, 0);
-       if (retval == -1) {
-               perror("RTC_AIE_OFF ioctl");
-               exit(errno);
-       }
-
-test_PIE:
-       /* Read periodic IRQ rate */
-       retval = ioctl(fd, RTC_IRQP_READ, &tmp);
-       if (retval == -1) {
-               /* not all RTCs support periodic IRQs */
-               if (errno == ENOTTY) {
-                       fprintf(stderr, "\nNo periodic IRQ support\n");
-                       goto done;
-               }
-               perror("RTC_IRQP_READ ioctl");
-               exit(errno);
-       }
-       fprintf(stderr, "\nPeriodic IRQ rate is %ldHz.\n", tmp);
-
-       fprintf(stderr, "Counting 20 interrupts at:");
-       fflush(stderr);
-
-       /* The frequencies 128Hz, 256Hz, ... 8192Hz are only allowed for root. */
-       for (tmp=2; tmp<=64; tmp*=2) {
-
-               retval = ioctl(fd, RTC_IRQP_SET, tmp);
-               if (retval == -1) {
-                       /* not all RTCs can change their periodic IRQ rate */
-                       if (errno == ENOTTY) {
-                               fprintf(stderr,
-                                       "\n...Periodic IRQ rate is fixed\n");
-                               goto done;
-                       }
-                       perror("RTC_IRQP_SET ioctl");
-                       exit(errno);
-               }
-
-               fprintf(stderr, "\n%ldHz:\t", tmp);
-               fflush(stderr);
-
-               /* Enable periodic interrupts */
-               retval = ioctl(fd, RTC_PIE_ON, 0);
-               if (retval == -1) {
-                       perror("RTC_PIE_ON ioctl");
-                       exit(errno);
-               }
-
-               for (i=1; i<21; i++) {
-                       /* This blocks */
-                       retval = read(fd, &data, sizeof(unsigned long));
-                       if (retval == -1) {
-                               perror("read");
-                               exit(errno);
-                       }
-                       fprintf(stderr, " %d",i);
-                       fflush(stderr);
-                       irqcount++;
-               }
-
-               /* Disable periodic interrupts */
-               retval = ioctl(fd, RTC_PIE_OFF, 0);
-               if (retval == -1) {
-                       perror("RTC_PIE_OFF ioctl");
-                       exit(errno);
-               }
-       }
-
-done:
-       fprintf(stderr, "\n\n\t\t\t *** Test complete ***\n");
-
-       close(fd);
-
-       return 0;
-}
+If all else fails, check out the tools/testing/selftests/timers/rtctest.c test!
index d29734bff28c5beb38bf304b5b5d02903044d691..d1824b399b2d1d79059231a52e2552949d9ba24f 100644 (file)
@@ -342,12 +342,11 @@ SPI protocol drivers somewhat resemble platform device drivers:
                .driver = {
                        .name           = "CHIP",
                        .owner          = THIS_MODULE,
+                       .pm             = &CHIP_pm_ops,
                },
 
                .probe          = CHIP_probe,
                .remove         = CHIP_remove,
-               .suspend        = CHIP_suspend,
-               .resume         = CHIP_resume,
        };
 
 The driver core will automatically attempt to bind this driver to any SPI
index 3a2f9d59edabf9d7858a1fff178fc722971dafd3..94f574b0fdb25890c3e3f64a23622ee0e8fb3bea 100644 (file)
@@ -15,6 +15,7 @@
 #include <unistd.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 #include <getopt.h>
 #include <fcntl.h>
 #include <sys/ioctl.h>
@@ -34,24 +35,79 @@ static uint32_t mode;
 static uint8_t bits = 8;
 static uint32_t speed = 500000;
 static uint16_t delay;
+static int verbose;
 
-static void transfer(int fd)
+uint8_t default_tx[] = {
+       0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+       0x40, 0x00, 0x00, 0x00, 0x00, 0x95,
+       0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+       0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+       0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+       0xF0, 0x0D,
+};
+
+uint8_t default_rx[ARRAY_SIZE(default_tx)] = {0, };
+char *input_tx;
+
+static void hex_dump(const void *src, size_t length, size_t line_size, char *prefix)
+{
+       int i = 0;
+       const unsigned char *address = src;
+       const unsigned char *line = address;
+       unsigned char c;
+
+       printf("%s | ", prefix);
+       while (length-- > 0) {
+               printf("%02X ", *address++);
+               if (!(++i % line_size) || (length == 0 && i % line_size)) {
+                       if (length == 0) {
+                               while (i++ % line_size)
+                                       printf("__ ");
+                       }
+                       printf(" | ");  /* right close */
+                       while (line < address) {
+                               c = *line++;
+                               printf("%c", (c < 33 || c == 255) ? 0x2E : c);
+                       }
+                       printf("\n");
+                       if (length > 0)
+                               printf("%s | ", prefix);
+               }
+       }
+}
+
+/*
+ *  Unescape - process hexadecimal escape character
+ *      converts shell input "\x23" -> 0x23
+ */
+int unespcape(char *_dst, char *_src, size_t len)
+{
+       int ret = 0;
+       char *src = _src;
+       char *dst = _dst;
+       unsigned int ch;
+
+       while (*src) {
+               if (*src == '\\' && *(src+1) == 'x') {
+                       sscanf(src + 2, "%2x", &ch);
+                       src += 4;
+                       *dst++ = (unsigned char)ch;
+               } else {
+                       *dst++ = *src++;
+               }
+               ret++;
+       }
+       return ret;
+}
+
+static void transfer(int fd, uint8_t const *tx, uint8_t const *rx, size_t len)
 {
        int ret;
-       uint8_t tx[] = {
-               0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-               0x40, 0x00, 0x00, 0x00, 0x00, 0x95,
-               0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-               0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-               0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-               0xDE, 0xAD, 0xBE, 0xEF, 0xBA, 0xAD,
-               0xF0, 0x0D,
-       };
-       uint8_t rx[ARRAY_SIZE(tx)] = {0, };
+
        struct spi_ioc_transfer tr = {
                .tx_buf = (unsigned long)tx,
                .rx_buf = (unsigned long)rx,
-               .len = ARRAY_SIZE(tx),
+               .len = len,
                .delay_usecs = delay,
                .speed_hz = speed,
                .bits_per_word = bits,
@@ -76,12 +132,9 @@ static void transfer(int fd)
        if (ret < 1)
                pabort("can't send spi message");
 
-       for (ret = 0; ret < ARRAY_SIZE(tx); ret++) {
-               if (!(ret % 6))
-                       puts("");
-               printf("%.2X ", rx[ret]);
-       }
-       puts("");
+       if (verbose)
+               hex_dump(tx, len, 32, "TX");
+       hex_dump(rx, len, 32, "RX");
 }
 
 static void print_usage(const char *prog)
@@ -97,6 +150,8 @@ static void print_usage(const char *prog)
             "  -L --lsb      least significant bit first\n"
             "  -C --cs-high  chip select active high\n"
             "  -3 --3wire    SI/SO signals shared\n"
+            "  -v --verbose  Verbose (show tx buffer)\n"
+            "  -p            Send data (e.g. \"1234\\xde\\xad\")\n"
             "  -N --no-cs    no chip select\n"
             "  -R --ready    slave pulls low to pause\n"
             "  -2 --dual     dual transfer\n"
@@ -121,12 +176,13 @@ static void parse_opts(int argc, char *argv[])
                        { "no-cs",   0, 0, 'N' },
                        { "ready",   0, 0, 'R' },
                        { "dual",    0, 0, '2' },
+                       { "verbose", 0, 0, 'v' },
                        { "quad",    0, 0, '4' },
                        { NULL, 0, 0, 0 },
                };
                int c;
 
-               c = getopt_long(argc, argv, "D:s:d:b:lHOLC3NR24", lopts, NULL);
+               c = getopt_long(argc, argv, "D:s:d:b:lHOLC3NR24p:v", lopts, NULL);
 
                if (c == -1)
                        break;
@@ -165,9 +221,15 @@ static void parse_opts(int argc, char *argv[])
                case 'N':
                        mode |= SPI_NO_CS;
                        break;
+               case 'v':
+                       verbose = 1;
+                       break;
                case 'R':
                        mode |= SPI_READY;
                        break;
+               case 'p':
+                       input_tx = optarg;
+                       break;
                case '2':
                        mode |= SPI_TX_DUAL;
                        break;
@@ -191,6 +253,9 @@ int main(int argc, char *argv[])
 {
        int ret = 0;
        int fd;
+       uint8_t *tx;
+       uint8_t *rx;
+       int size;
 
        parse_opts(argc, argv);
 
@@ -235,7 +300,17 @@ int main(int argc, char *argv[])
        printf("bits per word: %d\n", bits);
        printf("max speed: %d Hz (%d KHz)\n", speed, speed/1000);
 
-       transfer(fd);
+       if (input_tx) {
+               size = strlen(input_tx+1);
+               tx = malloc(size);
+               rx = malloc(size);
+               size = unespcape((char *)tx, input_tx, size);
+               transfer(fd, tx, rx, size);
+               free(rx);
+               free(tx);
+       } else {
+               transfer(fd, default_tx, default_rx, sizeof(default_tx));
+       }
 
        close(fd);
 
index b112efc816f155093ef80815db788a2cdddf0ecc..bc9f6fe44e27614c2f26f155385fe0d5d13d2e3b 100644 (file)
@@ -997,7 +997,7 @@ for vm-wide capabilities.
 4.38 KVM_GET_MP_STATE
 
 Capability: KVM_CAP_MP_STATE
-Architectures: x86, s390
+Architectures: x86, s390, arm, arm64
 Type: vcpu ioctl
 Parameters: struct kvm_mp_state (out)
 Returns: 0 on success; -1 on error
@@ -1011,7 +1011,7 @@ uniprocessor guests).
 
 Possible values are:
 
- - KVM_MP_STATE_RUNNABLE:        the vcpu is currently running [x86]
+ - KVM_MP_STATE_RUNNABLE:        the vcpu is currently running [x86,arm/arm64]
  - KVM_MP_STATE_UNINITIALIZED:   the vcpu is an application processor (AP)
                                  which has not yet received an INIT signal [x86]
  - KVM_MP_STATE_INIT_RECEIVED:   the vcpu has received an INIT signal, and is
@@ -1020,7 +1020,7 @@ Possible values are:
                                  is waiting for an interrupt [x86]
  - KVM_MP_STATE_SIPI_RECEIVED:   the vcpu has just received a SIPI (vector
                                  accessible via KVM_GET_VCPU_EVENTS) [x86]
- - KVM_MP_STATE_STOPPED:         the vcpu is stopped [s390]
+ - KVM_MP_STATE_STOPPED:         the vcpu is stopped [s390,arm/arm64]
  - KVM_MP_STATE_CHECK_STOP:      the vcpu is in a special error state [s390]
  - KVM_MP_STATE_OPERATING:       the vcpu is operating (running or halted)
                                  [s390]
@@ -1031,11 +1031,15 @@ On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
 in-kernel irqchip, the multiprocessing state must be maintained by userspace on
 these architectures.
 
+For arm/arm64:
+
+The only states that are valid are KVM_MP_STATE_STOPPED and
+KVM_MP_STATE_RUNNABLE which reflect if the vcpu is paused or not.
 
 4.39 KVM_SET_MP_STATE
 
 Capability: KVM_CAP_MP_STATE
-Architectures: x86, s390
+Architectures: x86, s390, arm, arm64
 Type: vcpu ioctl
 Parameters: struct kvm_mp_state (in)
 Returns: 0 on success; -1 on error
@@ -1047,6 +1051,10 @@ On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
 in-kernel irqchip, the multiprocessing state must be maintained by userspace on
 these architectures.
 
+For arm/arm64:
+
+The only states that are valid are KVM_MP_STATE_STOPPED and
+KVM_MP_STATE_RUNNABLE which reflect if the vcpu should be paused or not.
 
 4.40 KVM_SET_IDENTITY_MAP_ADDR
 
@@ -1967,15 +1975,25 @@ registers, find a list below:
   MIPS  | KVM_REG_MIPS_CP0_STATUS       | 32
   MIPS  | KVM_REG_MIPS_CP0_CAUSE        | 32
   MIPS  | KVM_REG_MIPS_CP0_EPC          | 64
+  MIPS  | KVM_REG_MIPS_CP0_PRID         | 32
   MIPS  | KVM_REG_MIPS_CP0_CONFIG       | 32
   MIPS  | KVM_REG_MIPS_CP0_CONFIG1      | 32
   MIPS  | KVM_REG_MIPS_CP0_CONFIG2      | 32
   MIPS  | KVM_REG_MIPS_CP0_CONFIG3      | 32
+  MIPS  | KVM_REG_MIPS_CP0_CONFIG4      | 32
+  MIPS  | KVM_REG_MIPS_CP0_CONFIG5      | 32
   MIPS  | KVM_REG_MIPS_CP0_CONFIG7      | 32
   MIPS  | KVM_REG_MIPS_CP0_ERROREPC     | 64
   MIPS  | KVM_REG_MIPS_COUNT_CTL        | 64
   MIPS  | KVM_REG_MIPS_COUNT_RESUME     | 64
   MIPS  | KVM_REG_MIPS_COUNT_HZ         | 64
+  MIPS  | KVM_REG_MIPS_FPR_32(0..31)    | 32
+  MIPS  | KVM_REG_MIPS_FPR_64(0..31)    | 64
+  MIPS  | KVM_REG_MIPS_VEC_128(0..31)   | 128
+  MIPS  | KVM_REG_MIPS_FCR_IR           | 32
+  MIPS  | KVM_REG_MIPS_FCR_CSR          | 32
+  MIPS  | KVM_REG_MIPS_MSA_IR           | 32
+  MIPS  | KVM_REG_MIPS_MSA_CSR          | 32
 
 ARM registers are mapped using the lower 32 bits.  The upper 16 of that
 is the register group type, or coprocessor number:
@@ -2029,6 +2047,25 @@ patterns depending on whether they're 32-bit or 64-bit registers:
 MIPS KVM control registers (see above) have the following id bit patterns:
   0x7030 0000 0002 <reg:16>
 
+MIPS FPU registers (see KVM_REG_MIPS_FPR_{32,64}() above) have the following
+id bit patterns depending on the size of the register being accessed. They are
+always accessed according to the current guest FPU mode (Status.FR and
+Config5.FRE), i.e. as the guest would see them, and they become unpredictable
+if the guest FPU mode is changed. MIPS SIMD Architecture (MSA) vector
+registers (see KVM_REG_MIPS_VEC_128() above) have similar patterns as they
+overlap the FPU registers:
+  0x7020 0000 0003 00 <0:3> <reg:5> (32-bit FPU registers)
+  0x7030 0000 0003 00 <0:3> <reg:5> (64-bit FPU registers)
+  0x7040 0000 0003 00 <0:3> <reg:5> (128-bit MSA vector registers)
+
+MIPS FPU control registers (see KVM_REG_MIPS_FCR_{IR,CSR} above) have the
+following id bit patterns:
+  0x7020 0000 0003 01 <0:3> <reg:5>
+
+MIPS MSA control registers (see KVM_REG_MIPS_MSA_{IR,CSR} above) have the
+following id bit patterns:
+  0x7020 0000 0003 02 <0:3> <reg:5>
+
 
 4.69 KVM_GET_ONE_REG
 
@@ -2234,7 +2271,7 @@ into the hash PTE second double word).
 4.75 KVM_IRQFD
 
 Capability: KVM_CAP_IRQFD
-Architectures: x86 s390
+Architectures: x86 s390 arm arm64
 Type: vm ioctl
 Parameters: struct kvm_irqfd (in)
 Returns: 0 on success, -1 on error
@@ -2260,6 +2297,10 @@ Note that closing the resamplefd is not sufficient to disable the
 irqfd.  The KVM_IRQFD_FLAG_RESAMPLE is only necessary on assignment
 and need not be specified with KVM_IRQFD_FLAG_DEASSIGN.
 
+On ARM/ARM64, the gsi field in the kvm_irqfd struct specifies the Shared
+Peripheral Interrupt (SPI) index, such that the GIC interrupt ID is
+given by gsi + 32.
+
 4.76 KVM_PPC_ALLOCATE_HTAB
 
 Capability: KVM_CAP_PPC_ALLOC_HTAB
@@ -2716,6 +2757,227 @@ The fields in each entry are defined as follows:
    eax, ebx, ecx, edx: the values returned by the cpuid instruction for
          this function/index combination
 
+4.89 KVM_S390_MEM_OP
+
+Capability: KVM_CAP_S390_MEM_OP
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_mem_op (in)
+Returns: = 0 on success,
+         < 0 on generic error (e.g. -EFAULT or -ENOMEM),
+         > 0 if an exception occurred while walking the page tables
+
+Read or write data from/to the logical (virtual) memory of a VPCU.
+
+Parameters are specified via the following structure:
+
+struct kvm_s390_mem_op {
+       __u64 gaddr;            /* the guest address */
+       __u64 flags;            /* flags */
+       __u32 size;             /* amount of bytes */
+       __u32 op;               /* type of operation */
+       __u64 buf;              /* buffer in userspace */
+       __u8 ar;                /* the access register number */
+       __u8 reserved[31];      /* should be set to 0 */
+};
+
+The type of operation is specified in the "op" field. It is either
+KVM_S390_MEMOP_LOGICAL_READ for reading from logical memory space or
+KVM_S390_MEMOP_LOGICAL_WRITE for writing to logical memory space. The
+KVM_S390_MEMOP_F_CHECK_ONLY flag can be set in the "flags" field to check
+whether the corresponding memory access would create an access exception
+(without touching the data in the memory at the destination). In case an
+access exception occurred while walking the MMU tables of the guest, the
+ioctl returns a positive error number to indicate the type of exception.
+This exception is also raised directly at the corresponding VCPU if the
+flag KVM_S390_MEMOP_F_INJECT_EXCEPTION is set in the "flags" field.
+
+The start address of the memory region has to be specified in the "gaddr"
+field, and the length of the region in the "size" field. "buf" is the buffer
+supplied by the userspace application where the read data should be written
+to for KVM_S390_MEMOP_LOGICAL_READ, or where the data that should be written
+is stored for a KVM_S390_MEMOP_LOGICAL_WRITE. "buf" is unused and can be NULL
+when KVM_S390_MEMOP_F_CHECK_ONLY is specified. "ar" designates the access
+register number to be used.
+
+The "reserved" field is meant for future extensions. It is not used by
+KVM with the currently defined set of flags.
+
+4.90 KVM_S390_GET_SKEYS
+
+Capability: KVM_CAP_S390_SKEYS
+Architectures: s390
+Type: vm ioctl
+Parameters: struct kvm_s390_skeys
+Returns: 0 on success, KVM_S390_GET_KEYS_NONE if guest is not using storage
+         keys, negative value on error
+
+This ioctl is used to get guest storage key values on the s390
+architecture. The ioctl takes parameters via the kvm_s390_skeys struct.
+
+struct kvm_s390_skeys {
+       __u64 start_gfn;
+       __u64 count;
+       __u64 skeydata_addr;
+       __u32 flags;
+       __u32 reserved[9];
+};
+
+The start_gfn field is the number of the first guest frame whose storage keys
+you want to get.
+
+The count field is the number of consecutive frames (starting from start_gfn)
+whose storage keys to get. The count field must be at least 1 and the maximum
+allowed value is defined as KVM_S390_SKEYS_ALLOC_MAX. Values outside this range
+will cause the ioctl to return -EINVAL.
+
+The skeydata_addr field is the address to a buffer large enough to hold count
+bytes. This buffer will be filled with storage key data by the ioctl.
+
+4.91 KVM_S390_SET_SKEYS
+
+Capability: KVM_CAP_S390_SKEYS
+Architectures: s390
+Type: vm ioctl
+Parameters: struct kvm_s390_skeys
+Returns: 0 on success, negative value on error
+
+This ioctl is used to set guest storage key values on the s390
+architecture. The ioctl takes parameters via the kvm_s390_skeys struct.
+See section on KVM_S390_GET_SKEYS for struct definition.
+
+The start_gfn field is the number of the first guest frame whose storage keys
+you want to set.
+
+The count field is the number of consecutive frames (starting from start_gfn)
+whose storage keys to get. The count field must be at least 1 and the maximum
+allowed value is defined as KVM_S390_SKEYS_ALLOC_MAX. Values outside this range
+will cause the ioctl to return -EINVAL.
+
+The skeydata_addr field is the address to a buffer containing count bytes of
+storage keys. Each byte in the buffer will be set as the storage key for a
+single frame starting at start_gfn for count frames.
+
+Note: If any architecturally invalid key value is found in the given data then
+the ioctl will return -EINVAL.
+
+4.92 KVM_S390_IRQ
+
+Capability: KVM_CAP_S390_INJECT_IRQ
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_irq (in)
+Returns: 0 on success, -1 on error
+Errors:
+  EINVAL: interrupt type is invalid
+          type is KVM_S390_SIGP_STOP and flag parameter is invalid value
+          type is KVM_S390_INT_EXTERNAL_CALL and code is bigger
+            than the maximum of VCPUs
+  EBUSY:  type is KVM_S390_SIGP_SET_PREFIX and vcpu is not stopped
+          type is KVM_S390_SIGP_STOP and a stop irq is already pending
+          type is KVM_S390_INT_EXTERNAL_CALL and an external call interrupt
+            is already pending
+
+Allows to inject an interrupt to the guest.
+
+Using struct kvm_s390_irq as a parameter allows
+to inject additional payload which is not
+possible via KVM_S390_INTERRUPT.
+
+Interrupt parameters are passed via kvm_s390_irq:
+
+struct kvm_s390_irq {
+       __u64 type;
+       union {
+               struct kvm_s390_io_info io;
+               struct kvm_s390_ext_info ext;
+               struct kvm_s390_pgm_info pgm;
+               struct kvm_s390_emerg_info emerg;
+               struct kvm_s390_extcall_info extcall;
+               struct kvm_s390_prefix_info prefix;
+               struct kvm_s390_stop_info stop;
+               struct kvm_s390_mchk_info mchk;
+               char reserved[64];
+       } u;
+};
+
+type can be one of the following:
+
+KVM_S390_SIGP_STOP - sigp stop; parameter in .stop
+KVM_S390_PROGRAM_INT - program check; parameters in .pgm
+KVM_S390_SIGP_SET_PREFIX - sigp set prefix; parameters in .prefix
+KVM_S390_RESTART - restart; no parameters
+KVM_S390_INT_CLOCK_COMP - clock comparator interrupt; no parameters
+KVM_S390_INT_CPU_TIMER - CPU timer interrupt; no parameters
+KVM_S390_INT_EMERGENCY - sigp emergency; parameters in .emerg
+KVM_S390_INT_EXTERNAL_CALL - sigp external call; parameters in .extcall
+KVM_S390_MCHK - machine check interrupt; parameters in .mchk
+
+
+Note that the vcpu ioctl is asynchronous to vcpu execution.
+
+4.94 KVM_S390_GET_IRQ_STATE
+
+Capability: KVM_CAP_S390_IRQ_STATE
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_irq_state (out)
+Returns: >= number of bytes copied into buffer,
+         -EINVAL if buffer size is 0,
+         -ENOBUFS if buffer size is too small to fit all pending interrupts,
+         -EFAULT if the buffer address was invalid
+
+This ioctl allows userspace to retrieve the complete state of all currently
+pending interrupts in a single buffer. Use cases include migration
+and introspection. The parameter structure contains the address of a
+userspace buffer and its length:
+
+struct kvm_s390_irq_state {
+       __u64 buf;
+       __u32 flags;
+       __u32 len;
+       __u32 reserved[4];
+};
+
+Userspace passes in the above struct and for each pending interrupt a
+struct kvm_s390_irq is copied to the provided buffer.
+
+If -ENOBUFS is returned the buffer provided was too small and userspace
+may retry with a bigger buffer.
+
+4.95 KVM_S390_SET_IRQ_STATE
+
+Capability: KVM_CAP_S390_IRQ_STATE
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_irq_state (in)
+Returns: 0 on success,
+         -EFAULT if the buffer address was invalid,
+         -EINVAL for an invalid buffer length (see below),
+         -EBUSY if there were already interrupts pending,
+         errors occurring when actually injecting the
+          interrupt. See KVM_S390_IRQ.
+
+This ioctl allows userspace to set the complete state of all cpu-local
+interrupts currently pending for the vcpu. It is intended for restoring
+interrupt state after a migration. The input parameter is a userspace buffer
+containing a struct kvm_s390_irq_state:
+
+struct kvm_s390_irq_state {
+       __u64 buf;
+       __u32 len;
+       __u32 pad;
+};
+
+The userspace memory referenced by buf contains a struct kvm_s390_irq
+for each interrupt to be injected into the guest.
+If one of the interrupts could not be injected for some reason the
+ioctl aborts.
+
+len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0
+and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
+which is the maximum number of possibly pending cpu-local interrupts.
+
 5. The kvm_run structure
 ------------------------
 
@@ -3189,6 +3451,31 @@ Parameters: none
 This capability enables the in-kernel irqchip for s390. Please refer to
 "4.24 KVM_CREATE_IRQCHIP" for details.
 
+6.9 KVM_CAP_MIPS_FPU
+
+Architectures: mips
+Target: vcpu
+Parameters: args[0] is reserved for future use (should be 0).
+
+This capability allows the use of the host Floating Point Unit by the guest. It
+allows the Config1.FP bit to be set to enable the FPU in the guest. Once this is
+done the KVM_REG_MIPS_FPR_* and KVM_REG_MIPS_FCR_* registers can be accessed
+(depending on the current guest FPU register mode), and the Status.FR,
+Config5.FRE bits are accessible via the KVM API and also from the guest,
+depending on them being supported by the FPU.
+
+6.10 KVM_CAP_MIPS_MSA
+
+Architectures: mips
+Target: vcpu
+Parameters: args[0] is reserved for future use (should be 0).
+
+This capability allows the use of the MIPS SIMD Architecture (MSA) by the guest.
+It allows the Config3.MSAP bit to be set to enable the use of MSA by the guest.
+Once this is done the KVM_REG_MIPS_VEC_* and KVM_REG_MIPS_MSA_* registers can be
+accessed, and the Config5.MSAEn bit is accessible via the KVM API and also from
+the guest.
+
 7. Capabilities that can be enabled on VMs
 ------------------------------------------
 
@@ -3248,3 +3535,41 @@ All other orders will be handled completely in user space.
 Only privileged operation exceptions will be checked for in the kernel (or even
 in the hardware prior to interception). If this capability is not enabled, the
 old way of handling SIGP orders is used (partially in kernel and user space).
+
+7.3 KVM_CAP_S390_VECTOR_REGISTERS
+
+Architectures: s390
+Parameters: none
+Returns: 0 on success, negative value on error
+
+Allows use of the vector registers introduced with z13 processor, and
+provides for the synchronization between host and user space.  Will
+return -EINVAL if the machine does not support vectors.
+
+7.4 KVM_CAP_S390_USER_STSI
+
+Architectures: s390
+Parameters: none
+
+This capability allows post-handlers for the STSI instruction. After
+initial handling in the kernel, KVM exits to user space with
+KVM_EXIT_S390_STSI to allow user space to insert further data.
+
+Before exiting to userspace, kvm handlers should fill in s390_stsi field of
+vcpu->run:
+struct {
+       __u64 addr;
+       __u8 ar;
+       __u8 reserved;
+       __u8 fc;
+       __u8 sel1;
+       __u16 sel2;
+} s390_stsi;
+
+@addr - guest address of STSI SYSIB
+@fc   - function code
+@sel1 - selector 1
+@sel2 - selector 2
+@ar   - access register number
+
+KVM handlers should exit to userspace with rc = -EREMOTE.
index 4ceef53164b0289237238c3cf29d83e38e5d34de..d1ad9d5cae467ceb2c1169ce8b53d70078aedf27 100644 (file)
@@ -27,6 +27,9 @@ Groups:
     Copies all floating interrupts into a buffer provided by userspace.
     When the buffer is too small it returns -ENOMEM, which is the indication
     for userspace to try again with a bigger buffer.
+    -ENOBUFS is returned when the allocation of a kernelspace buffer has
+    failed.
+    -EFAULT is returned when copying data to userspace failed.
     All interrupts remain pending, i.e. are not deleted from the list of
     currently pending interrupts.
     attr->addr contains the userspace address of the buffer into which all
index a75e3adaa39da277fb89150fb1d31daf8d1296ef..88b85899d30953a6be096a9e045fcf54be3676b4 100644 (file)
@@ -406,6 +406,12 @@ Protocol:  2.00+
        - If 0, the protected-mode code is loaded at 0x10000.
        - If 1, the protected-mode code is loaded at 0x100000.
 
+  Bit 1 (kernel internal): ALSR_FLAG
+       - Used internally by the compressed kernel to communicate
+         KASLR status to kernel proper.
+         If 1, KASLR enabled.
+         If 0, KASLR disabled.
+
   Bit 5 (write): QUIET_FLAG
        - If 0, print early messages.
        - If 1, suppress early messages.
index efbcb50e496954934295b3147e1de9dbdd05ff9e..8d2ee8e010a18218a970207b555b4eb6ba6196ac 100644 (file)
@@ -5591,6 +5591,8 @@ S:        Supported
 F:     Documentation/*/kvm*.txt
 F:     Documentation/virtual/kvm/
 F:     arch/*/kvm/
+F:     arch/x86/kernel/kvm.c
+F:     arch/x86/kernel/kvmclock.c
 F:     arch/*/include/asm/kvm*
 F:     include/linux/kvm*
 F:     include/uapi/linux/kvm*
@@ -6564,10 +6566,8 @@ F:       drivers/mfd/
 F:     include/linux/mfd/
 
 MULTIMEDIA CARD (MMC), SECURE DIGITAL (SD) AND SDIO SUBSYSTEM
-M:     Chris Ball <chris@printf.net>
 M:     Ulf Hansson <ulf.hansson@linaro.org>
 L:     linux-mmc@vger.kernel.org
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/cjb/mmc.git
 T:     git git://git.linaro.org/people/ulf.hansson/mmc.git
 S:     Maintained
 F:     drivers/mmc/
@@ -8559,6 +8559,7 @@ F:        include/uapi/linux/timex.h
 F:     kernel/time/clocksource.c
 F:     kernel/time/time*.c
 F:     kernel/time/ntp.c
+F:     tools/testing/selftests/timers/
 
 SC1200 WDT DRIVER
 M:     Zwane Mwaikambo <zwanem@gmail.com>
@@ -8667,10 +8668,8 @@ S:       Maintained
 F:     drivers/mmc/host/sdricoh_cs.c
 
 SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) DRIVER
-M:     Chris Ball <chris@printf.net>
 L:     linux-mmc@vger.kernel.org
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/cjb/mmc.git
-S:     Maintained
+S:     Orphan
 F:     drivers/mmc/host/sdhci.*
 F:     drivers/mmc/host/sdhci-pltfm.[ch]
 
@@ -8686,18 +8685,12 @@ F:      include/linux/seccomp.h
 K:     \bsecure_computing
 K:     \bTIF_SECCOMP\b
 
-SECURE DIGITAL HOST CONTROLLER INTERFACE, OPEN FIRMWARE BINDINGS (SDHCI-OF)
-M:     Anton Vorontsov <anton@enomsg.org>
-L:     linuxppc-dev@lists.ozlabs.org
-L:     linux-mmc@vger.kernel.org
-S:     Maintained
-F:     drivers/mmc/host/sdhci-pltfm.[ch]
-
 SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) SAMSUNG DRIVER
 M:     Ben Dooks <ben-linux@fluff.org>
+M:     Jaehoon Chung <jh80.chung@samsung.com>
 L:     linux-mmc@vger.kernel.org
 S:     Maintained
-F:     drivers/mmc/host/sdhci-s3c.c
+F:     drivers/mmc/host/sdhci-s3c*
 
 SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) ST SPEAR DRIVER
 M:     Viresh Kumar <viresh.linux@gmail.com>
index 54430f933b628ca99bdbc1e2bf5dd2570ca0354c..9b76ce1e08bbb80d15e3f2ee4859157323a23ee0 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 0
 SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION =
 NAME = Hurr durr I'ma sheep
 
 # *DOCUMENTATION*
@@ -779,6 +779,7 @@ KBUILD_ARFLAGS := $(call ar-option,D)
 # check for 'asm goto'
 ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y)
        KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
+       KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO
 endif
 
 include $(srctree)/scripts/Makefile.kasan
index c8d284d8521fc715a01122787294dd4fa2dbcbf5..f535a3fd0f60cc9651e89b83cb821b510374d087 100644 (file)
@@ -116,7 +116,7 @@ alpha_rtc_set_time(struct device *dev, struct rtc_time *tm)
 }
 
 static int
-alpha_rtc_set_mmss(struct device *dev, unsigned long nowtime)
+alpha_rtc_set_mmss(struct device *dev, time64_t nowtime)
 {
        int retval = 0;
        int real_seconds, real_minutes, cmos_minutes;
@@ -211,7 +211,7 @@ alpha_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
 static const struct rtc_class_ops alpha_rtc_ops = {
        .read_time = alpha_rtc_read_time,
        .set_time = alpha_rtc_set_time,
-       .set_mmss = alpha_rtc_set_mmss,
+       .set_mmss64 = alpha_rtc_set_mmss,
        .ioctl = alpha_rtc_ioctl,
 };
 
@@ -276,7 +276,7 @@ do_remote_mmss(void *data)
 }
 
 static int
-remote_set_mmss(struct device *dev, unsigned long now)
+remote_set_mmss(struct device *dev, time64_t now)
 {
        union remote_data x;
        if (smp_processor_id() != boot_cpuid) {
@@ -290,7 +290,7 @@ remote_set_mmss(struct device *dev, unsigned long now)
 static const struct rtc_class_ops remote_rtc_ops = {
        .read_time = remote_read_time,
        .set_time = remote_set_time,
-       .set_mmss = remote_set_mmss,
+       .set_mmss64 = remote_set_mmss,
        .ioctl = alpha_rtc_ioctl,
 };
 #endif
index 6eaddc47c43dfbd60f52b6d49c425acbd55cab62..37dc0fe1093fb24bb26b1c852c568bd8fcd3f6d2 100644 (file)
@@ -151,8 +151,6 @@ static int bL_switch_to(unsigned int new_cluster_id)
        unsigned int mpidr, this_cpu, that_cpu;
        unsigned int ob_mpidr, ob_cpu, ob_cluster, ib_mpidr, ib_cpu, ib_cluster;
        struct completion inbound_alive;
-       struct tick_device *tdev;
-       enum clock_event_mode tdev_mode;
        long volatile *handshake_ptr;
        int ipi_nr, ret;
 
@@ -219,13 +217,7 @@ static int bL_switch_to(unsigned int new_cluster_id)
        /* redirect GIC's SGIs to our counterpart */
        gic_migrate_target(bL_gic_id[ib_cpu][ib_cluster]);
 
-       tdev = tick_get_device(this_cpu);
-       if (tdev && !cpumask_equal(tdev->evtdev->cpumask, cpumask_of(this_cpu)))
-               tdev = NULL;
-       if (tdev) {
-               tdev_mode = tdev->evtdev->mode;
-               clockevents_set_mode(tdev->evtdev, CLOCK_EVT_MODE_SHUTDOWN);
-       }
+       tick_suspend_local();
 
        ret = cpu_pm_enter();
 
@@ -251,11 +243,7 @@ static int bL_switch_to(unsigned int new_cluster_id)
 
        ret = cpu_pm_exit();
 
-       if (tdev) {
-               clockevents_set_mode(tdev->evtdev, tdev_mode);
-               clockevents_program_event(tdev->evtdev,
-                                         tdev->evtdev->next_event, 1);
-       }
+       tick_resume_local();
 
        trace_cpu_migrate_finish(ktime_get_real_ns(), ib_mpidr);
        local_fiq_enable();
index 70f9b9bfb1f9646a1bdfe3ab597c5b9c26e80b7e..5f337dc5c1087f49ebf8eae9fde6051d7c9d45be 100644 (file)
@@ -1,7 +1,7 @@
 #ifndef _ASM_ARM_JUMP_LABEL_H
 #define _ASM_ARM_JUMP_LABEL_H
 
-#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
 
 #include <linux/types.h>
 
@@ -27,8 +27,6 @@ l_yes:
        return true;
 }
 
-#endif /* __KERNEL__ */
-
 typedef u32 jump_label_t;
 
 struct jump_entry {
@@ -37,4 +35,5 @@ struct jump_entry {
        jump_label_t key;
 };
 
+#endif  /* __ASSEMBLY__ */
 #endif
index 816db0bf2dd8addbd9844488b5a72d4495be72c7..d995821f1698c67bc3e57e2073af9cdc3353fab4 100644 (file)
 #define HSR_COND       (0xfU << HSR_COND_SHIFT)
 
 #define FSC_FAULT      (0x04)
+#define FSC_ACCESS     (0x08)
 #define FSC_PERM       (0x0c)
 
 /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
index 41008cd7c53f6b39d1476d5a46dc610c578e8d5c..d71607c16601b6b1e1a595e32562195ccd63f5b1 100644 (file)
@@ -27,6 +27,8 @@
 #include <asm/fpstate.h>
 #include <kvm/arm_arch_timer.h>
 
+#define __KVM_HAVE_ARCH_INTC_INITIALIZED
+
 #if defined(CONFIG_KVM_ARM_MAX_VCPUS)
 #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
 #else
@@ -165,19 +167,10 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 
 /* We do not have shadow page tables, hence the empty hooks */
-static inline int kvm_age_hva(struct kvm *kvm, unsigned long start,
-                             unsigned long end)
-{
-       return 0;
-}
-
-static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
-{
-       return 0;
-}
-
 static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
                                                         unsigned long address)
 {
index 3f83db2f6cf053cb083c15b5fa910b5d7ae7ee61..d8e90c8cb5fa0ab4c0486cb6d9fd53d576456291 100644 (file)
@@ -28,28 +28,6 @@ struct kvm_decode {
        bool sign_extend;
 };
 
-/*
- * The in-kernel MMIO emulation code wants to use a copy of run->mmio,
- * which is an anonymous type. Use our own type instead.
- */
-struct kvm_exit_mmio {
-       phys_addr_t     phys_addr;
-       u8              data[8];
-       u32             len;
-       bool            is_write;
-       void            *private;
-};
-
-static inline void kvm_prepare_mmio(struct kvm_run *run,
-                                   struct kvm_exit_mmio *mmio)
-{
-       run->mmio.phys_addr     = mmio->phys_addr;
-       run->mmio.len           = mmio->len;
-       run->mmio.is_write      = mmio->is_write;
-       memcpy(run->mmio.data, mmio->data, mmio->len);
-       run->exit_reason        = KVM_EXIT_MMIO;
-}
-
 int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
                 phys_addr_t fault_ipa);
index 90c12e1e695c97682c229174a975bb6fdb97403e..0f79e4dec7f98ccddb8429a1e6b262ca146c0d38 100644 (file)
@@ -12,8 +12,7 @@
 
 extern void timer_tick(void);
 
-struct timespec;
-typedef void (*clock_access_fn)(struct timespec *);
+typedef void (*clock_access_fn)(struct timespec64 *);
 extern int register_persistent_clock(clock_access_fn read_boot,
                                     clock_access_fn read_persistent);
 
index 0db25bc328643de55ded82f3b9583a748038d97d..2499867dd0d86d79477e85ac0933228b9bbcf6a2 100644 (file)
@@ -198,6 +198,9 @@ struct kvm_arch_memory_slot {
 /* Highest supported SPI, from VGIC_NR_IRQS */
 #define KVM_ARM_IRQ_GIC_MAX            127
 
+/* One single KVM irqchip, ie. the VGIC */
+#define KVM_NR_IRQCHIPS          1
+
 /* PSCI interface */
 #define KVM_PSCI_FN_BASE               0x95c1ba5e
 #define KVM_PSCI_FN(n)                 (KVM_PSCI_FN_BASE + (n))
index 2d2d6087b9b105d5dadcd66f9821deefe50d1e66..488eaac56028f59ed08fc4030de25dd7d6a59ccb 100644 (file)
@@ -190,7 +190,6 @@ int main(void)
   DEFINE(VCPU_HxFAR,           offsetof(struct kvm_vcpu, arch.fault.hxfar));
   DEFINE(VCPU_HPFAR,           offsetof(struct kvm_vcpu, arch.fault.hpfar));
   DEFINE(VCPU_HYP_PC,          offsetof(struct kvm_vcpu, arch.fault.hyp_pc));
-#ifdef CONFIG_KVM_ARM_VGIC
   DEFINE(VCPU_VGIC_CPU,                offsetof(struct kvm_vcpu, arch.vgic_cpu));
   DEFINE(VGIC_V2_CPU_HCR,      offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
   DEFINE(VGIC_V2_CPU_VMCR,     offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
@@ -200,14 +199,11 @@ int main(void)
   DEFINE(VGIC_V2_CPU_APR,      offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
   DEFINE(VGIC_V2_CPU_LR,       offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
   DEFINE(VGIC_CPU_NR_LR,       offsetof(struct vgic_cpu, nr_lr));
-#ifdef CONFIG_KVM_ARM_TIMER
   DEFINE(VCPU_TIMER_CNTV_CTL,  offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
   DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval));
   DEFINE(KVM_TIMER_CNTVOFF,    offsetof(struct kvm, arch.timer.cntvoff));
   DEFINE(KVM_TIMER_ENABLED,    offsetof(struct kvm, arch.timer.enabled));
-#endif
   DEFINE(KVM_VGIC_VCTRL,       offsetof(struct kvm, arch.vgic.vctrl_base));
-#endif
   DEFINE(KVM_VTTBR,            offsetof(struct kvm, arch.vttbr));
 #endif
   return 0; 
index 0cc7e58c47cc79fd53ab4feac1b9440300e7a562..a66e37e211a9a8cbfabf85d9213f6b73db417085 100644 (file)
@@ -76,7 +76,7 @@ void timer_tick(void)
 }
 #endif
 
-static void dummy_clock_access(struct timespec *ts)
+static void dummy_clock_access(struct timespec64 *ts)
 {
        ts->tv_sec = 0;
        ts->tv_nsec = 0;
@@ -85,12 +85,12 @@ static void dummy_clock_access(struct timespec *ts)
 static clock_access_fn __read_persistent_clock = dummy_clock_access;
 static clock_access_fn __read_boot_clock = dummy_clock_access;;
 
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
 {
        __read_persistent_clock(ts);
 }
 
-void read_boot_clock(struct timespec *ts)
+void read_boot_clock64(struct timespec64 *ts)
 {
        __read_boot_clock(ts);
 }
index 338ace78ed18611bcb4aea64baeb490fcf2bca05..f1f79d1043096093a780e46e4a68cf3796a153ac 100644 (file)
@@ -18,6 +18,7 @@ if VIRTUALIZATION
 
 config KVM
        bool "Kernel-based Virtual Machine (KVM) support"
+       depends on MMU && OF
        select PREEMPT_NOTIFIERS
        select ANON_INODES
        select HAVE_KVM_CPU_RELAX_INTERCEPT
@@ -26,10 +27,12 @@ config KVM
        select KVM_ARM_HOST
        select KVM_GENERIC_DIRTYLOG_READ_PROTECT
        select SRCU
-       depends on ARM_VIRT_EXT && ARM_LPAE
+       select MMU_NOTIFIER
+       select HAVE_KVM_EVENTFD
+       select HAVE_KVM_IRQFD
+       depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER
        ---help---
-         Support hosting virtualized guest machines. You will also
-         need to select one or more of the processor modules below.
+         Support hosting virtualized guest machines.
 
          This module provides access to the hardware capabilities through
          a character device node named /dev/kvm.
@@ -37,10 +40,7 @@ config KVM
          If unsure, say N.
 
 config KVM_ARM_HOST
-       bool "KVM host support for ARM cpus."
-       depends on KVM
-       depends on MMU
-       select  MMU_NOTIFIER
+       bool
        ---help---
          Provides host support for ARM processors.
 
@@ -55,20 +55,4 @@ config KVM_ARM_MAX_VCPUS
          large, so only choose a reasonable number that you expect to
          actually use.
 
-config KVM_ARM_VGIC
-       bool "KVM support for Virtual GIC"
-       depends on KVM_ARM_HOST && OF
-       select HAVE_KVM_IRQCHIP
-       default y
-       ---help---
-         Adds support for a hardware assisted, in-kernel GIC emulation.
-
-config KVM_ARM_TIMER
-       bool "KVM support for Architected Timers"
-       depends on KVM_ARM_VGIC && ARM_ARCH_TIMER
-       select HAVE_KVM_IRQCHIP
-       default y
-       ---help---
-         Adds support for the Architected Timers in virtual machines
-
 endif # VIRTUALIZATION
index 443b8bea43e93e862653f5f153d3ce7759da5528..139e46c08b6ec5daff4a3692a569027cf23c0347 100644 (file)
@@ -7,7 +7,7 @@ ifeq ($(plus_virt),+virt)
        plus_virt_def := -DREQUIRES_VIRT=1
 endif
 
-ccflags-y += -Ivirt/kvm -Iarch/arm/kvm
+ccflags-y += -Iarch/arm/kvm
 CFLAGS_arm.o := -I. $(plus_virt_def)
 CFLAGS_mmu.o := -I.
 
@@ -15,12 +15,12 @@ AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt)
 AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
 
 KVM := ../../../virt/kvm
-kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
+kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o
 
 obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
 obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
-obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
-obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
-obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o
-obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
+obj-y += $(KVM)/arm/vgic.o
+obj-y += $(KVM)/arm/vgic-v2.o
+obj-y += $(KVM)/arm/vgic-v2-emul.o
+obj-y += $(KVM)/arm/arch_timer.o
index 5560f74f9eeef1e3e4d2c9c39fc672e539eee93f..6f536451ab784e99966a308c0892e8614214591f 100644 (file)
@@ -61,8 +61,6 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
 static u8 kvm_next_vmid;
 static DEFINE_SPINLOCK(kvm_vmid_lock);
 
-static bool vgic_present;
-
 static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
 {
        BUG_ON(preemptible());
@@ -173,8 +171,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        int r;
        switch (ext) {
        case KVM_CAP_IRQCHIP:
-               r = vgic_present;
-               break;
+       case KVM_CAP_IRQFD:
+       case KVM_CAP_IOEVENTFD:
        case KVM_CAP_DEVICE_CTRL:
        case KVM_CAP_USER_MEMORY:
        case KVM_CAP_SYNC_MMU:
@@ -183,6 +181,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_ARM_PSCI:
        case KVM_CAP_ARM_PSCI_0_2:
        case KVM_CAP_READONLY_MEM:
+       case KVM_CAP_MP_STATE:
                r = 1;
                break;
        case KVM_CAP_COALESCED_MMIO:
@@ -268,7 +267,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 {
-       return 0;
+       return kvm_timer_should_fire(vcpu);
 }
 
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
@@ -313,13 +312,29 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
                                    struct kvm_mp_state *mp_state)
 {
-       return -EINVAL;
+       if (vcpu->arch.pause)
+               mp_state->mp_state = KVM_MP_STATE_STOPPED;
+       else
+               mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
+
+       return 0;
 }
 
 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
                                    struct kvm_mp_state *mp_state)
 {
-       return -EINVAL;
+       switch (mp_state->mp_state) {
+       case KVM_MP_STATE_RUNNABLE:
+               vcpu->arch.pause = false;
+               break;
+       case KVM_MP_STATE_STOPPED:
+               vcpu->arch.pause = true;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
 }
 
 /**
@@ -452,6 +467,11 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
        return 0;
 }
 
+bool kvm_arch_intc_initialized(struct kvm *kvm)
+{
+       return vgic_initialized(kvm);
+}
+
 static void vcpu_pause(struct kvm_vcpu *vcpu)
 {
        wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
@@ -831,8 +851,6 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
 
        switch (dev_id) {
        case KVM_ARM_DEVICE_VGIC_V2:
-               if (!vgic_present)
-                       return -ENXIO;
                return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
        default:
                return -ENODEV;
@@ -847,10 +865,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
        switch (ioctl) {
        case KVM_CREATE_IRQCHIP: {
-               if (vgic_present)
-                       return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
-               else
-                       return -ENXIO;
+               return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
        }
        case KVM_ARM_SET_DEVICE_ADDR: {
                struct kvm_arm_device_addr dev_addr;
@@ -1035,10 +1050,6 @@ static int init_hyp_mode(void)
        if (err)
                goto out_free_context;
 
-#ifdef CONFIG_KVM_ARM_VGIC
-               vgic_present = true;
-#endif
-
        /*
         * Init HYP architected timer support
         */
index 384bab67c4629a9bece251d5577c9bf908f8348f..d503fbb787d362752b9b6b688b2829e19b675095 100644 (file)
@@ -109,22 +109,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
        return -EINVAL;
 }
 
-#ifndef CONFIG_KVM_ARM_TIMER
-
-#define NUM_TIMER_REGS 0
-
-static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
-{
-       return 0;
-}
-
-static bool is_timer_reg(u64 index)
-{
-       return false;
-}
-
-#else
-
 #define NUM_TIMER_REGS 3
 
 static bool is_timer_reg(u64 index)
@@ -152,8 +136,6 @@ static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
        return 0;
 }
 
-#endif
-
 static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 {
        void __user *uaddr = (void __user *)(long)reg->addr;
index 14d488388480ea50a80d24b18bbed9636c8d1c25..35e4a3a0c476cd9730afa52192bea5b3ff6f20fd 100644 (file)
@@ -402,7 +402,6 @@ vcpu        .req    r0              @ vcpu pointer always in r0
  * Assumes vcpu pointer in vcpu reg
  */
 .macro save_vgic_state
-#ifdef CONFIG_KVM_ARM_VGIC
        /* Get VGIC VCTRL base into r2 */
        ldr     r2, [vcpu, #VCPU_KVM]
        ldr     r2, [r2, #KVM_VGIC_VCTRL]
@@ -460,7 +459,6 @@ ARM_BE8(rev r6, r6  )
        subs    r4, r4, #1
        bne     1b
 2:
-#endif
 .endm
 
 /*
@@ -469,7 +467,6 @@ ARM_BE8(rev r6, r6  )
  * Assumes vcpu pointer in vcpu reg
  */
 .macro restore_vgic_state
-#ifdef CONFIG_KVM_ARM_VGIC
        /* Get VGIC VCTRL base into r2 */
        ldr     r2, [vcpu, #VCPU_KVM]
        ldr     r2, [r2, #KVM_VGIC_VCTRL]
@@ -501,7 +498,6 @@ ARM_BE8(rev r6, r6  )
        subs    r4, r4, #1
        bne     1b
 2:
-#endif
 .endm
 
 #define CNTHCTL_PL1PCTEN       (1 << 0)
@@ -515,7 +511,6 @@ ARM_BE8(rev r6, r6  )
  * Clobbers r2-r5
  */
 .macro save_timer_state
-#ifdef CONFIG_KVM_ARM_TIMER
        ldr     r4, [vcpu, #VCPU_KVM]
        ldr     r2, [r4, #KVM_TIMER_ENABLED]
        cmp     r2, #0
@@ -537,7 +532,6 @@ ARM_BE8(rev r6, r6  )
        mcrr    p15, 4, r2, r2, c14     @ CNTVOFF
 
 1:
-#endif
        @ Allow physical timer/counter access for the host
        mrc     p15, 4, r2, c14, c1, 0  @ CNTHCTL
        orr     r2, r2, #(CNTHCTL_PL1PCEN | CNTHCTL_PL1PCTEN)
@@ -559,7 +553,6 @@ ARM_BE8(rev r6, r6  )
        bic     r2, r2, #CNTHCTL_PL1PCEN
        mcr     p15, 4, r2, c14, c1, 0  @ CNTHCTL
 
-#ifdef CONFIG_KVM_ARM_TIMER
        ldr     r4, [vcpu, #VCPU_KVM]
        ldr     r2, [r4, #KVM_TIMER_ENABLED]
        cmp     r2, #0
@@ -579,7 +572,6 @@ ARM_BE8(rev r6, r6  )
        and     r2, r2, #3
        mcr     p15, 0, r2, c14, c3, 1  @ CNTV_CTL
 1:
-#endif
 .endm
 
 .equ vmentry,  0
index 5d3bfc0eb3f000cb41cb217eb7fdc2611da85fc9..974b1c606d044c239bfa14ffbdf66f0fc982c4fb 100644 (file)
@@ -121,12 +121,11 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
        return 0;
 }
 
-static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
-                     struct kvm_exit_mmio *mmio)
+static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len)
 {
        unsigned long rt;
-       int len;
-       bool is_write, sign_extend;
+       int access_size;
+       bool sign_extend;
 
        if (kvm_vcpu_dabt_isextabt(vcpu)) {
                /* cache operation on I/O addr, tell guest unsupported */
@@ -140,17 +139,15 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                return 1;
        }
 
-       len = kvm_vcpu_dabt_get_as(vcpu);
-       if (unlikely(len < 0))
-               return len;
+       access_size = kvm_vcpu_dabt_get_as(vcpu);
+       if (unlikely(access_size < 0))
+               return access_size;
 
-       is_write = kvm_vcpu_dabt_iswrite(vcpu);
+       *is_write = kvm_vcpu_dabt_iswrite(vcpu);
        sign_extend = kvm_vcpu_dabt_issext(vcpu);
        rt = kvm_vcpu_dabt_get_rd(vcpu);
 
-       mmio->is_write = is_write;
-       mmio->phys_addr = fault_ipa;
-       mmio->len = len;
+       *len = access_size;
        vcpu->arch.mmio_decode.sign_extend = sign_extend;
        vcpu->arch.mmio_decode.rt = rt;
 
@@ -165,20 +162,20 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
                 phys_addr_t fault_ipa)
 {
-       struct kvm_exit_mmio mmio;
        unsigned long data;
        unsigned long rt;
        int ret;
+       bool is_write;
+       int len;
+       u8 data_buf[8];
 
        /*
-        * Prepare MMIO operation. First stash it in a private
-        * structure that we can use for in-kernel emulation. If the
-        * kernel can't handle it, copy it into run->mmio and let user
-        * space do its magic.
+        * Prepare MMIO operation. First decode the syndrome data we get
+        * from the CPU. Then try if some in-kernel emulation feels
+        * responsible, otherwise let user space do its magic.
         */
-
        if (kvm_vcpu_dabt_isvalid(vcpu)) {
-               ret = decode_hsr(vcpu, fault_ipa, &mmio);
+               ret = decode_hsr(vcpu, &is_write, &len);
                if (ret)
                        return ret;
        } else {
@@ -188,21 +185,34 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 
        rt = vcpu->arch.mmio_decode.rt;
 
-       if (mmio.is_write) {
-               data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt),
-                                              mmio.len);
+       if (is_write) {
+               data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), len);
+
+               trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
+               mmio_write_buf(data_buf, len, data);
 
-               trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, mmio.len,
-                              fault_ipa, data);
-               mmio_write_buf(mmio.data, mmio.len, data);
+               ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
+                                      data_buf);
        } else {
-               trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, mmio.len,
+               trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
                               fault_ipa, 0);
+
+               ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
+                                     data_buf);
        }
 
-       if (vgic_handle_mmio(vcpu, run, &mmio))
+       /* Now prepare kvm_run for the potential return to userland. */
+       run->mmio.is_write      = is_write;
+       run->mmio.phys_addr     = fault_ipa;
+       run->mmio.len           = len;
+       memcpy(run->mmio.data, data_buf, len);
+
+       if (!ret) {
+               /* We handled the access successfully in the kernel. */
+               kvm_handle_mmio_return(vcpu, run);
                return 1;
+       }
 
-       kvm_prepare_mmio(run, &mmio);
+       run->exit_reason        = KVM_EXIT_MMIO;
        return 0;
 }
index 5656d79c5a44f4d2ca816e15b647abf29a114e0b..15b050d46fc968afdc53029ada4b7d945ee23515 100644 (file)
@@ -1330,10 +1330,51 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 
 out_unlock:
        spin_unlock(&kvm->mmu_lock);
+       kvm_set_pfn_accessed(pfn);
        kvm_release_pfn_clean(pfn);
        return ret;
 }
 
+/*
+ * Resolve the access fault by making the page young again.
+ * Note that because the faulting entry is guaranteed not to be
+ * cached in the TLB, we don't need to invalidate anything.
+ */
+static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
+{
+       pmd_t *pmd;
+       pte_t *pte;
+       pfn_t pfn;
+       bool pfn_valid = false;
+
+       trace_kvm_access_fault(fault_ipa);
+
+       spin_lock(&vcpu->kvm->mmu_lock);
+
+       pmd = stage2_get_pmd(vcpu->kvm, NULL, fault_ipa);
+       if (!pmd || pmd_none(*pmd))     /* Nothing there */
+               goto out;
+
+       if (kvm_pmd_huge(*pmd)) {       /* THP, HugeTLB */
+               *pmd = pmd_mkyoung(*pmd);
+               pfn = pmd_pfn(*pmd);
+               pfn_valid = true;
+               goto out;
+       }
+
+       pte = pte_offset_kernel(pmd, fault_ipa);
+       if (pte_none(*pte))             /* Nothing there either */
+               goto out;
+
+       *pte = pte_mkyoung(*pte);       /* Just a page... */
+       pfn = pte_pfn(*pte);
+       pfn_valid = true;
+out:
+       spin_unlock(&vcpu->kvm->mmu_lock);
+       if (pfn_valid)
+               kvm_set_pfn_accessed(pfn);
+}
+
 /**
  * kvm_handle_guest_abort - handles all 2nd stage aborts
  * @vcpu:      the VCPU pointer
@@ -1364,7 +1405,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
        /* Check the stage-2 fault is trans. fault or write fault */
        fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
-       if (fault_status != FSC_FAULT && fault_status != FSC_PERM) {
+       if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
+           fault_status != FSC_ACCESS) {
                kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
                        kvm_vcpu_trap_get_class(vcpu),
                        (unsigned long)kvm_vcpu_trap_get_fault(vcpu),
@@ -1400,6 +1442,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
        /* Userspace should not be able to register out-of-bounds IPAs */
        VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE);
 
+       if (fault_status == FSC_ACCESS) {
+               handle_access_fault(vcpu, fault_ipa);
+               ret = 1;
+               goto out_unlock;
+       }
+
        ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
        if (ret == 0)
                ret = 1;
@@ -1408,15 +1456,16 @@ out_unlock:
        return ret;
 }
 
-static void handle_hva_to_gpa(struct kvm *kvm,
-                             unsigned long start,
-                             unsigned long end,
-                             void (*handler)(struct kvm *kvm,
-                                             gpa_t gpa, void *data),
-                             void *data)
+static int handle_hva_to_gpa(struct kvm *kvm,
+                            unsigned long start,
+                            unsigned long end,
+                            int (*handler)(struct kvm *kvm,
+                                           gpa_t gpa, void *data),
+                            void *data)
 {
        struct kvm_memslots *slots;
        struct kvm_memory_slot *memslot;
+       int ret = 0;
 
        slots = kvm_memslots(kvm);
 
@@ -1440,14 +1489,17 @@ static void handle_hva_to_gpa(struct kvm *kvm,
 
                for (; gfn < gfn_end; ++gfn) {
                        gpa_t gpa = gfn << PAGE_SHIFT;
-                       handler(kvm, gpa, data);
+                       ret |= handler(kvm, gpa, data);
                }
        }
+
+       return ret;
 }
 
-static void kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
+static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
 {
        unmap_stage2_range(kvm, gpa, PAGE_SIZE);
+       return 0;
 }
 
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
@@ -1473,7 +1525,7 @@ int kvm_unmap_hva_range(struct kvm *kvm,
        return 0;
 }
 
-static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
+static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
 {
        pte_t *pte = (pte_t *)data;
 
@@ -1485,6 +1537,7 @@ static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
         * through this calling path.
         */
        stage2_set_pte(kvm, NULL, gpa, pte, 0);
+       return 0;
 }
 
 
@@ -1501,6 +1554,67 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
        handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte);
 }
 
+static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
+{
+       pmd_t *pmd;
+       pte_t *pte;
+
+       pmd = stage2_get_pmd(kvm, NULL, gpa);
+       if (!pmd || pmd_none(*pmd))     /* Nothing there */
+               return 0;
+
+       if (kvm_pmd_huge(*pmd)) {       /* THP, HugeTLB */
+               if (pmd_young(*pmd)) {
+                       *pmd = pmd_mkold(*pmd);
+                       return 1;
+               }
+
+               return 0;
+       }
+
+       pte = pte_offset_kernel(pmd, gpa);
+       if (pte_none(*pte))
+               return 0;
+
+       if (pte_young(*pte)) {
+               *pte = pte_mkold(*pte); /* Just a page... */
+               return 1;
+       }
+
+       return 0;
+}
+
+static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
+{
+       pmd_t *pmd;
+       pte_t *pte;
+
+       pmd = stage2_get_pmd(kvm, NULL, gpa);
+       if (!pmd || pmd_none(*pmd))     /* Nothing there */
+               return 0;
+
+       if (kvm_pmd_huge(*pmd))         /* THP, HugeTLB */
+               return pmd_young(*pmd);
+
+       pte = pte_offset_kernel(pmd, gpa);
+       if (!pte_none(*pte))            /* Just a page... */
+               return pte_young(*pte);
+
+       return 0;
+}
+
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+       trace_kvm_age_hva(start, end);
+       return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL);
+}
+
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+       trace_kvm_test_age_hva(hva);
+       return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL);
+}
+
 void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
 {
        mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
index 6817664b46b80419047066686a47a8bc7953ebeb..0ec35392d2083ac4d8df391de0670d6ea6be6378 100644 (file)
@@ -68,6 +68,21 @@ TRACE_EVENT(kvm_guest_fault,
                  __entry->hxfar, __entry->vcpu_pc)
 );
 
+TRACE_EVENT(kvm_access_fault,
+       TP_PROTO(unsigned long ipa),
+       TP_ARGS(ipa),
+
+       TP_STRUCT__entry(
+               __field(        unsigned long,  ipa             )
+       ),
+
+       TP_fast_assign(
+               __entry->ipa            = ipa;
+       ),
+
+       TP_printk("IPA: %lx", __entry->ipa)
+);
+
 TRACE_EVENT(kvm_irq_line,
        TP_PROTO(unsigned int type, int vcpu_idx, int irq_num, int level),
        TP_ARGS(type, vcpu_idx, irq_num, level),
@@ -210,6 +225,39 @@ TRACE_EVENT(kvm_set_spte_hva,
        TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva)
 );
 
+TRACE_EVENT(kvm_age_hva,
+       TP_PROTO(unsigned long start, unsigned long end),
+       TP_ARGS(start, end),
+
+       TP_STRUCT__entry(
+               __field(        unsigned long,  start           )
+               __field(        unsigned long,  end             )
+       ),
+
+       TP_fast_assign(
+               __entry->start          = start;
+               __entry->end            = end;
+       ),
+
+       TP_printk("mmu notifier age hva: %#08lx -- %#08lx",
+                 __entry->start, __entry->end)
+);
+
+TRACE_EVENT(kvm_test_age_hva,
+       TP_PROTO(unsigned long hva),
+       TP_ARGS(hva),
+
+       TP_STRUCT__entry(
+               __field(        unsigned long,  hva             )
+       ),
+
+       TP_fast_assign(
+               __entry->hva            = hva;
+       ),
+
+       TP_printk("mmu notifier test age hva: %#08lx", __entry->hva)
+);
+
 TRACE_EVENT(kvm_hvc,
        TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm),
        TP_ARGS(vcpu_pc, r0, imm),
index 01e398a868bcbf9d0e33456e8fc70c0a2996848d..57d429830e09b4ceffa71efc3b3b88f6a88c68cd 100644 (file)
@@ -14,7 +14,7 @@
 #include <linux/cpuidle.h>
 #include <linux/cpu_pm.h>
 #include <linux/export.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 
 #include <asm/cpuidle.h>
 #include <asm/proc-fns.h>
@@ -84,7 +84,6 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev,
 {
        struct idle_statedata *cx = state_ptr + index;
        u32 mpuss_can_lose_context = 0;
-       int cpu_id = smp_processor_id();
 
        /*
         * CPU0 has to wait and stay ON until CPU1 is OFF state.
@@ -112,7 +111,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev,
        mpuss_can_lose_context = (cx->mpu_state == PWRDM_POWER_RET) &&
                                 (cx->mpu_logic_state == PWRDM_POWER_OFF);
 
-       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu_id);
+       tick_broadcast_enter();
 
        /*
         * Call idle CPU PM enter notifier chain so that
@@ -169,7 +168,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev,
        if (dev->cpu == 0 && mpuss_can_lose_context)
                cpu_cluster_pm_exit();
 
-       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu_id);
+       tick_broadcast_exit();
 
 fail:
        cpuidle_coupled_parallel_barrier(dev, &abort_barrier);
@@ -184,8 +183,7 @@ fail:
  */
 static void omap_setup_broadcast_timer(void *arg)
 {
-       int cpu = smp_processor_id();
-       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ON, &cpu);
+       tick_broadcast_enable();
 }
 
 static struct cpuidle_driver omap4_idle_driver = {
index dc6e79c4484abbfb097abb322298c37a8b9dd5ca..9a8611ab5dfa60d3bf5e23d6118779db3495ee80 100644 (file)
@@ -150,9 +150,13 @@ static int nop_mmc_set_power(struct device *dev, int power_on, int vdd)
 static inline void omap_hsmmc_mux(struct omap_hsmmc_platform_data
                                  *mmc_controller, int controller_nr)
 {
-       if (gpio_is_valid(mmc_controller->switch_pin) &&
-           (mmc_controller->switch_pin < OMAP_MAX_GPIO_LINES))
-               omap_mux_init_gpio(mmc_controller->switch_pin,
+       if (gpio_is_valid(mmc_controller->gpio_cd) &&
+           (mmc_controller->gpio_cd < OMAP_MAX_GPIO_LINES))
+               omap_mux_init_gpio(mmc_controller->gpio_cd,
+                                  OMAP_PIN_INPUT_PULLUP);
+       if (gpio_is_valid(mmc_controller->gpio_cod) &&
+           (mmc_controller->gpio_cod < OMAP_MAX_GPIO_LINES))
+               omap_mux_init_gpio(mmc_controller->gpio_cod,
                                   OMAP_PIN_INPUT_PULLUP);
        if (gpio_is_valid(mmc_controller->gpio_wp) &&
            (mmc_controller->gpio_wp < OMAP_MAX_GPIO_LINES))
@@ -250,15 +254,20 @@ static int __init omap_hsmmc_pdata_init(struct omap2_hsmmc_info *c,
        mmc->internal_clock = !c->ext_clock;
        mmc->reg_offset = 0;
 
-       mmc->switch_pin = c->gpio_cd;
+       if (c->cover_only) {
+               /* detect if mobile phone cover removed */
+               mmc->gpio_cd = -EINVAL;
+               mmc->gpio_cod = c->gpio_cd;
+       } else {
+               /* card detect pin on the mmc socket itself */
+               mmc->gpio_cd = c->gpio_cd;
+               mmc->gpio_cod = -EINVAL;
+       }
        mmc->gpio_wp = c->gpio_wp;
 
        mmc->remux = c->remux;
        mmc->init_card = c->init_card;
 
-       if (c->cover_only)
-               mmc->cover = 1;
-
        if (c->nonremovable)
                mmc->nonremovable = 1;
 
@@ -358,7 +367,15 @@ void omap_hsmmc_late_init(struct omap2_hsmmc_info *c)
                if (!mmc_pdata)
                        continue;
 
-               mmc_pdata->switch_pin = c->gpio_cd;
+               if (c->cover_only) {
+                       /* detect if mobile phone cover removed */
+                       mmc_pdata->gpio_cd = -EINVAL;
+                       mmc_pdata->gpio_cod = c->gpio_cd;
+               } else {
+                       /* card detect pin on the mmc socket itself */
+                       mmc_pdata->gpio_cd = c->gpio_cd;
+                       mmc_pdata->gpio_cod = -EINVAL;
+               }
                mmc_pdata->gpio_wp = c->gpio_wp;
 
                res = omap_device_register(pdev);
index f2b586d7b15dfe7cade115ecca4f785324eb15e0..155807fa6fdd0f5a3078064391ac5bb310ba3f53 100644 (file)
@@ -15,7 +15,7 @@
  */
 
 #include <asm/firmware.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 #include <linux/cpuidle.h>
 #include <linux/cpu_pm.h>
 #include <linux/kernel.h>
@@ -44,7 +44,7 @@ static int tegra114_idle_power_down(struct cpuidle_device *dev,
        tegra_set_cpu_in_lp2();
        cpu_pm_enter();
 
-       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+       tick_broadcast_enter();
 
        call_firmware_op(prepare_idle);
 
@@ -52,7 +52,7 @@ static int tegra114_idle_power_down(struct cpuidle_device *dev,
        if (call_firmware_op(do_idle, 0) == -ENOSYS)
                cpu_suspend(0, tegra30_sleep_cpu_secondary_finish);
 
-       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+       tick_broadcast_exit();
 
        cpu_pm_exit();
        tegra_clear_cpu_in_lp2();
index 4f25a7c7ca0fed7b74c8aab8b0f4281021f9e175..48844ae6c3a119b8493aaffef1c805cf929c7111 100644 (file)
@@ -20,7 +20,7 @@
  */
 
 #include <linux/clk/tegra.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 #include <linux/cpuidle.h>
 #include <linux/cpu_pm.h>
 #include <linux/kernel.h>
@@ -136,11 +136,11 @@ static bool tegra20_cpu_cluster_power_down(struct cpuidle_device *dev,
        if (tegra20_reset_cpu_1() || !tegra_cpu_rail_off_ready())
                return false;
 
-       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+       tick_broadcast_enter();
 
        tegra_idle_lp2_last();
 
-       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+       tick_broadcast_exit();
 
        if (cpu_online(1))
                tegra20_wake_cpu1_from_reset();
@@ -153,13 +153,13 @@ static bool tegra20_idle_enter_lp2_cpu_1(struct cpuidle_device *dev,
                                         struct cpuidle_driver *drv,
                                         int index)
 {
-       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+       tick_broadcast_enter();
 
        cpu_suspend(0, tegra20_sleep_cpu_secondary_finish);
 
        tegra20_cpu_clear_resettable();
 
-       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+       tick_broadcast_exit();
 
        return true;
 }
index f8815ed65d9d5227b3f21ddea6c316bb9e24723a..84d809a3cba3b2f720d261ece0dc07df14ec72cf 100644 (file)
@@ -20,7 +20,7 @@
  */
 
 #include <linux/clk/tegra.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 #include <linux/cpuidle.h>
 #include <linux/cpu_pm.h>
 #include <linux/kernel.h>
@@ -76,11 +76,11 @@ static bool tegra30_cpu_cluster_power_down(struct cpuidle_device *dev,
                return false;
        }
 
-       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+       tick_broadcast_enter();
 
        tegra_idle_lp2_last();
 
-       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+       tick_broadcast_exit();
 
        return true;
 }
@@ -90,13 +90,13 @@ static bool tegra30_cpu_core_power_down(struct cpuidle_device *dev,
                                        struct cpuidle_driver *drv,
                                        int index)
 {
-       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+       tick_broadcast_enter();
 
        smp_wmb();
 
        cpu_suspend(0, tegra30_sleep_cpu_secondary_finish);
 
-       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+       tick_broadcast_exit();
 
        return true;
 }
index 61b4d705c26720eb88f2dbcf7eab72361d31d14d..2438b96004c1c36013cb8e55fe2fb4b2eb180663 100644 (file)
@@ -44,24 +44,20 @@ static u64 notrace omap_32k_read_sched_clock(void)
 }
 
 /**
- * omap_read_persistent_clock -  Return time from a persistent clock.
+ * omap_read_persistent_clock64 -  Return time from a persistent clock.
  *
  * Reads the time from a source which isn't disabled during PM, the
  * 32k sync timer.  Convert the cycles elapsed since last read into
- * nsecs and adds to a monotonically increasing timespec.
+ * nsecs and adds to a monotonically increasing timespec64.
  */
-static struct timespec persistent_ts;
+static struct timespec64 persistent_ts;
 static cycles_t cycles;
 static unsigned int persistent_mult, persistent_shift;
-static DEFINE_SPINLOCK(read_persistent_clock_lock);
 
-static void omap_read_persistent_clock(struct timespec *ts)
+static void omap_read_persistent_clock64(struct timespec64 *ts)
 {
        unsigned long long nsecs;
        cycles_t last_cycles;
-       unsigned long flags;
-
-       spin_lock_irqsave(&read_persistent_clock_lock, flags);
 
        last_cycles = cycles;
        cycles = sync32k_cnt_reg ? readl_relaxed(sync32k_cnt_reg) : 0;
@@ -69,11 +65,9 @@ static void omap_read_persistent_clock(struct timespec *ts)
        nsecs = clocksource_cyc2ns(cycles - last_cycles,
                                        persistent_mult, persistent_shift);
 
-       timespec_add_ns(&persistent_ts, nsecs);
+       timespec64_add_ns(&persistent_ts, nsecs);
 
        *ts = persistent_ts;
-
-       spin_unlock_irqrestore(&read_persistent_clock_lock, flags);
 }
 
 /**
@@ -103,7 +97,7 @@ int __init omap_init_clocksource_32k(void __iomem *vbase)
 
        /*
         * 120000 rough estimate from the calculations in
-        * __clocksource_updatefreq_scale.
+        * __clocksource_update_freq_scale.
         */
        clocks_calc_mult_shift(&persistent_mult, &persistent_shift,
                        32768, NSEC_PER_SEC, 120000);
@@ -116,7 +110,7 @@ int __init omap_init_clocksource_32k(void __iomem *vbase)
        }
 
        sched_clock_register(omap_32k_read_sched_clock, 32, 32768);
-       register_persistent_clock(NULL, omap_read_persistent_clock);
+       register_persistent_clock(NULL, omap_read_persistent_clock64);
        pr_info("OMAP clocksource: 32k_counter at 32768 Hz\n");
 
        return 0;
index 92bbae38159821cb6ce45ab3dab456c7eedced15..70522450ca2342a66b3ec27167bb94e08590f6d1 100644 (file)
@@ -90,6 +90,7 @@
 #define ESR_ELx_FSC            (0x3F)
 #define ESR_ELx_FSC_TYPE       (0x3C)
 #define ESR_ELx_FSC_EXTABT     (0x10)
+#define ESR_ELx_FSC_ACCESS     (0x08)
 #define ESR_ELx_FSC_FAULT      (0x04)
 #define ESR_ELx_FSC_PERM       (0x0C)
 #define ESR_ELx_CV             (UL(1) << 24)
index 076a1c714049ffede6cf8fb3d46c6efcf3ac7f1f..c0e5165c2f76d3c1b979bbaac11121f6e27885da 100644 (file)
  */
 #ifndef __ASM_JUMP_LABEL_H
 #define __ASM_JUMP_LABEL_H
+
+#ifndef __ASSEMBLY__
+
 #include <linux/types.h>
 #include <asm/insn.h>
 
-#ifdef __KERNEL__
-
 #define JUMP_LABEL_NOP_SIZE            AARCH64_INSN_SIZE
 
 static __always_inline bool arch_static_branch(struct static_key *key)
@@ -39,8 +40,6 @@ l_yes:
        return true;
 }
 
-#endif /* __KERNEL__ */
-
 typedef u64 jump_label_t;
 
 struct jump_entry {
@@ -49,4 +48,5 @@ struct jump_entry {
        jump_label_t key;
 };
 
+#endif  /* __ASSEMBLY__ */
 #endif /* __ASM_JUMP_LABEL_H */
index 54bb4ba974417e269656d50adb524654851fbbd2..ac6fafb95fe71e48048fe3831f226853f2f4914d 100644 (file)
 
 /* For compatibility with fault code shared with 32-bit */
 #define FSC_FAULT      ESR_ELx_FSC_FAULT
+#define FSC_ACCESS     ESR_ELx_FSC_ACCESS
 #define FSC_PERM       ESR_ELx_FSC_PERM
 
 /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
index 8ac3c70fe3c6ae7f234c5505a4a3c0e5bbd6c4ff..f0f58c9beec0e87c8c7eefa7a8ba52ba518e07c3 100644 (file)
@@ -28,6 +28,8 @@
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
 
+#define __KVM_HAVE_ARCH_INTC_INITIALIZED
+
 #if defined(CONFIG_KVM_ARM_MAX_VCPUS)
 #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
 #else
@@ -177,19 +179,10 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_unmap_hva_range(struct kvm *kvm,
                        unsigned long start, unsigned long end);
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 
 /* We do not have shadow page tables, hence the empty hooks */
-static inline int kvm_age_hva(struct kvm *kvm, unsigned long start,
-                             unsigned long end)
-{
-       return 0;
-}
-
-static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
-{
-       return 0;
-}
-
 static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
                                                         unsigned long address)
 {
index 9f52beb7cb1355e1ebf129b899830b7b463d67b2..889c908ee631b526594b5dfc32ef5dfde15480df 100644 (file)
@@ -31,28 +31,6 @@ struct kvm_decode {
        bool sign_extend;
 };
 
-/*
- * The in-kernel MMIO emulation code wants to use a copy of run->mmio,
- * which is an anonymous type. Use our own type instead.
- */
-struct kvm_exit_mmio {
-       phys_addr_t     phys_addr;
-       u8              data[8];
-       u32             len;
-       bool            is_write;
-       void            *private;
-};
-
-static inline void kvm_prepare_mmio(struct kvm_run *run,
-                                   struct kvm_exit_mmio *mmio)
-{
-       run->mmio.phys_addr     = mmio->phys_addr;
-       run->mmio.len           = mmio->len;
-       run->mmio.is_write      = mmio->is_write;
-       memcpy(run->mmio.data, mmio->data, mmio->len);
-       run->exit_reason        = KVM_EXIT_MMIO;
-}
-
 int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
                 phys_addr_t fault_ipa);
index 3ef77a4660187ace735216639ffc1e8f8d11a38b..c154c0b7eb604ae09214beed276063f222af69ae 100644 (file)
@@ -191,6 +191,9 @@ struct kvm_arch_memory_slot {
 /* Highest supported SPI, from VGIC_NR_IRQS */
 #define KVM_ARM_IRQ_GIC_MAX            127
 
+/* One single KVM irqchip, ie. the VGIC */
+#define KVM_NR_IRQCHIPS          1
+
 /* PSCI interface */
 #define KVM_PSCI_FN_BASE               0x95c1ba5e
 #define KVM_PSCI_FN(n)                 (KVM_PSCI_FN_BASE + (n))
index 32aeea083d93b2391122ad9d1c49c3925121e38b..ec37ab3f524f303419d2cc3a82b79c119e61de1d 100644 (file)
@@ -200,7 +200,7 @@ up_fail:
 void update_vsyscall(struct timekeeper *tk)
 {
        struct timespec xtime_coarse;
-       u32 use_syscall = strcmp(tk->tkr.clock->name, "arch_sys_counter");
+       u32 use_syscall = strcmp(tk->tkr_mono.clock->name, "arch_sys_counter");
 
        ++vdso_data->tb_seq_count;
        smp_wmb();
@@ -213,11 +213,11 @@ void update_vsyscall(struct timekeeper *tk)
        vdso_data->wtm_clock_nsec               = tk->wall_to_monotonic.tv_nsec;
 
        if (!use_syscall) {
-               vdso_data->cs_cycle_last        = tk->tkr.cycle_last;
+               vdso_data->cs_cycle_last        = tk->tkr_mono.cycle_last;
                vdso_data->xtime_clock_sec      = tk->xtime_sec;
-               vdso_data->xtime_clock_nsec     = tk->tkr.xtime_nsec;
-               vdso_data->cs_mult              = tk->tkr.mult;
-               vdso_data->cs_shift             = tk->tkr.shift;
+               vdso_data->xtime_clock_nsec     = tk->tkr_mono.xtime_nsec;
+               vdso_data->cs_mult              = tk->tkr_mono.mult;
+               vdso_data->cs_shift             = tk->tkr_mono.shift;
        }
 
        smp_wmb();
index f5590c81d95f9e494bd82b5dda127762d5ea01f7..5105e297ed5fef43509f264a001138d299fb5cba 100644 (file)
@@ -18,6 +18,7 @@ if VIRTUALIZATION
 
 config KVM
        bool "Kernel-based Virtual Machine (KVM) support"
+       depends on OF
        select MMU_NOTIFIER
        select PREEMPT_NOTIFIERS
        select ANON_INODES
@@ -25,10 +26,10 @@ config KVM
        select HAVE_KVM_ARCH_TLB_FLUSH_ALL
        select KVM_MMIO
        select KVM_ARM_HOST
-       select KVM_ARM_VGIC
-       select KVM_ARM_TIMER
        select KVM_GENERIC_DIRTYLOG_READ_PROTECT
        select SRCU
+       select HAVE_KVM_EVENTFD
+       select HAVE_KVM_IRQFD
        ---help---
          Support hosting virtualized guest machines.
 
@@ -50,17 +51,4 @@ config KVM_ARM_MAX_VCPUS
          large, so only choose a reasonable number that you expect to
          actually use.
 
-config KVM_ARM_VGIC
-       bool
-       depends on KVM_ARM_HOST && OF
-       select HAVE_KVM_IRQCHIP
-       ---help---
-         Adds support for a hardware assisted, in-kernel GIC emulation.
-
-config KVM_ARM_TIMER
-       bool
-       depends on KVM_ARM_VGIC
-       ---help---
-         Adds support for the Architected Timers in virtual machines.
-
 endif # VIRTUALIZATION
index 4e6e09ee4033503088d686af976a6f7b0f3ee46a..d5904f876cdb535a373c6299beeec09bb5538331 100644 (file)
@@ -2,7 +2,7 @@
 # Makefile for Kernel-based Virtual Machine module
 #
 
-ccflags-y += -Ivirt/kvm -Iarch/arm64/kvm
+ccflags-y += -Iarch/arm64/kvm
 CFLAGS_arm.o := -I.
 CFLAGS_mmu.o := -I.
 
@@ -11,7 +11,7 @@ ARM=../../../arch/arm/kvm
 
 obj-$(CONFIG_KVM_ARM_HOST) += kvm.o
 
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o
 
@@ -19,11 +19,11 @@ kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o
 kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
 kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
 
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3-emul.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v3-switch.o
-kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o
+kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v2-switch.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o
+kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v3-switch.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
index d232888b99d5b1e210a70627413244d1e78b8ac7..0388ece75b02870d18d8711c14e782aa7a25136a 100644 (file)
@@ -84,7 +84,7 @@ typedef struct user_fpu_struct elf_fpregset_t;
    the loader.  We need to make sure that it is out of the way of the program
    that it will "exec", and that there is sufficient room for the brk.  */
 
-#define ELF_ET_DYN_BASE         (2 * TASK_SIZE / 3)
+#define ELF_ET_DYN_BASE         (TASK_SIZE / 3 * 2)
 
 
 /* This yields a mask that user programs can use to figure out what
index 1a10a08ebec7474f35977d35e6a3b3513a74e86c..ed1643b4c67893b627be4c23a3c9bc0c25b718f0 100644 (file)
@@ -521,8 +521,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -573,5 +575,6 @@ CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
index 7859a738c81ead67ec513fbb4cbbd56545d3b825..d38822b1847ee272325793172d32804f3f8f8c00 100644 (file)
@@ -479,8 +479,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -531,5 +533,6 @@ CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
index 372593a3b398f3a693a4bd9d3a8b298da23d2344..c429199cf4a9332a9e43432c6bcfed6032863ba0 100644 (file)
@@ -501,8 +501,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -553,5 +555,6 @@ CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
index f3bd35e76ea4823d156fc1d184602da8d35511f3..9b880371d6421ec9be77870c3fd352589eb571ee 100644 (file)
@@ -472,8 +472,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -524,5 +526,6 @@ CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
index 9f9793fb2b73dac7460ac6145b6aa141232b6b52..49ae3376e993a50f462a61dcd790ca4383b8ff40 100644 (file)
@@ -481,8 +481,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -533,5 +535,6 @@ CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
index 89f225c01a0b6745dcbf7032f9d1040742203924..ee143a57058cb93ed3fd116154ef19a9932e2532 100644 (file)
@@ -503,8 +503,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -555,5 +557,6 @@ CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
index d3cdb5447a2ca7e539c443101a132663c8c0f72f..c777aa05048f0bad1a2b33d46e7d5617e3752c42 100644 (file)
@@ -583,8 +583,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -635,5 +637,6 @@ CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
index b4c76640973eca3fb94b6e6b3c15c34583bc9a5a..a7628a85e260ddabb6e99f6ff5c71ee4fd565b8f 100644 (file)
@@ -472,8 +472,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -524,5 +526,6 @@ CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
index 0d4a26f9b58c5596ed306612a8b5788e205731c8..ebaa68268a4a5f3513b03f8e7fa842cf78427f60 100644 (file)
@@ -472,8 +472,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -524,5 +526,6 @@ CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
index 5d581c503fa375d516b3811482ed795f23aa2544..2c16853aedd350b8270623bf10e4e1e6cff9619b 100644 (file)
@@ -340,7 +340,7 @@ CONFIG_VETH=m
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MICREL is not set
-CONFIG_NE2000=m
+CONFIG_NE2000=y
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
@@ -494,8 +494,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -546,5 +548,6 @@ CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
index c6b49a4a887cab5edbd11ec4357124ad66c45610..e3056bf0f65bdea73d9b3eeefba282be8fc371a0 100644 (file)
@@ -473,8 +473,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -524,5 +526,6 @@ CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
index b65785eaff8dc52f3482247444574dc1489e59ae..73c36b7a000978cf3d8f9a6f39271435cf0d2e21 100644 (file)
@@ -473,8 +473,10 @@ CONFIG_NLS_MAC_TURKISH=m
 CONFIG_DLM=m
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
 CONFIG_TEST_STRING_HELPERS=m
 CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_LKM=m
 CONFIG_TEST_USER_COPY=m
 CONFIG_TEST_BPF=m
@@ -525,5 +527,6 @@ CONFIG_CRYPTO_DRBG_HASH=y
 CONFIG_CRYPTO_DRBG_CTR=y
 CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
+CONFIG_CRYPTO_USER_API_RNG=m
 # CONFIG_CRYPTO_HW is not set
 CONFIG_XZ_DEC_TEST=m
index 7b51416ccae221d103e489446461a7d16ffe4e1c..256da0e4aeb41b8a7381a7351f14942313c1cc71 100644 (file)
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
 */
 
 #ifndef mcfqspi_h
index 931a31ff59ddb32de3300d494b00a8efdcba2afc..8520250a1d9369c58c706674b4f2b6d0cbfc7275 100644 (file)
@@ -62,7 +62,7 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
 
                r = dev->resource + idx;
                if (!r->start && r->end) {
-                       pr_err(KERN_ERR "PCI: Device %s not available because of resource collisions\n",
+                       pr_err("PCI: Device %s not available because of resource collisions\n",
                                pci_name(dev));
                        return -EINVAL;
                }
index 7729f33878d1c43b17efeb81a7b229e049896c03..37234c2df47f6e8ed6edd4eb5d60fc29178f8ef7 100644 (file)
@@ -11,12 +11,7 @@ any later version.
 GNU CC is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+GNU General Public License for more details. */
 
 #define BITS_PER_UNIT 8
 
index 18ea5f7ed921f50e63fe03117a78cce6cf8a392e..1d59345f36c631550767bc8f8c77c9e6a69afbc0 100644 (file)
@@ -11,12 +11,7 @@ any later version.
 GNU CC is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+GNU General Public License for more details. */
 
 #define BITS_PER_UNIT 8
 
index ec307b61991e417d6cd93f38b96be14d7bff12e6..2c0ec85ac661547382c2206b613e86ba9d3a63be 100644 (file)
@@ -19,12 +19,7 @@ distribution when not linked into another program.)
 This file is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+General Public License for more details. */
 
 /* As a special exception, if you link this library with files
    compiled with GCC to produce an executable, this does not cause
index d06442d3a3288abf0b2b292872f918377e9ba45d..49e1ec8f2cc27a9f9880bfe78cef279afcdf3ab1 100644 (file)
@@ -11,12 +11,7 @@ any later version.
 GNU CC is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+GNU General Public License for more details. */
 
 #define BITS_PER_UNIT 8
 
index ef3849435768587832ad39ea21e19954a6426cfd..1d9e0efdf31d201f67ae56496a6fae6496ef8868 100644 (file)
@@ -19,12 +19,7 @@ distribution when not linked into another program.)
 This file is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+General Public License for more details. */
 
 /* As a special exception, if you link this library with files
    compiled with GCC to produce an executable, this does not cause
index ee5f0b1b5c5dd9b86c78552f42e4e7e9210a74cb..9006d15b87218d95d77effd305bb49b4968e4914 100644 (file)
@@ -12,12 +12,7 @@ any later version.
 GNU CC is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+GNU General Public License for more details. */
 
 #ifdef CONFIG_CPU_HAS_NO_MULDIV64
 
index ce29ea37b45ffdfc5962d826512739cc466a68ac..c39ad4e738e9a6522fcef640020502f727d46a52 100644 (file)
@@ -19,12 +19,7 @@ distribution when not linked into another program.)
 This file is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+General Public License for more details. */
 
 /* As a special exception, if you link this library with files
    compiled with GCC to produce an executable, this does not cause
index c424c4a1f0a3276a8ca186f169b3e0eed361bd79..35a5446572a5ee3bae78e725ae8c39619332b7a0 100644 (file)
@@ -19,12 +19,7 @@ distribution when not linked into another program.)
 This file is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+General Public License for more details. */
 
 /* As a special exception, if you link this library with files
    compiled with GCC to produce an executable, this does not cause
index 5def5f626478c38cbb2c12bf8f84cff2434e732f..099da514a8fd80daa85d3ba2644e3f2792175b5b 100644 (file)
@@ -19,12 +19,7 @@ distribution when not linked into another program.)
 This file is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
+General Public License for more details. */
 
 /* As a special exception, if you link this library with files
    compiled with GCC to produce an executable, this does not cause
index 54037125ebf8c0e05c82cb937cb2560a7575244d..bb11dceed7ed2e947ea31f9393539c8b2e6f5c41 100644 (file)
@@ -47,9 +47,8 @@ void __init oss_init(void)
        /* Disable all interrupts. Unlike a VIA it looks like we    */
        /* do this by setting the source's interrupt level to zero. */
 
-       for (i = 0; i <= OSS_NUM_SOURCES; i++) {
+       for (i = 0; i < OSS_NUM_SOURCES; i++)
                oss->irq_level[i] = 0;
-       }
 }
 
 /*
index cdac7b3eeaf7fa6524b8f6ffb92edc7209eef4c5..80386470d3a4414da1a6ff17d14b624ddd417ad7 100644 (file)
        .set push
        SET_HARDFLOAT
        cfc1    \tmp,  fcr31
-       swc1    $f0,  THREAD_FPR0_LS64(\thread)
-       swc1    $f1,  THREAD_FPR1_LS64(\thread)
-       swc1    $f2,  THREAD_FPR2_LS64(\thread)
-       swc1    $f3,  THREAD_FPR3_LS64(\thread)
-       swc1    $f4,  THREAD_FPR4_LS64(\thread)
-       swc1    $f5,  THREAD_FPR5_LS64(\thread)
-       swc1    $f6,  THREAD_FPR6_LS64(\thread)
-       swc1    $f7,  THREAD_FPR7_LS64(\thread)
-       swc1    $f8,  THREAD_FPR8_LS64(\thread)
-       swc1    $f9,  THREAD_FPR9_LS64(\thread)
-       swc1    $f10, THREAD_FPR10_LS64(\thread)
-       swc1    $f11, THREAD_FPR11_LS64(\thread)
-       swc1    $f12, THREAD_FPR12_LS64(\thread)
-       swc1    $f13, THREAD_FPR13_LS64(\thread)
-       swc1    $f14, THREAD_FPR14_LS64(\thread)
-       swc1    $f15, THREAD_FPR15_LS64(\thread)
-       swc1    $f16, THREAD_FPR16_LS64(\thread)
-       swc1    $f17, THREAD_FPR17_LS64(\thread)
-       swc1    $f18, THREAD_FPR18_LS64(\thread)
-       swc1    $f19, THREAD_FPR19_LS64(\thread)
-       swc1    $f20, THREAD_FPR20_LS64(\thread)
-       swc1    $f21, THREAD_FPR21_LS64(\thread)
-       swc1    $f22, THREAD_FPR22_LS64(\thread)
-       swc1    $f23, THREAD_FPR23_LS64(\thread)
-       swc1    $f24, THREAD_FPR24_LS64(\thread)
-       swc1    $f25, THREAD_FPR25_LS64(\thread)
-       swc1    $f26, THREAD_FPR26_LS64(\thread)
-       swc1    $f27, THREAD_FPR27_LS64(\thread)
-       swc1    $f28, THREAD_FPR28_LS64(\thread)
-       swc1    $f29, THREAD_FPR29_LS64(\thread)
-       swc1    $f30, THREAD_FPR30_LS64(\thread)
-       swc1    $f31, THREAD_FPR31_LS64(\thread)
+       swc1    $f0,  THREAD_FPR0(\thread)
+       swc1    $f1,  THREAD_FPR1(\thread)
+       swc1    $f2,  THREAD_FPR2(\thread)
+       swc1    $f3,  THREAD_FPR3(\thread)
+       swc1    $f4,  THREAD_FPR4(\thread)
+       swc1    $f5,  THREAD_FPR5(\thread)
+       swc1    $f6,  THREAD_FPR6(\thread)
+       swc1    $f7,  THREAD_FPR7(\thread)
+       swc1    $f8,  THREAD_FPR8(\thread)
+       swc1    $f9,  THREAD_FPR9(\thread)
+       swc1    $f10, THREAD_FPR10(\thread)
+       swc1    $f11, THREAD_FPR11(\thread)
+       swc1    $f12, THREAD_FPR12(\thread)
+       swc1    $f13, THREAD_FPR13(\thread)
+       swc1    $f14, THREAD_FPR14(\thread)
+       swc1    $f15, THREAD_FPR15(\thread)
+       swc1    $f16, THREAD_FPR16(\thread)
+       swc1    $f17, THREAD_FPR17(\thread)
+       swc1    $f18, THREAD_FPR18(\thread)
+       swc1    $f19, THREAD_FPR19(\thread)
+       swc1    $f20, THREAD_FPR20(\thread)
+       swc1    $f21, THREAD_FPR21(\thread)
+       swc1    $f22, THREAD_FPR22(\thread)
+       swc1    $f23, THREAD_FPR23(\thread)
+       swc1    $f24, THREAD_FPR24(\thread)
+       swc1    $f25, THREAD_FPR25(\thread)
+       swc1    $f26, THREAD_FPR26(\thread)
+       swc1    $f27, THREAD_FPR27(\thread)
+       swc1    $f28, THREAD_FPR28(\thread)
+       swc1    $f29, THREAD_FPR29(\thread)
+       swc1    $f30, THREAD_FPR30(\thread)
+       swc1    $f31, THREAD_FPR31(\thread)
        sw      \tmp, THREAD_FCR31(\thread)
        .set pop
        .endm
        .set push
        SET_HARDFLOAT
        lw      \tmp, THREAD_FCR31(\thread)
-       lwc1    $f0,  THREAD_FPR0_LS64(\thread)
-       lwc1    $f1,  THREAD_FPR1_LS64(\thread)
-       lwc1    $f2,  THREAD_FPR2_LS64(\thread)
-       lwc1    $f3,  THREAD_FPR3_LS64(\thread)
-       lwc1    $f4,  THREAD_FPR4_LS64(\thread)
-       lwc1    $f5,  THREAD_FPR5_LS64(\thread)
-       lwc1    $f6,  THREAD_FPR6_LS64(\thread)
-       lwc1    $f7,  THREAD_FPR7_LS64(\thread)
-       lwc1    $f8,  THREAD_FPR8_LS64(\thread)
-       lwc1    $f9,  THREAD_FPR9_LS64(\thread)
-       lwc1    $f10, THREAD_FPR10_LS64(\thread)
-       lwc1    $f11, THREAD_FPR11_LS64(\thread)
-       lwc1    $f12, THREAD_FPR12_LS64(\thread)
-       lwc1    $f13, THREAD_FPR13_LS64(\thread)
-       lwc1    $f14, THREAD_FPR14_LS64(\thread)
-       lwc1    $f15, THREAD_FPR15_LS64(\thread)
-       lwc1    $f16, THREAD_FPR16_LS64(\thread)
-       lwc1    $f17, THREAD_FPR17_LS64(\thread)
-       lwc1    $f18, THREAD_FPR18_LS64(\thread)
-       lwc1    $f19, THREAD_FPR19_LS64(\thread)
-       lwc1    $f20, THREAD_FPR20_LS64(\thread)
-       lwc1    $f21, THREAD_FPR21_LS64(\thread)
-       lwc1    $f22, THREAD_FPR22_LS64(\thread)
-       lwc1    $f23, THREAD_FPR23_LS64(\thread)
-       lwc1    $f24, THREAD_FPR24_LS64(\thread)
-       lwc1    $f25, THREAD_FPR25_LS64(\thread)
-       lwc1    $f26, THREAD_FPR26_LS64(\thread)
-       lwc1    $f27, THREAD_FPR27_LS64(\thread)
-       lwc1    $f28, THREAD_FPR28_LS64(\thread)
-       lwc1    $f29, THREAD_FPR29_LS64(\thread)
-       lwc1    $f30, THREAD_FPR30_LS64(\thread)
-       lwc1    $f31, THREAD_FPR31_LS64(\thread)
+       lwc1    $f0,  THREAD_FPR0(\thread)
+       lwc1    $f1,  THREAD_FPR1(\thread)
+       lwc1    $f2,  THREAD_FPR2(\thread)
+       lwc1    $f3,  THREAD_FPR3(\thread)
+       lwc1    $f4,  THREAD_FPR4(\thread)
+       lwc1    $f5,  THREAD_FPR5(\thread)
+       lwc1    $f6,  THREAD_FPR6(\thread)
+       lwc1    $f7,  THREAD_FPR7(\thread)
+       lwc1    $f8,  THREAD_FPR8(\thread)
+       lwc1    $f9,  THREAD_FPR9(\thread)
+       lwc1    $f10, THREAD_FPR10(\thread)
+       lwc1    $f11, THREAD_FPR11(\thread)
+       lwc1    $f12, THREAD_FPR12(\thread)
+       lwc1    $f13, THREAD_FPR13(\thread)
+       lwc1    $f14, THREAD_FPR14(\thread)
+       lwc1    $f15, THREAD_FPR15(\thread)
+       lwc1    $f16, THREAD_FPR16(\thread)
+       lwc1    $f17, THREAD_FPR17(\thread)
+       lwc1    $f18, THREAD_FPR18(\thread)
+       lwc1    $f19, THREAD_FPR19(\thread)
+       lwc1    $f20, THREAD_FPR20(\thread)
+       lwc1    $f21, THREAD_FPR21(\thread)
+       lwc1    $f22, THREAD_FPR22(\thread)
+       lwc1    $f23, THREAD_FPR23(\thread)
+       lwc1    $f24, THREAD_FPR24(\thread)
+       lwc1    $f25, THREAD_FPR25(\thread)
+       lwc1    $f26, THREAD_FPR26(\thread)
+       lwc1    $f27, THREAD_FPR27(\thread)
+       lwc1    $f28, THREAD_FPR28(\thread)
+       lwc1    $f29, THREAD_FPR29(\thread)
+       lwc1    $f30, THREAD_FPR30(\thread)
+       lwc1    $f31, THREAD_FPR31(\thread)
        ctc1    \tmp, fcr31
        .set pop
        .endm
index 0cae4595e985bbc3d8043b3bb85aef66c582615b..6156ac8c4cfb9a854bf3ed3a5546606216161118 100644 (file)
        .set    push
        SET_HARDFLOAT
        cfc1    \tmp, fcr31
-       sdc1    $f0,  THREAD_FPR0_LS64(\thread)
-       sdc1    $f2,  THREAD_FPR2_LS64(\thread)
-       sdc1    $f4,  THREAD_FPR4_LS64(\thread)
-       sdc1    $f6,  THREAD_FPR6_LS64(\thread)
-       sdc1    $f8,  THREAD_FPR8_LS64(\thread)
-       sdc1    $f10, THREAD_FPR10_LS64(\thread)
-       sdc1    $f12, THREAD_FPR12_LS64(\thread)
-       sdc1    $f14, THREAD_FPR14_LS64(\thread)
-       sdc1    $f16, THREAD_FPR16_LS64(\thread)
-       sdc1    $f18, THREAD_FPR18_LS64(\thread)
-       sdc1    $f20, THREAD_FPR20_LS64(\thread)
-       sdc1    $f22, THREAD_FPR22_LS64(\thread)
-       sdc1    $f24, THREAD_FPR24_LS64(\thread)
-       sdc1    $f26, THREAD_FPR26_LS64(\thread)
-       sdc1    $f28, THREAD_FPR28_LS64(\thread)
-       sdc1    $f30, THREAD_FPR30_LS64(\thread)
+       sdc1    $f0,  THREAD_FPR0(\thread)
+       sdc1    $f2,  THREAD_FPR2(\thread)
+       sdc1    $f4,  THREAD_FPR4(\thread)
+       sdc1    $f6,  THREAD_FPR6(\thread)
+       sdc1    $f8,  THREAD_FPR8(\thread)
+       sdc1    $f10, THREAD_FPR10(\thread)
+       sdc1    $f12, THREAD_FPR12(\thread)
+       sdc1    $f14, THREAD_FPR14(\thread)
+       sdc1    $f16, THREAD_FPR16(\thread)
+       sdc1    $f18, THREAD_FPR18(\thread)
+       sdc1    $f20, THREAD_FPR20(\thread)
+       sdc1    $f22, THREAD_FPR22(\thread)
+       sdc1    $f24, THREAD_FPR24(\thread)
+       sdc1    $f26, THREAD_FPR26(\thread)
+       sdc1    $f28, THREAD_FPR28(\thread)
+       sdc1    $f30, THREAD_FPR30(\thread)
        sw      \tmp, THREAD_FCR31(\thread)
        .set    pop
        .endm
        .set    push
        .set    mips64r2
        SET_HARDFLOAT
-       sdc1    $f1,  THREAD_FPR1_LS64(\thread)
-       sdc1    $f3,  THREAD_FPR3_LS64(\thread)
-       sdc1    $f5,  THREAD_FPR5_LS64(\thread)
-       sdc1    $f7,  THREAD_FPR7_LS64(\thread)
-       sdc1    $f9,  THREAD_FPR9_LS64(\thread)
-       sdc1    $f11, THREAD_FPR11_LS64(\thread)
-       sdc1    $f13, THREAD_FPR13_LS64(\thread)
-       sdc1    $f15, THREAD_FPR15_LS64(\thread)
-       sdc1    $f17, THREAD_FPR17_LS64(\thread)
-       sdc1    $f19, THREAD_FPR19_LS64(\thread)
-       sdc1    $f21, THREAD_FPR21_LS64(\thread)
-       sdc1    $f23, THREAD_FPR23_LS64(\thread)
-       sdc1    $f25, THREAD_FPR25_LS64(\thread)
-       sdc1    $f27, THREAD_FPR27_LS64(\thread)
-       sdc1    $f29, THREAD_FPR29_LS64(\thread)
-       sdc1    $f31, THREAD_FPR31_LS64(\thread)
+       sdc1    $f1,  THREAD_FPR1(\thread)
+       sdc1    $f3,  THREAD_FPR3(\thread)
+       sdc1    $f5,  THREAD_FPR5(\thread)
+       sdc1    $f7,  THREAD_FPR7(\thread)
+       sdc1    $f9,  THREAD_FPR9(\thread)
+       sdc1    $f11, THREAD_FPR11(\thread)
+       sdc1    $f13, THREAD_FPR13(\thread)
+       sdc1    $f15, THREAD_FPR15(\thread)
+       sdc1    $f17, THREAD_FPR17(\thread)
+       sdc1    $f19, THREAD_FPR19(\thread)
+       sdc1    $f21, THREAD_FPR21(\thread)
+       sdc1    $f23, THREAD_FPR23(\thread)
+       sdc1    $f25, THREAD_FPR25(\thread)
+       sdc1    $f27, THREAD_FPR27(\thread)
+       sdc1    $f29, THREAD_FPR29(\thread)
+       sdc1    $f31, THREAD_FPR31(\thread)
        .set    pop
        .endm
 
        .set    push
        SET_HARDFLOAT
        lw      \tmp, THREAD_FCR31(\thread)
-       ldc1    $f0,  THREAD_FPR0_LS64(\thread)
-       ldc1    $f2,  THREAD_FPR2_LS64(\thread)
-       ldc1    $f4,  THREAD_FPR4_LS64(\thread)
-       ldc1    $f6,  THREAD_FPR6_LS64(\thread)
-       ldc1    $f8,  THREAD_FPR8_LS64(\thread)
-       ldc1    $f10, THREAD_FPR10_LS64(\thread)
-       ldc1    $f12, THREAD_FPR12_LS64(\thread)
-       ldc1    $f14, THREAD_FPR14_LS64(\thread)
-       ldc1    $f16, THREAD_FPR16_LS64(\thread)
-       ldc1    $f18, THREAD_FPR18_LS64(\thread)
-       ldc1    $f20, THREAD_FPR20_LS64(\thread)
-       ldc1    $f22, THREAD_FPR22_LS64(\thread)
-       ldc1    $f24, THREAD_FPR24_LS64(\thread)
-       ldc1    $f26, THREAD_FPR26_LS64(\thread)
-       ldc1    $f28, THREAD_FPR28_LS64(\thread)
-       ldc1    $f30, THREAD_FPR30_LS64(\thread)
+       ldc1    $f0,  THREAD_FPR0(\thread)
+       ldc1    $f2,  THREAD_FPR2(\thread)
+       ldc1    $f4,  THREAD_FPR4(\thread)
+       ldc1    $f6,  THREAD_FPR6(\thread)
+       ldc1    $f8,  THREAD_FPR8(\thread)
+       ldc1    $f10, THREAD_FPR10(\thread)
+       ldc1    $f12, THREAD_FPR12(\thread)
+       ldc1    $f14, THREAD_FPR14(\thread)
+       ldc1    $f16, THREAD_FPR16(\thread)
+       ldc1    $f18, THREAD_FPR18(\thread)
+       ldc1    $f20, THREAD_FPR20(\thread)
+       ldc1    $f22, THREAD_FPR22(\thread)
+       ldc1    $f24, THREAD_FPR24(\thread)
+       ldc1    $f26, THREAD_FPR26(\thread)
+       ldc1    $f28, THREAD_FPR28(\thread)
+       ldc1    $f30, THREAD_FPR30(\thread)
        ctc1    \tmp, fcr31
        .endm
 
        .set    push
        .set    mips64r2
        SET_HARDFLOAT
-       ldc1    $f1,  THREAD_FPR1_LS64(\thread)
-       ldc1    $f3,  THREAD_FPR3_LS64(\thread)
-       ldc1    $f5,  THREAD_FPR5_LS64(\thread)
-       ldc1    $f7,  THREAD_FPR7_LS64(\thread)
-       ldc1    $f9,  THREAD_FPR9_LS64(\thread)
-       ldc1    $f11, THREAD_FPR11_LS64(\thread)
-       ldc1    $f13, THREAD_FPR13_LS64(\thread)
-       ldc1    $f15, THREAD_FPR15_LS64(\thread)
-       ldc1    $f17, THREAD_FPR17_LS64(\thread)
-       ldc1    $f19, THREAD_FPR19_LS64(\thread)
-       ldc1    $f21, THREAD_FPR21_LS64(\thread)
-       ldc1    $f23, THREAD_FPR23_LS64(\thread)
-       ldc1    $f25, THREAD_FPR25_LS64(\thread)
-       ldc1    $f27, THREAD_FPR27_LS64(\thread)
-       ldc1    $f29, THREAD_FPR29_LS64(\thread)
-       ldc1    $f31, THREAD_FPR31_LS64(\thread)
+       ldc1    $f1,  THREAD_FPR1(\thread)
+       ldc1    $f3,  THREAD_FPR3(\thread)
+       ldc1    $f5,  THREAD_FPR5(\thread)
+       ldc1    $f7,  THREAD_FPR7(\thread)
+       ldc1    $f9,  THREAD_FPR9(\thread)
+       ldc1    $f11, THREAD_FPR11(\thread)
+       ldc1    $f13, THREAD_FPR13(\thread)
+       ldc1    $f15, THREAD_FPR15(\thread)
+       ldc1    $f17, THREAD_FPR17(\thread)
+       ldc1    $f19, THREAD_FPR19(\thread)
+       ldc1    $f21, THREAD_FPR21(\thread)
+       ldc1    $f23, THREAD_FPR23(\thread)
+       ldc1    $f25, THREAD_FPR25(\thread)
+       ldc1    $f27, THREAD_FPR27(\thread)
+       ldc1    $f29, THREAD_FPR29(\thread)
+       ldc1    $f31, THREAD_FPR31(\thread)
        .set    pop
        .endm
 
        .endm
 
 #ifdef TOOLCHAIN_SUPPORTS_MSA
+       .macro  _cfcmsa rd, cs
+       .set    push
+       .set    mips32r2
+       .set    msa
+       cfcmsa  \rd, $\cs
+       .set    pop
+       .endm
+
+       .macro  _ctcmsa cd, rs
+       .set    push
+       .set    mips32r2
+       .set    msa
+       ctcmsa  $\cd, \rs
+       .set    pop
+       .endm
+
        .macro  ld_d    wd, off, base
        .set    push
        .set    mips32r2
        .set    pop
        .endm
 
-       .macro  copy_u_w        rd, ws, n
+       .macro  copy_u_w        ws, n
        .set    push
        .set    mips32r2
        .set    msa
-       copy_u.w \rd, $w\ws[\n]
+       copy_u.w $1, $w\ws[\n]
        .set    pop
        .endm
 
-       .macro  copy_u_d        rd, ws, n
+       .macro  copy_u_d        ws, n
        .set    push
        .set    mips64r2
        .set    msa
-       copy_u.d \rd, $w\ws[\n]
+       copy_u.d $1, $w\ws[\n]
        .set    pop
        .endm
 
-       .macro  insert_w        wd, n, rs
+       .macro  insert_w        wd, n
        .set    push
        .set    mips32r2
        .set    msa
-       insert.w $w\wd[\n], \rs
+       insert.w $w\wd[\n], $1
        .set    pop
        .endm
 
-       .macro  insert_d        wd, n, rs
+       .macro  insert_d        wd, n
        .set    push
        .set    mips64r2
        .set    msa
-       insert.d $w\wd[\n], \rs
+       insert.d $w\wd[\n], $1
        .set    pop
        .endm
 #else
        /*
         * Temporary until all toolchains in use include MSA support.
         */
-       .macro  cfcmsa  rd, cs
+       .macro  _cfcmsa rd, cs
        .set    push
        .set    noat
        SET_HARDFLOAT
        .set    pop
        .endm
 
-       .macro  ctcmsa  cd, rs
+       .macro  _ctcmsa cd, rs
        .set    push
        .set    noat
        SET_HARDFLOAT
        .set    pop
        .endm
 
-       .macro  copy_u_w        rd, ws, n
+       .macro  copy_u_w        ws, n
        .set    push
        .set    noat
        SET_HARDFLOAT
        .insn
        .word   COPY_UW_MSA_INSN | (\n << 16) | (\ws << 11)
-       /* move triggers an assembler bug... */
-       or      \rd, $1, zero
        .set    pop
        .endm
 
-       .macro  copy_u_d        rd, ws, n
+       .macro  copy_u_d        ws, n
        .set    push
        .set    noat
        SET_HARDFLOAT
        .insn
        .word   COPY_UD_MSA_INSN | (\n << 16) | (\ws << 11)
-       /* move triggers an assembler bug... */
-       or      \rd, $1, zero
        .set    pop
        .endm
 
-       .macro  insert_w        wd, n, rs
+       .macro  insert_w        wd, n
        .set    push
        .set    noat
        SET_HARDFLOAT
-       /* move triggers an assembler bug... */
-       or      $1, \rs, zero
        .word   INSERT_W_MSA_INSN | (\n << 16) | (\wd << 6)
        .set    pop
        .endm
 
-       .macro  insert_d        wd, n, rs
+       .macro  insert_d        wd, n
        .set    push
        .set    noat
        SET_HARDFLOAT
-       /* move triggers an assembler bug... */
-       or      $1, \rs, zero
        .word   INSERT_D_MSA_INSN | (\n << 16) | (\wd << 6)
        .set    pop
        .endm
        .set    push
        .set    noat
        SET_HARDFLOAT
-       cfcmsa  $1, MSA_CSR
+       _cfcmsa $1, MSA_CSR
        sw      $1, THREAD_MSA_CSR(\thread)
        .set    pop
        .endm
        .set    noat
        SET_HARDFLOAT
        lw      $1, THREAD_MSA_CSR(\thread)
-       ctcmsa  MSA_CSR, $1
+       _ctcmsa MSA_CSR, $1
        .set    pop
        ld_d    0, THREAD_FPR0, \thread
        ld_d    1, THREAD_FPR1, \thread
        insert_w \wd, 2
        insert_w \wd, 3
 #endif
-       .if     31-\wd
-       msa_init_upper  (\wd+1)
-       .endif
        .endm
 
        .macro  msa_init_all_upper
        SET_HARDFLOAT
        not     $1, zero
        msa_init_upper  0
+       msa_init_upper  1
+       msa_init_upper  2
+       msa_init_upper  3
+       msa_init_upper  4
+       msa_init_upper  5
+       msa_init_upper  6
+       msa_init_upper  7
+       msa_init_upper  8
+       msa_init_upper  9
+       msa_init_upper  10
+       msa_init_upper  11
+       msa_init_upper  12
+       msa_init_upper  13
+       msa_init_upper  14
+       msa_init_upper  15
+       msa_init_upper  16
+       msa_init_upper  17
+       msa_init_upper  18
+       msa_init_upper  19
+       msa_init_upper  20
+       msa_init_upper  21
+       msa_init_upper  22
+       msa_init_upper  23
+       msa_init_upper  24
+       msa_init_upper  25
+       msa_init_upper  26
+       msa_init_upper  27
+       msa_init_upper  28
+       msa_init_upper  29
+       msa_init_upper  30
+       msa_init_upper  31
        .set    pop
        .endm
 
index dd083e999b08a14ffdbef46d5f5f4a0731e9f18e..b104ad9d655f2da157544fcf783a225377cb996d 100644 (file)
@@ -48,6 +48,12 @@ enum fpu_mode {
 #define FPU_FR_MASK            0x1
 };
 
+#define __disable_fpu()                                                        \
+do {                                                                   \
+       clear_c0_status(ST0_CU1);                                       \
+       disable_fpu_hazard();                                           \
+} while (0)
+
 static inline int __enable_fpu(enum fpu_mode mode)
 {
        int fr;
@@ -86,7 +92,12 @@ fr_common:
                enable_fpu_hazard();
 
                /* check FR has the desired value */
-               return (!!(read_c0_status() & ST0_FR) == !!fr) ? 0 : SIGFPE;
+               if (!!(read_c0_status() & ST0_FR) == !!fr)
+                       return 0;
+
+               /* unsupported FR value */
+               __disable_fpu();
+               return SIGFPE;
 
        default:
                BUG();
@@ -95,12 +106,6 @@ fr_common:
        return SIGFPE;
 }
 
-#define __disable_fpu()                                                        \
-do {                                                                   \
-       clear_c0_status(ST0_CU1);                                       \
-       disable_fpu_hazard();                                           \
-} while (0)
-
 #define clear_fpu_owner()      clear_thread_flag(TIF_USEDFPU)
 
 static inline int __is_fpu_owner(void)
@@ -170,6 +175,7 @@ static inline void lose_fpu(int save)
                }
                disable_msa();
                clear_thread_flag(TIF_USEDMSA);
+               __disable_fpu();
        } else if (is_fpu_owner()) {
                if (save)
                        _save_fp(current);
index fdbff44e5482cd20acc5efc798091894a3292c4f..608aa57799c8194a88d39d2e9b6d9968e072ac8c 100644 (file)
@@ -8,9 +8,9 @@
 #ifndef _ASM_MIPS_JUMP_LABEL_H
 #define _ASM_MIPS_JUMP_LABEL_H
 
-#include <linux/types.h>
+#ifndef __ASSEMBLY__
 
-#ifdef __KERNEL__
+#include <linux/types.h>
 
 #define JUMP_LABEL_NOP_SIZE 4
 
@@ -39,8 +39,6 @@ l_yes:
        return true;
 }
 
-#endif /* __KERNEL__ */
-
 #ifdef CONFIG_64BIT
 typedef u64 jump_label_t;
 #else
@@ -53,4 +51,5 @@ struct jump_entry {
        jump_label_t key;
 };
 
+#endif  /* __ASSEMBLY__ */
 #endif /* _ASM_MIPS_JUMP_LABEL_H */
index 6a9af5fcb5d72ef7878dc9581f366b07568a1c0f..cba22ab7ad4d5fd9087aeb8181c6c45ba3398a8c 100644 (file)
@@ -10,7 +10,8 @@ enum die_val {
        DIE_RI,
        DIE_PAGE_FAULT,
        DIE_BREAK,
-       DIE_SSTEPBP
+       DIE_SSTEPBP,
+       DIE_MSAFP
 };
 
 #endif /* _ASM_MIPS_KDEBUG_H */
index ac4fc716062b791003c76f5572d56863bcdcb2cd..4c25823563fe16dfe8f4008351c111eb0dd5c4ad 100644 (file)
 
 /* MIPS KVM register ids */
 #define MIPS_CP0_32(_R, _S)                                    \
-       (KVM_REG_MIPS | KVM_REG_SIZE_U32 | 0x10000 | (8 * (_R) + (_S)))
+       (KVM_REG_MIPS_CP0 | KVM_REG_SIZE_U32 | (8 * (_R) + (_S)))
 
 #define MIPS_CP0_64(_R, _S)                                    \
-       (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 0x10000 | (8 * (_R) + (_S)))
+       (KVM_REG_MIPS_CP0 | KVM_REG_SIZE_U64 | (8 * (_R) + (_S)))
 
 #define KVM_REG_MIPS_CP0_INDEX         MIPS_CP0_32(0, 0)
 #define KVM_REG_MIPS_CP0_ENTRYLO0      MIPS_CP0_64(2, 0)
 #define KVM_REG_MIPS_CP0_STATUS                MIPS_CP0_32(12, 0)
 #define KVM_REG_MIPS_CP0_CAUSE         MIPS_CP0_32(13, 0)
 #define KVM_REG_MIPS_CP0_EPC           MIPS_CP0_64(14, 0)
+#define KVM_REG_MIPS_CP0_PRID          MIPS_CP0_32(15, 0)
 #define KVM_REG_MIPS_CP0_EBASE         MIPS_CP0_64(15, 1)
 #define KVM_REG_MIPS_CP0_CONFIG                MIPS_CP0_32(16, 0)
 #define KVM_REG_MIPS_CP0_CONFIG1       MIPS_CP0_32(16, 1)
 #define KVM_REG_MIPS_CP0_CONFIG2       MIPS_CP0_32(16, 2)
 #define KVM_REG_MIPS_CP0_CONFIG3       MIPS_CP0_32(16, 3)
+#define KVM_REG_MIPS_CP0_CONFIG4       MIPS_CP0_32(16, 4)
+#define KVM_REG_MIPS_CP0_CONFIG5       MIPS_CP0_32(16, 5)
 #define KVM_REG_MIPS_CP0_CONFIG7       MIPS_CP0_32(16, 7)
 #define KVM_REG_MIPS_CP0_XCONTEXT      MIPS_CP0_64(20, 0)
 #define KVM_REG_MIPS_CP0_ERROREPC      MIPS_CP0_64(30, 0)
@@ -119,6 +122,10 @@ struct kvm_vcpu_stat {
        u32 syscall_exits;
        u32 resvd_inst_exits;
        u32 break_inst_exits;
+       u32 trap_inst_exits;
+       u32 msa_fpe_exits;
+       u32 fpe_exits;
+       u32 msa_disabled_exits;
        u32 flush_dcache_exits;
        u32 halt_successful_poll;
        u32 halt_wakeup;
@@ -138,6 +145,10 @@ enum kvm_mips_exit_types {
        SYSCALL_EXITS,
        RESVD_INST_EXITS,
        BREAK_INST_EXITS,
+       TRAP_INST_EXITS,
+       MSA_FPE_EXITS,
+       FPE_EXITS,
+       MSA_DISABLED_EXITS,
        FLUSH_DCACHE_EXITS,
        MAX_KVM_MIPS_EXIT_TYPES
 };
@@ -206,6 +217,8 @@ struct mips_coproc {
 #define MIPS_CP0_CONFIG1_SEL   1
 #define MIPS_CP0_CONFIG2_SEL   2
 #define MIPS_CP0_CONFIG3_SEL   3
+#define MIPS_CP0_CONFIG4_SEL   4
+#define MIPS_CP0_CONFIG5_SEL   5
 
 /* Config0 register bits */
 #define CP0C0_M                        31
@@ -262,31 +275,6 @@ struct mips_coproc {
 #define CP0C3_SM               1
 #define CP0C3_TL               0
 
-/* Have config1, Cacheable, noncoherent, write-back, write allocate*/
-#define MIPS_CONFIG0                                           \
-  ((1 << CP0C0_M) | (0x3 << CP0C0_K0))
-
-/* Have config2, no coprocessor2 attached, no MDMX support attached,
-   no performance counters, watch registers present,
-   no code compression, EJTAG present, no FPU, no watch registers */
-#define MIPS_CONFIG1                                           \
-((1 << CP0C1_M) |                                              \
- (0 << CP0C1_C2) | (0 << CP0C1_MD) | (0 << CP0C1_PC) |         \
- (0 << CP0C1_WR) | (0 << CP0C1_CA) | (1 << CP0C1_EP) |         \
- (0 << CP0C1_FP))
-
-/* Have config3, no tertiary/secondary caches implemented */
-#define MIPS_CONFIG2                                           \
-((1 << CP0C2_M))
-
-/* No config4, no DSP ASE, no large physaddr (PABITS),
-   no external interrupt controller, no vectored interrupts,
-   no 1kb pages, no SmartMIPS ASE, no trace logic */
-#define MIPS_CONFIG3                                           \
-((0 << CP0C3_M) | (0 << CP0C3_DSPP) | (0 << CP0C3_LPA) |       \
- (0 << CP0C3_VEIC) | (0 << CP0C3_VInt) | (0 << CP0C3_SP) |     \
- (0 << CP0C3_SM) | (0 << CP0C3_TL))
-
 /* MMU types, the first four entries have the same layout as the
    CP0C0_MT field.  */
 enum mips_mmu_types {
@@ -321,7 +309,9 @@ enum mips_mmu_types {
  */
 #define T_TRAP                 13      /* Trap instruction */
 #define T_VCEI                 14      /* Virtual coherency exception */
+#define T_MSAFPE               14      /* MSA floating point exception */
 #define T_FPE                  15      /* Floating point exception */
+#define T_MSADIS               21      /* MSA disabled exception */
 #define T_WATCH                        23      /* Watch address reference */
 #define T_VCED                 31      /* Virtual coherency data */
 
@@ -374,6 +364,9 @@ struct kvm_mips_tlb {
        long tlb_lo1;
 };
 
+#define KVM_MIPS_FPU_FPU       0x1
+#define KVM_MIPS_FPU_MSA       0x2
+
 #define KVM_MIPS_GUEST_TLB_SIZE        64
 struct kvm_vcpu_arch {
        void *host_ebase, *guest_ebase;
@@ -395,6 +388,8 @@ struct kvm_vcpu_arch {
 
        /* FPU State */
        struct mips_fpu_struct fpu;
+       /* Which FPU state is loaded (KVM_MIPS_FPU_*) */
+       unsigned int fpu_inuse;
 
        /* COP0 State */
        struct mips_coproc *cop0;
@@ -441,6 +436,9 @@ struct kvm_vcpu_arch {
 
        /* WAIT executed */
        int wait;
+
+       u8 fpu_enabled;
+       u8 msa_enabled;
 };
 
 
@@ -482,11 +480,15 @@ struct kvm_vcpu_arch {
 #define kvm_read_c0_guest_config1(cop0)                (cop0->reg[MIPS_CP0_CONFIG][1])
 #define kvm_read_c0_guest_config2(cop0)                (cop0->reg[MIPS_CP0_CONFIG][2])
 #define kvm_read_c0_guest_config3(cop0)                (cop0->reg[MIPS_CP0_CONFIG][3])
+#define kvm_read_c0_guest_config4(cop0)                (cop0->reg[MIPS_CP0_CONFIG][4])
+#define kvm_read_c0_guest_config5(cop0)                (cop0->reg[MIPS_CP0_CONFIG][5])
 #define kvm_read_c0_guest_config7(cop0)                (cop0->reg[MIPS_CP0_CONFIG][7])
 #define kvm_write_c0_guest_config(cop0, val)   (cop0->reg[MIPS_CP0_CONFIG][0] = (val))
 #define kvm_write_c0_guest_config1(cop0, val)  (cop0->reg[MIPS_CP0_CONFIG][1] = (val))
 #define kvm_write_c0_guest_config2(cop0, val)  (cop0->reg[MIPS_CP0_CONFIG][2] = (val))
 #define kvm_write_c0_guest_config3(cop0, val)  (cop0->reg[MIPS_CP0_CONFIG][3] = (val))
+#define kvm_write_c0_guest_config4(cop0, val)  (cop0->reg[MIPS_CP0_CONFIG][4] = (val))
+#define kvm_write_c0_guest_config5(cop0, val)  (cop0->reg[MIPS_CP0_CONFIG][5] = (val))
 #define kvm_write_c0_guest_config7(cop0, val)  (cop0->reg[MIPS_CP0_CONFIG][7] = (val))
 #define kvm_read_c0_guest_errorepc(cop0)       (cop0->reg[MIPS_CP0_ERROR_PC][0])
 #define kvm_write_c0_guest_errorepc(cop0, val) (cop0->reg[MIPS_CP0_ERROR_PC][0] = (val))
@@ -567,6 +569,31 @@ static inline void _kvm_atomic_change_c0_guest_reg(unsigned long *reg,
        kvm_set_c0_guest_ebase(cop0, ((val) & (change)));               \
 }
 
+/* Helpers */
+
+static inline bool kvm_mips_guest_can_have_fpu(struct kvm_vcpu_arch *vcpu)
+{
+       return (!__builtin_constant_p(cpu_has_fpu) || cpu_has_fpu) &&
+               vcpu->fpu_enabled;
+}
+
+static inline bool kvm_mips_guest_has_fpu(struct kvm_vcpu_arch *vcpu)
+{
+       return kvm_mips_guest_can_have_fpu(vcpu) &&
+               kvm_read_c0_guest_config1(vcpu->cop0) & MIPS_CONF1_FP;
+}
+
+static inline bool kvm_mips_guest_can_have_msa(struct kvm_vcpu_arch *vcpu)
+{
+       return (!__builtin_constant_p(cpu_has_msa) || cpu_has_msa) &&
+               vcpu->msa_enabled;
+}
+
+static inline bool kvm_mips_guest_has_msa(struct kvm_vcpu_arch *vcpu)
+{
+       return kvm_mips_guest_can_have_msa(vcpu) &&
+               kvm_read_c0_guest_config3(vcpu->cop0) & MIPS_CONF3_MSA;
+}
 
 struct kvm_mips_callbacks {
        int (*handle_cop_unusable)(struct kvm_vcpu *vcpu);
@@ -578,6 +605,10 @@ struct kvm_mips_callbacks {
        int (*handle_syscall)(struct kvm_vcpu *vcpu);
        int (*handle_res_inst)(struct kvm_vcpu *vcpu);
        int (*handle_break)(struct kvm_vcpu *vcpu);
+       int (*handle_trap)(struct kvm_vcpu *vcpu);
+       int (*handle_msa_fpe)(struct kvm_vcpu *vcpu);
+       int (*handle_fpe)(struct kvm_vcpu *vcpu);
+       int (*handle_msa_disabled)(struct kvm_vcpu *vcpu);
        int (*vm_init)(struct kvm *kvm);
        int (*vcpu_init)(struct kvm_vcpu *vcpu);
        int (*vcpu_setup)(struct kvm_vcpu *vcpu);
@@ -596,6 +627,8 @@ struct kvm_mips_callbacks {
                           const struct kvm_one_reg *reg, s64 *v);
        int (*set_one_reg)(struct kvm_vcpu *vcpu,
                           const struct kvm_one_reg *reg, s64 v);
+       int (*vcpu_get_regs)(struct kvm_vcpu *vcpu);
+       int (*vcpu_set_regs)(struct kvm_vcpu *vcpu);
 };
 extern struct kvm_mips_callbacks *kvm_mips_callbacks;
 int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks);
@@ -606,6 +639,19 @@ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu);
 /* Trampoline ASM routine to start running in "Guest" context */
 extern int __kvm_mips_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu);
 
+/* FPU/MSA context management */
+void __kvm_save_fpu(struct kvm_vcpu_arch *vcpu);
+void __kvm_restore_fpu(struct kvm_vcpu_arch *vcpu);
+void __kvm_restore_fcsr(struct kvm_vcpu_arch *vcpu);
+void __kvm_save_msa(struct kvm_vcpu_arch *vcpu);
+void __kvm_restore_msa(struct kvm_vcpu_arch *vcpu);
+void __kvm_restore_msa_upper(struct kvm_vcpu_arch *vcpu);
+void __kvm_restore_msacsr(struct kvm_vcpu_arch *vcpu);
+void kvm_own_fpu(struct kvm_vcpu *vcpu);
+void kvm_own_msa(struct kvm_vcpu *vcpu);
+void kvm_drop_fpu(struct kvm_vcpu *vcpu);
+void kvm_lose_fpu(struct kvm_vcpu *vcpu);
+
 /* TLB handling */
 uint32_t kvm_get_kernel_asid(struct kvm_vcpu *vcpu);
 
@@ -711,6 +757,26 @@ extern enum emulation_result kvm_mips_emulate_bp_exc(unsigned long cause,
                                                     struct kvm_run *run,
                                                     struct kvm_vcpu *vcpu);
 
+extern enum emulation_result kvm_mips_emulate_trap_exc(unsigned long cause,
+                                                      uint32_t *opc,
+                                                      struct kvm_run *run,
+                                                      struct kvm_vcpu *vcpu);
+
+extern enum emulation_result kvm_mips_emulate_msafpe_exc(unsigned long cause,
+                                                        uint32_t *opc,
+                                                        struct kvm_run *run,
+                                                        struct kvm_vcpu *vcpu);
+
+extern enum emulation_result kvm_mips_emulate_fpe_exc(unsigned long cause,
+                                                     uint32_t *opc,
+                                                     struct kvm_run *run,
+                                                     struct kvm_vcpu *vcpu);
+
+extern enum emulation_result kvm_mips_emulate_msadis_exc(unsigned long cause,
+                                                        uint32_t *opc,
+                                                        struct kvm_run *run,
+                                                        struct kvm_vcpu *vcpu);
+
 extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
                                                         struct kvm_run *run);
 
@@ -749,6 +815,11 @@ enum emulation_result kvm_mips_emulate_load(uint32_t inst,
                                            struct kvm_run *run,
                                            struct kvm_vcpu *vcpu);
 
+unsigned int kvm_mips_config1_wrmask(struct kvm_vcpu *vcpu);
+unsigned int kvm_mips_config3_wrmask(struct kvm_vcpu *vcpu);
+unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu);
+unsigned int kvm_mips_config5_wrmask(struct kvm_vcpu *vcpu);
+
 /* Dynamic binary translation */
 extern int kvm_mips_trans_cache_index(uint32_t inst, uint32_t *opc,
                                      struct kvm_vcpu *vcpu);
index b5dcbee01fd7a52641584cbbf8b80848f7c6f4b9..9b3b48e21c221ffdcfc04ec9cae6165576043225 100644 (file)
@@ -105,7 +105,7 @@ union fpureg {
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
 # define FPR_IDX(width, idx)   (idx)
 #else
-# define FPR_IDX(width, idx)   ((FPU_REG_WIDTH / (width)) - 1 - (idx))
+# define FPR_IDX(width, idx)   ((idx) ^ ((64 / (width)) - 1))
 #endif
 
 #define BUILD_FPR_ACCESS(width) \
index 2c04b6d9ff85380de722745e934944411a5e33d3..6985eb59b08534581f7b4316655367182cb6f64f 100644 (file)
@@ -36,77 +36,85 @@ struct kvm_regs {
 
 /*
  * for KVM_GET_FPU and KVM_SET_FPU
- *
- * If Status[FR] is zero (32-bit FPU), the upper 32-bits of the FPRs
- * are zero filled.
  */
 struct kvm_fpu {
-       __u64 fpr[32];
-       __u32 fir;
-       __u32 fccr;
-       __u32 fexr;
-       __u32 fenr;
-       __u32 fcsr;
-       __u32 pad;
 };
 
 
 /*
- * For MIPS, we use KVM_SET_ONE_REG and KVM_GET_ONE_REG to access CP0
+ * For MIPS, we use KVM_SET_ONE_REG and KVM_GET_ONE_REG to access various
  * registers.  The id field is broken down as follows:
  *
- *  bits[2..0]   - Register 'sel' index.
- *  bits[7..3]   - Register 'rd'  index.
- *  bits[15..8]  - Must be zero.
- *  bits[31..16] - 1 -> CP0 registers.
- *  bits[51..32] - Must be zero.
  *  bits[63..52] - As per linux/kvm.h
+ *  bits[51..32] - Must be zero.
+ *  bits[31..16] - Register set.
+ *
+ * Register set = 0: GP registers from kvm_regs (see definitions below).
+ *
+ * Register set = 1: CP0 registers.
+ *  bits[15..8]  - Must be zero.
+ *  bits[7..3]   - Register 'rd'  index.
+ *  bits[2..0]   - Register 'sel' index.
+ *
+ * Register set = 2: KVM specific registers (see definitions below).
+ *
+ * Register set = 3: FPU / MSA registers (see definitions below).
  *
  * Other sets registers may be added in the future.  Each set would
  * have its own identifier in bits[31..16].
- *
- * The registers defined in struct kvm_regs are also accessible, the
- * id values for these are below.
  */
 
-#define KVM_REG_MIPS_R0 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 0)
-#define KVM_REG_MIPS_R1 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 1)
-#define KVM_REG_MIPS_R2 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 2)
-#define KVM_REG_MIPS_R3 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 3)
-#define KVM_REG_MIPS_R4 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 4)
-#define KVM_REG_MIPS_R5 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 5)
-#define KVM_REG_MIPS_R6 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 6)
-#define KVM_REG_MIPS_R7 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 7)
-#define KVM_REG_MIPS_R8 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 8)
-#define KVM_REG_MIPS_R9 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 9)
-#define KVM_REG_MIPS_R10 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 10)
-#define KVM_REG_MIPS_R11 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 11)
-#define KVM_REG_MIPS_R12 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 12)
-#define KVM_REG_MIPS_R13 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 13)
-#define KVM_REG_MIPS_R14 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 14)
-#define KVM_REG_MIPS_R15 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 15)
-#define KVM_REG_MIPS_R16 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 16)
-#define KVM_REG_MIPS_R17 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 17)
-#define KVM_REG_MIPS_R18 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 18)
-#define KVM_REG_MIPS_R19 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 19)
-#define KVM_REG_MIPS_R20 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 20)
-#define KVM_REG_MIPS_R21 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 21)
-#define KVM_REG_MIPS_R22 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 22)
-#define KVM_REG_MIPS_R23 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 23)
-#define KVM_REG_MIPS_R24 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 24)
-#define KVM_REG_MIPS_R25 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 25)
-#define KVM_REG_MIPS_R26 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 26)
-#define KVM_REG_MIPS_R27 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 27)
-#define KVM_REG_MIPS_R28 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 28)
-#define KVM_REG_MIPS_R29 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 29)
-#define KVM_REG_MIPS_R30 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 30)
-#define KVM_REG_MIPS_R31 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 31)
-
-#define KVM_REG_MIPS_HI (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 32)
-#define KVM_REG_MIPS_LO (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 33)
-#define KVM_REG_MIPS_PC (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 34)
-
-/* KVM specific control registers */
+#define KVM_REG_MIPS_GP                (KVM_REG_MIPS | 0x0000000000000000ULL)
+#define KVM_REG_MIPS_CP0       (KVM_REG_MIPS | 0x0000000000010000ULL)
+#define KVM_REG_MIPS_KVM       (KVM_REG_MIPS | 0x0000000000020000ULL)
+#define KVM_REG_MIPS_FPU       (KVM_REG_MIPS | 0x0000000000030000ULL)
+
+
+/*
+ * KVM_REG_MIPS_GP - General purpose registers from kvm_regs.
+ */
+
+#define KVM_REG_MIPS_R0                (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  0)
+#define KVM_REG_MIPS_R1                (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  1)
+#define KVM_REG_MIPS_R2                (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  2)
+#define KVM_REG_MIPS_R3                (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  3)
+#define KVM_REG_MIPS_R4                (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  4)
+#define KVM_REG_MIPS_R5                (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  5)
+#define KVM_REG_MIPS_R6                (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  6)
+#define KVM_REG_MIPS_R7                (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  7)
+#define KVM_REG_MIPS_R8                (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  8)
+#define KVM_REG_MIPS_R9                (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  9)
+#define KVM_REG_MIPS_R10       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 10)
+#define KVM_REG_MIPS_R11       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 11)
+#define KVM_REG_MIPS_R12       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 12)
+#define KVM_REG_MIPS_R13       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 13)
+#define KVM_REG_MIPS_R14       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 14)
+#define KVM_REG_MIPS_R15       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 15)
+#define KVM_REG_MIPS_R16       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 16)
+#define KVM_REG_MIPS_R17       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 17)
+#define KVM_REG_MIPS_R18       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 18)
+#define KVM_REG_MIPS_R19       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 19)
+#define KVM_REG_MIPS_R20       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 20)
+#define KVM_REG_MIPS_R21       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 21)
+#define KVM_REG_MIPS_R22       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 22)
+#define KVM_REG_MIPS_R23       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 23)
+#define KVM_REG_MIPS_R24       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 24)
+#define KVM_REG_MIPS_R25       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 25)
+#define KVM_REG_MIPS_R26       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 26)
+#define KVM_REG_MIPS_R27       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 27)
+#define KVM_REG_MIPS_R28       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 28)
+#define KVM_REG_MIPS_R29       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 29)
+#define KVM_REG_MIPS_R30       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 30)
+#define KVM_REG_MIPS_R31       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 31)
+
+#define KVM_REG_MIPS_HI                (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 32)
+#define KVM_REG_MIPS_LO                (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 33)
+#define KVM_REG_MIPS_PC                (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 34)
+
+
+/*
+ * KVM_REG_MIPS_KVM - KVM specific control registers.
+ */
 
 /*
  * CP0_Count control
@@ -118,8 +126,7 @@ struct kvm_fpu {
  *        safely without losing time or guest timer interrupts.
  * Other: Reserved, do not change.
  */
-#define KVM_REG_MIPS_COUNT_CTL         (KVM_REG_MIPS | KVM_REG_SIZE_U64 | \
-                                        0x20000 | 0)
+#define KVM_REG_MIPS_COUNT_CTL     (KVM_REG_MIPS_KVM | KVM_REG_SIZE_U64 | 0)
 #define KVM_REG_MIPS_COUNT_CTL_DC      0x00000001
 
 /*
@@ -131,15 +138,46 @@ struct kvm_fpu {
  * emulated.
  * Modifications to times in the future are rejected.
  */
-#define KVM_REG_MIPS_COUNT_RESUME      (KVM_REG_MIPS | KVM_REG_SIZE_U64 | \
-                                        0x20000 | 1)
+#define KVM_REG_MIPS_COUNT_RESUME   (KVM_REG_MIPS_KVM | KVM_REG_SIZE_U64 | 1)
 /*
  * CP0_Count rate in Hz
  * Specifies the rate of the CP0_Count timer in Hz. Modifications occur without
  * discontinuities in CP0_Count.
  */
-#define KVM_REG_MIPS_COUNT_HZ          (KVM_REG_MIPS | KVM_REG_SIZE_U64 | \
-                                        0x20000 | 2)
+#define KVM_REG_MIPS_COUNT_HZ      (KVM_REG_MIPS_KVM | KVM_REG_SIZE_U64 | 2)
+
+
+/*
+ * KVM_REG_MIPS_FPU - Floating Point and MIPS SIMD Architecture (MSA) registers.
+ *
+ *  bits[15..8]  - Register subset (see definitions below).
+ *  bits[7..5]   - Must be zero.
+ *  bits[4..0]   - Register number within register subset.
+ */
+
+#define KVM_REG_MIPS_FPR       (KVM_REG_MIPS_FPU | 0x0000000000000000ULL)
+#define KVM_REG_MIPS_FCR       (KVM_REG_MIPS_FPU | 0x0000000000000100ULL)
+#define KVM_REG_MIPS_MSACR     (KVM_REG_MIPS_FPU | 0x0000000000000200ULL)
+
+/*
+ * KVM_REG_MIPS_FPR - Floating point / Vector registers.
+ */
+#define KVM_REG_MIPS_FPR_32(n) (KVM_REG_MIPS_FPR | KVM_REG_SIZE_U32  | (n))
+#define KVM_REG_MIPS_FPR_64(n) (KVM_REG_MIPS_FPR | KVM_REG_SIZE_U64  | (n))
+#define KVM_REG_MIPS_VEC_128(n)        (KVM_REG_MIPS_FPR | KVM_REG_SIZE_U128 | (n))
+
+/*
+ * KVM_REG_MIPS_FCR - Floating point control registers.
+ */
+#define KVM_REG_MIPS_FCR_IR    (KVM_REG_MIPS_FCR | KVM_REG_SIZE_U32 |  0)
+#define KVM_REG_MIPS_FCR_CSR   (KVM_REG_MIPS_FCR | KVM_REG_SIZE_U32 | 31)
+
+/*
+ * KVM_REG_MIPS_MSACR - MIPS SIMD Architecture (MSA) control registers.
+ */
+#define KVM_REG_MIPS_MSA_IR     (KVM_REG_MIPS_MSACR | KVM_REG_SIZE_U32 |  0)
+#define KVM_REG_MIPS_MSA_CSR    (KVM_REG_MIPS_MSACR | KVM_REG_SIZE_U32 |  1)
+
 
 /*
  * KVM MIPS specific structures and definitions
index 750d67ac41e9b19affe066d5be8d1f56f7363041..e59fd7cfac9e35b2eeb90912c58504b9ed435606 100644 (file)
@@ -167,72 +167,6 @@ void output_thread_fpu_defines(void)
        OFFSET(THREAD_FPR30, task_struct, thread.fpu.fpr[30]);
        OFFSET(THREAD_FPR31, task_struct, thread.fpu.fpr[31]);
 
-       /* the least significant 64 bits of each FP register */
-       OFFSET(THREAD_FPR0_LS64, task_struct,
-              thread.fpu.fpr[0].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR1_LS64, task_struct,
-              thread.fpu.fpr[1].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR2_LS64, task_struct,
-              thread.fpu.fpr[2].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR3_LS64, task_struct,
-              thread.fpu.fpr[3].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR4_LS64, task_struct,
-              thread.fpu.fpr[4].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR5_LS64, task_struct,
-              thread.fpu.fpr[5].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR6_LS64, task_struct,
-              thread.fpu.fpr[6].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR7_LS64, task_struct,
-              thread.fpu.fpr[7].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR8_LS64, task_struct,
-              thread.fpu.fpr[8].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR9_LS64, task_struct,
-              thread.fpu.fpr[9].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR10_LS64, task_struct,
-              thread.fpu.fpr[10].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR11_LS64, task_struct,
-              thread.fpu.fpr[11].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR12_LS64, task_struct,
-              thread.fpu.fpr[12].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR13_LS64, task_struct,
-              thread.fpu.fpr[13].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR14_LS64, task_struct,
-              thread.fpu.fpr[14].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR15_LS64, task_struct,
-              thread.fpu.fpr[15].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR16_LS64, task_struct,
-              thread.fpu.fpr[16].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR17_LS64, task_struct,
-              thread.fpu.fpr[17].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR18_LS64, task_struct,
-              thread.fpu.fpr[18].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR19_LS64, task_struct,
-              thread.fpu.fpr[19].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR20_LS64, task_struct,
-              thread.fpu.fpr[20].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR21_LS64, task_struct,
-              thread.fpu.fpr[21].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR22_LS64, task_struct,
-              thread.fpu.fpr[22].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR23_LS64, task_struct,
-              thread.fpu.fpr[23].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR24_LS64, task_struct,
-              thread.fpu.fpr[24].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR25_LS64, task_struct,
-              thread.fpu.fpr[25].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR26_LS64, task_struct,
-              thread.fpu.fpr[26].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR27_LS64, task_struct,
-              thread.fpu.fpr[27].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR28_LS64, task_struct,
-              thread.fpu.fpr[28].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR29_LS64, task_struct,
-              thread.fpu.fpr[29].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR30_LS64, task_struct,
-              thread.fpu.fpr[30].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR31_LS64, task_struct,
-              thread.fpu.fpr[31].val64[FPR_IDX(64, 0)]);
-
        OFFSET(THREAD_FCR31, task_struct, thread.fpu.fcr31);
        OFFSET(THREAD_MSA_CSR, task_struct, thread.fpu.msacsr);
        BLANK();
@@ -470,6 +404,45 @@ void output_kvm_defines(void)
        OFFSET(VCPU_LO, kvm_vcpu_arch, lo);
        OFFSET(VCPU_HI, kvm_vcpu_arch, hi);
        OFFSET(VCPU_PC, kvm_vcpu_arch, pc);
+       BLANK();
+
+       OFFSET(VCPU_FPR0, kvm_vcpu_arch, fpu.fpr[0]);
+       OFFSET(VCPU_FPR1, kvm_vcpu_arch, fpu.fpr[1]);
+       OFFSET(VCPU_FPR2, kvm_vcpu_arch, fpu.fpr[2]);
+       OFFSET(VCPU_FPR3, kvm_vcpu_arch, fpu.fpr[3]);
+       OFFSET(VCPU_FPR4, kvm_vcpu_arch, fpu.fpr[4]);
+       OFFSET(VCPU_FPR5, kvm_vcpu_arch, fpu.fpr[5]);
+       OFFSET(VCPU_FPR6, kvm_vcpu_arch, fpu.fpr[6]);
+       OFFSET(VCPU_FPR7, kvm_vcpu_arch, fpu.fpr[7]);
+       OFFSET(VCPU_FPR8, kvm_vcpu_arch, fpu.fpr[8]);
+       OFFSET(VCPU_FPR9, kvm_vcpu_arch, fpu.fpr[9]);
+       OFFSET(VCPU_FPR10, kvm_vcpu_arch, fpu.fpr[10]);
+       OFFSET(VCPU_FPR11, kvm_vcpu_arch, fpu.fpr[11]);
+       OFFSET(VCPU_FPR12, kvm_vcpu_arch, fpu.fpr[12]);
+       OFFSET(VCPU_FPR13, kvm_vcpu_arch, fpu.fpr[13]);
+       OFFSET(VCPU_FPR14, kvm_vcpu_arch, fpu.fpr[14]);
+       OFFSET(VCPU_FPR15, kvm_vcpu_arch, fpu.fpr[15]);
+       OFFSET(VCPU_FPR16, kvm_vcpu_arch, fpu.fpr[16]);
+       OFFSET(VCPU_FPR17, kvm_vcpu_arch, fpu.fpr[17]);
+       OFFSET(VCPU_FPR18, kvm_vcpu_arch, fpu.fpr[18]);
+       OFFSET(VCPU_FPR19, kvm_vcpu_arch, fpu.fpr[19]);
+       OFFSET(VCPU_FPR20, kvm_vcpu_arch, fpu.fpr[20]);
+       OFFSET(VCPU_FPR21, kvm_vcpu_arch, fpu.fpr[21]);
+       OFFSET(VCPU_FPR22, kvm_vcpu_arch, fpu.fpr[22]);
+       OFFSET(VCPU_FPR23, kvm_vcpu_arch, fpu.fpr[23]);
+       OFFSET(VCPU_FPR24, kvm_vcpu_arch, fpu.fpr[24]);
+       OFFSET(VCPU_FPR25, kvm_vcpu_arch, fpu.fpr[25]);
+       OFFSET(VCPU_FPR26, kvm_vcpu_arch, fpu.fpr[26]);
+       OFFSET(VCPU_FPR27, kvm_vcpu_arch, fpu.fpr[27]);
+       OFFSET(VCPU_FPR28, kvm_vcpu_arch, fpu.fpr[28]);
+       OFFSET(VCPU_FPR29, kvm_vcpu_arch, fpu.fpr[29]);
+       OFFSET(VCPU_FPR30, kvm_vcpu_arch, fpu.fpr[30]);
+       OFFSET(VCPU_FPR31, kvm_vcpu_arch, fpu.fpr[31]);
+
+       OFFSET(VCPU_FCR31, kvm_vcpu_arch, fpu.fcr31);
+       OFFSET(VCPU_MSA_CSR, kvm_vcpu_arch, fpu.msacsr);
+       BLANK();
+
        OFFSET(VCPU_COP0, kvm_vcpu_arch, cop0);
        OFFSET(VCPU_GUEST_KERNEL_ASID, kvm_vcpu_arch, guest_kernel_asid);
        OFFSET(VCPU_GUEST_USER_ASID, kvm_vcpu_arch, guest_user_asid);
index 2ebaabe3af1513269e100d8bcffa9e8e9cb1f2c8..af42e7003f12d025cd31e2a5d167f2f4b158d37a 100644 (file)
@@ -360,12 +360,15 @@ NESTED(nmi_handler, PT_SIZE, sp)
        .set    mips1
        SET_HARDFLOAT
        cfc1    a1, fcr31
-       li      a2, ~(0x3f << 12)
-       and     a2, a1
-       ctc1    a2, fcr31
        .set    pop
-       TRACE_IRQS_ON
-       STI
+       CLI
+       TRACE_IRQS_OFF
+       .endm
+
+       .macro  __build_clear_msa_fpe
+       _cfcmsa a1, MSA_CSR
+       CLI
+       TRACE_IRQS_OFF
        .endm
 
        .macro  __build_clear_ade
@@ -426,7 +429,7 @@ NESTED(nmi_handler, PT_SIZE, sp)
        BUILD_HANDLER cpu cpu sti silent                /* #11 */
        BUILD_HANDLER ov ov sti silent                  /* #12 */
        BUILD_HANDLER tr tr sti silent                  /* #13 */
-       BUILD_HANDLER msa_fpe msa_fpe sti silent        /* #14 */
+       BUILD_HANDLER msa_fpe msa_fpe msa_fpe silent    /* #14 */
        BUILD_HANDLER fpe fpe fpe silent                /* #15 */
        BUILD_HANDLER ftlb ftlb none silent             /* #16 */
        BUILD_HANDLER msa msa sti silent                /* #21 */
index 51045281259403c55fcefac09d510f874a3047bb..7da6e324dd354a77991c4fe2dd998c07b998d956 100644 (file)
 #define CREATE_TRACE_POINTS
 #include <trace/events/syscalls.h>
 
+static void init_fp_ctx(struct task_struct *target)
+{
+       /* If FP has been used then the target already has context */
+       if (tsk_used_math(target))
+               return;
+
+       /* Begin with data registers set to all 1s... */
+       memset(&target->thread.fpu.fpr, ~0, sizeof(target->thread.fpu.fpr));
+
+       /* ...and FCSR zeroed */
+       target->thread.fpu.fcr31 = 0;
+
+       /*
+        * Record that the target has "used" math, such that the context
+        * just initialised, and any modifications made by the caller,
+        * aren't discarded.
+        */
+       set_stopped_child_used_math(target);
+}
+
 /*
  * Called by kernel/ptrace.c when detaching..
  *
@@ -142,6 +162,7 @@ int ptrace_setfpregs(struct task_struct *child, __u32 __user *data)
        if (!access_ok(VERIFY_READ, data, 33 * 8))
                return -EIO;
 
+       init_fp_ctx(child);
        fregs = get_fpu_regs(child);
 
        for (i = 0; i < 32; i++) {
@@ -439,6 +460,8 @@ static int fpr_set(struct task_struct *target,
 
        /* XXX fcr31  */
 
+       init_fp_ctx(target);
+
        if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
                return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
                                          &target->thread.fpu,
@@ -660,12 +683,7 @@ long arch_ptrace(struct task_struct *child, long request,
                case FPR_BASE ... FPR_BASE + 31: {
                        union fpureg *fregs = get_fpu_regs(child);
 
-                       if (!tsk_used_math(child)) {
-                               /* FP not yet used  */
-                               memset(&child->thread.fpu, ~0,
-                                      sizeof(child->thread.fpu));
-                               child->thread.fpu.fcr31 = 0;
-                       }
+                       init_fp_ctx(child);
 #ifdef CONFIG_32BIT
                        if (test_thread_flag(TIF_32BIT_FPREGS)) {
                                /*
index 676c5030a953bf9cca5ad038a7526d3b94ce372d..1d88af26ba82a0c3ee58ff8ff3b2b1661dad9455 100644 (file)
@@ -34,7 +34,6 @@
        .endm
 
        .set    noreorder
-       .set    MIPS_ISA_ARCH_LEVEL_RAW
 
 LEAF(_save_fp_context)
        .set    push
@@ -103,6 +102,7 @@ LEAF(_save_fp_context)
        /* Save 32-bit process floating point context */
 LEAF(_save_fp_context32)
        .set push
+       .set MIPS_ISA_ARCH_LEVEL_RAW
        SET_HARDFLOAT
        cfc1    t1, fcr31
 
index 33984c04b60b710516f1b0bfb88aa52aaa04629f..5b4d711f878da251a101526671a80283503736a4 100644 (file)
@@ -701,6 +701,13 @@ asmlinkage void do_ov(struct pt_regs *regs)
 
 int process_fpemu_return(int sig, void __user *fault_addr)
 {
+       /*
+        * We can't allow the emulated instruction to leave any of the cause
+        * bits set in FCSR. If they were then the kernel would take an FP
+        * exception when restoring FP context.
+        */
+       current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X;
+
        if (sig == SIGSEGV || sig == SIGBUS) {
                struct siginfo si = {0};
                si.si_addr = fault_addr;
@@ -781,6 +788,11 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31)
        if (notify_die(DIE_FP, "FP exception", regs, 0, regs_to_trapnr(regs),
                       SIGFPE) == NOTIFY_STOP)
                goto out;
+
+       /* Clear FCSR.Cause before enabling interrupts */
+       write_32bit_cp1_register(CP1_STATUS, fcr31 & ~FPU_CSR_ALL_X);
+       local_irq_enable();
+
        die_if_kernel("FP exception in kernel code", regs);
 
        if (fcr31 & FPU_CSR_UNI_X) {
@@ -804,18 +816,12 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31)
                sig = fpu_emulator_cop1Handler(regs, &current->thread.fpu, 1,
                                               &fault_addr);
 
-               /*
-                * We can't allow the emulated instruction to leave any of
-                * the cause bit set in $fcr31.
-                */
-               current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X;
+               /* If something went wrong, signal */
+               process_fpemu_return(sig, fault_addr);
 
                /* Restore the hardware register state */
                own_fpu(1);     /* Using the FPU again.  */
 
-               /* If something went wrong, signal */
-               process_fpemu_return(sig, fault_addr);
-
                goto out;
        } else if (fcr31 & FPU_CSR_INV_X)
                info.si_code = FPE_FLTINV;
@@ -1392,13 +1398,22 @@ out:
        exception_exit(prev_state);
 }
 
-asmlinkage void do_msa_fpe(struct pt_regs *regs)
+asmlinkage void do_msa_fpe(struct pt_regs *regs, unsigned int msacsr)
 {
        enum ctx_state prev_state;
 
        prev_state = exception_enter();
+       if (notify_die(DIE_MSAFP, "MSA FP exception", regs, 0,
+                      regs_to_trapnr(regs), SIGFPE) == NOTIFY_STOP)
+               goto out;
+
+       /* Clear MSACSR.Cause before enabling interrupts */
+       write_msa_csr(msacsr & ~MSA_CSR_CAUSEF);
+       local_irq_enable();
+
        die_if_kernel("do_msa_fpe invoked from kernel context!", regs);
        force_sig(SIGFPE, current);
+out:
        exception_exit(prev_state);
 }
 
index 401fe027c2612cf774fa53173420f93c072ea3c7..637ebbebd549701c1a0a67e7e3a8fbb9d2cb61f0 100644 (file)
@@ -1,13 +1,15 @@
 # Makefile for KVM support for MIPS
 #
 
-common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
+common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
 
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/mips/kvm
 
-kvm-objs := $(common-objs) mips.o emulate.o locore.o \
+common-objs-$(CONFIG_CPU_HAS_MSA) += msa.o
+
+kvm-objs := $(common-objs-y) mips.o emulate.o locore.o \
            interrupt.o stats.o commpage.o \
-           dyntrans.o trap_emul.o
+           dyntrans.o trap_emul.o fpu.o
 
 obj-$(CONFIG_KVM)      += kvm.o
 obj-y                  += callback.o tlb.o
index fb3e8dfd1ff647263c0ca93a1d2ba6c5fdbd906b..6230f376a44e7ab6f09041c4b805e54e59468489 100644 (file)
@@ -884,6 +884,84 @@ enum emulation_result kvm_mips_emul_tlbp(struct kvm_vcpu *vcpu)
        return EMULATE_DONE;
 }
 
+/**
+ * kvm_mips_config1_wrmask() - Find mask of writable bits in guest Config1
+ * @vcpu:      Virtual CPU.
+ *
+ * Finds the mask of bits which are writable in the guest's Config1 CP0
+ * register, by userland (currently read-only to the guest).
+ */
+unsigned int kvm_mips_config1_wrmask(struct kvm_vcpu *vcpu)
+{
+       unsigned int mask = 0;
+
+       /* Permit FPU to be present if FPU is supported */
+       if (kvm_mips_guest_can_have_fpu(&vcpu->arch))
+               mask |= MIPS_CONF1_FP;
+
+       return mask;
+}
+
+/**
+ * kvm_mips_config3_wrmask() - Find mask of writable bits in guest Config3
+ * @vcpu:      Virtual CPU.
+ *
+ * Finds the mask of bits which are writable in the guest's Config3 CP0
+ * register, by userland (currently read-only to the guest).
+ */
+unsigned int kvm_mips_config3_wrmask(struct kvm_vcpu *vcpu)
+{
+       /* Config4 is optional */
+       unsigned int mask = MIPS_CONF_M;
+
+       /* Permit MSA to be present if MSA is supported */
+       if (kvm_mips_guest_can_have_msa(&vcpu->arch))
+               mask |= MIPS_CONF3_MSA;
+
+       return mask;
+}
+
+/**
+ * kvm_mips_config4_wrmask() - Find mask of writable bits in guest Config4
+ * @vcpu:      Virtual CPU.
+ *
+ * Finds the mask of bits which are writable in the guest's Config4 CP0
+ * register, by userland (currently read-only to the guest).
+ */
+unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu)
+{
+       /* Config5 is optional */
+       return MIPS_CONF_M;
+}
+
+/**
+ * kvm_mips_config5_wrmask() - Find mask of writable bits in guest Config5
+ * @vcpu:      Virtual CPU.
+ *
+ * Finds the mask of bits which are writable in the guest's Config5 CP0
+ * register, by the guest itself.
+ */
+unsigned int kvm_mips_config5_wrmask(struct kvm_vcpu *vcpu)
+{
+       unsigned int mask = 0;
+
+       /* Permit MSAEn changes if MSA supported and enabled */
+       if (kvm_mips_guest_has_msa(&vcpu->arch))
+               mask |= MIPS_CONF5_MSAEN;
+
+       /*
+        * Permit guest FPU mode changes if FPU is enabled and the relevant
+        * feature exists according to FIR register.
+        */
+       if (kvm_mips_guest_has_fpu(&vcpu->arch)) {
+               if (cpu_has_fre)
+                       mask |= MIPS_CONF5_FRE;
+               /* We don't support UFR or UFE */
+       }
+
+       return mask;
+}
+
 enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
                                           uint32_t cause, struct kvm_run *run,
                                           struct kvm_vcpu *vcpu)
@@ -1021,18 +1099,114 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
                                kvm_mips_write_compare(vcpu,
                                                       vcpu->arch.gprs[rt]);
                        } else if ((rd == MIPS_CP0_STATUS) && (sel == 0)) {
-                               kvm_write_c0_guest_status(cop0,
-                                                         vcpu->arch.gprs[rt]);
+                               unsigned int old_val, val, change;
+
+                               old_val = kvm_read_c0_guest_status(cop0);
+                               val = vcpu->arch.gprs[rt];
+                               change = val ^ old_val;
+
+                               /* Make sure that the NMI bit is never set */
+                               val &= ~ST0_NMI;
+
+                               /*
+                                * Don't allow CU1 or FR to be set unless FPU
+                                * capability enabled and exists in guest
+                                * configuration.
+                                */
+                               if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+                                       val &= ~(ST0_CU1 | ST0_FR);
+
+                               /*
+                                * Also don't allow FR to be set if host doesn't
+                                * support it.
+                                */
+                               if (!(current_cpu_data.fpu_id & MIPS_FPIR_F64))
+                                       val &= ~ST0_FR;
+
+
+                               /* Handle changes in FPU mode */
+                               preempt_disable();
+
+                               /*
+                                * FPU and Vector register state is made
+                                * UNPREDICTABLE by a change of FR, so don't
+                                * even bother saving it.
+                                */
+                               if (change & ST0_FR)
+                                       kvm_drop_fpu(vcpu);
+
+                               /*
+                                * If MSA state is already live, it is undefined
+                                * how it interacts with FR=0 FPU state, and we
+                                * don't want to hit reserved instruction
+                                * exceptions trying to save the MSA state later
+                                * when CU=1 && FR=1, so play it safe and save
+                                * it first.
+                                */
+                               if (change & ST0_CU1 && !(val & ST0_FR) &&
+                                   vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA)
+                                       kvm_lose_fpu(vcpu);
+
                                /*
-                                * Make sure that CU1 and NMI bits are
-                                * never set
+                                * Propagate CU1 (FPU enable) changes
+                                * immediately if the FPU context is already
+                                * loaded. When disabling we leave the context
+                                * loaded so it can be quickly enabled again in
+                                * the near future.
                                 */
-                               kvm_clear_c0_guest_status(cop0,
-                                                         (ST0_CU1 | ST0_NMI));
+                               if (change & ST0_CU1 &&
+                                   vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU)
+                                       change_c0_status(ST0_CU1, val);
+
+                               preempt_enable();
+
+                               kvm_write_c0_guest_status(cop0, val);
 
 #ifdef CONFIG_KVM_MIPS_DYN_TRANS
-                               kvm_mips_trans_mtc0(inst, opc, vcpu);
+                               /*
+                                * If FPU present, we need CU1/FR bits to take
+                                * effect fairly soon.
+                                */
+                               if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+                                       kvm_mips_trans_mtc0(inst, opc, vcpu);
 #endif
+                       } else if ((rd == MIPS_CP0_CONFIG) && (sel == 5)) {
+                               unsigned int old_val, val, change, wrmask;
+
+                               old_val = kvm_read_c0_guest_config5(cop0);
+                               val = vcpu->arch.gprs[rt];
+
+                               /* Only a few bits are writable in Config5 */
+                               wrmask = kvm_mips_config5_wrmask(vcpu);
+                               change = (val ^ old_val) & wrmask;
+                               val = old_val ^ change;
+
+
+                               /* Handle changes in FPU/MSA modes */
+                               preempt_disable();
+
+                               /*
+                                * Propagate FRE changes immediately if the FPU
+                                * context is already loaded.
+                                */
+                               if (change & MIPS_CONF5_FRE &&
+                                   vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU)
+                                       change_c0_config5(MIPS_CONF5_FRE, val);
+
+                               /*
+                                * Propagate MSAEn changes immediately if the
+                                * MSA context is already loaded. When disabling
+                                * we leave the context loaded so it can be
+                                * quickly enabled again in the near future.
+                                */
+                               if (change & MIPS_CONF5_MSAEN &&
+                                   vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA)
+                                       change_c0_config5(MIPS_CONF5_MSAEN,
+                                                         val);
+
+                               preempt_enable();
+
+                               kvm_write_c0_guest_config5(cop0, val);
                        } else if ((rd == MIPS_CP0_CAUSE) && (sel == 0)) {
                                uint32_t old_cause, new_cause;
 
@@ -1970,6 +2144,146 @@ enum emulation_result kvm_mips_emulate_bp_exc(unsigned long cause,
        return er;
 }
 
+enum emulation_result kvm_mips_emulate_trap_exc(unsigned long cause,
+                                               uint32_t *opc,
+                                               struct kvm_run *run,
+                                               struct kvm_vcpu *vcpu)
+{
+       struct mips_coproc *cop0 = vcpu->arch.cop0;
+       struct kvm_vcpu_arch *arch = &vcpu->arch;
+       enum emulation_result er = EMULATE_DONE;
+
+       if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) {
+               /* save old pc */
+               kvm_write_c0_guest_epc(cop0, arch->pc);
+               kvm_set_c0_guest_status(cop0, ST0_EXL);
+
+               if (cause & CAUSEF_BD)
+                       kvm_set_c0_guest_cause(cop0, CAUSEF_BD);
+               else
+                       kvm_clear_c0_guest_cause(cop0, CAUSEF_BD);
+
+               kvm_debug("Delivering TRAP @ pc %#lx\n", arch->pc);
+
+               kvm_change_c0_guest_cause(cop0, (0xff),
+                                         (T_TRAP << CAUSEB_EXCCODE));
+
+               /* Set PC to the exception entry point */
+               arch->pc = KVM_GUEST_KSEG0 + 0x180;
+
+       } else {
+               kvm_err("Trying to deliver TRAP when EXL is already set\n");
+               er = EMULATE_FAIL;
+       }
+
+       return er;
+}
+
+enum emulation_result kvm_mips_emulate_msafpe_exc(unsigned long cause,
+                                                 uint32_t *opc,
+                                                 struct kvm_run *run,
+                                                 struct kvm_vcpu *vcpu)
+{
+       struct mips_coproc *cop0 = vcpu->arch.cop0;
+       struct kvm_vcpu_arch *arch = &vcpu->arch;
+       enum emulation_result er = EMULATE_DONE;
+
+       if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) {
+               /* save old pc */
+               kvm_write_c0_guest_epc(cop0, arch->pc);
+               kvm_set_c0_guest_status(cop0, ST0_EXL);
+
+               if (cause & CAUSEF_BD)
+                       kvm_set_c0_guest_cause(cop0, CAUSEF_BD);
+               else
+                       kvm_clear_c0_guest_cause(cop0, CAUSEF_BD);
+
+               kvm_debug("Delivering MSAFPE @ pc %#lx\n", arch->pc);
+
+               kvm_change_c0_guest_cause(cop0, (0xff),
+                                         (T_MSAFPE << CAUSEB_EXCCODE));
+
+               /* Set PC to the exception entry point */
+               arch->pc = KVM_GUEST_KSEG0 + 0x180;
+
+       } else {
+               kvm_err("Trying to deliver MSAFPE when EXL is already set\n");
+               er = EMULATE_FAIL;
+       }
+
+       return er;
+}
+
+enum emulation_result kvm_mips_emulate_fpe_exc(unsigned long cause,
+                                              uint32_t *opc,
+                                              struct kvm_run *run,
+                                              struct kvm_vcpu *vcpu)
+{
+       struct mips_coproc *cop0 = vcpu->arch.cop0;
+       struct kvm_vcpu_arch *arch = &vcpu->arch;
+       enum emulation_result er = EMULATE_DONE;
+
+       if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) {
+               /* save old pc */
+               kvm_write_c0_guest_epc(cop0, arch->pc);
+               kvm_set_c0_guest_status(cop0, ST0_EXL);
+
+               if (cause & CAUSEF_BD)
+                       kvm_set_c0_guest_cause(cop0, CAUSEF_BD);
+               else
+                       kvm_clear_c0_guest_cause(cop0, CAUSEF_BD);
+
+               kvm_debug("Delivering FPE @ pc %#lx\n", arch->pc);
+
+               kvm_change_c0_guest_cause(cop0, (0xff),
+                                         (T_FPE << CAUSEB_EXCCODE));
+
+               /* Set PC to the exception entry point */
+               arch->pc = KVM_GUEST_KSEG0 + 0x180;
+
+       } else {
+               kvm_err("Trying to deliver FPE when EXL is already set\n");
+               er = EMULATE_FAIL;
+       }
+
+       return er;
+}
+
+enum emulation_result kvm_mips_emulate_msadis_exc(unsigned long cause,
+                                                 uint32_t *opc,
+                                                 struct kvm_run *run,
+                                                 struct kvm_vcpu *vcpu)
+{
+       struct mips_coproc *cop0 = vcpu->arch.cop0;
+       struct kvm_vcpu_arch *arch = &vcpu->arch;
+       enum emulation_result er = EMULATE_DONE;
+
+       if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) {
+               /* save old pc */
+               kvm_write_c0_guest_epc(cop0, arch->pc);
+               kvm_set_c0_guest_status(cop0, ST0_EXL);
+
+               if (cause & CAUSEF_BD)
+                       kvm_set_c0_guest_cause(cop0, CAUSEF_BD);
+               else
+                       kvm_clear_c0_guest_cause(cop0, CAUSEF_BD);
+
+               kvm_debug("Delivering MSADIS @ pc %#lx\n", arch->pc);
+
+               kvm_change_c0_guest_cause(cop0, (0xff),
+                                         (T_MSADIS << CAUSEB_EXCCODE));
+
+               /* Set PC to the exception entry point */
+               arch->pc = KVM_GUEST_KSEG0 + 0x180;
+
+       } else {
+               kvm_err("Trying to deliver MSADIS when EXL is already set\n");
+               er = EMULATE_FAIL;
+       }
+
+       return er;
+}
+
 /* ll/sc, rdhwr, sync emulation */
 
 #define OPCODE 0xfc000000
@@ -2176,6 +2490,10 @@ enum emulation_result kvm_mips_check_privilege(unsigned long cause,
                case T_SYSCALL:
                case T_BREAK:
                case T_RES_INST:
+               case T_TRAP:
+               case T_MSAFPE:
+               case T_FPE:
+               case T_MSADIS:
                        break;
 
                case T_COP_UNUSABLE:
diff --git a/arch/mips/kvm/fpu.S b/arch/mips/kvm/fpu.S
new file mode 100644 (file)
index 0000000..531fbf5
--- /dev/null
@@ -0,0 +1,122 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * FPU context handling code for KVM.
+ *
+ * Copyright (C) 2015 Imagination Technologies Ltd.
+ */
+
+#include <asm/asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/fpregdef.h>
+#include <asm/mipsregs.h>
+#include <asm/regdef.h>
+
+       .set    noreorder
+       .set    noat
+
+LEAF(__kvm_save_fpu)
+       .set    push
+       .set    mips64r2
+       SET_HARDFLOAT
+       mfc0    t0, CP0_STATUS
+       sll     t0, t0, 5                       # is Status.FR set?
+       bgez    t0, 1f                          # no: skip odd doubles
+        nop
+       sdc1    $f1,  VCPU_FPR1(a0)
+       sdc1    $f3,  VCPU_FPR3(a0)
+       sdc1    $f5,  VCPU_FPR5(a0)
+       sdc1    $f7,  VCPU_FPR7(a0)
+       sdc1    $f9,  VCPU_FPR9(a0)
+       sdc1    $f11, VCPU_FPR11(a0)
+       sdc1    $f13, VCPU_FPR13(a0)
+       sdc1    $f15, VCPU_FPR15(a0)
+       sdc1    $f17, VCPU_FPR17(a0)
+       sdc1    $f19, VCPU_FPR19(a0)
+       sdc1    $f21, VCPU_FPR21(a0)
+       sdc1    $f23, VCPU_FPR23(a0)
+       sdc1    $f25, VCPU_FPR25(a0)
+       sdc1    $f27, VCPU_FPR27(a0)
+       sdc1    $f29, VCPU_FPR29(a0)
+       sdc1    $f31, VCPU_FPR31(a0)
+1:     sdc1    $f0,  VCPU_FPR0(a0)
+       sdc1    $f2,  VCPU_FPR2(a0)
+       sdc1    $f4,  VCPU_FPR4(a0)
+       sdc1    $f6,  VCPU_FPR6(a0)
+       sdc1    $f8,  VCPU_FPR8(a0)
+       sdc1    $f10, VCPU_FPR10(a0)
+       sdc1    $f12, VCPU_FPR12(a0)
+       sdc1    $f14, VCPU_FPR14(a0)
+       sdc1    $f16, VCPU_FPR16(a0)
+       sdc1    $f18, VCPU_FPR18(a0)
+       sdc1    $f20, VCPU_FPR20(a0)
+       sdc1    $f22, VCPU_FPR22(a0)
+       sdc1    $f24, VCPU_FPR24(a0)
+       sdc1    $f26, VCPU_FPR26(a0)
+       sdc1    $f28, VCPU_FPR28(a0)
+       jr      ra
+        sdc1   $f30, VCPU_FPR30(a0)
+       .set    pop
+       END(__kvm_save_fpu)
+
+LEAF(__kvm_restore_fpu)
+       .set    push
+       .set    mips64r2
+       SET_HARDFLOAT
+       mfc0    t0, CP0_STATUS
+       sll     t0, t0, 5                       # is Status.FR set?
+       bgez    t0, 1f                          # no: skip odd doubles
+        nop
+       ldc1    $f1,  VCPU_FPR1(a0)
+       ldc1    $f3,  VCPU_FPR3(a0)
+       ldc1    $f5,  VCPU_FPR5(a0)
+       ldc1    $f7,  VCPU_FPR7(a0)
+       ldc1    $f9,  VCPU_FPR9(a0)
+       ldc1    $f11, VCPU_FPR11(a0)
+       ldc1    $f13, VCPU_FPR13(a0)
+       ldc1    $f15, VCPU_FPR15(a0)
+       ldc1    $f17, VCPU_FPR17(a0)
+       ldc1    $f19, VCPU_FPR19(a0)
+       ldc1    $f21, VCPU_FPR21(a0)
+       ldc1    $f23, VCPU_FPR23(a0)
+       ldc1    $f25, VCPU_FPR25(a0)
+       ldc1    $f27, VCPU_FPR27(a0)
+       ldc1    $f29, VCPU_FPR29(a0)
+       ldc1    $f31, VCPU_FPR31(a0)
+1:     ldc1    $f0,  VCPU_FPR0(a0)
+       ldc1    $f2,  VCPU_FPR2(a0)
+       ldc1    $f4,  VCPU_FPR4(a0)
+       ldc1    $f6,  VCPU_FPR6(a0)
+       ldc1    $f8,  VCPU_FPR8(a0)
+       ldc1    $f10, VCPU_FPR10(a0)
+       ldc1    $f12, VCPU_FPR12(a0)
+       ldc1    $f14, VCPU_FPR14(a0)
+       ldc1    $f16, VCPU_FPR16(a0)
+       ldc1    $f18, VCPU_FPR18(a0)
+       ldc1    $f20, VCPU_FPR20(a0)
+       ldc1    $f22, VCPU_FPR22(a0)
+       ldc1    $f24, VCPU_FPR24(a0)
+       ldc1    $f26, VCPU_FPR26(a0)
+       ldc1    $f28, VCPU_FPR28(a0)
+       jr      ra
+        ldc1   $f30, VCPU_FPR30(a0)
+       .set    pop
+       END(__kvm_restore_fpu)
+
+LEAF(__kvm_restore_fcsr)
+       .set    push
+       SET_HARDFLOAT
+       lw      t0, VCPU_FCR31(a0)
+       /*
+        * The ctc1 must stay at this offset in __kvm_restore_fcsr.
+        * See kvm_mips_csr_die_notify() which handles t0 containing a value
+        * which triggers an FP Exception, which must be stepped over and
+        * ignored since the set cause bits must remain there for the guest.
+        */
+       ctc1    t0, fcr31
+       jr      ra
+        nop
+       .set    pop
+       END(__kvm_restore_fcsr)
index 4a68b176d6e4f8dff9680a2c22a41019be1d574f..c567240386a0f10818b0e06433f4b33b1bf5e6dc 100644 (file)
@@ -36,6 +36,8 @@
 #define PT_HOST_USERLOCAL   PT_EPC
 
 #define CP0_DDATA_LO        $28,3
+#define CP0_CONFIG3         $16,3
+#define CP0_CONFIG5         $16,5
 #define CP0_EBASE           $15,1
 
 #define CP0_INTCTL          $12,1
@@ -353,6 +355,42 @@ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra)
        LONG_L  k0, VCPU_HOST_EBASE(k1)
        mtc0    k0,CP0_EBASE
 
+       /*
+        * If FPU is enabled, save FCR31 and clear it so that later ctc1's don't
+        * trigger FPE for pending exceptions.
+        */
+       .set    at
+       and     v1, v0, ST0_CU1
+       beqz    v1, 1f
+        nop
+       .set    push
+       SET_HARDFLOAT
+       cfc1    t0, fcr31
+       sw      t0, VCPU_FCR31(k1)
+       ctc1    zero,fcr31
+       .set    pop
+       .set    noat
+1:
+
+#ifdef CONFIG_CPU_HAS_MSA
+       /*
+        * If MSA is enabled, save MSACSR and clear it so that later
+        * instructions don't trigger MSAFPE for pending exceptions.
+        */
+       mfc0    t0, CP0_CONFIG3
+       ext     t0, t0, 28, 1 /* MIPS_CONF3_MSAP */
+       beqz    t0, 1f
+        nop
+       mfc0    t0, CP0_CONFIG5
+       ext     t0, t0, 27, 1 /* MIPS_CONF5_MSAEN */
+       beqz    t0, 1f
+        nop
+       _cfcmsa t0, MSA_CSR
+       sw      t0, VCPU_MSA_CSR(k1)
+       _ctcmsa MSA_CSR, zero
+1:
+#endif
+
        /* Now that the new EBASE has been loaded, unset BEV and KSU_USER */
        .set    at
        and     v0, v0, ~(ST0_EXL | KSU_USER | ST0_IE)
index c9eccf5df912037e2b71bbb4a7dddd2a1d2d866e..bb68e8d520e83b5a30b74b22ae3292b1dd1469e1 100644 (file)
@@ -11,6 +11,7 @@
 
 #include <linux/errno.h>
 #include <linux/err.h>
+#include <linux/kdebug.h>
 #include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
@@ -48,6 +49,10 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "syscall",      VCPU_STAT(syscall_exits),      KVM_STAT_VCPU },
        { "resvd_inst",   VCPU_STAT(resvd_inst_exits),   KVM_STAT_VCPU },
        { "break_inst",   VCPU_STAT(break_inst_exits),   KVM_STAT_VCPU },
+       { "trap_inst",    VCPU_STAT(trap_inst_exits),    KVM_STAT_VCPU },
+       { "msa_fpe",      VCPU_STAT(msa_fpe_exits),      KVM_STAT_VCPU },
+       { "fpe",          VCPU_STAT(fpe_exits),          KVM_STAT_VCPU },
+       { "msa_disabled", VCPU_STAT(msa_disabled_exits), KVM_STAT_VCPU },
        { "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU },
        { "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU },
        { "halt_wakeup",  VCPU_STAT(halt_wakeup),        KVM_STAT_VCPU },
@@ -504,10 +509,13 @@ static u64 kvm_mips_get_one_regs[] = {
        KVM_REG_MIPS_CP0_STATUS,
        KVM_REG_MIPS_CP0_CAUSE,
        KVM_REG_MIPS_CP0_EPC,
+       KVM_REG_MIPS_CP0_PRID,
        KVM_REG_MIPS_CP0_CONFIG,
        KVM_REG_MIPS_CP0_CONFIG1,
        KVM_REG_MIPS_CP0_CONFIG2,
        KVM_REG_MIPS_CP0_CONFIG3,
+       KVM_REG_MIPS_CP0_CONFIG4,
+       KVM_REG_MIPS_CP0_CONFIG5,
        KVM_REG_MIPS_CP0_CONFIG7,
        KVM_REG_MIPS_CP0_ERROREPC,
 
@@ -520,10 +528,14 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
                            const struct kvm_one_reg *reg)
 {
        struct mips_coproc *cop0 = vcpu->arch.cop0;
+       struct mips_fpu_struct *fpu = &vcpu->arch.fpu;
        int ret;
        s64 v;
+       s64 vs[2];
+       unsigned int idx;
 
        switch (reg->id) {
+       /* General purpose registers */
        case KVM_REG_MIPS_R0 ... KVM_REG_MIPS_R31:
                v = (long)vcpu->arch.gprs[reg->id - KVM_REG_MIPS_R0];
                break;
@@ -537,6 +549,67 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
                v = (long)vcpu->arch.pc;
                break;
 
+       /* Floating point registers */
+       case KVM_REG_MIPS_FPR_32(0) ... KVM_REG_MIPS_FPR_32(31):
+               if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+                       return -EINVAL;
+               idx = reg->id - KVM_REG_MIPS_FPR_32(0);
+               /* Odd singles in top of even double when FR=0 */
+               if (kvm_read_c0_guest_status(cop0) & ST0_FR)
+                       v = get_fpr32(&fpu->fpr[idx], 0);
+               else
+                       v = get_fpr32(&fpu->fpr[idx & ~1], idx & 1);
+               break;
+       case KVM_REG_MIPS_FPR_64(0) ... KVM_REG_MIPS_FPR_64(31):
+               if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+                       return -EINVAL;
+               idx = reg->id - KVM_REG_MIPS_FPR_64(0);
+               /* Can't access odd doubles in FR=0 mode */
+               if (idx & 1 && !(kvm_read_c0_guest_status(cop0) & ST0_FR))
+                       return -EINVAL;
+               v = get_fpr64(&fpu->fpr[idx], 0);
+               break;
+       case KVM_REG_MIPS_FCR_IR:
+               if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+                       return -EINVAL;
+               v = boot_cpu_data.fpu_id;
+               break;
+       case KVM_REG_MIPS_FCR_CSR:
+               if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+                       return -EINVAL;
+               v = fpu->fcr31;
+               break;
+
+       /* MIPS SIMD Architecture (MSA) registers */
+       case KVM_REG_MIPS_VEC_128(0) ... KVM_REG_MIPS_VEC_128(31):
+               if (!kvm_mips_guest_has_msa(&vcpu->arch))
+                       return -EINVAL;
+               /* Can't access MSA registers in FR=0 mode */
+               if (!(kvm_read_c0_guest_status(cop0) & ST0_FR))
+                       return -EINVAL;
+               idx = reg->id - KVM_REG_MIPS_VEC_128(0);
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+               /* least significant byte first */
+               vs[0] = get_fpr64(&fpu->fpr[idx], 0);
+               vs[1] = get_fpr64(&fpu->fpr[idx], 1);
+#else
+               /* most significant byte first */
+               vs[0] = get_fpr64(&fpu->fpr[idx], 1);
+               vs[1] = get_fpr64(&fpu->fpr[idx], 0);
+#endif
+               break;
+       case KVM_REG_MIPS_MSA_IR:
+               if (!kvm_mips_guest_has_msa(&vcpu->arch))
+                       return -EINVAL;
+               v = boot_cpu_data.msa_id;
+               break;
+       case KVM_REG_MIPS_MSA_CSR:
+               if (!kvm_mips_guest_has_msa(&vcpu->arch))
+                       return -EINVAL;
+               v = fpu->msacsr;
+               break;
+
+       /* Co-processor 0 registers */
        case KVM_REG_MIPS_CP0_INDEX:
                v = (long)kvm_read_c0_guest_index(cop0);
                break;
@@ -573,8 +646,8 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
        case KVM_REG_MIPS_CP0_EPC:
                v = (long)kvm_read_c0_guest_epc(cop0);
                break;
-       case KVM_REG_MIPS_CP0_ERROREPC:
-               v = (long)kvm_read_c0_guest_errorepc(cop0);
+       case KVM_REG_MIPS_CP0_PRID:
+               v = (long)kvm_read_c0_guest_prid(cop0);
                break;
        case KVM_REG_MIPS_CP0_CONFIG:
                v = (long)kvm_read_c0_guest_config(cop0);
@@ -588,9 +661,18 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
        case KVM_REG_MIPS_CP0_CONFIG3:
                v = (long)kvm_read_c0_guest_config3(cop0);
                break;
+       case KVM_REG_MIPS_CP0_CONFIG4:
+               v = (long)kvm_read_c0_guest_config4(cop0);
+               break;
+       case KVM_REG_MIPS_CP0_CONFIG5:
+               v = (long)kvm_read_c0_guest_config5(cop0);
+               break;
        case KVM_REG_MIPS_CP0_CONFIG7:
                v = (long)kvm_read_c0_guest_config7(cop0);
                break;
+       case KVM_REG_MIPS_CP0_ERROREPC:
+               v = (long)kvm_read_c0_guest_errorepc(cop0);
+               break;
        /* registers to be handled specially */
        case KVM_REG_MIPS_CP0_COUNT:
        case KVM_REG_MIPS_COUNT_CTL:
@@ -612,6 +694,10 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
                u32 v32 = (u32)v;
 
                return put_user(v32, uaddr32);
+       } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) {
+               void __user *uaddr = (void __user *)(long)reg->addr;
+
+               return copy_to_user(uaddr, vs, 16);
        } else {
                return -EINVAL;
        }
@@ -621,7 +707,10 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
                            const struct kvm_one_reg *reg)
 {
        struct mips_coproc *cop0 = vcpu->arch.cop0;
-       u64 v;
+       struct mips_fpu_struct *fpu = &vcpu->arch.fpu;
+       s64 v;
+       s64 vs[2];
+       unsigned int idx;
 
        if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64) {
                u64 __user *uaddr64 = (u64 __user *)(long)reg->addr;
@@ -635,11 +724,16 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
                if (get_user(v32, uaddr32) != 0)
                        return -EFAULT;
                v = (s64)v32;
+       } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) {
+               void __user *uaddr = (void __user *)(long)reg->addr;
+
+               return copy_from_user(vs, uaddr, 16);
        } else {
                return -EINVAL;
        }
 
        switch (reg->id) {
+       /* General purpose registers */
        case KVM_REG_MIPS_R0:
                /* Silently ignore requests to set $0 */
                break;
@@ -656,6 +750,64 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
                vcpu->arch.pc = v;
                break;
 
+       /* Floating point registers */
+       case KVM_REG_MIPS_FPR_32(0) ... KVM_REG_MIPS_FPR_32(31):
+               if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+                       return -EINVAL;
+               idx = reg->id - KVM_REG_MIPS_FPR_32(0);
+               /* Odd singles in top of even double when FR=0 */
+               if (kvm_read_c0_guest_status(cop0) & ST0_FR)
+                       set_fpr32(&fpu->fpr[idx], 0, v);
+               else
+                       set_fpr32(&fpu->fpr[idx & ~1], idx & 1, v);
+               break;
+       case KVM_REG_MIPS_FPR_64(0) ... KVM_REG_MIPS_FPR_64(31):
+               if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+                       return -EINVAL;
+               idx = reg->id - KVM_REG_MIPS_FPR_64(0);
+               /* Can't access odd doubles in FR=0 mode */
+               if (idx & 1 && !(kvm_read_c0_guest_status(cop0) & ST0_FR))
+                       return -EINVAL;
+               set_fpr64(&fpu->fpr[idx], 0, v);
+               break;
+       case KVM_REG_MIPS_FCR_IR:
+               if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+                       return -EINVAL;
+               /* Read-only */
+               break;
+       case KVM_REG_MIPS_FCR_CSR:
+               if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+                       return -EINVAL;
+               fpu->fcr31 = v;
+               break;
+
+       /* MIPS SIMD Architecture (MSA) registers */
+       case KVM_REG_MIPS_VEC_128(0) ... KVM_REG_MIPS_VEC_128(31):
+               if (!kvm_mips_guest_has_msa(&vcpu->arch))
+                       return -EINVAL;
+               idx = reg->id - KVM_REG_MIPS_VEC_128(0);
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+               /* least significant byte first */
+               set_fpr64(&fpu->fpr[idx], 0, vs[0]);
+               set_fpr64(&fpu->fpr[idx], 1, vs[1]);
+#else
+               /* most significant byte first */
+               set_fpr64(&fpu->fpr[idx], 1, vs[0]);
+               set_fpr64(&fpu->fpr[idx], 0, vs[1]);
+#endif
+               break;
+       case KVM_REG_MIPS_MSA_IR:
+               if (!kvm_mips_guest_has_msa(&vcpu->arch))
+                       return -EINVAL;
+               /* Read-only */
+               break;
+       case KVM_REG_MIPS_MSA_CSR:
+               if (!kvm_mips_guest_has_msa(&vcpu->arch))
+                       return -EINVAL;
+               fpu->msacsr = v;
+               break;
+
+       /* Co-processor 0 registers */
        case KVM_REG_MIPS_CP0_INDEX:
                kvm_write_c0_guest_index(cop0, v);
                break;
@@ -686,6 +838,9 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
        case KVM_REG_MIPS_CP0_EPC:
                kvm_write_c0_guest_epc(cop0, v);
                break;
+       case KVM_REG_MIPS_CP0_PRID:
+               kvm_write_c0_guest_prid(cop0, v);
+               break;
        case KVM_REG_MIPS_CP0_ERROREPC:
                kvm_write_c0_guest_errorepc(cop0, v);
                break;
@@ -693,6 +848,12 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
        case KVM_REG_MIPS_CP0_COUNT:
        case KVM_REG_MIPS_CP0_COMPARE:
        case KVM_REG_MIPS_CP0_CAUSE:
+       case KVM_REG_MIPS_CP0_CONFIG:
+       case KVM_REG_MIPS_CP0_CONFIG1:
+       case KVM_REG_MIPS_CP0_CONFIG2:
+       case KVM_REG_MIPS_CP0_CONFIG3:
+       case KVM_REG_MIPS_CP0_CONFIG4:
+       case KVM_REG_MIPS_CP0_CONFIG5:
        case KVM_REG_MIPS_COUNT_CTL:
        case KVM_REG_MIPS_COUNT_RESUME:
        case KVM_REG_MIPS_COUNT_HZ:
@@ -703,6 +864,33 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
        return 0;
 }
 
+static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
+                                    struct kvm_enable_cap *cap)
+{
+       int r = 0;
+
+       if (!kvm_vm_ioctl_check_extension(vcpu->kvm, cap->cap))
+               return -EINVAL;
+       if (cap->flags)
+               return -EINVAL;
+       if (cap->args[0])
+               return -EINVAL;
+
+       switch (cap->cap) {
+       case KVM_CAP_MIPS_FPU:
+               vcpu->arch.fpu_enabled = true;
+               break;
+       case KVM_CAP_MIPS_MSA:
+               vcpu->arch.msa_enabled = true;
+               break;
+       default:
+               r = -EINVAL;
+               break;
+       }
+
+       return r;
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl,
                         unsigned long arg)
 {
@@ -760,6 +948,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl,
                        r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
                        break;
                }
+       case KVM_ENABLE_CAP: {
+               struct kvm_enable_cap cap;
+
+               r = -EFAULT;
+               if (copy_from_user(&cap, argp, sizeof(cap)))
+                       goto out;
+               r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
+               break;
+       }
        default:
                r = -ENOIOCTLCMD;
        }
@@ -868,11 +1065,30 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 
        switch (ext) {
        case KVM_CAP_ONE_REG:
+       case KVM_CAP_ENABLE_CAP:
                r = 1;
                break;
        case KVM_CAP_COALESCED_MMIO:
                r = KVM_COALESCED_MMIO_PAGE_OFFSET;
                break;
+       case KVM_CAP_MIPS_FPU:
+               r = !!cpu_has_fpu;
+               break;
+       case KVM_CAP_MIPS_MSA:
+               /*
+                * We don't support MSA vector partitioning yet:
+                * 1) It would require explicit support which can't be tested
+                *    yet due to lack of support in current hardware.
+                * 2) It extends the state that would need to be saved/restored
+                *    by e.g. QEMU for migration.
+                *
+                * When vector partitioning hardware becomes available, support
+                * could be added by requiring a flag when enabling
+                * KVM_CAP_MIPS_MSA capability to indicate that userland knows
+                * to save/restore the appropriate extra state.
+                */
+               r = cpu_has_msa && !(boot_cpu_data.msa_id & MSA_IR_WRPF);
+               break;
        default:
                r = 0;
                break;
@@ -1119,6 +1335,30 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
                ret = kvm_mips_callbacks->handle_break(vcpu);
                break;
 
+       case T_TRAP:
+               ++vcpu->stat.trap_inst_exits;
+               trace_kvm_exit(vcpu, TRAP_INST_EXITS);
+               ret = kvm_mips_callbacks->handle_trap(vcpu);
+               break;
+
+       case T_MSAFPE:
+               ++vcpu->stat.msa_fpe_exits;
+               trace_kvm_exit(vcpu, MSA_FPE_EXITS);
+               ret = kvm_mips_callbacks->handle_msa_fpe(vcpu);
+               break;
+
+       case T_FPE:
+               ++vcpu->stat.fpe_exits;
+               trace_kvm_exit(vcpu, FPE_EXITS);
+               ret = kvm_mips_callbacks->handle_fpe(vcpu);
+               break;
+
+       case T_MSADIS:
+               ++vcpu->stat.msa_disabled_exits;
+               trace_kvm_exit(vcpu, MSA_DISABLED_EXITS);
+               ret = kvm_mips_callbacks->handle_msa_disabled(vcpu);
+               break;
+
        default:
                kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x  BadVaddr: %#lx Status: %#lx\n",
                        exccode, opc, kvm_get_inst(opc, vcpu), badvaddr,
@@ -1146,12 +1386,233 @@ skip_emul:
                }
        }
 
+       if (ret == RESUME_GUEST) {
+               /*
+                * If FPU / MSA are enabled (i.e. the guest's FPU / MSA context
+                * is live), restore FCR31 / MSACSR.
+                *
+                * This should be before returning to the guest exception
+                * vector, as it may well cause an [MSA] FP exception if there
+                * are pending exception bits unmasked. (see
+                * kvm_mips_csr_die_notifier() for how that is handled).
+                */
+               if (kvm_mips_guest_has_fpu(&vcpu->arch) &&
+                   read_c0_status() & ST0_CU1)
+                       __kvm_restore_fcsr(&vcpu->arch);
+
+               if (kvm_mips_guest_has_msa(&vcpu->arch) &&
+                   read_c0_config5() & MIPS_CONF5_MSAEN)
+                       __kvm_restore_msacsr(&vcpu->arch);
+       }
+
        /* Disable HTW before returning to guest or host */
        htw_stop();
 
        return ret;
 }
 
+/* Enable FPU for guest and restore context */
+void kvm_own_fpu(struct kvm_vcpu *vcpu)
+{
+       struct mips_coproc *cop0 = vcpu->arch.cop0;
+       unsigned int sr, cfg5;
+
+       preempt_disable();
+
+       sr = kvm_read_c0_guest_status(cop0);
+
+       /*
+        * If MSA state is already live, it is undefined how it interacts with
+        * FR=0 FPU state, and we don't want to hit reserved instruction
+        * exceptions trying to save the MSA state later when CU=1 && FR=1, so
+        * play it safe and save it first.
+        *
+        * In theory we shouldn't ever hit this case since kvm_lose_fpu() should
+        * get called when guest CU1 is set, however we can't trust the guest
+        * not to clobber the status register directly via the commpage.
+        */
+       if (cpu_has_msa && sr & ST0_CU1 && !(sr & ST0_FR) &&
+           vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA)
+               kvm_lose_fpu(vcpu);
+
+       /*
+        * Enable FPU for guest
+        * We set FR and FRE according to guest context
+        */
+       change_c0_status(ST0_CU1 | ST0_FR, sr);
+       if (cpu_has_fre) {
+               cfg5 = kvm_read_c0_guest_config5(cop0);
+               change_c0_config5(MIPS_CONF5_FRE, cfg5);
+       }
+       enable_fpu_hazard();
+
+       /* If guest FPU state not active, restore it now */
+       if (!(vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU)) {
+               __kvm_restore_fpu(&vcpu->arch);
+               vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_FPU;
+       }
+
+       preempt_enable();
+}
+
+#ifdef CONFIG_CPU_HAS_MSA
+/* Enable MSA for guest and restore context */
+void kvm_own_msa(struct kvm_vcpu *vcpu)
+{
+       struct mips_coproc *cop0 = vcpu->arch.cop0;
+       unsigned int sr, cfg5;
+
+       preempt_disable();
+
+       /*
+        * Enable FPU if enabled in guest, since we're restoring FPU context
+        * anyway. We set FR and FRE according to guest context.
+        */
+       if (kvm_mips_guest_has_fpu(&vcpu->arch)) {
+               sr = kvm_read_c0_guest_status(cop0);
+
+               /*
+                * If FR=0 FPU state is already live, it is undefined how it
+                * interacts with MSA state, so play it safe and save it first.
+                */
+               if (!(sr & ST0_FR) &&
+                   (vcpu->arch.fpu_inuse & (KVM_MIPS_FPU_FPU |
+                               KVM_MIPS_FPU_MSA)) == KVM_MIPS_FPU_FPU)
+                       kvm_lose_fpu(vcpu);
+
+               change_c0_status(ST0_CU1 | ST0_FR, sr);
+               if (sr & ST0_CU1 && cpu_has_fre) {
+                       cfg5 = kvm_read_c0_guest_config5(cop0);
+                       change_c0_config5(MIPS_CONF5_FRE, cfg5);
+               }
+       }
+
+       /* Enable MSA for guest */
+       set_c0_config5(MIPS_CONF5_MSAEN);
+       enable_fpu_hazard();
+
+       switch (vcpu->arch.fpu_inuse & (KVM_MIPS_FPU_FPU | KVM_MIPS_FPU_MSA)) {
+       case KVM_MIPS_FPU_FPU:
+               /*
+                * Guest FPU state already loaded, only restore upper MSA state
+                */
+               __kvm_restore_msa_upper(&vcpu->arch);
+               vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_MSA;
+               break;
+       case 0:
+               /* Neither FPU or MSA already active, restore full MSA state */
+               __kvm_restore_msa(&vcpu->arch);
+               vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_MSA;
+               if (kvm_mips_guest_has_fpu(&vcpu->arch))
+                       vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_FPU;
+               break;
+       default:
+               break;
+       }
+
+       preempt_enable();
+}
+#endif
+
+/* Drop FPU & MSA without saving it */
+void kvm_drop_fpu(struct kvm_vcpu *vcpu)
+{
+       preempt_disable();
+       if (cpu_has_msa && vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA) {
+               disable_msa();
+               vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_MSA;
+       }
+       if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) {
+               clear_c0_status(ST0_CU1 | ST0_FR);
+               vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_FPU;
+       }
+       preempt_enable();
+}
+
+/* Save and disable FPU & MSA */
+void kvm_lose_fpu(struct kvm_vcpu *vcpu)
+{
+       /*
+        * FPU & MSA get disabled in root context (hardware) when it is disabled
+        * in guest context (software), but the register state in the hardware
+        * may still be in use. This is why we explicitly re-enable the hardware
+        * before saving.
+        */
+
+       preempt_disable();
+       if (cpu_has_msa && vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA) {
+               set_c0_config5(MIPS_CONF5_MSAEN);
+               enable_fpu_hazard();
+
+               __kvm_save_msa(&vcpu->arch);
+
+               /* Disable MSA & FPU */
+               disable_msa();
+               if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU)
+                       clear_c0_status(ST0_CU1 | ST0_FR);
+               vcpu->arch.fpu_inuse &= ~(KVM_MIPS_FPU_FPU | KVM_MIPS_FPU_MSA);
+       } else if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) {
+               set_c0_status(ST0_CU1);
+               enable_fpu_hazard();
+
+               __kvm_save_fpu(&vcpu->arch);
+               vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_FPU;
+
+               /* Disable FPU */
+               clear_c0_status(ST0_CU1 | ST0_FR);
+       }
+       preempt_enable();
+}
+
+/*
+ * Step over a specific ctc1 to FCSR and a specific ctcmsa to MSACSR which are
+ * used to restore guest FCSR/MSACSR state and may trigger a "harmless" FP/MSAFP
+ * exception if cause bits are set in the value being written.
+ */
+static int kvm_mips_csr_die_notify(struct notifier_block *self,
+                                  unsigned long cmd, void *ptr)
+{
+       struct die_args *args = (struct die_args *)ptr;
+       struct pt_regs *regs = args->regs;
+       unsigned long pc;
+
+       /* Only interested in FPE and MSAFPE */
+       if (cmd != DIE_FP && cmd != DIE_MSAFP)
+               return NOTIFY_DONE;
+
+       /* Return immediately if guest context isn't active */
+       if (!(current->flags & PF_VCPU))
+               return NOTIFY_DONE;
+
+       /* Should never get here from user mode */
+       BUG_ON(user_mode(regs));
+
+       pc = instruction_pointer(regs);
+       switch (cmd) {
+       case DIE_FP:
+               /* match 2nd instruction in __kvm_restore_fcsr */
+               if (pc != (unsigned long)&__kvm_restore_fcsr + 4)
+                       return NOTIFY_DONE;
+               break;
+       case DIE_MSAFP:
+               /* match 2nd/3rd instruction in __kvm_restore_msacsr */
+               if (!cpu_has_msa ||
+                   pc < (unsigned long)&__kvm_restore_msacsr + 4 ||
+                   pc > (unsigned long)&__kvm_restore_msacsr + 8)
+                       return NOTIFY_DONE;
+               break;
+       }
+
+       /* Move PC forward a little and continue executing */
+       instruction_pointer(regs) += 4;
+
+       return NOTIFY_STOP;
+}
+
+static struct notifier_block kvm_mips_csr_die_notifier = {
+       .notifier_call = kvm_mips_csr_die_notify,
+};
+
 int __init kvm_mips_init(void)
 {
        int ret;
@@ -1161,6 +1622,8 @@ int __init kvm_mips_init(void)
        if (ret)
                return ret;
 
+       register_die_notifier(&kvm_mips_csr_die_notifier);
+
        /*
         * On MIPS, kernel modules are executed from "mapped space", which
         * requires TLBs. The TLB handling code is statically linked with
@@ -1173,7 +1636,6 @@ int __init kvm_mips_init(void)
        kvm_mips_release_pfn_clean = kvm_release_pfn_clean;
        kvm_mips_is_error_pfn = is_error_pfn;
 
-       pr_info("KVM/MIPS Initialized\n");
        return 0;
 }
 
@@ -1185,7 +1647,7 @@ void __exit kvm_mips_exit(void)
        kvm_mips_release_pfn_clean = NULL;
        kvm_mips_is_error_pfn = NULL;
 
-       pr_info("KVM/MIPS unloaded\n");
+       unregister_die_notifier(&kvm_mips_csr_die_notifier);
 }
 
 module_init(kvm_mips_init);
diff --git a/arch/mips/kvm/msa.S b/arch/mips/kvm/msa.S
new file mode 100644 (file)
index 0000000..d02f0c6
--- /dev/null
@@ -0,0 +1,161 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * MIPS SIMD Architecture (MSA) context handling code for KVM.
+ *
+ * Copyright (C) 2015 Imagination Technologies Ltd.
+ */
+
+#include <asm/asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/asmmacro.h>
+#include <asm/regdef.h>
+
+       .set    noreorder
+       .set    noat
+
+LEAF(__kvm_save_msa)
+       st_d    0,  VCPU_FPR0,  a0
+       st_d    1,  VCPU_FPR1,  a0
+       st_d    2,  VCPU_FPR2,  a0
+       st_d    3,  VCPU_FPR3,  a0
+       st_d    4,  VCPU_FPR4,  a0
+       st_d    5,  VCPU_FPR5,  a0
+       st_d    6,  VCPU_FPR6,  a0
+       st_d    7,  VCPU_FPR7,  a0
+       st_d    8,  VCPU_FPR8,  a0
+       st_d    9,  VCPU_FPR9,  a0
+       st_d    10, VCPU_FPR10, a0
+       st_d    11, VCPU_FPR11, a0
+       st_d    12, VCPU_FPR12, a0
+       st_d    13, VCPU_FPR13, a0
+       st_d    14, VCPU_FPR14, a0
+       st_d    15, VCPU_FPR15, a0
+       st_d    16, VCPU_FPR16, a0
+       st_d    17, VCPU_FPR17, a0
+       st_d    18, VCPU_FPR18, a0
+       st_d    19, VCPU_FPR19, a0
+       st_d    20, VCPU_FPR20, a0
+       st_d    21, VCPU_FPR21, a0
+       st_d    22, VCPU_FPR22, a0
+       st_d    23, VCPU_FPR23, a0
+       st_d    24, VCPU_FPR24, a0
+       st_d    25, VCPU_FPR25, a0
+       st_d    26, VCPU_FPR26, a0
+       st_d    27, VCPU_FPR27, a0
+       st_d    28, VCPU_FPR28, a0
+       st_d    29, VCPU_FPR29, a0
+       st_d    30, VCPU_FPR30, a0
+       st_d    31, VCPU_FPR31, a0
+       jr      ra
+        nop
+       END(__kvm_save_msa)
+
+LEAF(__kvm_restore_msa)
+       ld_d    0,  VCPU_FPR0,  a0
+       ld_d    1,  VCPU_FPR1,  a0
+       ld_d    2,  VCPU_FPR2,  a0
+       ld_d    3,  VCPU_FPR3,  a0
+       ld_d    4,  VCPU_FPR4,  a0
+       ld_d    5,  VCPU_FPR5,  a0
+       ld_d    6,  VCPU_FPR6,  a0
+       ld_d    7,  VCPU_FPR7,  a0
+       ld_d    8,  VCPU_FPR8,  a0
+       ld_d    9,  VCPU_FPR9,  a0
+       ld_d    10, VCPU_FPR10, a0
+       ld_d    11, VCPU_FPR11, a0
+       ld_d    12, VCPU_FPR12, a0
+       ld_d    13, VCPU_FPR13, a0
+       ld_d    14, VCPU_FPR14, a0
+       ld_d    15, VCPU_FPR15, a0
+       ld_d    16, VCPU_FPR16, a0
+       ld_d    17, VCPU_FPR17, a0
+       ld_d    18, VCPU_FPR18, a0
+       ld_d    19, VCPU_FPR19, a0
+       ld_d    20, VCPU_FPR20, a0
+       ld_d    21, VCPU_FPR21, a0
+       ld_d    22, VCPU_FPR22, a0
+       ld_d    23, VCPU_FPR23, a0
+       ld_d    24, VCPU_FPR24, a0
+       ld_d    25, VCPU_FPR25, a0
+       ld_d    26, VCPU_FPR26, a0
+       ld_d    27, VCPU_FPR27, a0
+       ld_d    28, VCPU_FPR28, a0
+       ld_d    29, VCPU_FPR29, a0
+       ld_d    30, VCPU_FPR30, a0
+       ld_d    31, VCPU_FPR31, a0
+       jr      ra
+        nop
+       END(__kvm_restore_msa)
+
+       .macro  kvm_restore_msa_upper   wr, off, base
+       .set    push
+       .set    noat
+#ifdef CONFIG_64BIT
+       ld      $1, \off(\base)
+       insert_d \wr, 1
+#elif defined(CONFIG_CPU_LITTLE_ENDIAN)
+       lw      $1, \off(\base)
+       insert_w \wr, 2
+       lw      $1, (\off+4)(\base)
+       insert_w \wr, 3
+#else /* CONFIG_CPU_BIG_ENDIAN */
+       lw      $1, (\off+4)(\base)
+       insert_w \wr, 2
+       lw      $1, \off(\base)
+       insert_w \wr, 3
+#endif
+       .set    pop
+       .endm
+
+LEAF(__kvm_restore_msa_upper)
+       kvm_restore_msa_upper   0,  VCPU_FPR0 +8, a0
+       kvm_restore_msa_upper   1,  VCPU_FPR1 +8, a0
+       kvm_restore_msa_upper   2,  VCPU_FPR2 +8, a0
+       kvm_restore_msa_upper   3,  VCPU_FPR3 +8, a0
+       kvm_restore_msa_upper   4,  VCPU_FPR4 +8, a0
+       kvm_restore_msa_upper   5,  VCPU_FPR5 +8, a0
+       kvm_restore_msa_upper   6,  VCPU_FPR6 +8, a0
+       kvm_restore_msa_upper   7,  VCPU_FPR7 +8, a0
+       kvm_restore_msa_upper   8,  VCPU_FPR8 +8, a0
+       kvm_restore_msa_upper   9,  VCPU_FPR9 +8, a0
+       kvm_restore_msa_upper   10, VCPU_FPR10+8, a0
+       kvm_restore_msa_upper   11, VCPU_FPR11+8, a0
+       kvm_restore_msa_upper   12, VCPU_FPR12+8, a0
+       kvm_restore_msa_upper   13, VCPU_FPR13+8, a0
+       kvm_restore_msa_upper   14, VCPU_FPR14+8, a0
+       kvm_restore_msa_upper   15, VCPU_FPR15+8, a0
+       kvm_restore_msa_upper   16, VCPU_FPR16+8, a0
+       kvm_restore_msa_upper   17, VCPU_FPR17+8, a0
+       kvm_restore_msa_upper   18, VCPU_FPR18+8, a0
+       kvm_restore_msa_upper   19, VCPU_FPR19+8, a0
+       kvm_restore_msa_upper   20, VCPU_FPR20+8, a0
+       kvm_restore_msa_upper   21, VCPU_FPR21+8, a0
+       kvm_restore_msa_upper   22, VCPU_FPR22+8, a0
+       kvm_restore_msa_upper   23, VCPU_FPR23+8, a0
+       kvm_restore_msa_upper   24, VCPU_FPR24+8, a0
+       kvm_restore_msa_upper   25, VCPU_FPR25+8, a0
+       kvm_restore_msa_upper   26, VCPU_FPR26+8, a0
+       kvm_restore_msa_upper   27, VCPU_FPR27+8, a0
+       kvm_restore_msa_upper   28, VCPU_FPR28+8, a0
+       kvm_restore_msa_upper   29, VCPU_FPR29+8, a0
+       kvm_restore_msa_upper   30, VCPU_FPR30+8, a0
+       kvm_restore_msa_upper   31, VCPU_FPR31+8, a0
+       jr      ra
+        nop
+       END(__kvm_restore_msa_upper)
+
+LEAF(__kvm_restore_msacsr)
+       lw      t0, VCPU_MSA_CSR(a0)
+       /*
+        * The ctcmsa must stay at this offset in __kvm_restore_msacsr.
+        * See kvm_mips_csr_die_notify() which handles t0 containing a value
+        * which triggers an MSA FP Exception, which must be stepped over and
+        * ignored since the set cause bits must remain there for the guest.
+        */
+       _ctcmsa MSA_CSR, t0
+       jr      ra
+        nop
+       END(__kvm_restore_msacsr)
index a74d6024c5ad5f5c7e0701a309e246bd42532992..888bb67070ac6d1139a7f06f5958e180680511ef 100644 (file)
@@ -25,6 +25,10 @@ char *kvm_mips_exit_types_str[MAX_KVM_MIPS_EXIT_TYPES] = {
        "System Call",
        "Reserved Inst",
        "Break Inst",
+       "Trap Inst",
+       "MSA FPE",
+       "FPE",
+       "MSA Disabled",
        "D-Cache Flushes",
 };
 
index b6beb0e07b1b3b535f7625d61100e6c0087de00d..aed0ac2a4972cd1daf0f2992db6c100e9912fb70 100644 (file)
@@ -733,6 +733,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
                }
        }
 
+       /* restore guest state to registers */
+       kvm_mips_callbacks->vcpu_set_regs(vcpu);
+
        local_irq_restore(flags);
 
 }
@@ -751,6 +754,9 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
        vcpu->arch.preempt_entryhi = read_c0_entryhi();
        vcpu->arch.last_sched_cpu = cpu;
 
+       /* save guest state in registers */
+       kvm_mips_callbacks->vcpu_get_regs(vcpu);
+
        if (((cpu_context(cpu, current->mm) ^ asid_cache(cpu)) &
             ASID_VERSION_MASK)) {
                kvm_debug("%s: Dropping MMU Context:  %#lx\n", __func__,
index fd7257b70e656fcb8c53d72b552f240f5b255ce6..d836ed5b0bc7ea38e36350304a6238a520e0d74d 100644 (file)
@@ -39,16 +39,30 @@ static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva)
 
 static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu)
 {
+       struct mips_coproc *cop0 = vcpu->arch.cop0;
        struct kvm_run *run = vcpu->run;
        uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc;
        unsigned long cause = vcpu->arch.host_cp0_cause;
        enum emulation_result er = EMULATE_DONE;
        int ret = RESUME_GUEST;
 
-       if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 1)
-               er = kvm_mips_emulate_fpu_exc(cause, opc, run, vcpu);
-       else
+       if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 1) {
+               /* FPU Unusable */
+               if (!kvm_mips_guest_has_fpu(&vcpu->arch) ||
+                   (kvm_read_c0_guest_status(cop0) & ST0_CU1) == 0) {
+                       /*
+                        * Unusable/no FPU in guest:
+                        * deliver guest COP1 Unusable Exception
+                        */
+                       er = kvm_mips_emulate_fpu_exc(cause, opc, run, vcpu);
+               } else {
+                       /* Restore FPU state */
+                       kvm_own_fpu(vcpu);
+                       er = EMULATE_DONE;
+               }
+       } else {
                er = kvm_mips_emulate_inst(cause, opc, run, vcpu);
+       }
 
        switch (er) {
        case EMULATE_DONE:
@@ -330,6 +344,107 @@ static int kvm_trap_emul_handle_break(struct kvm_vcpu *vcpu)
        return ret;
 }
 
+static int kvm_trap_emul_handle_trap(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+       uint32_t __user *opc = (uint32_t __user *)vcpu->arch.pc;
+       unsigned long cause = vcpu->arch.host_cp0_cause;
+       enum emulation_result er = EMULATE_DONE;
+       int ret = RESUME_GUEST;
+
+       er = kvm_mips_emulate_trap_exc(cause, opc, run, vcpu);
+       if (er == EMULATE_DONE) {
+               ret = RESUME_GUEST;
+       } else {
+               run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+               ret = RESUME_HOST;
+       }
+       return ret;
+}
+
+static int kvm_trap_emul_handle_msa_fpe(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+       uint32_t __user *opc = (uint32_t __user *)vcpu->arch.pc;
+       unsigned long cause = vcpu->arch.host_cp0_cause;
+       enum emulation_result er = EMULATE_DONE;
+       int ret = RESUME_GUEST;
+
+       er = kvm_mips_emulate_msafpe_exc(cause, opc, run, vcpu);
+       if (er == EMULATE_DONE) {
+               ret = RESUME_GUEST;
+       } else {
+               run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+               ret = RESUME_HOST;
+       }
+       return ret;
+}
+
+static int kvm_trap_emul_handle_fpe(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+       uint32_t __user *opc = (uint32_t __user *)vcpu->arch.pc;
+       unsigned long cause = vcpu->arch.host_cp0_cause;
+       enum emulation_result er = EMULATE_DONE;
+       int ret = RESUME_GUEST;
+
+       er = kvm_mips_emulate_fpe_exc(cause, opc, run, vcpu);
+       if (er == EMULATE_DONE) {
+               ret = RESUME_GUEST;
+       } else {
+               run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+               ret = RESUME_HOST;
+       }
+       return ret;
+}
+
+/**
+ * kvm_trap_emul_handle_msa_disabled() - Guest used MSA while disabled in root.
+ * @vcpu:      Virtual CPU context.
+ *
+ * Handle when the guest attempts to use MSA when it is disabled.
+ */
+static int kvm_trap_emul_handle_msa_disabled(struct kvm_vcpu *vcpu)
+{
+       struct mips_coproc *cop0 = vcpu->arch.cop0;
+       struct kvm_run *run = vcpu->run;
+       uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc;
+       unsigned long cause = vcpu->arch.host_cp0_cause;
+       enum emulation_result er = EMULATE_DONE;
+       int ret = RESUME_GUEST;
+
+       if (!kvm_mips_guest_has_msa(&vcpu->arch) ||
+           (kvm_read_c0_guest_status(cop0) & (ST0_CU1 | ST0_FR)) == ST0_CU1) {
+               /*
+                * No MSA in guest, or FPU enabled and not in FR=1 mode,
+                * guest reserved instruction exception
+                */
+               er = kvm_mips_emulate_ri_exc(cause, opc, run, vcpu);
+       } else if (!(kvm_read_c0_guest_config5(cop0) & MIPS_CONF5_MSAEN)) {
+               /* MSA disabled by guest, guest MSA disabled exception */
+               er = kvm_mips_emulate_msadis_exc(cause, opc, run, vcpu);
+       } else {
+               /* Restore MSA/FPU state */
+               kvm_own_msa(vcpu);
+               er = EMULATE_DONE;
+       }
+
+       switch (er) {
+       case EMULATE_DONE:
+               ret = RESUME_GUEST;
+               break;
+
+       case EMULATE_FAIL:
+               run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+               ret = RESUME_HOST;
+               break;
+
+       default:
+               BUG();
+       }
+       return ret;
+}
+
 static int kvm_trap_emul_vm_init(struct kvm *kvm)
 {
        return 0;
@@ -351,8 +466,9 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu)
         * guest will come up as expected, for now we simulate a MIPS 24kc
         */
        kvm_write_c0_guest_prid(cop0, 0x00019300);
-       kvm_write_c0_guest_config(cop0,
-                                 MIPS_CONFIG0 | (0x1 << CP0C0_AR) |
+       /* Have config1, Cacheable, noncoherent, write-back, write allocate */
+       kvm_write_c0_guest_config(cop0, MIPS_CONF_M | (0x3 << CP0C0_K0) |
+                                 (0x1 << CP0C0_AR) |
                                  (MMU_TYPE_R4000 << CP0C0_MT));
 
        /* Read the cache characteristics from the host Config1 Register */
@@ -368,10 +484,18 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu)
              (1 << CP0C1_WR) | (1 << CP0C1_CA));
        kvm_write_c0_guest_config1(cop0, config1);
 
-       kvm_write_c0_guest_config2(cop0, MIPS_CONFIG2);
-       /* MIPS_CONFIG2 | (read_c0_config2() & 0xfff) */
-       kvm_write_c0_guest_config3(cop0, MIPS_CONFIG3 | (0 << CP0C3_VInt) |
-                                        (1 << CP0C3_ULRI));
+       /* Have config3, no tertiary/secondary caches implemented */
+       kvm_write_c0_guest_config2(cop0, MIPS_CONF_M);
+       /* MIPS_CONF_M | (read_c0_config2() & 0xfff) */
+
+       /* Have config4, UserLocal */
+       kvm_write_c0_guest_config3(cop0, MIPS_CONF_M | MIPS_CONF3_ULRI);
+
+       /* Have config5 */
+       kvm_write_c0_guest_config4(cop0, MIPS_CONF_M);
+
+       /* No config6 */
+       kvm_write_c0_guest_config5(cop0, 0);
 
        /* Set Wait IE/IXMT Ignore in Config7, IAR, AR */
        kvm_write_c0_guest_config7(cop0, (MIPS_CONF7_WII) | (1 << 10));
@@ -416,6 +540,7 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu,
 {
        struct mips_coproc *cop0 = vcpu->arch.cop0;
        int ret = 0;
+       unsigned int cur, change;
 
        switch (reg->id) {
        case KVM_REG_MIPS_CP0_COUNT:
@@ -444,6 +569,44 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu,
                        kvm_write_c0_guest_cause(cop0, v);
                }
                break;
+       case KVM_REG_MIPS_CP0_CONFIG:
+               /* read-only for now */
+               break;
+       case KVM_REG_MIPS_CP0_CONFIG1:
+               cur = kvm_read_c0_guest_config1(cop0);
+               change = (cur ^ v) & kvm_mips_config1_wrmask(vcpu);
+               if (change) {
+                       v = cur ^ change;
+                       kvm_write_c0_guest_config1(cop0, v);
+               }
+               break;
+       case KVM_REG_MIPS_CP0_CONFIG2:
+               /* read-only for now */
+               break;
+       case KVM_REG_MIPS_CP0_CONFIG3:
+               cur = kvm_read_c0_guest_config3(cop0);
+               change = (cur ^ v) & kvm_mips_config3_wrmask(vcpu);
+               if (change) {
+                       v = cur ^ change;
+                       kvm_write_c0_guest_config3(cop0, v);
+               }
+               break;
+       case KVM_REG_MIPS_CP0_CONFIG4:
+               cur = kvm_read_c0_guest_config4(cop0);
+               change = (cur ^ v) & kvm_mips_config4_wrmask(vcpu);
+               if (change) {
+                       v = cur ^ change;
+                       kvm_write_c0_guest_config4(cop0, v);
+               }
+               break;
+       case KVM_REG_MIPS_CP0_CONFIG5:
+               cur = kvm_read_c0_guest_config5(cop0);
+               change = (cur ^ v) & kvm_mips_config5_wrmask(vcpu);
+               if (change) {
+                       v = cur ^ change;
+                       kvm_write_c0_guest_config5(cop0, v);
+               }
+               break;
        case KVM_REG_MIPS_COUNT_CTL:
                ret = kvm_mips_set_count_ctl(vcpu, v);
                break;
@@ -459,6 +622,18 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu,
        return ret;
 }
 
+static int kvm_trap_emul_vcpu_get_regs(struct kvm_vcpu *vcpu)
+{
+       kvm_lose_fpu(vcpu);
+
+       return 0;
+}
+
+static int kvm_trap_emul_vcpu_set_regs(struct kvm_vcpu *vcpu)
+{
+       return 0;
+}
+
 static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
        /* exit handlers */
        .handle_cop_unusable = kvm_trap_emul_handle_cop_unusable,
@@ -470,6 +645,10 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
        .handle_syscall = kvm_trap_emul_handle_syscall,
        .handle_res_inst = kvm_trap_emul_handle_res_inst,
        .handle_break = kvm_trap_emul_handle_break,
+       .handle_trap = kvm_trap_emul_handle_trap,
+       .handle_msa_fpe = kvm_trap_emul_handle_msa_fpe,
+       .handle_fpe = kvm_trap_emul_handle_fpe,
+       .handle_msa_disabled = kvm_trap_emul_handle_msa_disabled,
 
        .vm_init = kvm_trap_emul_vm_init,
        .vcpu_init = kvm_trap_emul_vcpu_init,
@@ -483,6 +662,8 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
        .irq_clear = kvm_mips_irq_clear_cb,
        .get_one_reg = kvm_trap_emul_get_one_reg,
        .set_one_reg = kvm_trap_emul_set_one_reg,
+       .vcpu_get_regs = kvm_trap_emul_vcpu_get_regs,
+       .vcpu_set_regs = kvm_trap_emul_vcpu_set_regs,
 };
 
 int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks)
index 3b7f65cc42187e95627bdd14b461be0e60e7342b..cf9b4633257eb86dd14cea3656b94104ce8d909a 100644 (file)
@@ -75,11 +75,11 @@ static int rtctmp;
 int proc_dolasatrtc(struct ctl_table *table, int write,
                       void *buffer, size_t *lenp, loff_t *ppos)
 {
-       struct timespec ts;
+       struct timespec64 ts;
        int r;
 
        if (!write) {
-               read_persistent_clock(&ts);
+               read_persistent_clock64(&ts);
                rtctmp = ts.tv_sec;
                /* check for time < 0 and set to 0 */
                if (rtctmp < 0)
index 1f266575beb51c459432ac918e96e0b3b810ae85..a16e55cbd8ad99dce9f448d14c3a4ddeb20b8e42 100644 (file)
@@ -47,7 +47,6 @@ struct thread_info {
                                                  0-0x7FFFFFFF for user-thead
                                                  0-0xFFFFFFFF for kernel-thread
                                                */
-       struct restart_block    restart_block;
        struct pt_regs          *regs;
 };
 
@@ -64,9 +63,6 @@ struct thread_info {
        .cpu            = 0,                    \
        .preempt_count  = INIT_PREEMPT_COUNT,   \
        .addr_limit     = KERNEL_DS,            \
-       .restart_block  = {                     \
-               .fn = do_no_restart_syscall,    \
-       },                                      \
 }
 
 #define init_thread_info       (init_thread_union.thread_info)
index 71a330597adff689dcae092d63839aad1fb8b7d3..eff00e67c0a245f5208ece2f662f9055f328f3c6 100644 (file)
 #define PTR_IPENDING   37
 #define PTR_CPUID      38
 #define PTR_CTL6       39
-#define PTR_CTL7       40
+#define PTR_EXCEPTION  40
 #define PTR_PTEADDR    41
 #define PTR_TLBACC     42
 #define PTR_TLBMISC    43
+#define PTR_ECCINJ     44
+#define PTR_BADADDR    45
+#define PTR_CONFIG     46
+#define PTR_MPUBASE    47
+#define PTR_MPUACC     48
 
-#define NUM_PTRACE_REG (PTR_TLBMISC + 1)
+#define NUM_PTRACE_REG (PTR_MPUACC + 1)
 
 /* User structures for general purpose registers.  */
 struct user_pt_regs {
index 7729bd3f2e79d48937e7dbead303640610ca24ae..27b006c52e12e1193fd392033f5804c1c42c56da 100644 (file)
@@ -161,7 +161,7 @@ ENTRY(inthandler)
  ***********************************************************************
  */
 ENTRY(handle_trap)
-       ldw     r24, -4(ea)     /* instruction that caused the exception */
+       ldwio   r24, -4(ea)     /* instruction that caused the exception */
        srli    r24, r24, 4
        andi    r24, r24, 0x7c
        movia   r9,trap_table
index dda41e4fe7070885ee7ab77e4c5e9e18e51dd0a3..20662b0f6c9e30cd52279ce3274bcbc463fa5236 100644 (file)
@@ -43,7 +43,7 @@ static inline int rt_restore_ucontext(struct pt_regs *regs,
        int err;
 
        /* Always make any pending restarted system calls return -EINTR */
-       current_thread_info()->restart_block.fn = do_no_restart_syscall;
+       current->restart_block.fn = do_no_restart_syscall;
 
        err = __get_user(temp, &uc->uc_mcontext.version);
        if (temp != MCONTEXT_VERSION)
index 2ae482b4266931cbcdd28267630b4f7bd2f56074..796642932e2ef446a9e78b72e7fecccc4ed14647 100644 (file)
@@ -23,9 +23,6 @@ static void __flush_dcache(unsigned long start, unsigned long end)
        end += (cpuinfo.dcache_line_size - 1);
        end &= ~(cpuinfo.dcache_line_size - 1);
 
-       if (end > start + cpuinfo.dcache_size)
-               end = start + cpuinfo.dcache_size;
-
        for (addr = start; addr < end; addr += cpuinfo.dcache_line_size) {
                __asm__ __volatile__ ("   flushda 0(%0)\n"
                                        : /* Outputs */
index 39b3a8f816f28d0ecd61ca8a110d22bb99fb4507..6249cdc834d14977ffe5344a4b15857de01abe79 100644 (file)
@@ -34,7 +34,7 @@
 #include <asm/kvm_para.h>
 #include <asm/kvm_host.h>
 #include <asm/kvm_ppc.h>
-#include "iodev.h"
+#include <kvm/iodev.h>
 
 #define MAX_CPU     32
 #define MAX_SRC     256
@@ -289,11 +289,6 @@ static inline void IRQ_resetbit(struct irq_queue *q, int n_IRQ)
        clear_bit(n_IRQ, q->queue);
 }
 
-static inline int IRQ_testbit(struct irq_queue *q, int n_IRQ)
-{
-       return test_bit(n_IRQ, q->queue);
-}
-
 static void IRQ_check(struct openpic *opp, struct irq_queue *q)
 {
        int irq = -1;
@@ -1374,8 +1369,9 @@ static int kvm_mpic_write_internal(struct openpic *opp, gpa_t addr, u32 val)
        return -ENXIO;
 }
 
-static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr,
-                        int len, void *ptr)
+static int kvm_mpic_read(struct kvm_vcpu *vcpu,
+                        struct kvm_io_device *this,
+                        gpa_t addr, int len, void *ptr)
 {
        struct openpic *opp = container_of(this, struct openpic, mmio);
        int ret;
@@ -1415,8 +1411,9 @@ static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr,
        return ret;
 }
 
-static int kvm_mpic_write(struct kvm_io_device *this, gpa_t addr,
-                         int len, const void *ptr)
+static int kvm_mpic_write(struct kvm_vcpu *vcpu,
+                         struct kvm_io_device *this,
+                         gpa_t addr, int len, const void *ptr)
 {
        struct openpic *opp = container_of(this, struct openpic, mmio);
        int ret;
index 27c0face86f45cdac10ac74ba309de04094a8367..24bfe401373e44aad58268c95caf7e2a5e09198e 100644 (file)
@@ -807,7 +807,7 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
        idx = srcu_read_lock(&vcpu->kvm->srcu);
 
-       ret = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
+       ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr,
                              bytes, &run->mmio.data);
 
        srcu_read_unlock(&vcpu->kvm->srcu, idx);
@@ -880,7 +880,7 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
        idx = srcu_read_lock(&vcpu->kvm->srcu);
 
-       ret = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
+       ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr,
                               bytes, &run->mmio.data);
 
        srcu_read_unlock(&vcpu->kvm->srcu, idx);
index 0509bca5e830b656c8a553c047740b68b172f4b1..fcbe899fe299303d8e96d3faf0ba74eb5dc1ee53 100644 (file)
@@ -9,11 +9,11 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/jump_label.h>
 #include <asm/ppc_asm.h>
 #include <asm/hvcall.h>
 #include <asm/asm-offsets.h>
 #include <asm/opal.h>
-#include <asm/jump_label.h>
 
        .section        ".text"
 
index ccd53f91e8aa8e4f3b30bc550f55a63f509818a8..74b5b8e239c8235ac61b529b8085cc3ceff096ed 100644 (file)
@@ -7,12 +7,12 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
+#include <linux/jump_label.h>
 #include <asm/hvcall.h>
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
 #include <asm/ptrace.h>
-#include <asm/jump_label.h>
 
        .section        ".text"
        
index b5682fd6c9846b2cdb259720e35a2326cf7c45a1..b7a67e3d2201e4d5988e9cb3df651651c12024c8 100644 (file)
@@ -26,7 +26,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/console.h>
 #include <linux/export.h>
-#include <linux/static_key.h>
+#include <linux/jump_label.h>
 #include <asm/processor.h>
 #include <asm/mmu.h>
 #include <asm/page.h>
index 58642fd29c878c8d1fb5d384ecf58427b553552e..2b77e235b5fbdf54707551e067da19bd9ffb3a11 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef _ASM_S390_JUMP_LABEL_H
 #define _ASM_S390_JUMP_LABEL_H
 
+#ifndef __ASSEMBLY__
+
 #include <linux/types.h>
 
 #define JUMP_LABEL_NOP_SIZE 6
@@ -39,4 +41,5 @@ struct jump_entry {
        jump_label_t key;
 };
 
+#endif  /* __ASSEMBLY__ */
 #endif
index f407bbf5ee94ca5e2f6122951e52ce2d7db1a7ef..d01fc588b5c378fddc46eba49e28b4de4be1f1a9 100644 (file)
@@ -172,7 +172,9 @@ struct kvm_s390_sie_block {
        __u32   fac;                    /* 0x01a0 */
        __u8    reserved1a4[20];        /* 0x01a4 */
        __u64   cbrlo;                  /* 0x01b8 */
-       __u8    reserved1c0[30];        /* 0x01c0 */
+       __u8    reserved1c0[8];         /* 0x01c0 */
+       __u32   ecd;                    /* 0x01c8 */
+       __u8    reserved1cc[18];        /* 0x01cc */
        __u64   pp;                     /* 0x01de */
        __u8    reserved1e6[2];         /* 0x01e6 */
        __u64   itdba;                  /* 0x01e8 */
@@ -183,11 +185,17 @@ struct kvm_s390_itdb {
        __u8    data[256];
 } __packed;
 
+struct kvm_s390_vregs {
+       __vector128 vrs[32];
+       __u8    reserved200[512];       /* for future vector expansion */
+} __packed;
+
 struct sie_page {
        struct kvm_s390_sie_block sie_block;
        __u8 reserved200[1024];         /* 0x0200 */
        struct kvm_s390_itdb itdb;      /* 0x0600 */
-       __u8 reserved700[2304];         /* 0x0700 */
+       __u8 reserved700[1280];         /* 0x0700 */
+       struct kvm_s390_vregs vregs;    /* 0x0c00 */
 } __packed;
 
 struct kvm_vcpu_stat {
@@ -238,6 +246,7 @@ struct kvm_vcpu_stat {
        u32 instruction_sigp_stop;
        u32 instruction_sigp_stop_store_status;
        u32 instruction_sigp_store_status;
+       u32 instruction_sigp_store_adtl_status;
        u32 instruction_sigp_arch;
        u32 instruction_sigp_prefix;
        u32 instruction_sigp_restart;
@@ -270,6 +279,7 @@ struct kvm_vcpu_stat {
 #define PGM_SPECIAL_OPERATION          0x13
 #define PGM_OPERAND                    0x15
 #define PGM_TRACE_TABEL                        0x16
+#define PGM_VECTOR_PROCESSING          0x1b
 #define PGM_SPACE_SWITCH               0x1c
 #define PGM_HFP_SQUARE_ROOT            0x1d
 #define PGM_PC_TRANSLATION_SPEC                0x1f
@@ -334,6 +344,11 @@ enum irq_types {
        IRQ_PEND_COUNT
 };
 
+/* We have 2M for virtio device descriptor pages. Smallest amount of
+ * memory per page is 24 bytes (1 queue), so (2048*1024) / 24 = 87381
+ */
+#define KVM_S390_MAX_VIRTIO_IRQS 87381
+
 /*
  * Repressible (non-floating) machine check interrupts
  * subclass bits in MCIC
@@ -411,13 +426,32 @@ struct kvm_s390_local_interrupt {
        unsigned long pending_irqs;
 };
 
+#define FIRQ_LIST_IO_ISC_0 0
+#define FIRQ_LIST_IO_ISC_1 1
+#define FIRQ_LIST_IO_ISC_2 2
+#define FIRQ_LIST_IO_ISC_3 3
+#define FIRQ_LIST_IO_ISC_4 4
+#define FIRQ_LIST_IO_ISC_5 5
+#define FIRQ_LIST_IO_ISC_6 6
+#define FIRQ_LIST_IO_ISC_7 7
+#define FIRQ_LIST_PFAULT   8
+#define FIRQ_LIST_VIRTIO   9
+#define FIRQ_LIST_COUNT   10
+#define FIRQ_CNTR_IO       0
+#define FIRQ_CNTR_SERVICE  1
+#define FIRQ_CNTR_VIRTIO   2
+#define FIRQ_CNTR_PFAULT   3
+#define FIRQ_MAX_COUNT     4
+
 struct kvm_s390_float_interrupt {
+       unsigned long pending_irqs;
        spinlock_t lock;
-       struct list_head list;
-       atomic_t active;
+       struct list_head lists[FIRQ_LIST_COUNT];
+       int counters[FIRQ_MAX_COUNT];
+       struct kvm_s390_mchk_info mchk;
+       struct kvm_s390_ext_info srv_signal;
        int next_rr_cpu;
        unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
-       unsigned int irq_count;
 };
 
 struct kvm_hw_wp_info_arch {
@@ -465,6 +499,7 @@ struct kvm_vcpu_arch {
        s390_fp_regs      host_fpregs;
        unsigned int      host_acrs[NUM_ACRS];
        s390_fp_regs      guest_fpregs;
+       struct kvm_s390_vregs   *host_vregs;
        struct kvm_s390_local_interrupt local_int;
        struct hrtimer    ckc_timer;
        struct kvm_s390_pgm_info pgm;
@@ -553,6 +588,7 @@ struct kvm_arch{
        int use_cmma;
        int user_cpu_state_ctrl;
        int user_sigp;
+       int user_stsi;
        struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
        wait_queue_head_t ipte_wq;
        int ipte_lock_count;
index 9c77e60b9a269a59bcde0d4fe0a5fb77348f1520..ef1a5fcc6c66bbf5705173b41371378c4b541483 100644 (file)
@@ -150,6 +150,7 @@ struct kvm_guest_debug_arch {
 #define KVM_SYNC_CRS    (1UL << 3)
 #define KVM_SYNC_ARCH0  (1UL << 4)
 #define KVM_SYNC_PFAULT (1UL << 5)
+#define KVM_SYNC_VRS    (1UL << 6)
 /* definition of registers in kvm_run */
 struct kvm_sync_regs {
        __u64 prefix;   /* prefix register */
@@ -164,6 +165,9 @@ struct kvm_sync_regs {
        __u64 pft;      /* pfault token [PFAULT] */
        __u64 pfs;      /* pfault select [PFAULT] */
        __u64 pfc;      /* pfault compare [PFAULT] */
+       __u64 vrs[32][2];       /* vector registers */
+       __u8  reserved[512];    /* for future vector expansion */
+       __u32 fpc;      /* only valid with vector registers */
 };
 
 #define KVM_REG_S390_TODPR     (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
index d4096fdfc6ab45b02eda2f0a7258da5b98f46930..ee69c0854c8891067b67a0d8920e7f3cf671f308 100644 (file)
  * and returns a key, which can be used to find a mnemonic name
  * of the instruction in the icpt_insn_codes table.
  */
-#define icpt_insn_decoder(insn)                        \
+#define icpt_insn_decoder(insn) (              \
        INSN_DECODE_IPA0(0x01, insn, 48, 0xff)  \
        INSN_DECODE_IPA0(0xaa, insn, 48, 0x0f)  \
        INSN_DECODE_IPA0(0xb2, insn, 48, 0xff)  \
        INSN_DECODE_IPA0(0xe5, insn, 48, 0xff)  \
        INSN_DECODE_IPA0(0xeb, insn, 16, 0xff)  \
        INSN_DECODE_IPA0(0xc8, insn, 48, 0x0f)  \
-       INSN_DECODE(insn)
+       INSN_DECODE(insn))
 
 #endif /* _UAPI_ASM_S390_SIE_H */
index e07e91605353003084ff19511c1c520e8bca1dd7..8dc4db10d1608e81eafad41e5f87d1c92af7bbf5 100644 (file)
@@ -171,6 +171,7 @@ int main(void)
 #else /* CONFIG_32BIT */
        DEFINE(__LC_DATA_EXC_CODE, offsetof(struct _lowcore, data_exc_code));
        DEFINE(__LC_MCCK_FAIL_STOR_ADDR, offsetof(struct _lowcore, failing_storage_address));
+       DEFINE(__LC_VX_SAVE_AREA_ADDR, offsetof(struct _lowcore, vector_save_area_addr));
        DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2));
        DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area));
        DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste));
index 20660dddb2d67f1e4ebdd16a2a89ff99602935f1..170ddd2018b31667df8619b471df42b7fb562705 100644 (file)
@@ -215,20 +215,20 @@ void update_vsyscall(struct timekeeper *tk)
 {
        u64 nsecps;
 
-       if (tk->tkr.clock != &clocksource_tod)
+       if (tk->tkr_mono.clock != &clocksource_tod)
                return;
 
        /* Make userspace gettimeofday spin until we're done. */
        ++vdso_data->tb_update_count;
        smp_wmb();
-       vdso_data->xtime_tod_stamp = tk->tkr.cycle_last;
+       vdso_data->xtime_tod_stamp = tk->tkr_mono.cycle_last;
        vdso_data->xtime_clock_sec = tk->xtime_sec;
-       vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec;
+       vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
        vdso_data->wtom_clock_sec =
                tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
-       vdso_data->wtom_clock_nsec = tk->tkr.xtime_nsec +
-               + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr.shift);
-       nsecps = (u64) NSEC_PER_SEC << tk->tkr.shift;
+       vdso_data->wtom_clock_nsec = tk->tkr_mono.xtime_nsec +
+               + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
+       nsecps = (u64) NSEC_PER_SEC << tk->tkr_mono.shift;
        while (vdso_data->wtom_clock_nsec >= nsecps) {
                vdso_data->wtom_clock_nsec -= nsecps;
                vdso_data->wtom_clock_sec++;
@@ -236,7 +236,7 @@ void update_vsyscall(struct timekeeper *tk)
 
        vdso_data->xtime_coarse_sec = tk->xtime_sec;
        vdso_data->xtime_coarse_nsec =
-               (long)(tk->tkr.xtime_nsec >> tk->tkr.shift);
+               (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
        vdso_data->wtom_coarse_sec =
                vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec;
        vdso_data->wtom_coarse_nsec =
@@ -246,8 +246,8 @@ void update_vsyscall(struct timekeeper *tk)
                vdso_data->wtom_coarse_sec++;
        }
 
-       vdso_data->tk_mult = tk->tkr.mult;
-       vdso_data->tk_shift = tk->tkr.shift;
+       vdso_data->tk_mult = tk->tkr_mono.mult;
+       vdso_data->tk_shift = tk->tkr_mono.shift;
        smp_wmb();
        ++vdso_data->tb_update_count;
 }
@@ -283,7 +283,7 @@ void __init time_init(void)
        if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt))
                panic("Couldn't request external interrupt 0x1406");
 
-       if (clocksource_register(&clocksource_tod) != 0)
+       if (__clocksource_register(&clocksource_tod) != 0)
                panic("Could not register TOD clock source");
 
        /* Enable TOD clock interrupts on the boot cpu. */
index 9254afff250c968682db79c453d0542868bfdd9c..fc7ec95848c39c527c2a24ee723c9f45624e31a0 100644 (file)
@@ -77,7 +77,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
 
        if (vcpu->run->s.regs.gprs[rx] & 7)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-       rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], &parm, sizeof(parm));
+       rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm));
        if (rc)
                return kvm_s390_inject_prog_cond(vcpu, rc);
        if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
@@ -213,7 +213,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
         * - gpr 3 contains the virtqueue index (passed as datamatch)
         * - gpr 4 contains the index on the bus (optionally)
         */
-       ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS,
+       ret = kvm_io_bus_write_cookie(vcpu, KVM_VIRTIO_CCW_NOTIFY_BUS,
                                      vcpu->run->s.regs.gprs[2] & 0xffffffff,
                                      8, &vcpu->run->s.regs.gprs[3],
                                      vcpu->run->s.regs.gprs[4]);
@@ -230,7 +230,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
 
 int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
 {
-       int code = kvm_s390_get_base_disp_rs(vcpu) & 0xffff;
+       int code = kvm_s390_get_base_disp_rs(vcpu, NULL) & 0xffff;
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
index 267523cac6de7860cda644017e2b52ccfc7ea8d4..a7559f7207df3a0ac62d0fc16b199f3b4c6b6dac 100644 (file)
@@ -10,6 +10,7 @@
 #include <asm/pgtable.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
+#include <asm/switch_to.h>
 
 union asce {
        unsigned long val;
@@ -207,6 +208,54 @@ union raddress {
        unsigned long pfra : 52; /* Page-Frame Real Address */
 };
 
+union alet {
+       u32 val;
+       struct {
+               u32 reserved : 7;
+               u32 p        : 1;
+               u32 alesn    : 8;
+               u32 alen     : 16;
+       };
+};
+
+union ald {
+       u32 val;
+       struct {
+               u32     : 1;
+               u32 alo : 24;
+               u32 all : 7;
+       };
+};
+
+struct ale {
+       unsigned long i      : 1; /* ALEN-Invalid Bit */
+       unsigned long        : 5;
+       unsigned long fo     : 1; /* Fetch-Only Bit */
+       unsigned long p      : 1; /* Private Bit */
+       unsigned long alesn  : 8; /* Access-List-Entry Sequence Number */
+       unsigned long aleax  : 16; /* Access-List-Entry Authorization Index */
+       unsigned long        : 32;
+       unsigned long        : 1;
+       unsigned long asteo  : 25; /* ASN-Second-Table-Entry Origin */
+       unsigned long        : 6;
+       unsigned long astesn : 32; /* ASTE Sequence Number */
+} __packed;
+
+struct aste {
+       unsigned long i      : 1; /* ASX-Invalid Bit */
+       unsigned long ato    : 29; /* Authority-Table Origin */
+       unsigned long        : 1;
+       unsigned long b      : 1; /* Base-Space Bit */
+       unsigned long ax     : 16; /* Authorization Index */
+       unsigned long atl    : 12; /* Authority-Table Length */
+       unsigned long        : 2;
+       unsigned long ca     : 1; /* Controlled-ASN Bit */
+       unsigned long ra     : 1; /* Reusable-ASN Bit */
+       unsigned long asce   : 64; /* Address-Space-Control Element */
+       unsigned long ald    : 32;
+       unsigned long astesn : 32;
+       /* .. more fields there */
+} __packed;
 
 int ipte_lock_held(struct kvm_vcpu *vcpu)
 {
@@ -307,15 +356,157 @@ void ipte_unlock(struct kvm_vcpu *vcpu)
                ipte_unlock_simple(vcpu);
 }
 
-static unsigned long get_vcpu_asce(struct kvm_vcpu *vcpu)
+static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar,
+                         int write)
+{
+       union alet alet;
+       struct ale ale;
+       struct aste aste;
+       unsigned long ald_addr, authority_table_addr;
+       union ald ald;
+       int eax, rc;
+       u8 authority_table;
+
+       if (ar >= NUM_ACRS)
+               return -EINVAL;
+
+       save_access_regs(vcpu->run->s.regs.acrs);
+       alet.val = vcpu->run->s.regs.acrs[ar];
+
+       if (ar == 0 || alet.val == 0) {
+               asce->val = vcpu->arch.sie_block->gcr[1];
+               return 0;
+       } else if (alet.val == 1) {
+               asce->val = vcpu->arch.sie_block->gcr[7];
+               return 0;
+       }
+
+       if (alet.reserved)
+               return PGM_ALET_SPECIFICATION;
+
+       if (alet.p)
+               ald_addr = vcpu->arch.sie_block->gcr[5];
+       else
+               ald_addr = vcpu->arch.sie_block->gcr[2];
+       ald_addr &= 0x7fffffc0;
+
+       rc = read_guest_real(vcpu, ald_addr + 16, &ald.val, sizeof(union ald));
+       if (rc)
+               return rc;
+
+       if (alet.alen / 8 > ald.all)
+               return PGM_ALEN_TRANSLATION;
+
+       if (0x7fffffff - ald.alo * 128 < alet.alen * 16)
+               return PGM_ADDRESSING;
+
+       rc = read_guest_real(vcpu, ald.alo * 128 + alet.alen * 16, &ale,
+                            sizeof(struct ale));
+       if (rc)
+               return rc;
+
+       if (ale.i == 1)
+               return PGM_ALEN_TRANSLATION;
+       if (ale.alesn != alet.alesn)
+               return PGM_ALE_SEQUENCE;
+
+       rc = read_guest_real(vcpu, ale.asteo * 64, &aste, sizeof(struct aste));
+       if (rc)
+               return rc;
+
+       if (aste.i)
+               return PGM_ASTE_VALIDITY;
+       if (aste.astesn != ale.astesn)
+               return PGM_ASTE_SEQUENCE;
+
+       if (ale.p == 1) {
+               eax = (vcpu->arch.sie_block->gcr[8] >> 16) & 0xffff;
+               if (ale.aleax != eax) {
+                       if (eax / 16 > aste.atl)
+                               return PGM_EXTENDED_AUTHORITY;
+
+                       authority_table_addr = aste.ato * 4 + eax / 4;
+
+                       rc = read_guest_real(vcpu, authority_table_addr,
+                                            &authority_table,
+                                            sizeof(u8));
+                       if (rc)
+                               return rc;
+
+                       if ((authority_table & (0x40 >> ((eax & 3) * 2))) == 0)
+                               return PGM_EXTENDED_AUTHORITY;
+               }
+       }
+
+       if (ale.fo == 1 && write)
+               return PGM_PROTECTION;
+
+       asce->val = aste.asce;
+       return 0;
+}
+
+struct trans_exc_code_bits {
+       unsigned long addr : 52; /* Translation-exception Address */
+       unsigned long fsi  : 2;  /* Access Exception Fetch/Store Indication */
+       unsigned long      : 6;
+       unsigned long b60  : 1;
+       unsigned long b61  : 1;
+       unsigned long as   : 2;  /* ASCE Identifier */
+};
+
+enum {
+       FSI_UNKNOWN = 0, /* Unknown wether fetch or store */
+       FSI_STORE   = 1, /* Exception was due to store operation */
+       FSI_FETCH   = 2  /* Exception was due to fetch operation */
+};
+
+static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
+                        ar_t ar, int write)
 {
+       int rc;
+       psw_t *psw = &vcpu->arch.sie_block->gpsw;
+       struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
+       struct trans_exc_code_bits *tec_bits;
+
+       memset(pgm, 0, sizeof(*pgm));
+       tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+       tec_bits->fsi = write ? FSI_STORE : FSI_FETCH;
+       tec_bits->as = psw_bits(*psw).as;
+
+       if (!psw_bits(*psw).t) {
+               asce->val = 0;
+               asce->r = 1;
+               return 0;
+       }
+
        switch (psw_bits(vcpu->arch.sie_block->gpsw).as) {
        case PSW_AS_PRIMARY:
-               return vcpu->arch.sie_block->gcr[1];
+               asce->val = vcpu->arch.sie_block->gcr[1];
+               return 0;
        case PSW_AS_SECONDARY:
-               return vcpu->arch.sie_block->gcr[7];
+               asce->val = vcpu->arch.sie_block->gcr[7];
+               return 0;
        case PSW_AS_HOME:
-               return vcpu->arch.sie_block->gcr[13];
+               asce->val = vcpu->arch.sie_block->gcr[13];
+               return 0;
+       case PSW_AS_ACCREG:
+               rc = ar_translation(vcpu, asce, ar, write);
+               switch (rc) {
+               case PGM_ALEN_TRANSLATION:
+               case PGM_ALE_SEQUENCE:
+               case PGM_ASTE_VALIDITY:
+               case PGM_ASTE_SEQUENCE:
+               case PGM_EXTENDED_AUTHORITY:
+                       vcpu->arch.pgm.exc_access_id = ar;
+                       break;
+               case PGM_PROTECTION:
+                       tec_bits->b60 = 1;
+                       tec_bits->b61 = 1;
+                       break;
+               }
+               if (rc > 0)
+                       pgm->code = rc;
+               return rc;
        }
        return 0;
 }
@@ -330,10 +521,11 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
  * @vcpu: virtual cpu
  * @gva: guest virtual address
  * @gpa: points to where guest physical (absolute) address should be stored
+ * @asce: effective asce
  * @write: indicates if access is a write access
  *
  * Translate a guest virtual address into a guest absolute address by means
- * of dynamic address translation as specified by the architecuture.
+ * of dynamic address translation as specified by the architecture.
  * If the resulting absolute address is not available in the configuration
  * an addressing exception is indicated and @gpa will not be changed.
  *
@@ -345,7 +537,8 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
  *           by the architecture
  */
 static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
-                                    unsigned long *gpa, int write)
+                                    unsigned long *gpa, const union asce asce,
+                                    int write)
 {
        union vaddress vaddr = {.addr = gva};
        union raddress raddr = {.addr = gva};
@@ -354,12 +547,10 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
        union ctlreg0 ctlreg0;
        unsigned long ptr;
        int edat1, edat2;
-       union asce asce;
 
        ctlreg0.val = vcpu->arch.sie_block->gcr[0];
        edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8);
        edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78);
-       asce.val = get_vcpu_asce(vcpu);
        if (asce.r)
                goto real_address;
        ptr = asce.origin * 4096;
@@ -506,48 +697,30 @@ static inline int is_low_address(unsigned long ga)
        return (ga & ~0x11fful) == 0;
 }
 
-static int low_address_protection_enabled(struct kvm_vcpu *vcpu)
+static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
+                                         const union asce asce)
 {
        union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
        psw_t *psw = &vcpu->arch.sie_block->gpsw;
-       union asce asce;
 
        if (!ctlreg0.lap)
                return 0;
-       asce.val = get_vcpu_asce(vcpu);
        if (psw_bits(*psw).t && asce.p)
                return 0;
        return 1;
 }
 
-struct trans_exc_code_bits {
-       unsigned long addr : 52; /* Translation-exception Address */
-       unsigned long fsi  : 2;  /* Access Exception Fetch/Store Indication */
-       unsigned long      : 7;
-       unsigned long b61  : 1;
-       unsigned long as   : 2;  /* ASCE Identifier */
-};
-
-enum {
-       FSI_UNKNOWN = 0, /* Unknown wether fetch or store */
-       FSI_STORE   = 1, /* Exception was due to store operation */
-       FSI_FETCH   = 2  /* Exception was due to fetch operation */
-};
-
 static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
                            unsigned long *pages, unsigned long nr_pages,
-                           int write)
+                           const union asce asce, int write)
 {
        struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
        psw_t *psw = &vcpu->arch.sie_block->gpsw;
        struct trans_exc_code_bits *tec_bits;
        int lap_enabled, rc;
 
-       memset(pgm, 0, sizeof(*pgm));
        tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
-       tec_bits->fsi = write ? FSI_STORE : FSI_FETCH;
-       tec_bits->as = psw_bits(*psw).as;
-       lap_enabled = low_address_protection_enabled(vcpu);
+       lap_enabled = low_address_protection_enabled(vcpu, asce);
        while (nr_pages) {
                ga = kvm_s390_logical_to_effective(vcpu, ga);
                tec_bits->addr = ga >> PAGE_SHIFT;
@@ -557,7 +730,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
                }
                ga &= PAGE_MASK;
                if (psw_bits(*psw).t) {
-                       rc = guest_translate(vcpu, ga, pages, write);
+                       rc = guest_translate(vcpu, ga, pages, asce, write);
                        if (rc < 0)
                                return rc;
                        if (rc == PGM_PROTECTION)
@@ -578,7 +751,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
        return 0;
 }
 
-int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
                 unsigned long len, int write)
 {
        psw_t *psw = &vcpu->arch.sie_block->gpsw;
@@ -591,20 +764,19 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
 
        if (!len)
                return 0;
-       /* Access register mode is not supported yet. */
-       if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG)
-               return -EOPNOTSUPP;
+       rc = get_vcpu_asce(vcpu, &asce, ar, write);
+       if (rc)
+               return rc;
        nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
        pages = pages_array;
        if (nr_pages > ARRAY_SIZE(pages_array))
                pages = vmalloc(nr_pages * sizeof(unsigned long));
        if (!pages)
                return -ENOMEM;
-       asce.val = get_vcpu_asce(vcpu);
        need_ipte_lock = psw_bits(*psw).t && !asce.r;
        if (need_ipte_lock)
                ipte_lock(vcpu);
-       rc = guest_page_range(vcpu, ga, pages, nr_pages, write);
+       rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, write);
        for (idx = 0; idx < nr_pages && !rc; idx++) {
                gpa = *(pages + idx) + (ga & ~PAGE_MASK);
                _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
@@ -652,7 +824,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
  * Note: The IPTE lock is not taken during this function, so the caller
  * has to take care of this.
  */
-int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
+int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
                            unsigned long *gpa, int write)
 {
        struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
@@ -661,26 +833,21 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
        union asce asce;
        int rc;
 
-       /* Access register mode is not supported yet. */
-       if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG)
-               return -EOPNOTSUPP;
-
        gva = kvm_s390_logical_to_effective(vcpu, gva);
-       memset(pgm, 0, sizeof(*pgm));
        tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
-       tec->as = psw_bits(*psw).as;
-       tec->fsi = write ? FSI_STORE : FSI_FETCH;
+       rc = get_vcpu_asce(vcpu, &asce, ar, write);
        tec->addr = gva >> PAGE_SHIFT;
-       if (is_low_address(gva) && low_address_protection_enabled(vcpu)) {
+       if (rc)
+               return rc;
+       if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) {
                if (write) {
                        rc = pgm->code = PGM_PROTECTION;
                        return rc;
                }
        }
 
-       asce.val = get_vcpu_asce(vcpu);
        if (psw_bits(*psw).t && !asce.r) {      /* Use DAT? */
-               rc = guest_translate(vcpu, gva, gpa, write);
+               rc = guest_translate(vcpu, gva, gpa, asce, write);
                if (rc > 0) {
                        if (rc == PGM_PROTECTION)
                                tec->b61 = 1;
@@ -697,28 +864,51 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
 }
 
 /**
- * kvm_s390_check_low_addr_protection - check for low-address protection
- * @ga: Guest address
+ * check_gva_range - test a range of guest virtual addresses for accessibility
+ */
+int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
+                   unsigned long length, int is_write)
+{
+       unsigned long gpa;
+       unsigned long currlen;
+       int rc = 0;
+
+       ipte_lock(vcpu);
+       while (length > 0 && !rc) {
+               currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE));
+               rc = guest_translate_address(vcpu, gva, ar, &gpa, is_write);
+               gva += currlen;
+               length -= currlen;
+       }
+       ipte_unlock(vcpu);
+
+       return rc;
+}
+
+/**
+ * kvm_s390_check_low_addr_prot_real - check for low-address protection
+ * @gra: Guest real address
  *
  * Checks whether an address is subject to low-address protection and set
  * up vcpu->arch.pgm accordingly if necessary.
  *
  * Return: 0 if no protection exception, or PGM_PROTECTION if protected.
  */
-int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga)
+int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra)
 {
        struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
        psw_t *psw = &vcpu->arch.sie_block->gpsw;
        struct trans_exc_code_bits *tec_bits;
+       union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
 
-       if (!is_low_address(ga) || !low_address_protection_enabled(vcpu))
+       if (!ctlreg0.lap || !is_low_address(gra))
                return 0;
 
        memset(pgm, 0, sizeof(*pgm));
        tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
        tec_bits->fsi = FSI_STORE;
        tec_bits->as = psw_bits(*psw).as;
-       tec_bits->addr = ga >> PAGE_SHIFT;
+       tec_bits->addr = gra >> PAGE_SHIFT;
        pgm->code = PGM_PROTECTION;
 
        return pgm->code;
index 0149cf15058ab9e8d12918192353884ac4ce8f4f..ef03726cc6611acd1e52fb6970e2e802a1730cbd 100644 (file)
@@ -156,9 +156,11 @@ int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
 }
 
 int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
-                           unsigned long *gpa, int write);
+                           ar_t ar, unsigned long *gpa, int write);
+int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
+                   unsigned long length, int is_write);
 
-int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
                 unsigned long len, int write);
 
 int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
@@ -168,6 +170,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
  * write_guest - copy data from kernel space to guest space
  * @vcpu: virtual cpu
  * @ga: guest address
+ * @ar: access register
  * @data: source address in kernel space
  * @len: number of bytes to copy
  *
@@ -176,8 +179,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
  * If DAT is off data will be copied to guest real or absolute memory.
  * If DAT is on data will be copied to the address space as specified by
  * the address space bits of the PSW:
- * Primary, secondory or home space (access register mode is currently not
- * implemented).
+ * Primary, secondary, home space or access register mode.
  * The addressing mode of the PSW is also inspected, so that address wrap
  * around is taken into account for 24-, 31- and 64-bit addressing mode,
  * if the to be copied data crosses page boundaries in guest address space.
@@ -210,16 +212,17 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
  *      if data has been changed in guest space in case of an exception.
  */
 static inline __must_check
-int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
                unsigned long len)
 {
-       return access_guest(vcpu, ga, data, len, 1);
+       return access_guest(vcpu, ga, ar, data, len, 1);
 }
 
 /**
  * read_guest - copy data from guest space to kernel space
  * @vcpu: virtual cpu
  * @ga: guest address
+ * @ar: access register
  * @data: destination address in kernel space
  * @len: number of bytes to copy
  *
@@ -229,10 +232,10 @@ int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
  * data will be copied from guest space to kernel space.
  */
 static inline __must_check
-int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
               unsigned long len)
 {
-       return access_guest(vcpu, ga, data, len, 0);
+       return access_guest(vcpu, ga, ar, data, len, 0);
 }
 
 /**
@@ -330,6 +333,6 @@ int read_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
 void ipte_lock(struct kvm_vcpu *vcpu);
 void ipte_unlock(struct kvm_vcpu *vcpu);
 int ipte_lock_held(struct kvm_vcpu *vcpu);
-int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga);
+int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra);
 
 #endif /* __KVM_S390_GACCESS_H */
index 3e8d4092ce30f10a6bd4b9e486e926889a937b2e..e97b3455d7e6bfbb7b12c606a5224c5efeba37d4 100644 (file)
@@ -191,8 +191,8 @@ static int __import_wp_info(struct kvm_vcpu *vcpu,
        if (!wp_info->old_data)
                return -ENOMEM;
        /* try to backup the original value */
-       ret = read_guest(vcpu, wp_info->phys_addr, wp_info->old_data,
-                        wp_info->len);
+       ret = read_guest_abs(vcpu, wp_info->phys_addr, wp_info->old_data,
+                            wp_info->len);
        if (ret) {
                kfree(wp_info->old_data);
                wp_info->old_data = NULL;
@@ -362,8 +362,8 @@ static struct kvm_hw_wp_info_arch *any_wp_changed(struct kvm_vcpu *vcpu)
                        continue;
 
                /* refetch the wp data and compare it to the old value */
-               if (!read_guest(vcpu, wp_info->phys_addr, temp,
-                               wp_info->len)) {
+               if (!read_guest_abs(vcpu, wp_info->phys_addr, temp,
+                                   wp_info->len)) {
                        if (memcmp(temp, wp_info->old_data, wp_info->len)) {
                                kfree(temp);
                                return wp_info;
index bebd2157edd019448ff772c9ac75a778e7e124f4..9e3779e3e496314a4e3f15823c152867c3a3cadd 100644 (file)
@@ -165,6 +165,7 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu,
                pgm_info->mon_class_nr = vcpu->arch.sie_block->mcn;
                pgm_info->mon_code = vcpu->arch.sie_block->tecmc;
                break;
+       case PGM_VECTOR_PROCESSING:
        case PGM_DATA:
                pgm_info->data_exc_code = vcpu->arch.sie_block->dxc;
                break;
@@ -319,7 +320,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
 
        /* Make sure that the source is paged-in */
        rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2],
-                                    &srcaddr, 0);
+                                    reg2, &srcaddr, 0);
        if (rc)
                return kvm_s390_inject_prog_cond(vcpu, rc);
        rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0);
@@ -328,7 +329,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
 
        /* Make sure that the destination is paged-in */
        rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1],
-                                    &dstaddr, 1);
+                                    reg1, &dstaddr, 1);
        if (rc)
                return kvm_s390_inject_prog_cond(vcpu, rc);
        rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1);
index 073b5f387d1dd3484186dd69dcfc5aae63d90b21..9de47265ef73da07ffd7ef37337bf2e44e59bd46 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * handling kvm guest interrupts
  *
- * Copyright IBM Corp. 2008,2014
+ * Copyright IBM Corp. 2008, 2015
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License (version 2 only)
 #include <linux/signal.h>
 #include <linux/slab.h>
 #include <linux/bitmap.h>
+#include <linux/vmalloc.h>
 #include <asm/asm-offsets.h>
+#include <asm/dis.h>
 #include <asm/uaccess.h>
 #include <asm/sclp.h>
+#include <asm/isc.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
 #include "trace-s390.h"
 #define PFAULT_DONE 0x0680
 #define VIRTIO_PARAM 0x0d00
 
-static int is_ioint(u64 type)
-{
-       return ((type & 0xfffe0000u) != 0xfffe0000u);
-}
-
 int psw_extint_disabled(struct kvm_vcpu *vcpu)
 {
        return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT);
@@ -72,70 +70,45 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
        return 1;
 }
 
-static u64 int_word_to_isc_bits(u32 int_word)
+static int ckc_irq_pending(struct kvm_vcpu *vcpu)
+{
+       if (!(vcpu->arch.sie_block->ckc <
+             get_tod_clock_fast() + vcpu->arch.sie_block->epoch))
+               return 0;
+       return ckc_interrupts_enabled(vcpu);
+}
+
+static int cpu_timer_interrupts_enabled(struct kvm_vcpu *vcpu)
+{
+       return !psw_extint_disabled(vcpu) &&
+              (vcpu->arch.sie_block->gcr[0] & 0x400ul);
+}
+
+static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu)
+{
+       return (vcpu->arch.sie_block->cputm >> 63) &&
+              cpu_timer_interrupts_enabled(vcpu);
+}
+
+static inline int is_ioirq(unsigned long irq_type)
 {
-       u8 isc = (int_word & 0x38000000) >> 27;
+       return ((irq_type >= IRQ_PEND_IO_ISC_0) &&
+               (irq_type <= IRQ_PEND_IO_ISC_7));
+}
 
+static uint64_t isc_to_isc_bits(int isc)
+{
        return (0x80 >> isc) << 24;
 }
 
-static int __must_check __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
-                                     struct kvm_s390_interrupt_info *inti)
+static inline u8 int_word_to_isc(u32 int_word)
 {
-       switch (inti->type) {
-       case KVM_S390_INT_EXTERNAL_CALL:
-               if (psw_extint_disabled(vcpu))
-                       return 0;
-               if (vcpu->arch.sie_block->gcr[0] & 0x2000ul)
-                       return 1;
-               return 0;
-       case KVM_S390_INT_EMERGENCY:
-               if (psw_extint_disabled(vcpu))
-                       return 0;
-               if (vcpu->arch.sie_block->gcr[0] & 0x4000ul)
-                       return 1;
-               return 0;
-       case KVM_S390_INT_CLOCK_COMP:
-               return ckc_interrupts_enabled(vcpu);
-       case KVM_S390_INT_CPU_TIMER:
-               if (psw_extint_disabled(vcpu))
-                       return 0;
-               if (vcpu->arch.sie_block->gcr[0] & 0x400ul)
-                       return 1;
-               return 0;
-       case KVM_S390_INT_SERVICE:
-       case KVM_S390_INT_PFAULT_INIT:
-       case KVM_S390_INT_PFAULT_DONE:
-       case KVM_S390_INT_VIRTIO:
-               if (psw_extint_disabled(vcpu))
-                       return 0;
-               if (vcpu->arch.sie_block->gcr[0] & 0x200ul)
-                       return 1;
-               return 0;
-       case KVM_S390_PROGRAM_INT:
-       case KVM_S390_SIGP_STOP:
-       case KVM_S390_SIGP_SET_PREFIX:
-       case KVM_S390_RESTART:
-               return 1;
-       case KVM_S390_MCHK:
-               if (psw_mchk_disabled(vcpu))
-                       return 0;
-               if (vcpu->arch.sie_block->gcr[14] & inti->mchk.cr14)
-                       return 1;
-               return 0;
-       case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-               if (psw_ioint_disabled(vcpu))
-                       return 0;
-               if (vcpu->arch.sie_block->gcr[6] &
-                   int_word_to_isc_bits(inti->io.io_int_word))
-                       return 1;
-               return 0;
-       default:
-               printk(KERN_WARNING "illegal interrupt type %llx\n",
-                      inti->type);
-               BUG();
-       }
-       return 0;
+       return (int_word & 0x38000000) >> 27;
+}
+
+static inline unsigned long pending_floating_irqs(struct kvm_vcpu *vcpu)
+{
+       return vcpu->kvm->arch.float_int.pending_irqs;
 }
 
 static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu)
@@ -143,12 +116,31 @@ static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu)
        return vcpu->arch.local_int.pending_irqs;
 }
 
-static unsigned long deliverable_local_irqs(struct kvm_vcpu *vcpu)
+static unsigned long disable_iscs(struct kvm_vcpu *vcpu,
+                                  unsigned long active_mask)
+{
+       int i;
+
+       for (i = 0; i <= MAX_ISC; i++)
+               if (!(vcpu->arch.sie_block->gcr[6] & isc_to_isc_bits(i)))
+                       active_mask &= ~(1UL << (IRQ_PEND_IO_ISC_0 + i));
+
+       return active_mask;
+}
+
+static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
 {
-       unsigned long active_mask = pending_local_irqs(vcpu);
+       unsigned long active_mask;
+
+       active_mask = pending_local_irqs(vcpu);
+       active_mask |= pending_floating_irqs(vcpu);
 
        if (psw_extint_disabled(vcpu))
                active_mask &= ~IRQ_PEND_EXT_MASK;
+       if (psw_ioint_disabled(vcpu))
+               active_mask &= ~IRQ_PEND_IO_MASK;
+       else
+               active_mask = disable_iscs(vcpu, active_mask);
        if (!(vcpu->arch.sie_block->gcr[0] & 0x2000ul))
                __clear_bit(IRQ_PEND_EXT_EXTERNAL, &active_mask);
        if (!(vcpu->arch.sie_block->gcr[0] & 0x4000ul))
@@ -157,8 +149,13 @@ static unsigned long deliverable_local_irqs(struct kvm_vcpu *vcpu)
                __clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &active_mask);
        if (!(vcpu->arch.sie_block->gcr[0] & 0x400ul))
                __clear_bit(IRQ_PEND_EXT_CPU_TIMER, &active_mask);
+       if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
+               __clear_bit(IRQ_PEND_EXT_SERVICE, &active_mask);
        if (psw_mchk_disabled(vcpu))
                active_mask &= ~IRQ_PEND_MCHK_MASK;
+       if (!(vcpu->arch.sie_block->gcr[14] &
+             vcpu->kvm->arch.float_int.mchk.cr14))
+               __clear_bit(IRQ_PEND_MCHK_REP, &active_mask);
 
        /*
         * STOP irqs will never be actively delivered. They are triggered via
@@ -200,6 +197,16 @@ static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
        atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags);
 }
 
+static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
+{
+       if (!(pending_floating_irqs(vcpu) & IRQ_PEND_IO_MASK))
+               return;
+       else if (psw_ioint_disabled(vcpu))
+               __set_cpuflag(vcpu, CPUSTAT_IO_INT);
+       else
+               vcpu->arch.sie_block->lctl |= LCTL_CR6;
+}
+
 static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
 {
        if (!(pending_local_irqs(vcpu) & IRQ_PEND_EXT_MASK))
@@ -226,47 +233,17 @@ static void set_intercept_indicators_stop(struct kvm_vcpu *vcpu)
                __set_cpuflag(vcpu, CPUSTAT_STOP_INT);
 }
 
-/* Set interception request for non-deliverable local interrupts */
-static void set_intercept_indicators_local(struct kvm_vcpu *vcpu)
+/* Set interception request for non-deliverable interrupts */
+static void set_intercept_indicators(struct kvm_vcpu *vcpu)
 {
+       set_intercept_indicators_io(vcpu);
        set_intercept_indicators_ext(vcpu);
        set_intercept_indicators_mchk(vcpu);
        set_intercept_indicators_stop(vcpu);
 }
 
-static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
-                                     struct kvm_s390_interrupt_info *inti)
-{
-       switch (inti->type) {
-       case KVM_S390_INT_SERVICE:
-       case KVM_S390_INT_PFAULT_DONE:
-       case KVM_S390_INT_VIRTIO:
-               if (psw_extint_disabled(vcpu))
-                       __set_cpuflag(vcpu, CPUSTAT_EXT_INT);
-               else
-                       vcpu->arch.sie_block->lctl |= LCTL_CR0;
-               break;
-       case KVM_S390_MCHK:
-               if (psw_mchk_disabled(vcpu))
-                       vcpu->arch.sie_block->ictl |= ICTL_LPSW;
-               else
-                       vcpu->arch.sie_block->lctl |= LCTL_CR14;
-               break;
-       case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-               if (psw_ioint_disabled(vcpu))
-                       __set_cpuflag(vcpu, CPUSTAT_IO_INT);
-               else
-                       vcpu->arch.sie_block->lctl |= LCTL_CR6;
-               break;
-       default:
-               BUG();
-       }
-}
-
 static u16 get_ilc(struct kvm_vcpu *vcpu)
 {
-       const unsigned short table[] = { 2, 4, 4, 6 };
-
        switch (vcpu->arch.sie_block->icptcode) {
        case ICPT_INST:
        case ICPT_INSTPROGI:
@@ -274,7 +251,7 @@ static u16 get_ilc(struct kvm_vcpu *vcpu)
        case ICPT_PARTEXEC:
        case ICPT_IOINST:
                /* last instruction only stored for these icptcodes */
-               return table[vcpu->arch.sie_block->ipa >> 14];
+               return insn_length(vcpu->arch.sie_block->ipa >> 8);
        case ICPT_PROGI:
                return vcpu->arch.sie_block->pgmilc;
        default:
@@ -350,38 +327,72 @@ static int __must_check __deliver_pfault_init(struct kvm_vcpu *vcpu)
 
 static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
 {
+       struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
        struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-       struct kvm_s390_mchk_info mchk;
-       int rc;
+       struct kvm_s390_mchk_info mchk = {};
+       unsigned long adtl_status_addr;
+       int deliver = 0;
+       int rc = 0;
 
+       spin_lock(&fi->lock);
        spin_lock(&li->lock);
-       mchk = li->irq.mchk;
+       if (test_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs) ||
+           test_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs)) {
+               /*
+                * If there was an exigent machine check pending, then any
+                * repressible machine checks that might have been pending
+                * are indicated along with it, so always clear bits for
+                * repressible and exigent interrupts
+                */
+               mchk = li->irq.mchk;
+               clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
+               clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs);
+               memset(&li->irq.mchk, 0, sizeof(mchk));
+               deliver = 1;
+       }
        /*
-        * If there was an exigent machine check pending, then any repressible
-        * machine checks that might have been pending are indicated along
-        * with it, so always clear both bits
+        * We indicate floating repressible conditions along with
+        * other pending conditions. Channel Report Pending and Channel
+        * Subsystem damage are the only two and and are indicated by
+        * bits in mcic and masked in cr14.
         */
-       clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
-       clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs);
-       memset(&li->irq.mchk, 0, sizeof(mchk));
+       if (test_and_clear_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) {
+               mchk.mcic |= fi->mchk.mcic;
+               mchk.cr14 |= fi->mchk.cr14;
+               memset(&fi->mchk, 0, sizeof(mchk));
+               deliver = 1;
+       }
        spin_unlock(&li->lock);
+       spin_unlock(&fi->lock);
 
-       VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
-                  mchk.mcic);
-       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK,
-                                        mchk.cr14, mchk.mcic);
-
-       rc  = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED);
-       rc |= put_guest_lc(vcpu, mchk.mcic,
-                          (u64 __user *) __LC_MCCK_CODE);
-       rc |= put_guest_lc(vcpu, mchk.failing_storage_address,
-                          (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
-       rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
-                            &mchk.fixed_logout, sizeof(mchk.fixed_logout));
-       rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
-                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-       rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
-                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       if (deliver) {
+               VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
+                          mchk.mcic);
+               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+                                                KVM_S390_MCHK,
+                                                mchk.cr14, mchk.mcic);
+
+               rc  = kvm_s390_vcpu_store_status(vcpu,
+                                                KVM_S390_STORE_STATUS_PREFIXED);
+               rc |= read_guest_lc(vcpu, __LC_VX_SAVE_AREA_ADDR,
+                                   &adtl_status_addr,
+                                   sizeof(unsigned long));
+               rc |= kvm_s390_vcpu_store_adtl_status(vcpu,
+                                                     adtl_status_addr);
+               rc |= put_guest_lc(vcpu, mchk.mcic,
+                                  (u64 __user *) __LC_MCCK_CODE);
+               rc |= put_guest_lc(vcpu, mchk.failing_storage_address,
+                                  (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
+               rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
+                                    &mchk.fixed_logout,
+                                    sizeof(mchk.fixed_logout));
+               rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
+                                    &vcpu->arch.sie_block->gpsw,
+                                    sizeof(psw_t));
+               rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
+                                   &vcpu->arch.sie_block->gpsw,
+                                   sizeof(psw_t));
+       }
        return rc ? -EFAULT : 0;
 }
 
@@ -484,7 +495,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
 {
        struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
        struct kvm_s390_pgm_info pgm_info;
-       int rc = 0;
+       int rc = 0, nullifying = false;
        u16 ilc = get_ilc(vcpu);
 
        spin_lock(&li->lock);
@@ -509,6 +520,8 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
        case PGM_LX_TRANSLATION:
        case PGM_PRIMARY_AUTHORITY:
        case PGM_SECONDARY_AUTHORITY:
+               nullifying = true;
+               /* fall through */
        case PGM_SPACE_SWITCH:
                rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
                                  (u64 *)__LC_TRANS_EXC_CODE);
@@ -521,6 +534,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
        case PGM_EXTENDED_AUTHORITY:
                rc = put_guest_lc(vcpu, pgm_info.exc_access_id,
                                  (u8 *)__LC_EXC_ACCESS_ID);
+               nullifying = true;
                break;
        case PGM_ASCE_TYPE:
        case PGM_PAGE_TRANSLATION:
@@ -534,6 +548,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
                                   (u8 *)__LC_EXC_ACCESS_ID);
                rc |= put_guest_lc(vcpu, pgm_info.op_access_id,
                                   (u8 *)__LC_OP_ACCESS_ID);
+               nullifying = true;
                break;
        case PGM_MONITOR:
                rc = put_guest_lc(vcpu, pgm_info.mon_class_nr,
@@ -541,6 +556,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
                rc |= put_guest_lc(vcpu, pgm_info.mon_code,
                                   (u64 *)__LC_MON_CODE);
                break;
+       case PGM_VECTOR_PROCESSING:
        case PGM_DATA:
                rc = put_guest_lc(vcpu, pgm_info.data_exc_code,
                                  (u32 *)__LC_DATA_EXC_CODE);
@@ -551,6 +567,15 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
                rc |= put_guest_lc(vcpu, pgm_info.exc_access_id,
                                   (u8 *)__LC_EXC_ACCESS_ID);
                break;
+       case PGM_STACK_FULL:
+       case PGM_STACK_EMPTY:
+       case PGM_STACK_SPECIFICATION:
+       case PGM_STACK_TYPE:
+       case PGM_STACK_OPERATION:
+       case PGM_TRACE_TABEL:
+       case PGM_CRYPTO_OPERATION:
+               nullifying = true;
+               break;
        }
 
        if (pgm_info.code & PGM_PER) {
@@ -564,7 +589,12 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
                                   (u8 *) __LC_PER_ACCESS_ID);
        }
 
+       if (nullifying && vcpu->arch.sie_block->icptcode == ICPT_INST)
+               kvm_s390_rewind_psw(vcpu, ilc);
+
        rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC);
+       rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea,
+                                (u64 *) __LC_LAST_BREAK);
        rc |= put_guest_lc(vcpu, pgm_info.code,
                           (u16 *)__LC_PGM_INT_CODE);
        rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
@@ -574,16 +604,27 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
        return rc ? -EFAULT : 0;
 }
 
-static int __must_check __deliver_service(struct kvm_vcpu *vcpu,
-                                         struct kvm_s390_interrupt_info *inti)
+static int __must_check __deliver_service(struct kvm_vcpu *vcpu)
 {
-       int rc;
+       struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+       struct kvm_s390_ext_info ext;
+       int rc = 0;
+
+       spin_lock(&fi->lock);
+       if (!(test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs))) {
+               spin_unlock(&fi->lock);
+               return 0;
+       }
+       ext = fi->srv_signal;
+       memset(&fi->srv_signal, 0, sizeof(ext));
+       clear_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
+       spin_unlock(&fi->lock);
 
        VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
-                  inti->ext.ext_params);
+                  ext.ext_params);
        vcpu->stat.deliver_service_signal++;
-       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                        inti->ext.ext_params, 0);
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE,
+                                        ext.ext_params, 0);
 
        rc  = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE);
        rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
@@ -591,106 +632,146 @@ static int __must_check __deliver_service(struct kvm_vcpu *vcpu,
                             &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
        rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-       rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+       rc |= put_guest_lc(vcpu, ext.ext_params,
                           (u32 *)__LC_EXT_PARAMS);
+
        return rc ? -EFAULT : 0;
 }
 
-static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu,
-                                          struct kvm_s390_interrupt_info *inti)
+static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu)
 {
-       int rc;
+       struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+       struct kvm_s390_interrupt_info *inti;
+       int rc = 0;
 
-       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
-                                        KVM_S390_INT_PFAULT_DONE, 0,
-                                        inti->ext.ext_params2);
+       spin_lock(&fi->lock);
+       inti = list_first_entry_or_null(&fi->lists[FIRQ_LIST_PFAULT],
+                                       struct kvm_s390_interrupt_info,
+                                       list);
+       if (inti) {
+               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+                               KVM_S390_INT_PFAULT_DONE, 0,
+                               inti->ext.ext_params2);
+               list_del(&inti->list);
+               fi->counters[FIRQ_CNTR_PFAULT] -= 1;
+       }
+       if (list_empty(&fi->lists[FIRQ_LIST_PFAULT]))
+               clear_bit(IRQ_PEND_PFAULT_DONE, &fi->pending_irqs);
+       spin_unlock(&fi->lock);
 
-       rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE);
-       rc |= put_guest_lc(vcpu, PFAULT_DONE, (u16 *)__LC_EXT_CPU_ADDR);
-       rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-       rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-       rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
-                          (u64 *)__LC_EXT_PARAMS2);
+       if (inti) {
+               rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
+                               (u16 *)__LC_EXT_INT_CODE);
+               rc |= put_guest_lc(vcpu, PFAULT_DONE,
+                               (u16 *)__LC_EXT_CPU_ADDR);
+               rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+                               &vcpu->arch.sie_block->gpsw,
+                               sizeof(psw_t));
+               rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+                               &vcpu->arch.sie_block->gpsw,
+                               sizeof(psw_t));
+               rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+                               (u64 *)__LC_EXT_PARAMS2);
+               kfree(inti);
+       }
        return rc ? -EFAULT : 0;
 }
 
-static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu,
-                                        struct kvm_s390_interrupt_info *inti)
+static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu)
 {
-       int rc;
+       struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+       struct kvm_s390_interrupt_info *inti;
+       int rc = 0;
 
-       VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
-                  inti->ext.ext_params, inti->ext.ext_params2);
-       vcpu->stat.deliver_virtio_interrupt++;
-       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                        inti->ext.ext_params,
-                                        inti->ext.ext_params2);
+       spin_lock(&fi->lock);
+       inti = list_first_entry_or_null(&fi->lists[FIRQ_LIST_VIRTIO],
+                                       struct kvm_s390_interrupt_info,
+                                       list);
+       if (inti) {
+               VCPU_EVENT(vcpu, 4,
+                          "interrupt: virtio parm:%x,parm64:%llx",
+                          inti->ext.ext_params, inti->ext.ext_params2);
+               vcpu->stat.deliver_virtio_interrupt++;
+               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+                               inti->type,
+                               inti->ext.ext_params,
+                               inti->ext.ext_params2);
+               list_del(&inti->list);
+               fi->counters[FIRQ_CNTR_VIRTIO] -= 1;
+       }
+       if (list_empty(&fi->lists[FIRQ_LIST_VIRTIO]))
+               clear_bit(IRQ_PEND_VIRTIO, &fi->pending_irqs);
+       spin_unlock(&fi->lock);
 
-       rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE);
-       rc |= put_guest_lc(vcpu, VIRTIO_PARAM, (u16 *)__LC_EXT_CPU_ADDR);
-       rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-       rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-       rc |= put_guest_lc(vcpu, inti->ext.ext_params,
-                          (u32 *)__LC_EXT_PARAMS);
-       rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
-                          (u64 *)__LC_EXT_PARAMS2);
+       if (inti) {
+               rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
+                               (u16 *)__LC_EXT_INT_CODE);
+               rc |= put_guest_lc(vcpu, VIRTIO_PARAM,
+                               (u16 *)__LC_EXT_CPU_ADDR);
+               rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+                               &vcpu->arch.sie_block->gpsw,
+                               sizeof(psw_t));
+               rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+                               &vcpu->arch.sie_block->gpsw,
+                               sizeof(psw_t));
+               rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+                               (u32 *)__LC_EXT_PARAMS);
+               rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+                               (u64 *)__LC_EXT_PARAMS2);
+               kfree(inti);
+       }
        return rc ? -EFAULT : 0;
 }
 
 static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
-                                    struct kvm_s390_interrupt_info *inti)
+                                    unsigned long irq_type)
 {
-       int rc;
+       struct list_head *isc_list;
+       struct kvm_s390_float_interrupt *fi;
+       struct kvm_s390_interrupt_info *inti = NULL;
+       int rc = 0;
 
-       VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
-       vcpu->stat.deliver_io_int++;
-       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                        ((__u32)inti->io.subchannel_id << 16) |
-                                               inti->io.subchannel_nr,
-                                        ((__u64)inti->io.io_int_parm << 32) |
-                                               inti->io.io_int_word);
-
-       rc  = put_guest_lc(vcpu, inti->io.subchannel_id,
-                          (u16 *)__LC_SUBCHANNEL_ID);
-       rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
-                          (u16 *)__LC_SUBCHANNEL_NR);
-       rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
-                          (u32 *)__LC_IO_INT_PARM);
-       rc |= put_guest_lc(vcpu, inti->io.io_int_word,
-                          (u32 *)__LC_IO_INT_WORD);
-       rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
-                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-       rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
-                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-       return rc ? -EFAULT : 0;
-}
+       fi = &vcpu->kvm->arch.float_int;
 
-static int __must_check __deliver_mchk_floating(struct kvm_vcpu *vcpu,
-                                          struct kvm_s390_interrupt_info *inti)
-{
-       struct kvm_s390_mchk_info *mchk = &inti->mchk;
-       int rc;
+       spin_lock(&fi->lock);
+       isc_list = &fi->lists[irq_type - IRQ_PEND_IO_ISC_0];
+       inti = list_first_entry_or_null(isc_list,
+                                       struct kvm_s390_interrupt_info,
+                                       list);
+       if (inti) {
+               VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
+               vcpu->stat.deliver_io_int++;
+               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+                               inti->type,
+                               ((__u32)inti->io.subchannel_id << 16) |
+                               inti->io.subchannel_nr,
+                               ((__u64)inti->io.io_int_parm << 32) |
+                               inti->io.io_int_word);
+               list_del(&inti->list);
+               fi->counters[FIRQ_CNTR_IO] -= 1;
+       }
+       if (list_empty(isc_list))
+               clear_bit(irq_type, &fi->pending_irqs);
+       spin_unlock(&fi->lock);
+
+       if (inti) {
+               rc  = put_guest_lc(vcpu, inti->io.subchannel_id,
+                               (u16 *)__LC_SUBCHANNEL_ID);
+               rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
+                               (u16 *)__LC_SUBCHANNEL_NR);
+               rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
+                               (u32 *)__LC_IO_INT_PARM);
+               rc |= put_guest_lc(vcpu, inti->io.io_int_word,
+                               (u32 *)__LC_IO_INT_WORD);
+               rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
+                               &vcpu->arch.sie_block->gpsw,
+                               sizeof(psw_t));
+               rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
+                               &vcpu->arch.sie_block->gpsw,
+                               sizeof(psw_t));
+               kfree(inti);
+       }
 
-       VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
-                  mchk->mcic);
-       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK,
-                                        mchk->cr14, mchk->mcic);
-
-       rc  = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED);
-       rc |= put_guest_lc(vcpu, mchk->mcic,
-                       (u64 __user *) __LC_MCCK_CODE);
-       rc |= put_guest_lc(vcpu, mchk->failing_storage_address,
-                       (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
-       rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
-                            &mchk->fixed_logout, sizeof(mchk->fixed_logout));
-       rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
-                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-       rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
-                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
        return rc ? -EFAULT : 0;
 }
 
@@ -698,6 +779,7 @@ typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu);
 
 static const deliver_irq_t deliver_irq_funcs[] = {
        [IRQ_PEND_MCHK_EX]        = __deliver_machine_check,
+       [IRQ_PEND_MCHK_REP]       = __deliver_machine_check,
        [IRQ_PEND_PROG]           = __deliver_prog,
        [IRQ_PEND_EXT_EMERGENCY]  = __deliver_emergency_signal,
        [IRQ_PEND_EXT_EXTERNAL]   = __deliver_external_call,
@@ -706,36 +788,11 @@ static const deliver_irq_t deliver_irq_funcs[] = {
        [IRQ_PEND_RESTART]        = __deliver_restart,
        [IRQ_PEND_SET_PREFIX]     = __deliver_set_prefix,
        [IRQ_PEND_PFAULT_INIT]    = __deliver_pfault_init,
+       [IRQ_PEND_EXT_SERVICE]    = __deliver_service,
+       [IRQ_PEND_PFAULT_DONE]    = __deliver_pfault_done,
+       [IRQ_PEND_VIRTIO]         = __deliver_virtio,
 };
 
-static int __must_check __deliver_floating_interrupt(struct kvm_vcpu *vcpu,
-                                          struct kvm_s390_interrupt_info *inti)
-{
-       int rc;
-
-       switch (inti->type) {
-       case KVM_S390_INT_SERVICE:
-               rc = __deliver_service(vcpu, inti);
-               break;
-       case KVM_S390_INT_PFAULT_DONE:
-               rc = __deliver_pfault_done(vcpu, inti);
-               break;
-       case KVM_S390_INT_VIRTIO:
-               rc = __deliver_virtio(vcpu, inti);
-               break;
-       case KVM_S390_MCHK:
-               rc = __deliver_mchk_floating(vcpu, inti);
-               break;
-       case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-               rc = __deliver_io(vcpu, inti);
-               break;
-       default:
-               BUG();
-       }
-
-       return rc;
-}
-
 /* Check whether an external call is pending (deliverable or not) */
 int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
 {
@@ -751,21 +808,9 @@ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
 
 int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop)
 {
-       struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
-       struct kvm_s390_interrupt_info  *inti;
        int rc;
 
-       rc = !!deliverable_local_irqs(vcpu);
-
-       if ((!rc) && atomic_read(&fi->active)) {
-               spin_lock(&fi->lock);
-               list_for_each_entry(inti, &fi->list, list)
-                       if (__interrupt_is_deliverable(vcpu, inti)) {
-                               rc = 1;
-                               break;
-                       }
-               spin_unlock(&fi->lock);
-       }
+       rc = !!deliverable_irqs(vcpu);
 
        if (!rc && kvm_cpu_has_pending_timer(vcpu))
                rc = 1;
@@ -784,12 +829,7 @@ int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop)
 
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 {
-       if (!(vcpu->arch.sie_block->ckc <
-             get_tod_clock_fast() + vcpu->arch.sie_block->epoch))
-               return 0;
-       if (!ckc_interrupts_enabled(vcpu))
-               return 0;
-       return 1;
+       return ckc_irq_pending(vcpu) || cpu_timer_irq_pending(vcpu);
 }
 
 int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
@@ -884,60 +924,45 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
 int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 {
        struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-       struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
-       struct kvm_s390_interrupt_info  *n, *inti = NULL;
        deliver_irq_t func;
-       int deliver;
        int rc = 0;
        unsigned long irq_type;
-       unsigned long deliverable_irqs;
+       unsigned long irqs;
 
        __reset_intercept_indicators(vcpu);
 
        /* pending ckc conditions might have been invalidated */
        clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
-       if (kvm_cpu_has_pending_timer(vcpu))
+       if (ckc_irq_pending(vcpu))
                set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
 
+       /* pending cpu timer conditions might have been invalidated */
+       clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+       if (cpu_timer_irq_pending(vcpu))
+               set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+
        do {
-               deliverable_irqs = deliverable_local_irqs(vcpu);
+               irqs = deliverable_irqs(vcpu);
                /* bits are in the order of interrupt priority */
-               irq_type = find_first_bit(&deliverable_irqs, IRQ_PEND_COUNT);
+               irq_type = find_first_bit(&irqs, IRQ_PEND_COUNT);
                if (irq_type == IRQ_PEND_COUNT)
                        break;
-               func = deliver_irq_funcs[irq_type];
-               if (!func) {
-                       WARN_ON_ONCE(func == NULL);
-                       clear_bit(irq_type, &li->pending_irqs);
-                       continue;
+               if (is_ioirq(irq_type)) {
+                       rc = __deliver_io(vcpu, irq_type);
+               } else {
+                       func = deliver_irq_funcs[irq_type];
+                       if (!func) {
+                               WARN_ON_ONCE(func == NULL);
+                               clear_bit(irq_type, &li->pending_irqs);
+                               continue;
+                       }
+                       rc = func(vcpu);
                }
-               rc = func(vcpu);
-       } while (!rc && irq_type != IRQ_PEND_COUNT);
+               if (rc)
+                       break;
+       } while (!rc);
 
-       set_intercept_indicators_local(vcpu);
-
-       if (!rc && atomic_read(&fi->active)) {
-               do {
-                       deliver = 0;
-                       spin_lock(&fi->lock);
-                       list_for_each_entry_safe(inti, n, &fi->list, list) {
-                               if (__interrupt_is_deliverable(vcpu, inti)) {
-                                       list_del(&inti->list);
-                                       fi->irq_count--;
-                                       deliver = 1;
-                                       break;
-                               }
-                               __set_intercept_indicator(vcpu, inti);
-                       }
-                       if (list_empty(&fi->list))
-                               atomic_set(&fi->active, 0);
-                       spin_unlock(&fi->lock);
-                       if (deliver) {
-                               rc = __deliver_floating_interrupt(vcpu, inti);
-                               kfree(inti);
-                       }
-               } while (!rc && deliver);
-       }
+       set_intercept_indicators(vcpu);
 
        return rc;
 }
@@ -1172,80 +1197,182 @@ static int __inject_cpu_timer(struct kvm_vcpu *vcpu)
        return 0;
 }
 
+static struct kvm_s390_interrupt_info *get_io_int(struct kvm *kvm,
+                                                 int isc, u32 schid)
+{
+       struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+       struct list_head *isc_list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc];
+       struct kvm_s390_interrupt_info *iter;
+       u16 id = (schid & 0xffff0000U) >> 16;
+       u16 nr = schid & 0x0000ffffU;
 
+       spin_lock(&fi->lock);
+       list_for_each_entry(iter, isc_list, list) {
+               if (schid && (id != iter->io.subchannel_id ||
+                             nr != iter->io.subchannel_nr))
+                       continue;
+               /* found an appropriate entry */
+               list_del_init(&iter->list);
+               fi->counters[FIRQ_CNTR_IO] -= 1;
+               if (list_empty(isc_list))
+                       clear_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs);
+               spin_unlock(&fi->lock);
+               return iter;
+       }
+       spin_unlock(&fi->lock);
+       return NULL;
+}
+
+/*
+ * Dequeue and return an I/O interrupt matching any of the interruption
+ * subclasses as designated by the isc mask in cr6 and the schid (if != 0).
+ */
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
-                                                   u64 cr6, u64 schid)
+                                                   u64 isc_mask, u32 schid)
+{
+       struct kvm_s390_interrupt_info *inti = NULL;
+       int isc;
+
+       for (isc = 0; isc <= MAX_ISC && !inti; isc++) {
+               if (isc_mask & isc_to_isc_bits(isc))
+                       inti = get_io_int(kvm, isc, schid);
+       }
+       return inti;
+}
+
+#define SCCB_MASK 0xFFFFFFF8
+#define SCCB_EVENT_PENDING 0x3
+
+static int __inject_service(struct kvm *kvm,
+                            struct kvm_s390_interrupt_info *inti)
+{
+       struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+       spin_lock(&fi->lock);
+       fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_EVENT_PENDING;
+       /*
+        * Early versions of the QEMU s390 bios will inject several
+        * service interrupts after another without handling a
+        * condition code indicating busy.
+        * We will silently ignore those superfluous sccb values.
+        * A future version of QEMU will take care of serialization
+        * of servc requests
+        */
+       if (fi->srv_signal.ext_params & SCCB_MASK)
+               goto out;
+       fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_MASK;
+       set_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
+out:
+       spin_unlock(&fi->lock);
+       kfree(inti);
+       return 0;
+}
+
+static int __inject_virtio(struct kvm *kvm,
+                           struct kvm_s390_interrupt_info *inti)
+{
+       struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+       spin_lock(&fi->lock);
+       if (fi->counters[FIRQ_CNTR_VIRTIO] >= KVM_S390_MAX_VIRTIO_IRQS) {
+               spin_unlock(&fi->lock);
+               return -EBUSY;
+       }
+       fi->counters[FIRQ_CNTR_VIRTIO] += 1;
+       list_add_tail(&inti->list, &fi->lists[FIRQ_LIST_VIRTIO]);
+       set_bit(IRQ_PEND_VIRTIO, &fi->pending_irqs);
+       spin_unlock(&fi->lock);
+       return 0;
+}
+
+static int __inject_pfault_done(struct kvm *kvm,
+                                struct kvm_s390_interrupt_info *inti)
+{
+       struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+       spin_lock(&fi->lock);
+       if (fi->counters[FIRQ_CNTR_PFAULT] >=
+               (ASYNC_PF_PER_VCPU * KVM_MAX_VCPUS)) {
+               spin_unlock(&fi->lock);
+               return -EBUSY;
+       }
+       fi->counters[FIRQ_CNTR_PFAULT] += 1;
+       list_add_tail(&inti->list, &fi->lists[FIRQ_LIST_PFAULT]);
+       set_bit(IRQ_PEND_PFAULT_DONE, &fi->pending_irqs);
+       spin_unlock(&fi->lock);
+       return 0;
+}
+
+#define CR_PENDING_SUBCLASS 28
+static int __inject_float_mchk(struct kvm *kvm,
+                               struct kvm_s390_interrupt_info *inti)
+{
+       struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+       spin_lock(&fi->lock);
+       fi->mchk.cr14 |= inti->mchk.cr14 & (1UL << CR_PENDING_SUBCLASS);
+       fi->mchk.mcic |= inti->mchk.mcic;
+       set_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs);
+       spin_unlock(&fi->lock);
+       kfree(inti);
+       return 0;
+}
+
+static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 {
        struct kvm_s390_float_interrupt *fi;
-       struct kvm_s390_interrupt_info *inti, *iter;
+       struct list_head *list;
+       int isc;
 
-       if ((!schid && !cr6) || (schid && cr6))
-               return NULL;
        fi = &kvm->arch.float_int;
        spin_lock(&fi->lock);
-       inti = NULL;
-       list_for_each_entry(iter, &fi->list, list) {
-               if (!is_ioint(iter->type))
-                       continue;
-               if (cr6 &&
-                   ((cr6 & int_word_to_isc_bits(iter->io.io_int_word)) == 0))
-                       continue;
-               if (schid) {
-                       if (((schid & 0x00000000ffff0000) >> 16) !=
-                           iter->io.subchannel_id)
-                               continue;
-                       if ((schid & 0x000000000000ffff) !=
-                           iter->io.subchannel_nr)
-                               continue;
-               }
-               inti = iter;
-               break;
-       }
-       if (inti) {
-               list_del_init(&inti->list);
-               fi->irq_count--;
+       if (fi->counters[FIRQ_CNTR_IO] >= KVM_S390_MAX_FLOAT_IRQS) {
+               spin_unlock(&fi->lock);
+               return -EBUSY;
        }
-       if (list_empty(&fi->list))
-               atomic_set(&fi->active, 0);
+       fi->counters[FIRQ_CNTR_IO] += 1;
+
+       isc = int_word_to_isc(inti->io.io_int_word);
+       list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc];
+       list_add_tail(&inti->list, list);
+       set_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs);
        spin_unlock(&fi->lock);
-       return inti;
+       return 0;
 }
 
 static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 {
        struct kvm_s390_local_interrupt *li;
        struct kvm_s390_float_interrupt *fi;
-       struct kvm_s390_interrupt_info *iter;
        struct kvm_vcpu *dst_vcpu = NULL;
        int sigcpu;
-       int rc = 0;
+       u64 type = READ_ONCE(inti->type);
+       int rc;
 
        fi = &kvm->arch.float_int;
-       spin_lock(&fi->lock);
-       if (fi->irq_count >= KVM_S390_MAX_FLOAT_IRQS) {
+
+       switch (type) {
+       case KVM_S390_MCHK:
+               rc = __inject_float_mchk(kvm, inti);
+               break;
+       case KVM_S390_INT_VIRTIO:
+               rc = __inject_virtio(kvm, inti);
+               break;
+       case KVM_S390_INT_SERVICE:
+               rc = __inject_service(kvm, inti);
+               break;
+       case KVM_S390_INT_PFAULT_DONE:
+               rc = __inject_pfault_done(kvm, inti);
+               break;
+       case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+               rc = __inject_io(kvm, inti);
+               break;
+       default:
                rc = -EINVAL;
-               goto unlock_fi;
        }
-       fi->irq_count++;
-       if (!is_ioint(inti->type)) {
-               list_add_tail(&inti->list, &fi->list);
-       } else {
-               u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word);
+       if (rc)
+               return rc;
 
-               /* Keep I/O interrupts sorted in isc order. */
-               list_for_each_entry(iter, &fi->list, list) {
-                       if (!is_ioint(iter->type))
-                               continue;
-                       if (int_word_to_isc_bits(iter->io.io_int_word)
-                           <= isc_bits)
-                               continue;
-                       break;
-               }
-               list_add_tail(&inti->list, &iter->list);
-       }
-       atomic_set(&fi->active, 1);
-       if (atomic_read(&kvm->online_vcpus) == 0)
-               goto unlock_fi;
        sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
        if (sigcpu == KVM_MAX_VCPUS) {
                do {
@@ -1257,7 +1384,7 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
        dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
        li = &dst_vcpu->arch.local_int;
        spin_lock(&li->lock);
-       switch (inti->type) {
+       switch (type) {
        case KVM_S390_MCHK:
                atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
                break;
@@ -1270,9 +1397,8 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
        }
        spin_unlock(&li->lock);
        kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu));
-unlock_fi:
-       spin_unlock(&fi->lock);
-       return rc;
+       return 0;
+
 }
 
 int kvm_s390_inject_vm(struct kvm *kvm,
@@ -1332,10 +1458,10 @@ int kvm_s390_inject_vm(struct kvm *kvm,
        return rc;
 }
 
-void kvm_s390_reinject_io_int(struct kvm *kvm,
+int kvm_s390_reinject_io_int(struct kvm *kvm,
                              struct kvm_s390_interrupt_info *inti)
 {
-       __inject_vm(kvm, inti);
+       return __inject_vm(kvm, inti);
 }
 
 int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
@@ -1388,12 +1514,10 @@ void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu)
        spin_unlock(&li->lock);
 }
 
-int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+static int do_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 {
-       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
        int rc;
 
-       spin_lock(&li->lock);
        switch (irq->type) {
        case KVM_S390_PROGRAM_INT:
                VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
@@ -1433,83 +1557,130 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
        default:
                rc = -EINVAL;
        }
+
+       return rc;
+}
+
+int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       int rc;
+
+       spin_lock(&li->lock);
+       rc = do_inject_vcpu(vcpu, irq);
        spin_unlock(&li->lock);
        if (!rc)
                kvm_s390_vcpu_wakeup(vcpu);
        return rc;
 }
 
-void kvm_s390_clear_float_irqs(struct kvm *kvm)
+static inline void clear_irq_list(struct list_head *_list)
 {
-       struct kvm_s390_float_interrupt *fi;
-       struct kvm_s390_interrupt_info  *n, *inti = NULL;
+       struct kvm_s390_interrupt_info *inti, *n;
 
-       fi = &kvm->arch.float_int;
-       spin_lock(&fi->lock);
-       list_for_each_entry_safe(inti, n, &fi->list, list) {
+       list_for_each_entry_safe(inti, n, _list, list) {
                list_del(&inti->list);
                kfree(inti);
        }
-       fi->irq_count = 0;
-       atomic_set(&fi->active, 0);
-       spin_unlock(&fi->lock);
 }
 
-static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti,
-                                  u8 *addr)
+static void inti_to_irq(struct kvm_s390_interrupt_info *inti,
+                      struct kvm_s390_irq *irq)
 {
-       struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr;
-       struct kvm_s390_irq irq = {0};
-
-       irq.type = inti->type;
+       irq->type = inti->type;
        switch (inti->type) {
        case KVM_S390_INT_PFAULT_INIT:
        case KVM_S390_INT_PFAULT_DONE:
        case KVM_S390_INT_VIRTIO:
-       case KVM_S390_INT_SERVICE:
-               irq.u.ext = inti->ext;
+               irq->u.ext = inti->ext;
                break;
        case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-               irq.u.io = inti->io;
+               irq->u.io = inti->io;
                break;
-       case KVM_S390_MCHK:
-               irq.u.mchk = inti->mchk;
-               break;
-       default:
-               return -EINVAL;
        }
+}
 
-       if (copy_to_user(uptr, &irq, sizeof(irq)))
-               return -EFAULT;
+void kvm_s390_clear_float_irqs(struct kvm *kvm)
+{
+       struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+       int i;
 
-       return 0;
-}
+       spin_lock(&fi->lock);
+       for (i = 0; i < FIRQ_LIST_COUNT; i++)
+               clear_irq_list(&fi->lists[i]);
+       for (i = 0; i < FIRQ_MAX_COUNT; i++)
+               fi->counters[i] = 0;
+       spin_unlock(&fi->lock);
+};
 
-static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len)
+static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len)
 {
        struct kvm_s390_interrupt_info *inti;
        struct kvm_s390_float_interrupt *fi;
+       struct kvm_s390_irq *buf;
+       struct kvm_s390_irq *irq;
+       int max_irqs;
        int ret = 0;
        int n = 0;
+       int i;
+
+       if (len > KVM_S390_FLIC_MAX_BUFFER || len == 0)
+               return -EINVAL;
+
+       /*
+        * We are already using -ENOMEM to signal
+        * userspace it may retry with a bigger buffer,
+        * so we need to use something else for this case
+        */
+       buf = vzalloc(len);
+       if (!buf)
+               return -ENOBUFS;
+
+       max_irqs = len / sizeof(struct kvm_s390_irq);
 
        fi = &kvm->arch.float_int;
        spin_lock(&fi->lock);
-
-       list_for_each_entry(inti, &fi->list, list) {
-               if (len < sizeof(struct kvm_s390_irq)) {
+       for (i = 0; i < FIRQ_LIST_COUNT; i++) {
+               list_for_each_entry(inti, &fi->lists[i], list) {
+                       if (n == max_irqs) {
+                               /* signal userspace to try again */
+                               ret = -ENOMEM;
+                               goto out;
+                       }
+                       inti_to_irq(inti, &buf[n]);
+                       n++;
+               }
+       }
+       if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs)) {
+               if (n == max_irqs) {
                        /* signal userspace to try again */
                        ret = -ENOMEM;
-                       break;
+                       goto out;
                }
-               ret = copy_irq_to_user(inti, buf);
-               if (ret)
-                       break;
-               buf += sizeof(struct kvm_s390_irq);
-               len -= sizeof(struct kvm_s390_irq);
+               irq = (struct kvm_s390_irq *) &buf[n];
+               irq->type = KVM_S390_INT_SERVICE;
+               irq->u.ext = fi->srv_signal;
                n++;
        }
+       if (test_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) {
+               if (n == max_irqs) {
+                               /* signal userspace to try again */
+                               ret = -ENOMEM;
+                               goto out;
+               }
+               irq = (struct kvm_s390_irq *) &buf[n];
+               irq->type = KVM_S390_MCHK;
+               irq->u.mchk = fi->mchk;
+               n++;
+}
 
+out:
        spin_unlock(&fi->lock);
+       if (!ret && n > 0) {
+               if (copy_to_user(usrbuf, buf, sizeof(struct kvm_s390_irq) * n))
+                       ret = -EFAULT;
+       }
+       vfree(buf);
 
        return ret < 0 ? ret : n;
 }
@@ -1520,7 +1691,7 @@ static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 
        switch (attr->group) {
        case KVM_DEV_FLIC_GET_ALL_IRQS:
-               r = get_all_floating_irqs(dev->kvm, (u8 *) attr->addr,
+               r = get_all_floating_irqs(dev->kvm, (u8 __user *) attr->addr,
                                          attr->attr);
                break;
        default:
@@ -1952,3 +2123,143 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
 {
        return -EINVAL;
 }
+
+int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu, void __user *irqstate, int len)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       struct kvm_s390_irq *buf;
+       int r = 0;
+       int n;
+
+       buf = vmalloc(len);
+       if (!buf)
+               return -ENOMEM;
+
+       if (copy_from_user((void *) buf, irqstate, len)) {
+               r = -EFAULT;
+               goto out_free;
+       }
+
+       /*
+        * Don't allow setting the interrupt state
+        * when there are already interrupts pending
+        */
+       spin_lock(&li->lock);
+       if (li->pending_irqs) {
+               r = -EBUSY;
+               goto out_unlock;
+       }
+
+       for (n = 0; n < len / sizeof(*buf); n++) {
+               r = do_inject_vcpu(vcpu, &buf[n]);
+               if (r)
+                       break;
+       }
+
+out_unlock:
+       spin_unlock(&li->lock);
+out_free:
+       vfree(buf);
+
+       return r;
+}
+
+static void store_local_irq(struct kvm_s390_local_interrupt *li,
+                           struct kvm_s390_irq *irq,
+                           unsigned long irq_type)
+{
+       switch (irq_type) {
+       case IRQ_PEND_MCHK_EX:
+       case IRQ_PEND_MCHK_REP:
+               irq->type = KVM_S390_MCHK;
+               irq->u.mchk = li->irq.mchk;
+               break;
+       case IRQ_PEND_PROG:
+               irq->type = KVM_S390_PROGRAM_INT;
+               irq->u.pgm = li->irq.pgm;
+               break;
+       case IRQ_PEND_PFAULT_INIT:
+               irq->type = KVM_S390_INT_PFAULT_INIT;
+               irq->u.ext = li->irq.ext;
+               break;
+       case IRQ_PEND_EXT_EXTERNAL:
+               irq->type = KVM_S390_INT_EXTERNAL_CALL;
+               irq->u.extcall = li->irq.extcall;
+               break;
+       case IRQ_PEND_EXT_CLOCK_COMP:
+               irq->type = KVM_S390_INT_CLOCK_COMP;
+               break;
+       case IRQ_PEND_EXT_CPU_TIMER:
+               irq->type = KVM_S390_INT_CPU_TIMER;
+               break;
+       case IRQ_PEND_SIGP_STOP:
+               irq->type = KVM_S390_SIGP_STOP;
+               irq->u.stop = li->irq.stop;
+               break;
+       case IRQ_PEND_RESTART:
+               irq->type = KVM_S390_RESTART;
+               break;
+       case IRQ_PEND_SET_PREFIX:
+               irq->type = KVM_S390_SIGP_SET_PREFIX;
+               irq->u.prefix = li->irq.prefix;
+               break;
+       }
+}
+
+int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
+{
+       uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl;
+       unsigned long sigp_emerg_pending[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       unsigned long pending_irqs;
+       struct kvm_s390_irq irq;
+       unsigned long irq_type;
+       int cpuaddr;
+       int n = 0;
+
+       spin_lock(&li->lock);
+       pending_irqs = li->pending_irqs;
+       memcpy(&sigp_emerg_pending, &li->sigp_emerg_pending,
+              sizeof(sigp_emerg_pending));
+       spin_unlock(&li->lock);
+
+       for_each_set_bit(irq_type, &pending_irqs, IRQ_PEND_COUNT) {
+               memset(&irq, 0, sizeof(irq));
+               if (irq_type == IRQ_PEND_EXT_EMERGENCY)
+                       continue;
+               if (n + sizeof(irq) > len)
+                       return -ENOBUFS;
+               store_local_irq(&vcpu->arch.local_int, &irq, irq_type);
+               if (copy_to_user(&buf[n], &irq, sizeof(irq)))
+                       return -EFAULT;
+               n += sizeof(irq);
+       }
+
+       if (test_bit(IRQ_PEND_EXT_EMERGENCY, &pending_irqs)) {
+               for_each_set_bit(cpuaddr, sigp_emerg_pending, KVM_MAX_VCPUS) {
+                       memset(&irq, 0, sizeof(irq));
+                       if (n + sizeof(irq) > len)
+                               return -ENOBUFS;
+                       irq.type = KVM_S390_INT_EMERGENCY;
+                       irq.u.emerg.code = cpuaddr;
+                       if (copy_to_user(&buf[n], &irq, sizeof(irq)))
+                               return -EFAULT;
+                       n += sizeof(irq);
+               }
+       }
+
+       if ((sigp_ctrl & SIGP_CTRL_C) &&
+           (atomic_read(&vcpu->arch.sie_block->cpuflags) &
+            CPUSTAT_ECALL_PEND)) {
+               if (n + sizeof(irq) > len)
+                       return -ENOBUFS;
+               memset(&irq, 0, sizeof(irq));
+               irq.type = KVM_S390_INT_EXTERNAL_CALL;
+               irq.u.extcall.code = sigp_ctrl & SIGP_CTRL_SCN_MASK;
+               if (copy_to_user(&buf[n], &irq, sizeof(irq)))
+                       return -EFAULT;
+               n += sizeof(irq);
+       }
+
+       return n;
+}
index 19e17bd7aec09b2662874a3925e3d55f4e4207f4..afa2bd750ffc814d36300bc491a0a6eff516bda4 100644 (file)
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <linux/timer.h>
+#include <linux/vmalloc.h>
 #include <asm/asm-offsets.h>
 #include <asm/lowcore.h>
 #include <asm/pgtable.h>
 #include <asm/nmi.h>
 #include <asm/switch_to.h>
+#include <asm/isc.h>
 #include <asm/sclp.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
 #include "trace.h"
 #include "trace-s390.h"
 
+#define MEM_OP_MAX_SIZE 65536  /* Maximum transfer size for KVM_S390_MEM_OP */
+#define LOCAL_IRQS 32
+#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
+                          (KVM_MAX_VCPUS + LOCAL_IRQS))
+
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
@@ -87,6 +94,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
        { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
        { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
+       { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
        { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
        { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
        { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
@@ -101,8 +109,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
 /* upper facilities limit for kvm */
 unsigned long kvm_s390_fac_list_mask[] = {
-       0xff82fffbf4fc2000UL,
-       0x005c000000000000UL,
+       0xffe6fffbfcfdfc40UL,
+       0x205c800000000000UL,
 };
 
 unsigned long kvm_s390_fac_list_mask_size(void)
@@ -171,9 +179,16 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_S390_IRQCHIP:
        case KVM_CAP_VM_ATTRIBUTES:
        case KVM_CAP_MP_STATE:
+       case KVM_CAP_S390_INJECT_IRQ:
        case KVM_CAP_S390_USER_SIGP:
+       case KVM_CAP_S390_USER_STSI:
+       case KVM_CAP_S390_SKEYS:
+       case KVM_CAP_S390_IRQ_STATE:
                r = 1;
                break;
+       case KVM_CAP_S390_MEM_OP:
+               r = MEM_OP_MAX_SIZE;
+               break;
        case KVM_CAP_NR_VCPUS:
        case KVM_CAP_MAX_VCPUS:
                r = KVM_MAX_VCPUS;
@@ -184,6 +199,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_S390_COW:
                r = MACHINE_HAS_ESOP;
                break;
+       case KVM_CAP_S390_VECTOR_REGISTERS:
+               r = MACHINE_HAS_VX;
+               break;
        default:
                r = 0;
        }
@@ -264,6 +282,18 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
                kvm->arch.user_sigp = 1;
                r = 0;
                break;
+       case KVM_CAP_S390_VECTOR_REGISTERS:
+               if (MACHINE_HAS_VX) {
+                       set_kvm_facility(kvm->arch.model.fac->mask, 129);
+                       set_kvm_facility(kvm->arch.model.fac->list, 129);
+                       r = 0;
+               } else
+                       r = -EINVAL;
+               break;
+       case KVM_CAP_S390_USER_STSI:
+               kvm->arch.user_stsi = 1;
+               r = 0;
+               break;
        default:
                r = -EINVAL;
                break;
@@ -708,6 +738,108 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
        return ret;
 }
 
+static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
+{
+       uint8_t *keys;
+       uint64_t hva;
+       unsigned long curkey;
+       int i, r = 0;
+
+       if (args->flags != 0)
+               return -EINVAL;
+
+       /* Is this guest using storage keys? */
+       if (!mm_use_skey(current->mm))
+               return KVM_S390_GET_SKEYS_NONE;
+
+       /* Enforce sane limit on memory allocation */
+       if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
+               return -EINVAL;
+
+       keys = kmalloc_array(args->count, sizeof(uint8_t),
+                            GFP_KERNEL | __GFP_NOWARN);
+       if (!keys)
+               keys = vmalloc(sizeof(uint8_t) * args->count);
+       if (!keys)
+               return -ENOMEM;
+
+       for (i = 0; i < args->count; i++) {
+               hva = gfn_to_hva(kvm, args->start_gfn + i);
+               if (kvm_is_error_hva(hva)) {
+                       r = -EFAULT;
+                       goto out;
+               }
+
+               curkey = get_guest_storage_key(current->mm, hva);
+               if (IS_ERR_VALUE(curkey)) {
+                       r = curkey;
+                       goto out;
+               }
+               keys[i] = curkey;
+       }
+
+       r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
+                        sizeof(uint8_t) * args->count);
+       if (r)
+               r = -EFAULT;
+out:
+       kvfree(keys);
+       return r;
+}
+
+static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
+{
+       uint8_t *keys;
+       uint64_t hva;
+       int i, r = 0;
+
+       if (args->flags != 0)
+               return -EINVAL;
+
+       /* Enforce sane limit on memory allocation */
+       if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
+               return -EINVAL;
+
+       keys = kmalloc_array(args->count, sizeof(uint8_t),
+                            GFP_KERNEL | __GFP_NOWARN);
+       if (!keys)
+               keys = vmalloc(sizeof(uint8_t) * args->count);
+       if (!keys)
+               return -ENOMEM;
+
+       r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
+                          sizeof(uint8_t) * args->count);
+       if (r) {
+               r = -EFAULT;
+               goto out;
+       }
+
+       /* Enable storage key handling for the guest */
+       s390_enable_skey();
+
+       for (i = 0; i < args->count; i++) {
+               hva = gfn_to_hva(kvm, args->start_gfn + i);
+               if (kvm_is_error_hva(hva)) {
+                       r = -EFAULT;
+                       goto out;
+               }
+
+               /* Lowest order bit is reserved */
+               if (keys[i] & 0x01) {
+                       r = -EINVAL;
+                       goto out;
+               }
+
+               r = set_guest_storage_key(current->mm, hva,
+                                         (unsigned long)keys[i], 0);
+               if (r)
+                       goto out;
+       }
+out:
+       kvfree(keys);
+       return r;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
                       unsigned int ioctl, unsigned long arg)
 {
@@ -767,6 +899,26 @@ long kvm_arch_vm_ioctl(struct file *filp,
                r = kvm_s390_vm_has_attr(kvm, &attr);
                break;
        }
+       case KVM_S390_GET_SKEYS: {
+               struct kvm_s390_skeys args;
+
+               r = -EFAULT;
+               if (copy_from_user(&args, argp,
+                                  sizeof(struct kvm_s390_skeys)))
+                       break;
+               r = kvm_s390_get_skeys(kvm, &args);
+               break;
+       }
+       case KVM_S390_SET_SKEYS: {
+               struct kvm_s390_skeys args;
+
+               r = -EFAULT;
+               if (copy_from_user(&args, argp,
+                                  sizeof(struct kvm_s390_skeys)))
+                       break;
+               r = kvm_s390_set_skeys(kvm, &args);
+               break;
+       }
        default:
                r = -ENOTTY;
        }
@@ -887,7 +1039,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
        kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
        if (!kvm->arch.dbf)
-               goto out_nodbf;
+               goto out_err;
 
        /*
         * The architectural maximum amount of facilities is 16 kbit. To store
@@ -899,7 +1051,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
        kvm->arch.model.fac =
                (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
        if (!kvm->arch.model.fac)
-               goto out_nofac;
+               goto out_err;
 
        /* Populate the facility mask initially. */
        memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
@@ -919,10 +1071,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
        kvm->arch.model.ibc = sclp_get_ibc() & 0x0fff;
 
        if (kvm_s390_crypto_init(kvm) < 0)
-               goto out_crypto;
+               goto out_err;
 
        spin_lock_init(&kvm->arch.float_int.lock);
-       INIT_LIST_HEAD(&kvm->arch.float_int.list);
+       for (i = 0; i < FIRQ_LIST_COUNT; i++)
+               INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
        init_waitqueue_head(&kvm->arch.ipte_wq);
        mutex_init(&kvm->arch.ipte_mutex);
 
@@ -934,7 +1087,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
        } else {
                kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
                if (!kvm->arch.gmap)
-                       goto out_nogmap;
+                       goto out_err;
                kvm->arch.gmap->private = kvm;
                kvm->arch.gmap->pfault_enabled = 0;
        }
@@ -946,15 +1099,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
        spin_lock_init(&kvm->arch.start_stop_lock);
 
        return 0;
-out_nogmap:
+out_err:
        kfree(kvm->arch.crypto.crycb);
-out_crypto:
        free_page((unsigned long)kvm->arch.model.fac);
-out_nofac:
        debug_unregister(kvm->arch.dbf);
-out_nodbf:
        free_page((unsigned long)(kvm->arch.sca));
-out_err:
        return rc;
 }
 
@@ -1034,6 +1183,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
                                    KVM_SYNC_CRS |
                                    KVM_SYNC_ARCH0 |
                                    KVM_SYNC_PFAULT;
+       if (test_kvm_facility(vcpu->kvm, 129))
+               vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
 
        if (kvm_is_ucontrol(vcpu->kvm))
                return __kvm_ucontrol_vcpu_init(vcpu);
@@ -1044,10 +1195,18 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
        save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
-       save_fp_regs(vcpu->arch.host_fpregs.fprs);
+       if (test_kvm_facility(vcpu->kvm, 129))
+               save_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
+       else
+               save_fp_regs(vcpu->arch.host_fpregs.fprs);
        save_access_regs(vcpu->arch.host_acrs);
-       restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
-       restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
+       if (test_kvm_facility(vcpu->kvm, 129)) {
+               restore_fp_ctl(&vcpu->run->s.regs.fpc);
+               restore_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
+       } else {
+               restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+               restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
+       }
        restore_access_regs(vcpu->run->s.regs.acrs);
        gmap_enable(vcpu->arch.gmap);
        atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
@@ -1057,11 +1216,19 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
        atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
        gmap_disable(vcpu->arch.gmap);
-       save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
-       save_fp_regs(vcpu->arch.guest_fpregs.fprs);
+       if (test_kvm_facility(vcpu->kvm, 129)) {
+               save_fp_ctl(&vcpu->run->s.regs.fpc);
+               save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
+       } else {
+               save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+               save_fp_regs(vcpu->arch.guest_fpregs.fprs);
+       }
        save_access_regs(vcpu->run->s.regs.acrs);
        restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
-       restore_fp_regs(vcpu->arch.host_fpregs.fprs);
+       if (test_kvm_facility(vcpu->kvm, 129))
+               restore_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
+       else
+               restore_fp_regs(vcpu->arch.host_fpregs.fprs);
        restore_access_regs(vcpu->arch.host_acrs);
 }
 
@@ -1129,6 +1296,15 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
        return 0;
 }
 
+static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
+{
+       struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
+
+       vcpu->arch.cpu_id = model->cpu_id;
+       vcpu->arch.sie_block->ibc = model->ibc;
+       vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
+}
+
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
        int rc = 0;
@@ -1137,6 +1313,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
                                                    CPUSTAT_SM |
                                                    CPUSTAT_STOPPED |
                                                    CPUSTAT_GED);
+       kvm_s390_vcpu_setup_model(vcpu);
+
        vcpu->arch.sie_block->ecb   = 6;
        if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
                vcpu->arch.sie_block->ecb |= 0x10;
@@ -1147,8 +1325,11 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
                vcpu->arch.sie_block->eca |= 1;
        if (sclp_has_sigpif())
                vcpu->arch.sie_block->eca |= 0x10000000U;
-       vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE |
-                                     ICTL_TPROT;
+       if (test_kvm_facility(vcpu->kvm, 129)) {
+               vcpu->arch.sie_block->eca |= 0x00020000;
+               vcpu->arch.sie_block->ecd |= 0x20000000;
+       }
+       vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
 
        if (kvm_s390_cmma_enabled(vcpu->kvm)) {
                rc = kvm_s390_vcpu_setup_cmma(vcpu);
@@ -1158,11 +1339,6 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
        hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
 
-       mutex_lock(&vcpu->kvm->lock);
-       vcpu->arch.cpu_id = vcpu->kvm->arch.model.cpu_id;
-       vcpu->arch.sie_block->ibc = vcpu->kvm->arch.model.ibc;
-       mutex_unlock(&vcpu->kvm->lock);
-
        kvm_s390_vcpu_crypto_setup(vcpu);
 
        return rc;
@@ -1190,6 +1366,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 
        vcpu->arch.sie_block = &sie_page->sie_block;
        vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
+       vcpu->arch.host_vregs = &sie_page->vregs;
 
        vcpu->arch.sie_block->icpua = id;
        if (!kvm_is_ucontrol(kvm)) {
@@ -1205,7 +1382,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
                vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
                set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
        }
-       vcpu->arch.sie_block->fac = (int) (long) kvm->arch.model.fac->list;
 
        spin_lock_init(&vcpu->arch.local_int.lock);
        vcpu->arch.local_int.float_int = &kvm->arch.float_int;
@@ -1725,6 +1901,31 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
        return 0;
 }
 
+static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
+{
+       psw_t *psw = &vcpu->arch.sie_block->gpsw;
+       u8 opcode;
+       int rc;
+
+       VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
+       trace_kvm_s390_sie_fault(vcpu);
+
+       /*
+        * We want to inject an addressing exception, which is defined as a
+        * suppressing or terminating exception. However, since we came here
+        * by a DAT access exception, the PSW still points to the faulting
+        * instruction since DAT exceptions are nullifying. So we've got
+        * to look up the current opcode to get the length of the instruction
+        * to be able to forward the PSW.
+        */
+       rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
+       if (rc)
+               return kvm_s390_inject_prog_cond(vcpu, rc);
+       psw->addr = __rewind_psw(*psw, -insn_length(opcode));
+
+       return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+}
+
 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
 {
        int rc = -1;
@@ -1756,11 +1957,8 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
                }
        }
 
-       if (rc == -1) {
-               VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
-               trace_kvm_s390_sie_fault(vcpu);
-               rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-       }
+       if (rc == -1)
+               rc = vcpu_post_run_fault_in_sie(vcpu);
 
        memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
 
@@ -1976,6 +2174,35 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
        return kvm_s390_store_status_unloaded(vcpu, addr);
 }
 
+/*
+ * store additional status at address
+ */
+int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
+                                       unsigned long gpa)
+{
+       /* Only bits 0-53 are used for address formation */
+       if (!(gpa & ~0x3ff))
+               return 0;
+
+       return write_guest_abs(vcpu, gpa & ~0x3ff,
+                              (void *)&vcpu->run->s.regs.vrs, 512);
+}
+
+int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+       if (!test_kvm_facility(vcpu->kvm, 129))
+               return 0;
+
+       /*
+        * The guest VXRS are in the host VXRs due to the lazy
+        * copying in vcpu load/put. Let's update our copies before we save
+        * it into the save area.
+        */
+       save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
+
+       return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
+}
+
 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
 {
        kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
@@ -2100,6 +2327,65 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
        return r;
 }
 
+static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
+                                 struct kvm_s390_mem_op *mop)
+{
+       void __user *uaddr = (void __user *)mop->buf;
+       void *tmpbuf = NULL;
+       int r, srcu_idx;
+       const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
+                                   | KVM_S390_MEMOP_F_CHECK_ONLY;
+
+       if (mop->flags & ~supported_flags)
+               return -EINVAL;
+
+       if (mop->size > MEM_OP_MAX_SIZE)
+               return -E2BIG;
+
+       if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
+               tmpbuf = vmalloc(mop->size);
+               if (!tmpbuf)
+                       return -ENOMEM;
+       }
+
+       srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+       switch (mop->op) {
+       case KVM_S390_MEMOP_LOGICAL_READ:
+               if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+                       r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
+                       break;
+               }
+               r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
+               if (r == 0) {
+                       if (copy_to_user(uaddr, tmpbuf, mop->size))
+                               r = -EFAULT;
+               }
+               break;
+       case KVM_S390_MEMOP_LOGICAL_WRITE:
+               if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+                       r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
+                       break;
+               }
+               if (copy_from_user(tmpbuf, uaddr, mop->size)) {
+                       r = -EFAULT;
+                       break;
+               }
+               r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
+               break;
+       default:
+               r = -EINVAL;
+       }
+
+       srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+
+       if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
+               kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+
+       vfree(tmpbuf);
+       return r;
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp,
                         unsigned int ioctl, unsigned long arg)
 {
@@ -2109,6 +2395,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
        long r;
 
        switch (ioctl) {
+       case KVM_S390_IRQ: {
+               struct kvm_s390_irq s390irq;
+
+               r = -EFAULT;
+               if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
+                       break;
+               r = kvm_s390_inject_vcpu(vcpu, &s390irq);
+               break;
+       }
        case KVM_S390_INTERRUPT: {
                struct kvm_s390_interrupt s390int;
                struct kvm_s390_irq s390irq;
@@ -2199,6 +2494,47 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
                break;
        }
+       case KVM_S390_MEM_OP: {
+               struct kvm_s390_mem_op mem_op;
+
+               if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
+                       r = kvm_s390_guest_mem_op(vcpu, &mem_op);
+               else
+                       r = -EFAULT;
+               break;
+       }
+       case KVM_S390_SET_IRQ_STATE: {
+               struct kvm_s390_irq_state irq_state;
+
+               r = -EFAULT;
+               if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
+                       break;
+               if (irq_state.len > VCPU_IRQS_MAX_BUF ||
+                   irq_state.len == 0 ||
+                   irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
+                       r = -EINVAL;
+                       break;
+               }
+               r = kvm_s390_set_irq_state(vcpu,
+                                          (void __user *) irq_state.buf,
+                                          irq_state.len);
+               break;
+       }
+       case KVM_S390_GET_IRQ_STATE: {
+               struct kvm_s390_irq_state irq_state;
+
+               r = -EFAULT;
+               if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
+                       break;
+               if (irq_state.len == 0) {
+                       r = -EINVAL;
+                       break;
+               }
+               r = kvm_s390_get_irq_state(vcpu,
+                                          (__u8 __user *)  irq_state.buf,
+                                          irq_state.len);
+               break;
+       }
        default:
                r = -ENOTTY;
        }
index c34109aa552d9b1a6e5ea66f172b5c3e30ad001b..ca108b90ae5613a15e0d82a68ef0288a93d62792 100644 (file)
@@ -70,16 +70,22 @@ static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
        kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
 }
 
-static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu)
+typedef u8 __bitwise ar_t;
+
+static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, ar_t *ar)
 {
        u32 base2 = vcpu->arch.sie_block->ipb >> 28;
        u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
 
+       if (ar)
+               *ar = base2;
+
        return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
 }
 
 static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
-                                             u64 *address1, u64 *address2)
+                                             u64 *address1, u64 *address2,
+                                             ar_t *ar_b1, ar_t *ar_b2)
 {
        u32 base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28;
        u32 disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16;
@@ -88,6 +94,11 @@ static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
 
        *address1 = (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1;
        *address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
+
+       if (ar_b1)
+               *ar_b1 = base1;
+       if (ar_b2)
+               *ar_b2 = base2;
 }
 
 static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2)
@@ -98,7 +109,7 @@ static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2
                *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16;
 }
 
-static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu)
+static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu, ar_t *ar)
 {
        u32 base2 = vcpu->arch.sie_block->ipb >> 28;
        u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
@@ -107,14 +118,20 @@ static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu)
        if (disp2 & 0x80000)
                disp2+=0xfff00000;
 
+       if (ar)
+               *ar = base2;
+
        return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + (long)(int)disp2;
 }
 
-static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu)
+static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu, ar_t *ar)
 {
        u32 base2 = vcpu->arch.sie_block->ipb >> 28;
        u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
 
+       if (ar)
+               *ar = base2;
+
        return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
 }
 
@@ -125,13 +142,24 @@ static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc)
        vcpu->arch.sie_block->gpsw.mask |= cc << 44;
 }
 
-/* test availability of facility in a kvm intance */
+/* test availability of facility in a kvm instance */
 static inline int test_kvm_facility(struct kvm *kvm, unsigned long nr)
 {
        return __test_facility(nr, kvm->arch.model.fac->mask) &&
                __test_facility(nr, kvm->arch.model.fac->list);
 }
 
+static inline int set_kvm_facility(u64 *fac_list, unsigned long nr)
+{
+       unsigned char *ptr;
+
+       if (nr >= MAX_FACILITY_BIT)
+               return -EINVAL;
+       ptr = (unsigned char *) fac_list + (nr >> 3);
+       *ptr |= (0x80UL >> (nr & 7));
+       return 0;
+}
+
 /* are cpu states controlled by user space */
 static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
 {
@@ -150,9 +178,9 @@ int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
                                      struct kvm_s390_irq *irq);
 int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
-                                                   u64 cr6, u64 schid);
-void kvm_s390_reinject_io_int(struct kvm *kvm,
-                             struct kvm_s390_interrupt_info *inti);
+                                                   u64 isc_mask, u32 schid);
+int kvm_s390_reinject_io_int(struct kvm *kvm,
+                            struct kvm_s390_interrupt_info *inti);
 int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
 
 /* implemented in intercept.c */
@@ -177,7 +205,10 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
 /* implemented in kvm-s390.c */
 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
+int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
+                                       unsigned long addr);
 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
+int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
 void s390_vcpu_block(struct kvm_vcpu *vcpu);
@@ -241,6 +272,10 @@ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu);
 extern struct kvm_device_ops kvm_flic_ops;
 int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu);
 void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu);
+int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu,
+                          void __user *buf, int len);
+int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu,
+                          __u8 __user *buf, int len);
 
 /* implemented in guestdbg.c */
 void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
index 351116939ea27f2fcf6eb4de60447d86335d0b6e..d22d8ee1ff9d9c6404d653f5c4f8a04b8ddc70da 100644 (file)
@@ -36,15 +36,16 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
        struct kvm_vcpu *cpup;
        s64 hostclk, val;
        int i, rc;
+       ar_t ar;
        u64 op2;
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       op2 = kvm_s390_get_base_disp_s(vcpu);
+       op2 = kvm_s390_get_base_disp_s(vcpu, &ar);
        if (op2 & 7)    /* Operand must be on a doubleword boundary */
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-       rc = read_guest(vcpu, op2, &val, sizeof(val));
+       rc = read_guest(vcpu, op2, ar, &val, sizeof(val));
        if (rc)
                return kvm_s390_inject_prog_cond(vcpu, rc);
 
@@ -68,20 +69,21 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
        u64 operand2;
        u32 address;
        int rc;
+       ar_t ar;
 
        vcpu->stat.instruction_spx++;
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       operand2 = kvm_s390_get_base_disp_s(vcpu);
+       operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
 
        /* must be word boundary */
        if (operand2 & 3)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
        /* get the value */
-       rc = read_guest(vcpu, operand2, &address, sizeof(address));
+       rc = read_guest(vcpu, operand2, ar, &address, sizeof(address));
        if (rc)
                return kvm_s390_inject_prog_cond(vcpu, rc);
 
@@ -107,13 +109,14 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
        u64 operand2;
        u32 address;
        int rc;
+       ar_t ar;
 
        vcpu->stat.instruction_stpx++;
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       operand2 = kvm_s390_get_base_disp_s(vcpu);
+       operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
 
        /* must be word boundary */
        if (operand2 & 3)
@@ -122,7 +125,7 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
        address = kvm_s390_get_prefix(vcpu);
 
        /* get the value */
-       rc = write_guest(vcpu, operand2, &address, sizeof(address));
+       rc = write_guest(vcpu, operand2, ar, &address, sizeof(address));
        if (rc)
                return kvm_s390_inject_prog_cond(vcpu, rc);
 
@@ -136,18 +139,19 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
        u16 vcpu_id = vcpu->vcpu_id;
        u64 ga;
        int rc;
+       ar_t ar;
 
        vcpu->stat.instruction_stap++;
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       ga = kvm_s390_get_base_disp_s(vcpu);
+       ga = kvm_s390_get_base_disp_s(vcpu, &ar);
 
        if (ga & 1)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-       rc = write_guest(vcpu, ga, &vcpu_id, sizeof(vcpu_id));
+       rc = write_guest(vcpu, ga, ar, &vcpu_id, sizeof(vcpu_id));
        if (rc)
                return kvm_s390_inject_prog_cond(vcpu, rc);
 
@@ -207,7 +211,7 @@ static int handle_test_block(struct kvm_vcpu *vcpu)
        kvm_s390_get_regs_rre(vcpu, NULL, &reg2);
        addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
        addr = kvm_s390_logical_to_effective(vcpu, addr);
-       if (kvm_s390_check_low_addr_protection(vcpu, addr))
+       if (kvm_s390_check_low_addr_prot_real(vcpu, addr))
                return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
        addr = kvm_s390_real_to_abs(vcpu, addr);
 
@@ -229,18 +233,20 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
        struct kvm_s390_interrupt_info *inti;
        unsigned long len;
        u32 tpi_data[3];
-       int cc, rc;
+       int rc;
        u64 addr;
+       ar_t ar;
 
-       rc = 0;
-       addr = kvm_s390_get_base_disp_s(vcpu);
+       addr = kvm_s390_get_base_disp_s(vcpu, &ar);
        if (addr & 3)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-       cc = 0;
+
        inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->arch.sie_block->gcr[6], 0);
-       if (!inti)
-               goto no_interrupt;
-       cc = 1;
+       if (!inti) {
+               kvm_s390_set_psw_cc(vcpu, 0);
+               return 0;
+       }
+
        tpi_data[0] = inti->io.subchannel_id << 16 | inti->io.subchannel_nr;
        tpi_data[1] = inti->io.io_int_parm;
        tpi_data[2] = inti->io.io_int_word;
@@ -250,40 +256,51 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
                 * provided area.
                 */
                len = sizeof(tpi_data) - 4;
-               rc = write_guest(vcpu, addr, &tpi_data, len);
-               if (rc)
-                       return kvm_s390_inject_prog_cond(vcpu, rc);
+               rc = write_guest(vcpu, addr, ar, &tpi_data, len);
+               if (rc) {
+                       rc = kvm_s390_inject_prog_cond(vcpu, rc);
+                       goto reinject_interrupt;
+               }
        } else {
                /*
                 * Store the three-word I/O interruption code into
                 * the appropriate lowcore area.
                 */
                len = sizeof(tpi_data);
-               if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len))
+               if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len)) {
+                       /* failed writes to the low core are not recoverable */
                        rc = -EFAULT;
+                       goto reinject_interrupt;
+               }
        }
+
+       /* irq was successfully handed to the guest */
+       kfree(inti);
+       kvm_s390_set_psw_cc(vcpu, 1);
+       return 0;
+reinject_interrupt:
        /*
         * If we encounter a problem storing the interruption code, the
         * instruction is suppressed from the guest's view: reinject the
         * interrupt.
         */
-       if (!rc)
+       if (kvm_s390_reinject_io_int(vcpu->kvm, inti)) {
                kfree(inti);
-       else
-               kvm_s390_reinject_io_int(vcpu->kvm, inti);
-no_interrupt:
-       /* Set condition code and we're done. */
-       if (!rc)
-               kvm_s390_set_psw_cc(vcpu, cc);
+               rc = -EFAULT;
+       }
+       /* don't set the cc, a pgm irq was injected or we drop to user space */
        return rc ? -EFAULT : 0;
 }
 
 static int handle_tsch(struct kvm_vcpu *vcpu)
 {
-       struct kvm_s390_interrupt_info *inti;
+       struct kvm_s390_interrupt_info *inti = NULL;
+       const u64 isc_mask = 0xffUL << 24; /* all iscs set */
 
-       inti = kvm_s390_get_io_int(vcpu->kvm, 0,
-                                  vcpu->run->s.regs.gprs[1]);
+       /* a valid schid has at least one bit set */
+       if (vcpu->run->s.regs.gprs[1])
+               inti = kvm_s390_get_io_int(vcpu->kvm, isc_mask,
+                                          vcpu->run->s.regs.gprs[1]);
 
        /*
         * Prepare exit to userspace.
@@ -386,15 +403,16 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
        psw_compat_t new_psw;
        u64 addr;
        int rc;
+       ar_t ar;
 
        if (gpsw->mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       addr = kvm_s390_get_base_disp_s(vcpu);
+       addr = kvm_s390_get_base_disp_s(vcpu, &ar);
        if (addr & 7)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-       rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw));
+       rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw));
        if (rc)
                return kvm_s390_inject_prog_cond(vcpu, rc);
        if (!(new_psw.mask & PSW32_MASK_BASE))
@@ -412,14 +430,15 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
        psw_t new_psw;
        u64 addr;
        int rc;
+       ar_t ar;
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       addr = kvm_s390_get_base_disp_s(vcpu);
+       addr = kvm_s390_get_base_disp_s(vcpu, &ar);
        if (addr & 7)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-       rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw));
+       rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw));
        if (rc)
                return kvm_s390_inject_prog_cond(vcpu, rc);
        vcpu->arch.sie_block->gpsw = new_psw;
@@ -433,18 +452,19 @@ static int handle_stidp(struct kvm_vcpu *vcpu)
        u64 stidp_data = vcpu->arch.stidp_data;
        u64 operand2;
        int rc;
+       ar_t ar;
 
        vcpu->stat.instruction_stidp++;
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       operand2 = kvm_s390_get_base_disp_s(vcpu);
+       operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
 
        if (operand2 & 7)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-       rc = write_guest(vcpu, operand2, &stidp_data, sizeof(stidp_data));
+       rc = write_guest(vcpu, operand2, ar, &stidp_data, sizeof(stidp_data));
        if (rc)
                return kvm_s390_inject_prog_cond(vcpu, rc);
 
@@ -467,6 +487,7 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
        for (n = mem->count - 1; n > 0 ; n--)
                memcpy(&mem->vm[n], &mem->vm[n - 1], sizeof(mem->vm[0]));
 
+       memset(&mem->vm[0], 0, sizeof(mem->vm[0]));
        mem->vm[0].cpus_total = cpus;
        mem->vm[0].cpus_configured = cpus;
        mem->vm[0].cpus_standby = 0;
@@ -478,6 +499,17 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
        ASCEBC(mem->vm[0].cpi, 16);
 }
 
+static void insert_stsi_usr_data(struct kvm_vcpu *vcpu, u64 addr, ar_t ar,
+                                u8 fc, u8 sel1, u16 sel2)
+{
+       vcpu->run->exit_reason = KVM_EXIT_S390_STSI;
+       vcpu->run->s390_stsi.addr = addr;
+       vcpu->run->s390_stsi.ar = ar;
+       vcpu->run->s390_stsi.fc = fc;
+       vcpu->run->s390_stsi.sel1 = sel1;
+       vcpu->run->s390_stsi.sel2 = sel2;
+}
+
 static int handle_stsi(struct kvm_vcpu *vcpu)
 {
        int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28;
@@ -486,6 +518,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
        unsigned long mem = 0;
        u64 operand2;
        int rc = 0;
+       ar_t ar;
 
        vcpu->stat.instruction_stsi++;
        VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2);
@@ -508,7 +541,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
                return 0;
        }
 
-       operand2 = kvm_s390_get_base_disp_s(vcpu);
+       operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
 
        if (operand2 & 0xfff)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -532,16 +565,20 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
                break;
        }
 
-       rc = write_guest(vcpu, operand2, (void *)mem, PAGE_SIZE);
+       rc = write_guest(vcpu, operand2, ar, (void *)mem, PAGE_SIZE);
        if (rc) {
                rc = kvm_s390_inject_prog_cond(vcpu, rc);
                goto out;
        }
+       if (vcpu->kvm->arch.user_stsi) {
+               insert_stsi_usr_data(vcpu, operand2, ar, fc, sel1, sel2);
+               rc = -EREMOTE;
+       }
        trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
        free_page(mem);
        kvm_s390_set_psw_cc(vcpu, 0);
        vcpu->run->s.regs.gprs[0] = 0;
-       return 0;
+       return rc;
 out_no_data:
        kvm_s390_set_psw_cc(vcpu, 3);
 out:
@@ -670,7 +707,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
        }
 
        if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
-               if (kvm_s390_check_low_addr_protection(vcpu, start))
+               if (kvm_s390_check_low_addr_prot_real(vcpu, start))
                        return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
        }
 
@@ -776,13 +813,14 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
        int reg, rc, nr_regs;
        u32 ctl_array[16];
        u64 ga;
+       ar_t ar;
 
        vcpu->stat.instruction_lctl++;
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       ga = kvm_s390_get_base_disp_rs(vcpu);
+       ga = kvm_s390_get_base_disp_rs(vcpu, &ar);
 
        if (ga & 3)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -791,7 +829,7 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
        trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga);
 
        nr_regs = ((reg3 - reg1) & 0xf) + 1;
-       rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32));
+       rc = read_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u32));
        if (rc)
                return kvm_s390_inject_prog_cond(vcpu, rc);
        reg = reg1;
@@ -814,13 +852,14 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
        int reg, rc, nr_regs;
        u32 ctl_array[16];
        u64 ga;
+       ar_t ar;
 
        vcpu->stat.instruction_stctl++;
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       ga = kvm_s390_get_base_disp_rs(vcpu);
+       ga = kvm_s390_get_base_disp_rs(vcpu, &ar);
 
        if (ga & 3)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -836,7 +875,7 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
                        break;
                reg = (reg + 1) % 16;
        } while (1);
-       rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32));
+       rc = write_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u32));
        return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
 }
 
@@ -847,13 +886,14 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
        int reg, rc, nr_regs;
        u64 ctl_array[16];
        u64 ga;
+       ar_t ar;
 
        vcpu->stat.instruction_lctlg++;
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       ga = kvm_s390_get_base_disp_rsy(vcpu);
+       ga = kvm_s390_get_base_disp_rsy(vcpu, &ar);
 
        if (ga & 7)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -862,7 +902,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
        trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga);
 
        nr_regs = ((reg3 - reg1) & 0xf) + 1;
-       rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64));
+       rc = read_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u64));
        if (rc)
                return kvm_s390_inject_prog_cond(vcpu, rc);
        reg = reg1;
@@ -884,13 +924,14 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
        int reg, rc, nr_regs;
        u64 ctl_array[16];
        u64 ga;
+       ar_t ar;
 
        vcpu->stat.instruction_stctg++;
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       ga = kvm_s390_get_base_disp_rsy(vcpu);
+       ga = kvm_s390_get_base_disp_rsy(vcpu, &ar);
 
        if (ga & 7)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -906,7 +947,7 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
                        break;
                reg = (reg + 1) % 16;
        } while (1);
-       rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64));
+       rc = write_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u64));
        return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
 }
 
@@ -931,13 +972,14 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
        unsigned long hva, gpa;
        int ret = 0, cc = 0;
        bool writable;
+       ar_t ar;
 
        vcpu->stat.instruction_tprot++;
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       kvm_s390_get_base_disp_sse(vcpu, &address1, &address2);
+       kvm_s390_get_base_disp_sse(vcpu, &address1, &address2, &ar, NULL);
 
        /* we only handle the Linux memory detection case:
         * access key == 0
@@ -946,11 +988,11 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
                return -EOPNOTSUPP;
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
                ipte_lock(vcpu);
-       ret = guest_translate_address(vcpu, address1, &gpa, 1);
+       ret = guest_translate_address(vcpu, address1, ar, &gpa, 1);
        if (ret == PGM_PROTECTION) {
                /* Write protected? Try again with read-only... */
                cc = 1;
-               ret = guest_translate_address(vcpu, address1, &gpa, 0);
+               ret = guest_translate_address(vcpu, address1, ar, &gpa, 0);
        }
        if (ret) {
                if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) {
index 23b1e86b212245dcf7a95fb4ab82ab99f8749735..72e58bd2bee78162e963dc2bd531e40e03e29d97 100644 (file)
@@ -393,6 +393,9 @@ static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code)
        case SIGP_STORE_STATUS_AT_ADDRESS:
                vcpu->stat.instruction_sigp_store_status++;
                break;
+       case SIGP_STORE_ADDITIONAL_STATUS:
+               vcpu->stat.instruction_sigp_store_adtl_status++;
+               break;
        case SIGP_SET_PREFIX:
                vcpu->stat.instruction_sigp_prefix++;
                break;
@@ -431,7 +434,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       order_code = kvm_s390_get_base_disp_rs(vcpu);
+       order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
        if (handle_sigp_order_in_user_space(vcpu, order_code))
                return -EOPNOTSUPP;
 
@@ -473,7 +476,7 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu)
        int r3 = vcpu->arch.sie_block->ipa & 0x000f;
        u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
        struct kvm_vcpu *dest_vcpu;
-       u8 order_code = kvm_s390_get_base_disp_rs(vcpu);
+       u8 order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
 
        trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
 
index ec2e2e2aba7d8f15419aa12bc6f790993f8c54d4..cc9b04a2b11b6394cfcad8b485df1fac6f598583 100644 (file)
@@ -1,7 +1,7 @@
 #ifndef _ASM_SPARC_JUMP_LABEL_H
 #define _ASM_SPARC_JUMP_LABEL_H
 
-#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
 
 #include <linux/types.h>
 
@@ -22,8 +22,6 @@ l_yes:
        return true;
 }
 
-#endif /* __KERNEL__ */
-
 typedef u32 jump_label_t;
 
 struct jump_entry {
@@ -32,4 +30,5 @@ struct jump_entry {
        jump_label_t key;
 };
 
+#endif  /* __ASSEMBLY__ */
 #endif
index 9ce5afe167ff509288b21605a2f9a35f96ff36dc..b36365f49478c573d715816d53582e2dfb153227 100644 (file)
@@ -639,10 +639,7 @@ static void pci_claim_bus_resources(struct pci_bus *bus)
                                       (unsigned long long)r->end,
                                       (unsigned int)r->flags);
 
-                       if (pci_claim_resource(dev, i) == 0)
-                               continue;
-
-                       pci_claim_bridge_resource(dev, i);
+                       pci_claim_resource(dev, i);
                }
        }
 
index 2f80d23a0a44964ebff804a2ab4428b9f30cb162..18147a5523d947736a3e7c0815a7eeb3526ec271 100644 (file)
@@ -181,17 +181,13 @@ static struct clocksource timer_cs = {
        .rating = 100,
        .read   = timer_cs_read,
        .mask   = CLOCKSOURCE_MASK(64),
-       .shift  = 2,
        .flags  = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
 static __init int setup_timer_cs(void)
 {
        timer_cs_enabled = 1;
-       timer_cs.mult = clocksource_hz2mult(sparc_config.clock_rate,
-                                           timer_cs.shift);
-
-       return clocksource_register(&timer_cs);
+       return clocksource_register_hz(&timer_cs, sparc_config.clock_rate);
 }
 
 #ifdef CONFIG_SMP
index d412b0856c0a2622b13b9bc84dc39f65c1b5b08c..00178ecf9aeab4731ffd24d86ff7bc28252e23b6 100644 (file)
@@ -257,34 +257,34 @@ void update_vsyscall_tz(void)
 
 void update_vsyscall(struct timekeeper *tk)
 {
-       if (tk->tkr.clock != &cycle_counter_cs)
+       if (tk->tkr_mono.clock != &cycle_counter_cs)
                return;
 
        write_seqcount_begin(&vdso_data->tb_seq);
 
-       vdso_data->cycle_last           = tk->tkr.cycle_last;
-       vdso_data->mask                 = tk->tkr.mask;
-       vdso_data->mult                 = tk->tkr.mult;
-       vdso_data->shift                = tk->tkr.shift;
+       vdso_data->cycle_last           = tk->tkr_mono.cycle_last;
+       vdso_data->mask                 = tk->tkr_mono.mask;
+       vdso_data->mult                 = tk->tkr_mono.mult;
+       vdso_data->shift                = tk->tkr_mono.shift;
 
        vdso_data->wall_time_sec        = tk->xtime_sec;
-       vdso_data->wall_time_snsec      = tk->tkr.xtime_nsec;
+       vdso_data->wall_time_snsec      = tk->tkr_mono.xtime_nsec;
 
        vdso_data->monotonic_time_sec   = tk->xtime_sec
                                        + tk->wall_to_monotonic.tv_sec;
-       vdso_data->monotonic_time_snsec = tk->tkr.xtime_nsec
+       vdso_data->monotonic_time_snsec = tk->tkr_mono.xtime_nsec
                                        + ((u64)tk->wall_to_monotonic.tv_nsec
-                                               << tk->tkr.shift);
+                                               << tk->tkr_mono.shift);
        while (vdso_data->monotonic_time_snsec >=
-                                       (((u64)NSEC_PER_SEC) << tk->tkr.shift)) {
+                                       (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
                vdso_data->monotonic_time_snsec -=
-                                       ((u64)NSEC_PER_SEC) << tk->tkr.shift;
+                                       ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
                vdso_data->monotonic_time_sec++;
        }
 
        vdso_data->wall_time_coarse_sec = tk->xtime_sec;
-       vdso_data->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >>
-                                                tk->tkr.shift);
+       vdso_data->wall_time_coarse_nsec = (long)(tk->tkr_mono.xtime_nsec >>
+                                                tk->tkr_mono.shift);
 
        vdso_data->monotonic_time_coarse_sec =
                vdso_data->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
index b7d31ca5518744983c77bc8339f30756621dfea0..faff6934c05a21f874ce5839f4b9b8542f9c02e5 100644 (file)
@@ -235,12 +235,10 @@ config ARCH_WANT_GENERAL_HUGETLB
        def_bool y
 
 config ZONE_DMA32
-       bool
-       default X86_64
+       def_bool y if X86_64
 
 config AUDIT_ARCH
-       bool
-       default X86_64
+       def_bool y if X86_64
 
 config ARCH_SUPPORTS_OPTIMIZED_INLINING
        def_bool y
@@ -891,7 +889,8 @@ config UP_LATE_INIT
        depends on !SMP && X86_LOCAL_APIC
 
 config X86_UP_APIC
-       bool "Local APIC support on uniprocessors"
+       bool "Local APIC support on uniprocessors" if !PCI_MSI
+       default PCI_MSI
        depends on X86_32 && !SMP && !X86_32_NON_STANDARD
        ---help---
          A local APIC (Advanced Programmable Interrupt Controller) is an
@@ -903,10 +902,6 @@ config X86_UP_APIC
          performance counters), and the NMI watchdog which detects hard
          lockups.
 
-config X86_UP_APIC_MSI
-       def_bool y
-       select X86_UP_APIC if X86_32 && !SMP && !X86_32_NON_STANDARD && PCI_MSI
-
 config X86_UP_IOAPIC
        bool "IO-APIC support on uniprocessors"
        depends on X86_UP_APIC
@@ -925,8 +920,8 @@ config X86_LOCAL_APIC
        select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
 
 config X86_IO_APIC
-       def_bool X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC
-       depends on X86_LOCAL_APIC
+       def_bool y
+       depends on X86_LOCAL_APIC || X86_UP_IOAPIC
        select IRQ_DOMAIN
 
 config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
@@ -1145,10 +1140,10 @@ config MICROCODE_OLD_INTERFACE
        depends on MICROCODE
 
 config MICROCODE_INTEL_EARLY
-       def_bool n
+       bool
 
 config MICROCODE_AMD_EARLY
-       def_bool n
+       bool
 
 config MICROCODE_EARLY
        bool "Early load microcode"
@@ -1300,14 +1295,14 @@ config ARCH_DMA_ADDR_T_64BIT
        def_bool y
        depends on X86_64 || HIGHMEM64G
 
-config DIRECT_GBPAGES
-       bool "Enable 1GB pages for kernel pagetables" if EXPERT
-       default y
-       depends on X86_64
+config X86_DIRECT_GBPAGES
+       def_bool y
+       depends on X86_64 && !DEBUG_PAGEALLOC && !KMEMCHECK
        ---help---
-         Allow the kernel linear mapping to use 1GB pages on CPUs that
-         support it. This can improve the kernel's performance a tiny bit by
-         reducing TLB pressure. If in doubt, say "Y".
+         Certain kernel features effectively disable kernel
+         linear 1 GB mappings (even if the CPU otherwise
+         supports them), so don't confuse the user by printing
+         that we have them enabled.
 
 # Common NUMA Features
 config NUMA
@@ -1747,14 +1742,11 @@ config KEXEC_VERIFY_SIG
        depends on KEXEC_FILE
        ---help---
          This option makes kernel signature verification mandatory for
-         kexec_file_load() syscall. If kernel is signature can not be
-         verified, kexec_file_load() will fail.
-
-         This option enforces signature verification at generic level.
-         One needs to enable signature verification for type of kernel
-         image being loaded to make sure it works. For example, enable
-         bzImage signature verification option to be able to load and
-         verify signatures of bzImage. Otherwise kernel loading will fail.
+         the kexec_file_load() syscall.
+
+         In addition to that option, you need to enable signature
+         verification for the corresponding kernel image type being
+         loaded in order for this to work.
 
 config KEXEC_BZIMAGE_VERIFY_SIG
        bool "Enable bzImage signature verification support"
index bb1376381985edb9f96e49c0a1b0269e56bd0f9e..d7b1f655b3ef4c2e14a4976fa565fe67d0159607 100644 (file)
@@ -295,7 +295,8 @@ static unsigned long find_random_addr(unsigned long minimum,
        return slots_fetch_random();
 }
 
-unsigned char *choose_kernel_location(unsigned char *input,
+unsigned char *choose_kernel_location(struct boot_params *boot_params,
+                                     unsigned char *input,
                                      unsigned long input_size,
                                      unsigned char *output,
                                      unsigned long output_size)
@@ -315,6 +316,8 @@ unsigned char *choose_kernel_location(unsigned char *input,
        }
 #endif
 
+       boot_params->hdr.loadflags |= KASLR_FLAG;
+
        /* Record the various known unsafe memory ranges. */
        mem_avoid_init((unsigned long)input, input_size,
                       (unsigned long)output, output_size);
index 1d7fbbcc196d6f8b661545130972453f109e27d7..8ef964ddc18ec656b1e3b0f038adf134484dbdd2 100644 (file)
@@ -29,6 +29,7 @@
 #include <asm/page_types.h>
 #include <asm/boot.h>
 #include <asm/asm-offsets.h>
+#include <asm/bootparam.h>
 
        __HEAD
 ENTRY(startup_32)
@@ -102,7 +103,7 @@ preferred_addr:
         * Test KEEP_SEGMENTS flag to see if the bootloader is asking
         * us to not reload segments
         */
-       testb   $(1<<6), BP_loadflags(%esi)
+       testb   $KEEP_SEGMENTS, BP_loadflags(%esi)
        jnz     1f
 
        cli
index 6b1766c6c08205f3bda8a527dff88097b48e77e3..b0c0d16ef58d1099342c97aff83767dd35c73691 100644 (file)
@@ -31,6 +31,7 @@
 #include <asm/msr.h>
 #include <asm/processor-flags.h>
 #include <asm/asm-offsets.h>
+#include <asm/bootparam.h>
 
        __HEAD
        .code32
@@ -46,7 +47,7 @@ ENTRY(startup_32)
         * Test KEEP_SEGMENTS flag to see if the bootloader is asking
         * us to not reload segments
         */
-       testb $(1<<6), BP_loadflags(%esi)
+       testb $KEEP_SEGMENTS, BP_loadflags(%esi)
        jnz 1f
 
        cli
@@ -164,7 +165,7 @@ ENTRY(startup_32)
        /* After gdt is loaded */
        xorl    %eax, %eax
        lldt    %ax
-       movl    $0x20, %eax
+       movl    $__BOOT_TSS, %eax
        ltr     %ax
 
        /*
index a950864a64dab3d558197c77bef3c56a07961494..a107b935e22fbc9a749ddf975cf72ea187cb1fa2 100644 (file)
@@ -377,6 +377,9 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
 
        real_mode = rmode;
 
+       /* Clear it for solely in-kernel use */
+       real_mode->hdr.loadflags &= ~KASLR_FLAG;
+
        sanitize_boot_params(real_mode);
 
        if (real_mode->screen_info.orig_video_mode == 7) {
@@ -401,7 +404,7 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
         * the entire decompressed kernel plus relocation table, or the
         * entire decompressed kernel plus .bss and .brk sections.
         */
-       output = choose_kernel_location(input_data, input_len, output,
+       output = choose_kernel_location(real_mode, input_data, input_len, output,
                                        output_len > run_size ? output_len
                                                              : run_size);
 
index 04477d68403f1fe6197d82276033ce27338c1bac..89dd0d78013aaff6c889340e0e3caceb4c8f8c88 100644 (file)
@@ -57,7 +57,8 @@ int cmdline_find_option_bool(const char *option);
 
 #if CONFIG_RANDOMIZE_BASE
 /* aslr.c */
-unsigned char *choose_kernel_location(unsigned char *input,
+unsigned char *choose_kernel_location(struct boot_params *boot_params,
+                                     unsigned char *input,
                                      unsigned long input_size,
                                      unsigned char *output,
                                      unsigned long output_size);
@@ -65,7 +66,8 @@ unsigned char *choose_kernel_location(unsigned char *input,
 bool has_cpuflag(int flag);
 #else
 static inline
-unsigned char *choose_kernel_location(unsigned char *input,
+unsigned char *choose_kernel_location(struct boot_params *boot_params,
+                                     unsigned char *input,
                                      unsigned long input_size,
                                      unsigned char *output,
                                      unsigned long output_size)
index 493f3fd9f1391815c29e06c2163f3225b4e52063..318b8465d30204cad7006bf0889672e0da093aed 100644 (file)
@@ -30,7 +30,7 @@ int strcmp(const char *str1, const char *str2)
        int delta = 0;
 
        while (*s1 || *s2) {
-               delta = *s2 - *s1;
+               delta = *s1 - *s2;
                if (delta)
                        return delta;
                s1++;
index 748e8d06290a668ff5c417197cf5c2bc5c7a16e3..aa8a96b052e30263d60afc6b81a60e387fc55773 100644 (file)
 /*
  * Common variables
  */
-int adapter;                   /* 0=CGA/MDA/HGC, 1=EGA, 2=VGA+ */
-u16 video_segment;
+int adapter;           /* 0=CGA/MDA/HGC, 1=EGA, 2=VGA+ */
 int force_x, force_y;  /* Don't query the BIOS for cols/rows */
-
 int do_restore;                /* Screen contents changed during mode flip */
 int graphic_mode;      /* Graphic mode with linear frame buffer */
 
index 43eda284d27fe96c2a4d407273f4b8d61bae87e2..05111bb8d018e38c71ae278ee0a78ccff76e34e2 100644 (file)
@@ -17,6 +17,8 @@
 #include "video.h"
 #include "vesa.h"
 
+static u16 video_segment;
+
 static void store_cursor_position(void)
 {
        struct biosregs ireg, oreg;
index 0bb25491262d00f941001d3f7f7b2fc800212250..b54e0328c449e981013dc1edc46d331827e290c4 100644 (file)
@@ -91,7 +91,6 @@ int mode_defined(u16 mode);   /* video.c */
 #define ADAPTER_VGA    2
 
 extern int adapter;
-extern u16 video_segment;
 extern int force_x, force_y;   /* Don't query the BIOS for cols/rows */
 extern int do_restore;         /* Restore screen contents */
 extern int graphic_mode;       /* Graphics mode with linear frame buffer */
index 419819d6dab3b7573196dc685c9bc9ea236a147f..aaa1118bf01e864f50dad66a0e1099bb50de08d7 100644 (file)
@@ -248,7 +248,7 @@ CONFIG_USB=y
 CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
 CONFIG_USB_MON=y
 CONFIG_USB_EHCI_HCD=y
-# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+CONFIG_USB_EHCI_TT_NEWSCHED=y
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_UHCI_HCD=y
 CONFIG_USB_PRINTER=y
index 4c311ddd973bb60686079a5c4d2831cd0bf224c7..315b861065725a4cd154744ee45d5331de6fb828 100644 (file)
@@ -243,7 +243,7 @@ CONFIG_USB=y
 CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
 CONFIG_USB_MON=y
 CONFIG_USB_EHCI_HCD=y
-# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+CONFIG_USB_EHCI_TT_NEWSCHED=y
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_UHCI_HCD=y
 CONFIG_USB_PRINTER=y
index 26d49ebae0404ee74fea7a5e7f6bd7d22b9bad80..225be06edc80982f9509e25af09e1ac0a0232237 100644 (file)
@@ -178,7 +178,7 @@ continue_block:
        ## 2a) PROCESS FULL BLOCKS:
        ################################################################
 full_block:
-       movq    $128,%rax
+       movl    $128,%eax
        lea     128*8*2(block_0), block_1
        lea     128*8*3(block_0), block_2
        add     $128*8*1, block_0
index a039d21986a21c87e9bdf9a41117a726ea3bf77f..a350c990dc86c86bdf5ec4396a9db17dd8dc7bfa 100644 (file)
@@ -264,7 +264,7 @@ ENTRY(twofish_enc_blk)
        movq    R1,     8(%rsi)
 
        popq    R1
-       movq    $1,%rax
+       movl    $1,%eax
        ret
 ENDPROC(twofish_enc_blk)
 
@@ -316,6 +316,6 @@ ENTRY(twofish_dec_blk)
        movq    R1,     8(%rsi)
 
        popq    R1
-       movq    $1,%rax
+       movl    $1,%eax
        ret
 ENDPROC(twofish_dec_blk)
index e785b422b76686b3bf79d95be5aebd20b961cef8..bb635c6418692f4fea1d02fce3b1c170f1a38b0f 100644 (file)
@@ -3,7 +3,6 @@
 #
 
 obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o
-obj-$(CONFIG_IA32_EMULATION) += nosyscall.o syscall_ia32.o
 
 obj-$(CONFIG_IA32_AOUT) += ia32_aout.o
 
index d0165c9a293241559c48742d6c0fb8df3e4fcf82..c81d35e6c7f1d91c22734793c006c0f5f33c0c10 100644 (file)
@@ -161,8 +161,7 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
 }
 
 static int ia32_restore_sigcontext(struct pt_regs *regs,
-                                  struct sigcontext_ia32 __user *sc,
-                                  unsigned int *pax)
+                                  struct sigcontext_ia32 __user *sc)
 {
        unsigned int tmpflags, err = 0;
        void __user *buf;
@@ -184,7 +183,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
                RELOAD_SEG(es);
 
                COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
-               COPY(dx); COPY(cx); COPY(ip);
+               COPY(dx); COPY(cx); COPY(ip); COPY(ax);
                /* Don't touch extended registers */
 
                COPY_SEG_CPL3(cs);
@@ -197,12 +196,12 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
 
                get_user_ex(tmp, &sc->fpstate);
                buf = compat_ptr(tmp);
-
-               get_user_ex(*pax, &sc->ax);
        } get_user_catch(err);
 
        err |= restore_xstate_sig(buf, 1);
 
+       force_iret();
+
        return err;
 }
 
@@ -211,7 +210,6 @@ asmlinkage long sys32_sigreturn(void)
        struct pt_regs *regs = current_pt_regs();
        struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8);
        sigset_t set;
-       unsigned int ax;
 
        if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
                goto badframe;
@@ -224,9 +222,9 @@ asmlinkage long sys32_sigreturn(void)
 
        set_current_blocked(&set);
 
-       if (ia32_restore_sigcontext(regs, &frame->sc, &ax))
+       if (ia32_restore_sigcontext(regs, &frame->sc))
                goto badframe;
-       return ax;
+       return regs->ax;
 
 badframe:
        signal_fault(regs, frame, "32bit sigreturn");
@@ -238,7 +236,6 @@ asmlinkage long sys32_rt_sigreturn(void)
        struct pt_regs *regs = current_pt_regs();
        struct rt_sigframe_ia32 __user *frame;
        sigset_t set;
-       unsigned int ax;
 
        frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4);
 
@@ -249,13 +246,13 @@ asmlinkage long sys32_rt_sigreturn(void)
 
        set_current_blocked(&set);
 
-       if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
+       if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext))
                goto badframe;
 
        if (compat_restore_altstack(&frame->uc.uc_stack))
                goto badframe;
 
-       return ax;
+       return regs->ax;
 
 badframe:
        signal_fault(regs, frame, "32bit rt sigreturn");
index 156ebcab4ada6d54cf8992fcd95398af390f41cc..a821b1cd4fa7a2748cce60ba986ef63d27efb131 100644 (file)
 
        .section .entry.text, "ax"
 
-       .macro IA32_ARG_FIXUP noebp=0
-       movl    %edi,%r8d
-       .if \noebp
-       .else
-       movl    %ebp,%r9d
-       .endif
-       xchg    %ecx,%esi
-       movl    %ebx,%edi
-       movl    %edx,%edx       /* zero extension */
-       .endm 
-
-       /* clobbers %eax */     
-       .macro  CLEAR_RREGS offset=0, _r9=rax
+       /* clobbers %rax */
+       .macro  CLEAR_RREGS _r9=rax
        xorl    %eax,%eax
-       movq    %rax,\offset+R11(%rsp)
-       movq    %rax,\offset+R10(%rsp)
-       movq    %\_r9,\offset+R9(%rsp)
-       movq    %rax,\offset+R8(%rsp)
+       movq    %rax,R11(%rsp)
+       movq    %rax,R10(%rsp)
+       movq    %\_r9,R9(%rsp)
+       movq    %rax,R8(%rsp)
        .endm
 
        /*
         * If it's -1 to make us punt the syscall, then (u32)-1 is still
         * an appropriately invalid value.
         */
-       .macro LOAD_ARGS32 offset, _r9=0
+       .macro LOAD_ARGS32 _r9=0
        .if \_r9
-       movl \offset+16(%rsp),%r9d
+       movl R9(%rsp),%r9d
        .endif
-       movl \offset+40(%rsp),%ecx
-       movl \offset+48(%rsp),%edx
-       movl \offset+56(%rsp),%esi
-       movl \offset+64(%rsp),%edi
+       movl RCX(%rsp),%ecx
+       movl RDX(%rsp),%edx
+       movl RSI(%rsp),%esi
+       movl RDI(%rsp),%edi
        movl %eax,%eax                  /* zero extension */
        .endm
        
@@ -99,54 +88,69 @@ ENDPROC(native_irq_enable_sysexit)
 /*
  * 32bit SYSENTER instruction entry.
  *
+ * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs.
+ * IF and VM in rflags are cleared (IOW: interrupts are off).
+ * SYSENTER does not save anything on the stack,
+ * and does not save old rip (!!!) and rflags.
+ *
  * Arguments:
- * %eax        System call number.
- * %ebx Arg1
- * %ecx Arg2
- * %edx Arg3
- * %esi Arg4
- * %edi Arg5
- * %ebp user stack
- * 0(%ebp) Arg6        
- *     
- * Interrupts off.
- *     
+ * eax  system call number
+ * ebx  arg1
+ * ecx  arg2
+ * edx  arg3
+ * esi  arg4
+ * edi  arg5
+ * ebp  user stack
+ * 0(%ebp) arg6
+ *
  * This is purely a fast path. For anything complicated we use the int 0x80
- * path below. Set up a complete hardware stack frame to share code
+ * path below. We set up a complete hardware stack frame to share code
  * with the int 0x80 path.
- */    
+ */
 ENTRY(ia32_sysenter_target)
        CFI_STARTPROC32 simple
        CFI_SIGNAL_FRAME
        CFI_DEF_CFA     rsp,0
        CFI_REGISTER    rsp,rbp
-       SWAPGS_UNSAFE_STACK
-       movq    PER_CPU_VAR(kernel_stack), %rsp
-       addq    $(KERNEL_STACK_OFFSET),%rsp
+
        /*
-        * No need to follow this irqs on/off section: the syscall
-        * disabled irqs, here we enable it straight after entry:
+        * Interrupts are off on entry.
+        * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+        * it is too small to ever cause noticeable irq latency.
         */
+       SWAPGS_UNSAFE_STACK
+       movq    PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
        ENABLE_INTERRUPTS(CLBR_NONE)
-       movl    %ebp,%ebp               /* zero extension */
-       pushq_cfi $__USER32_DS
-       /*CFI_REL_OFFSET ss,0*/
-       pushq_cfi %rbp
-       CFI_REL_OFFSET rsp,0
-       pushfq_cfi
-       /*CFI_REL_OFFSET rflags,0*/
-       movl    TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d
-       CFI_REGISTER rip,r10
-       pushq_cfi $__USER32_CS
-       /*CFI_REL_OFFSET cs,0*/
+
+       /* Zero-extending 32-bit regs, do not remove */
+       movl    %ebp, %ebp
        movl    %eax, %eax
-       pushq_cfi %r10
-       CFI_REL_OFFSET rip,0
-       pushq_cfi %rax
+
+       movl    ASM_THREAD_INFO(TI_sysenter_return, %rsp, 0), %r10d
+       CFI_REGISTER rip,r10
+
+       /* Construct struct pt_regs on stack */
+       pushq_cfi       $__USER32_DS            /* pt_regs->ss */
+       pushq_cfi       %rbp                    /* pt_regs->sp */
+       CFI_REL_OFFSET  rsp,0
+       pushfq_cfi                              /* pt_regs->flags */
+       pushq_cfi       $__USER32_CS            /* pt_regs->cs */
+       pushq_cfi       %r10 /* pt_regs->ip = thread_info->sysenter_return */
+       CFI_REL_OFFSET  rip,0
+       pushq_cfi_reg   rax                     /* pt_regs->orig_ax */
+       pushq_cfi_reg   rdi                     /* pt_regs->di */
+       pushq_cfi_reg   rsi                     /* pt_regs->si */
+       pushq_cfi_reg   rdx                     /* pt_regs->dx */
+       pushq_cfi_reg   rcx                     /* pt_regs->cx */
+       pushq_cfi_reg   rax                     /* pt_regs->ax */
        cld
-       SAVE_ARGS 0,1,0
-       /* no need to do an access_ok check here because rbp has been
-          32bit zero extended */ 
+       sub     $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
+       CFI_ADJUST_CFA_OFFSET 10*8
+
+       /*
+        * no need to do an access_ok check here because rbp has been
+        * 32bit zero extended
+        */
        ASM_STAC
 1:     movl    (%rbp),%ebp
        _ASM_EXTABLE(1b,ia32_badarg)
@@ -157,42 +161,80 @@ ENTRY(ia32_sysenter_target)
         * ourselves.  To save a few cycles, we can check whether
         * NT was set instead of doing an unconditional popfq.
         */
-       testl $X86_EFLAGS_NT,EFLAGS-ARGOFFSET(%rsp)
+       testl $X86_EFLAGS_NT,EFLAGS(%rsp)
        jnz sysenter_fix_flags
 sysenter_flags_fixed:
 
-       orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-       testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+       orl     $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+       testl   $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
        CFI_REMEMBER_STATE
        jnz  sysenter_tracesys
        cmpq    $(IA32_NR_syscalls-1),%rax
        ja      ia32_badsys
 sysenter_do_call:
-       IA32_ARG_FIXUP
+       /* 32bit syscall -> 64bit C ABI argument conversion */
+       movl    %edi,%r8d       /* arg5 */
+       movl    %ebp,%r9d       /* arg6 */
+       xchg    %ecx,%esi       /* rsi:arg2, rcx:arg4 */
+       movl    %ebx,%edi       /* arg1 */
+       movl    %edx,%edx       /* arg3 (zero extension) */
 sysenter_dispatch:
        call    *ia32_sys_call_table(,%rax,8)
-       movq    %rax,RAX-ARGOFFSET(%rsp)
+       movq    %rax,RAX(%rsp)
        DISABLE_INTERRUPTS(CLBR_NONE)
        TRACE_IRQS_OFF
-       testl   $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+       testl   $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
        jnz     sysexit_audit
 sysexit_from_sys_call:
-       andl    $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-       /* clear IF, that popfq doesn't enable interrupts early */
-       andl    $~0x200,EFLAGS-ARGOFFSET(%rsp)
-       movl    RIP-ARGOFFSET(%rsp),%edx                /* User %eip */
-       CFI_REGISTER rip,rdx
-       RESTORE_ARGS 0,24,0,0,0,0
+       /*
+        * NB: SYSEXIT is not obviously safe for 64-bit kernels -- an
+        * NMI between STI and SYSEXIT has poorly specified behavior,
+        * and and NMI followed by an IRQ with usergs is fatal.  So
+        * we just pretend we're using SYSEXIT but we really use
+        * SYSRETL instead.
+        *
+        * This code path is still called 'sysexit' because it pairs
+        * with 'sysenter' and it uses the SYSENTER calling convention.
+        */
+       andl    $~TS_COMPAT,ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+       movl    RIP(%rsp),%ecx          /* User %eip */
+       CFI_REGISTER rip,rcx
+       RESTORE_RSI_RDI
+       xorl    %edx,%edx               /* avoid info leaks */
        xorq    %r8,%r8
        xorq    %r9,%r9
        xorq    %r10,%r10
-       xorq    %r11,%r11
-       popfq_cfi
+       movl    EFLAGS(%rsp),%r11d      /* User eflags */
        /*CFI_RESTORE rflags*/
-       popq_cfi %rcx                           /* User %esp */
-       CFI_REGISTER rsp,rcx
        TRACE_IRQS_ON
-       ENABLE_INTERRUPTS_SYSEXIT32
+
+       /*
+        * SYSRETL works even on Intel CPUs.  Use it in preference to SYSEXIT,
+        * since it avoids a dicey window with interrupts enabled.
+        */
+       movl    RSP(%rsp),%esp
+
+       /*
+        * USERGS_SYSRET32 does:
+        *  gsbase = user's gs base
+        *  eip = ecx
+        *  rflags = r11
+        *  cs = __USER32_CS
+        *  ss = __USER_DS
+        *
+        * The prologue set RIP(%rsp) to VDSO32_SYSENTER_RETURN, which does:
+        *
+        *  pop %ebp
+        *  pop %edx
+        *  pop %ecx
+        *
+        * Therefore, we invoke SYSRETL with EDX and R8-R10 zeroed to
+        * avoid info leaks.  R11 ends up with VDSO32_SYSENTER_RETURN's
+        * address (already known to user code), and R12-R15 are
+        * callee-saved and therefore don't contain any interesting
+        * kernel data.
+        */
+       USERGS_SYSRET32
 
        CFI_RESTORE_STATE
 
@@ -205,18 +247,18 @@ sysexit_from_sys_call:
        movl %ebx,%esi                  /* 2nd arg: 1st syscall arg */
        movl %eax,%edi                  /* 1st arg: syscall number */
        call __audit_syscall_entry
-       movl RAX-ARGOFFSET(%rsp),%eax   /* reload syscall number */
+       movl RAX(%rsp),%eax     /* reload syscall number */
        cmpq $(IA32_NR_syscalls-1),%rax
        ja ia32_badsys
        movl %ebx,%edi                  /* reload 1st syscall arg */
-       movl RCX-ARGOFFSET(%rsp),%esi   /* reload 2nd syscall arg */
-       movl RDX-ARGOFFSET(%rsp),%edx   /* reload 3rd syscall arg */
-       movl RSI-ARGOFFSET(%rsp),%ecx   /* reload 4th syscall arg */
-       movl RDI-ARGOFFSET(%rsp),%r8d   /* reload 5th syscall arg */
+       movl RCX(%rsp),%esi     /* reload 2nd syscall arg */
+       movl RDX(%rsp),%edx     /* reload 3rd syscall arg */
+       movl RSI(%rsp),%ecx     /* reload 4th syscall arg */
+       movl RDI(%rsp),%r8d     /* reload 5th syscall arg */
        .endm
 
        .macro auditsys_exit exit
-       testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+       testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
        jnz ia32_ret_from_sys_call
        TRACE_IRQS_ON
        ENABLE_INTERRUPTS(CLBR_NONE)
@@ -227,13 +269,13 @@ sysexit_from_sys_call:
 1:     setbe %al               /* 1 if error, 0 if not */
        movzbl %al,%edi         /* zero-extend that into %edi */
        call __audit_syscall_exit
-       movq RAX-ARGOFFSET(%rsp),%rax   /* reload syscall return value */
+       movq RAX(%rsp),%rax     /* reload syscall return value */
        movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
        DISABLE_INTERRUPTS(CLBR_NONE)
        TRACE_IRQS_OFF
-       testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+       testl %edi, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
        jz \exit
-       CLEAR_RREGS -ARGOFFSET
+       CLEAR_RREGS
        jmp int_with_check
        .endm
 
@@ -253,16 +295,16 @@ sysenter_fix_flags:
 
 sysenter_tracesys:
 #ifdef CONFIG_AUDITSYSCALL
-       testl   $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+       testl   $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
        jz      sysenter_auditsys
 #endif
-       SAVE_REST
+       SAVE_EXTRA_REGS
        CLEAR_RREGS
        movq    $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
        movq    %rsp,%rdi        /* &pt_regs -> arg1 */
        call    syscall_trace_enter
-       LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
-       RESTORE_REST
+       LOAD_ARGS32  /* reload args from stack in case ptrace changed it */
+       RESTORE_EXTRA_REGS
        cmpq    $(IA32_NR_syscalls-1),%rax
        ja      int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
        jmp     sysenter_do_call
@@ -272,94 +314,128 @@ ENDPROC(ia32_sysenter_target)
 /*
  * 32bit SYSCALL instruction entry.
  *
+ * 32bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
+ * then loads new ss, cs, and rip from previously programmed MSRs.
+ * rflags gets masked by a value from another MSR (so CLD and CLAC
+ * are not needed). SYSCALL does not save anything on the stack
+ * and does not change rsp.
+ *
+ * Note: rflags saving+masking-with-MSR happens only in Long mode
+ * (in legacy 32bit mode, IF, RF and VM bits are cleared and that's it).
+ * Don't get confused: rflags saving+masking depends on Long Mode Active bit
+ * (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
+ * or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
+ *
  * Arguments:
- * %eax        System call number.
- * %ebx Arg1
- * %ecx return EIP 
- * %edx Arg3
- * %esi Arg4
- * %edi Arg5
- * %ebp Arg2    [note: not saved in the stack frame, should not be touched]
- * %esp user stack 
- * 0(%esp) Arg6
- *     
- * Interrupts off.
- *     
+ * eax  system call number
+ * ecx  return address
+ * ebx  arg1
+ * ebp  arg2   (note: not saved in the stack frame, should not be touched)
+ * edx  arg3
+ * esi  arg4
+ * edi  arg5
+ * esp  user stack
+ * 0(%esp) arg6
+ *
  * This is purely a fast path. For anything complicated we use the int 0x80
- * path below. Set up a complete hardware stack frame to share code
- * with the int 0x80 path.     
- */    
+ * path below. We set up a complete hardware stack frame to share code
+ * with the int 0x80 path.
+ */
 ENTRY(ia32_cstar_target)
        CFI_STARTPROC32 simple
        CFI_SIGNAL_FRAME
-       CFI_DEF_CFA     rsp,KERNEL_STACK_OFFSET
+       CFI_DEF_CFA     rsp,0
        CFI_REGISTER    rip,rcx
        /*CFI_REGISTER  rflags,r11*/
+
+       /*
+        * Interrupts are off on entry.
+        * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+        * it is too small to ever cause noticeable irq latency.
+        */
        SWAPGS_UNSAFE_STACK
        movl    %esp,%r8d
        CFI_REGISTER    rsp,r8
        movq    PER_CPU_VAR(kernel_stack),%rsp
-       /*
-        * No need to follow this irqs on/off section: the syscall
-        * disabled irqs and here we enable it straight after entry:
-        */
        ENABLE_INTERRUPTS(CLBR_NONE)
-       SAVE_ARGS 8,0,0
-       movl    %eax,%eax       /* zero extension */
-       movq    %rax,ORIG_RAX-ARGOFFSET(%rsp)
-       movq    %rcx,RIP-ARGOFFSET(%rsp)
-       CFI_REL_OFFSET rip,RIP-ARGOFFSET
-       movq    %rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */
+
+       /* Zero-extending 32-bit regs, do not remove */
+       movl    %eax,%eax
+
+       /* Construct struct pt_regs on stack */
+       pushq_cfi       $__USER32_DS            /* pt_regs->ss */
+       pushq_cfi       %r8                     /* pt_regs->sp */
+       CFI_REL_OFFSET rsp,0
+       pushq_cfi       %r11                    /* pt_regs->flags */
+       pushq_cfi       $__USER32_CS            /* pt_regs->cs */
+       pushq_cfi       %rcx                    /* pt_regs->ip */
+       CFI_REL_OFFSET rip,0
+       pushq_cfi_reg   rax                     /* pt_regs->orig_ax */
+       pushq_cfi_reg   rdi                     /* pt_regs->di */
+       pushq_cfi_reg   rsi                     /* pt_regs->si */
+       pushq_cfi_reg   rdx                     /* pt_regs->dx */
+       pushq_cfi_reg   rbp                     /* pt_regs->cx */
        movl    %ebp,%ecx
-       movq    $__USER32_CS,CS-ARGOFFSET(%rsp)
-       movq    $__USER32_DS,SS-ARGOFFSET(%rsp)
-       movq    %r11,EFLAGS-ARGOFFSET(%rsp)
-       /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
-       movq    %r8,RSP-ARGOFFSET(%rsp) 
-       CFI_REL_OFFSET rsp,RSP-ARGOFFSET
-       /* no need to do an access_ok check here because r8 has been
-          32bit zero extended */ 
-       /* hardware stack frame is complete now */      
+       pushq_cfi_reg   rax                     /* pt_regs->ax */
+       sub     $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
+       CFI_ADJUST_CFA_OFFSET 10*8
+
+       /*
+        * no need to do an access_ok check here because r8 has been
+        * 32bit zero extended
+        */
        ASM_STAC
 1:     movl    (%r8),%r9d
        _ASM_EXTABLE(1b,ia32_badarg)
        ASM_CLAC
-       orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-       testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+       orl     $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+       testl   $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
        CFI_REMEMBER_STATE
        jnz   cstar_tracesys
        cmpq $IA32_NR_syscalls-1,%rax
        ja  ia32_badsys
 cstar_do_call:
-       IA32_ARG_FIXUP 1
+       /* 32bit syscall -> 64bit C ABI argument conversion */
+       movl    %edi,%r8d       /* arg5 */
+       /* r9 already loaded */ /* arg6 */
+       xchg    %ecx,%esi       /* rsi:arg2, rcx:arg4 */
+       movl    %ebx,%edi       /* arg1 */
+       movl    %edx,%edx       /* arg3 (zero extension) */
 cstar_dispatch:
        call *ia32_sys_call_table(,%rax,8)
-       movq %rax,RAX-ARGOFFSET(%rsp)
+       movq %rax,RAX(%rsp)
        DISABLE_INTERRUPTS(CLBR_NONE)
        TRACE_IRQS_OFF
-       testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+       testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
        jnz sysretl_audit
 sysretl_from_sys_call:
-       andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-       RESTORE_ARGS 0,-ARG_SKIP,0,0,0
-       movl RIP-ARGOFFSET(%rsp),%ecx
+       andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+       RESTORE_RSI_RDI_RDX
+       movl RIP(%rsp),%ecx
        CFI_REGISTER rip,rcx
-       movl EFLAGS-ARGOFFSET(%rsp),%r11d       
+       movl EFLAGS(%rsp),%r11d
        /*CFI_REGISTER rflags,r11*/
        xorq    %r10,%r10
        xorq    %r9,%r9
        xorq    %r8,%r8
        TRACE_IRQS_ON
-       movl RSP-ARGOFFSET(%rsp),%esp
+       movl RSP(%rsp),%esp
        CFI_RESTORE rsp
+       /*
+        * 64bit->32bit SYSRET restores eip from ecx,
+        * eflags from r11 (but RF and VM bits are forced to 0),
+        * cs and ss are loaded from MSRs.
+        * (Note: 32bit->32bit SYSRET is different: since r11
+        * does not exist, it merely sets eflags.IF=1).
+        */
        USERGS_SYSRET32
-       
+
 #ifdef CONFIG_AUDITSYSCALL
 cstar_auditsys:
        CFI_RESTORE_STATE
-       movl %r9d,R9-ARGOFFSET(%rsp)    /* register to be clobbered by call */
+       movl %r9d,R9(%rsp)      /* register to be clobbered by call */
        auditsys_entry_common
-       movl R9-ARGOFFSET(%rsp),%r9d    /* reload 6th syscall arg */
+       movl R9(%rsp),%r9d      /* reload 6th syscall arg */
        jmp cstar_dispatch
 
 sysretl_audit:
@@ -368,17 +444,17 @@ sysretl_audit:
 
 cstar_tracesys:
 #ifdef CONFIG_AUDITSYSCALL
-       testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+       testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
        jz cstar_auditsys
 #endif
        xchgl %r9d,%ebp
-       SAVE_REST
-       CLEAR_RREGS 0, r9
+       SAVE_EXTRA_REGS
+       CLEAR_RREGS r9
        movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
        movq %rsp,%rdi        /* &pt_regs -> arg1 */
        call syscall_trace_enter
-       LOAD_ARGS32 ARGOFFSET, 1  /* reload args from stack in case ptrace changed it */
-       RESTORE_REST
+       LOAD_ARGS32   /* reload args from stack in case ptrace changed it */
+       RESTORE_EXTRA_REGS
        xchgl %ebp,%r9d
        cmpq $(IA32_NR_syscalls-1),%rax
        ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
@@ -391,78 +467,94 @@ ia32_badarg:
        jmp ia32_sysret
        CFI_ENDPROC
 
-/* 
- * Emulated IA32 system calls via int 0x80. 
+/*
+ * Emulated IA32 system calls via int 0x80.
  *
- * Arguments:   
- * %eax        System call number.
- * %ebx Arg1
- * %ecx Arg2
- * %edx Arg3
- * %esi Arg4
- * %edi Arg5
- * %ebp Arg6    [note: not saved in the stack frame, should not be touched]
+ * Arguments:
+ * eax  system call number
+ * ebx  arg1
+ * ecx  arg2
+ * edx  arg3
+ * esi  arg4
+ * edi  arg5
+ * ebp  arg6   (note: not saved in the stack frame, should not be touched)
  *
  * Notes:
- * Uses the same stack frame as the x86-64 version.    
- * All registers except %eax must be saved (but ptrace may violate that)
+ * Uses the same stack frame as the x86-64 version.
+ * All registers except eax must be saved (but ptrace may violate that).
  * Arguments are zero extended. For system calls that want sign extension and
  * take long arguments a wrapper is needed. Most calls can just be called
  * directly.
- * Assumes it is only called from user space and entered with interrupts off.  
- */                            
+ * Assumes it is only called from user space and entered with interrupts off.
+ */
 
 ENTRY(ia32_syscall)
        CFI_STARTPROC32 simple
        CFI_SIGNAL_FRAME
-       CFI_DEF_CFA     rsp,SS+8-RIP
-       /*CFI_REL_OFFSET        ss,SS-RIP*/
-       CFI_REL_OFFSET  rsp,RSP-RIP
-       /*CFI_REL_OFFSET        rflags,EFLAGS-RIP*/
-       /*CFI_REL_OFFSET        cs,CS-RIP*/
-       CFI_REL_OFFSET  rip,RIP-RIP
-       PARAVIRT_ADJUST_EXCEPTION_FRAME
-       SWAPGS
+       CFI_DEF_CFA     rsp,5*8
+       /*CFI_REL_OFFSET        ss,4*8 */
+       CFI_REL_OFFSET  rsp,3*8
+       /*CFI_REL_OFFSET        rflags,2*8 */
+       /*CFI_REL_OFFSET        cs,1*8 */
+       CFI_REL_OFFSET  rip,0*8
+
        /*
-        * No need to follow this irqs on/off section: the syscall
-        * disabled irqs and here we enable it straight after entry:
+        * Interrupts are off on entry.
+        * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+        * it is too small to ever cause noticeable irq latency.
         */
+       PARAVIRT_ADJUST_EXCEPTION_FRAME
+       SWAPGS
        ENABLE_INTERRUPTS(CLBR_NONE)
-       movl %eax,%eax
-       pushq_cfi %rax
+
+       /* Zero-extending 32-bit regs, do not remove */
+       movl    %eax,%eax
+
+       /* Construct struct pt_regs on stack (iret frame is already on stack) */
+       pushq_cfi_reg   rax                     /* pt_regs->orig_ax */
+       pushq_cfi_reg   rdi                     /* pt_regs->di */
+       pushq_cfi_reg   rsi                     /* pt_regs->si */
+       pushq_cfi_reg   rdx                     /* pt_regs->dx */
+       pushq_cfi_reg   rcx                     /* pt_regs->cx */
+       pushq_cfi_reg   rax                     /* pt_regs->ax */
        cld
-       /* note the registers are not zero extended to the sf.
-          this could be a problem. */
-       SAVE_ARGS 0,1,0
-       orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-       testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+       sub     $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
+       CFI_ADJUST_CFA_OFFSET 10*8
+
+       orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+       testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
        jnz ia32_tracesys
        cmpq $(IA32_NR_syscalls-1),%rax
        ja ia32_badsys
 ia32_do_call:
-       IA32_ARG_FIXUP
+       /* 32bit syscall -> 64bit C ABI argument conversion */
+       movl %edi,%r8d  /* arg5 */
+       movl %ebp,%r9d  /* arg6 */
+       xchg %ecx,%esi  /* rsi:arg2, rcx:arg4 */
+       movl %ebx,%edi  /* arg1 */
+       movl %edx,%edx  /* arg3 (zero extension) */
        call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
 ia32_sysret:
-       movq %rax,RAX-ARGOFFSET(%rsp)
+       movq %rax,RAX(%rsp)
 ia32_ret_from_sys_call:
-       CLEAR_RREGS -ARGOFFSET
-       jmp int_ret_from_sys_call 
+       CLEAR_RREGS
+       jmp int_ret_from_sys_call
 
-ia32_tracesys:                  
-       SAVE_REST
+ia32_tracesys:
+       SAVE_EXTRA_REGS
        CLEAR_RREGS
        movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
        movq %rsp,%rdi        /* &pt_regs -> arg1 */
        call syscall_trace_enter
-       LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
-       RESTORE_REST
+       LOAD_ARGS32     /* reload args from stack in case ptrace changed it */
+       RESTORE_EXTRA_REGS
        cmpq $(IA32_NR_syscalls-1),%rax
        ja  int_ret_from_sys_call       /* ia32_tracesys has set RAX(%rsp) */
        jmp ia32_do_call
 END(ia32_syscall)
 
 ia32_badsys:
-       movq $0,ORIG_RAX-ARGOFFSET(%rsp)
+       movq $0,ORIG_RAX(%rsp)
        movq $-ENOSYS,%rax
        jmp ia32_sysret
 
@@ -479,8 +571,6 @@ GLOBAL(\label)
 
        PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn
        PTREGSCALL stub32_sigreturn, sys32_sigreturn
-       PTREGSCALL stub32_execve, compat_sys_execve
-       PTREGSCALL stub32_execveat, compat_sys_execveat
        PTREGSCALL stub32_fork, sys_fork
        PTREGSCALL stub32_vfork, sys_vfork
 
@@ -492,24 +582,23 @@ GLOBAL(stub32_clone)
 
        ALIGN
 ia32_ptregs_common:
-       popq %r11
        CFI_ENDPROC
        CFI_STARTPROC32 simple
        CFI_SIGNAL_FRAME
-       CFI_DEF_CFA     rsp,SS+8-ARGOFFSET
-       CFI_REL_OFFSET  rax,RAX-ARGOFFSET
-       CFI_REL_OFFSET  rcx,RCX-ARGOFFSET
-       CFI_REL_OFFSET  rdx,RDX-ARGOFFSET
-       CFI_REL_OFFSET  rsi,RSI-ARGOFFSET
-       CFI_REL_OFFSET  rdi,RDI-ARGOFFSET
-       CFI_REL_OFFSET  rip,RIP-ARGOFFSET
-/*     CFI_REL_OFFSET  cs,CS-ARGOFFSET*/
-/*     CFI_REL_OFFSET  rflags,EFLAGS-ARGOFFSET*/
-       CFI_REL_OFFSET  rsp,RSP-ARGOFFSET
-/*     CFI_REL_OFFSET  ss,SS-ARGOFFSET*/
-       SAVE_REST
+       CFI_DEF_CFA     rsp,SIZEOF_PTREGS
+       CFI_REL_OFFSET  rax,RAX
+       CFI_REL_OFFSET  rcx,RCX
+       CFI_REL_OFFSET  rdx,RDX
+       CFI_REL_OFFSET  rsi,RSI
+       CFI_REL_OFFSET  rdi,RDI
+       CFI_REL_OFFSET  rip,RIP
+/*     CFI_REL_OFFSET  cs,CS*/
+/*     CFI_REL_OFFSET  rflags,EFLAGS*/
+       CFI_REL_OFFSET  rsp,RSP
+/*     CFI_REL_OFFSET  ss,SS*/
+       SAVE_EXTRA_REGS 8
        call *%rax
-       RESTORE_REST
-       jmp  ia32_sysret        /* misbalances the return cache */
+       RESTORE_EXTRA_REGS 8
+       ret
        CFI_ENDPROC
 END(ia32_ptregs_common)
diff --git a/arch/x86/ia32/nosyscall.c b/arch/x86/ia32/nosyscall.c
deleted file mode 100644 (file)
index 51ecd5b..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/errno.h>
-
-long compat_ni_syscall(void)
-{
-       return -ENOSYS;
-}
index 8e0ceecdc95790d7a53eb3fe5bc1c3867bcb9e7f..719cd702b0a476e13abb42bfa28b0a7911204382 100644 (file)
@@ -201,20 +201,6 @@ long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
                                advice);
 }
 
-long sys32_vm86_warning(void)
-{
-       struct task_struct *me = current;
-       static char lastcomm[sizeof(me->comm)];
-
-       if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
-               compat_printk(KERN_INFO
-                             "%s: vm86 mode not supported on 64 bit kernel\n",
-                             me->comm);
-               strncpy(lastcomm, me->comm, sizeof(lastcomm));
-       }
-       return -ENOSYS;
-}
-
 asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi,
                                   size_t count)
 {
diff --git a/arch/x86/ia32/syscall_ia32.c b/arch/x86/ia32/syscall_ia32.c
deleted file mode 100644 (file)
index 4754ba0..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-/* System call table for ia32 emulation. */
-
-#include <linux/linkage.h>
-#include <linux/sys.h>
-#include <linux/cache.h>
-#include <asm/asm-offsets.h>
-
-#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void compat(void) ;
-#include <asm/syscalls_32.h>
-#undef __SYSCALL_I386
-
-#define __SYSCALL_I386(nr, sym, compat) [nr] = compat,
-
-typedef void (*sys_call_ptr_t)(void);
-
-extern void compat_ni_syscall(void);
-
-const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = {
-       /*
-        * Smells like a compiler bug -- it doesn't work
-        * when the & below is removed.
-        */
-       [0 ... __NR_ia32_syscall_max] = &compat_ni_syscall,
-#include <asm/syscalls_32.h>
-};
index 372231c22a47a46b1417e5c6739d88eb927f89fd..bdf02eeee76519582b0fe9c35b631852b1b417d9 100644 (file)
        .endm
 #endif
 
-.macro altinstruction_entry orig alt feature orig_len alt_len
+.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
        .long \orig - .
        .long \alt - .
        .word \feature
        .byte \orig_len
        .byte \alt_len
+       .byte \pad_len
+.endm
+
+.macro ALTERNATIVE oldinstr, newinstr, feature
+140:
+       \oldinstr
+141:
+       .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
+142:
+
+       .pushsection .altinstructions,"a"
+       altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
+       .popsection
+
+       .pushsection .altinstr_replacement,"ax"
+143:
+       \newinstr
+144:
+       .popsection
+.endm
+
+#define old_len                        141b-140b
+#define new_len1               144f-143f
+#define new_len2               145f-144f
+
+/*
+ * max without conditionals. Idea adapted from:
+ * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ */
+#define alt_max_short(a, b)    ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
+
+.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
+140:
+       \oldinstr
+141:
+       .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
+               (alt_max_short(new_len1, new_len2) - (old_len)),0x90
+142:
+
+       .pushsection .altinstructions,"a"
+       altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
+       altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
+       .popsection
+
+       .pushsection .altinstr_replacement,"ax"
+143:
+       \newinstr1
+144:
+       \newinstr2
+145:
+       .popsection
 .endm
 
 #endif  /*  __ASSEMBLY__  */
index 473bdbee378a10ac2030b586dc33d3be74de5a11..ba32af062f61d69164a792630e3257c8cdc6deb5 100644 (file)
@@ -48,8 +48,9 @@ struct alt_instr {
        s32 repl_offset;        /* offset to replacement instruction */
        u16 cpuid;              /* cpuid bit set for replacement */
        u8  instrlen;           /* length of original instruction */
-       u8  replacementlen;     /* length of new instruction, <= instrlen */
-};
+       u8  replacementlen;     /* length of new instruction */
+       u8  padlen;             /* length of build-time padding */
+} __packed;
 
 extern void alternative_instructions(void);
 extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
@@ -76,50 +77,69 @@ static inline int alternatives_text_reserved(void *start, void *end)
 }
 #endif /* CONFIG_SMP */
 
-#define OLDINSTR(oldinstr)     "661:\n\t" oldinstr "\n662:\n"
+#define b_replacement(num)     "664"#num
+#define e_replacement(num)     "665"#num
 
-#define b_replacement(number)  "663"#number
-#define e_replacement(number)  "664"#number
+#define alt_end_marker         "663"
+#define alt_slen               "662b-661b"
+#define alt_pad_len            alt_end_marker"b-662b"
+#define alt_total_slen         alt_end_marker"b-661b"
+#define alt_rlen(num)          e_replacement(num)"f-"b_replacement(num)"f"
 
-#define alt_slen "662b-661b"
-#define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f"
+#define __OLDINSTR(oldinstr, num)                                      \
+       "661:\n\t" oldinstr "\n662:\n"                                  \
+       ".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * "          \
+               "((" alt_rlen(num) ")-(" alt_slen ")),0x90\n"
 
-#define ALTINSTR_ENTRY(feature, number)                                              \
+#define OLDINSTR(oldinstr, num)                                                \
+       __OLDINSTR(oldinstr, num)                                       \
+       alt_end_marker ":\n"
+
+/*
+ * max without conditionals. Idea adapted from:
+ * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ *
+ * The additional "-" is needed because gas works with s32s.
+ */
+#define alt_max_short(a, b)    "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") - (" b ")))))"
+
+/*
+ * Pad the second replacement alternative with additional NOPs if it is
+ * additionally longer than the first replacement alternative.
+ */
+#define OLDINSTR_2(oldinstr, num1, num2) \
+       "661:\n\t" oldinstr "\n662:\n"                                                          \
+       ".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * "  \
+               "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n"  \
+       alt_end_marker ":\n"
+
+#define ALTINSTR_ENTRY(feature, num)                                         \
        " .long 661b - .\n"                             /* label           */ \
-       " .long " b_replacement(number)"f - .\n"        /* new instruction */ \
+       " .long " b_replacement(num)"f - .\n"           /* new instruction */ \
        " .word " __stringify(feature) "\n"             /* feature bit     */ \
-       " .byte " alt_slen "\n"                         /* source len      */ \
-       " .byte " alt_rlen(number) "\n"                 /* replacement len */
-
-#define DISCARD_ENTRY(number)                          /* rlen <= slen */    \
-       " .byte 0xff + (" alt_rlen(number) ") - (" alt_slen ")\n"
+       " .byte " alt_total_slen "\n"                   /* source len      */ \
+       " .byte " alt_rlen(num) "\n"                    /* replacement len */ \
+       " .byte " alt_pad_len "\n"                      /* pad len */
 
-#define ALTINSTR_REPLACEMENT(newinstr, feature, number)        /* replacement */     \
-       b_replacement(number)":\n\t" newinstr "\n" e_replacement(number) ":\n\t"
+#define ALTINSTR_REPLACEMENT(newinstr, feature, num)   /* replacement */     \
+       b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t"
 
 /* alternative assembly primitive: */
 #define ALTERNATIVE(oldinstr, newinstr, feature)                       \
-       OLDINSTR(oldinstr)                                              \
+       OLDINSTR(oldinstr, 1)                                           \
        ".pushsection .altinstructions,\"a\"\n"                         \
        ALTINSTR_ENTRY(feature, 1)                                      \
        ".popsection\n"                                                 \
-       ".pushsection .discard,\"aw\",@progbits\n"                      \
-       DISCARD_ENTRY(1)                                                \
-       ".popsection\n"                                                 \
        ".pushsection .altinstr_replacement, \"ax\"\n"                  \
        ALTINSTR_REPLACEMENT(newinstr, feature, 1)                      \
        ".popsection"
 
 #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
-       OLDINSTR(oldinstr)                                              \
+       OLDINSTR_2(oldinstr, 1, 2)                                      \
        ".pushsection .altinstructions,\"a\"\n"                         \
        ALTINSTR_ENTRY(feature1, 1)                                     \
        ALTINSTR_ENTRY(feature2, 2)                                     \
        ".popsection\n"                                                 \
-       ".pushsection .discard,\"aw\",@progbits\n"                      \
-       DISCARD_ENTRY(1)                                                \
-       DISCARD_ENTRY(2)                                                \
-       ".popsection\n"                                                 \
        ".pushsection .altinstr_replacement, \"ax\"\n"                  \
        ALTINSTR_REPLACEMENT(newinstr1, feature1, 1)                    \
        ALTINSTR_REPLACEMENT(newinstr2, feature2, 2)                    \
@@ -146,6 +166,9 @@ static inline int alternatives_text_reserved(void *start, void *end)
 #define alternative(oldinstr, newinstr, feature)                       \
        asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
 
+#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
+       asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")
+
 /*
  * Alternative inline assembly with input.
  *
index efc3b22d896eb23b7e37cf9c720065c0b6b0c717..976b86a325e55cedfd28c029455ddecb2c06b6be 100644 (file)
@@ -91,7 +91,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v)
 {
        volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
 
-       alternative_io("movl %0, %1", "xchgl %0, %1", X86_BUG_11AP,
+       alternative_io("movl %0, %P1", "xchgl %0, %P1", X86_BUG_11AP,
                       ASM_OUTPUT2("=r" (v), "=m" (*addr)),
                       ASM_OUTPUT2("0" (v), "m" (*addr)));
 }
@@ -204,7 +204,6 @@ extern void clear_local_APIC(void);
 extern void disconnect_bsp_APIC(int virt_wire_setup);
 extern void disable_local_APIC(void);
 extern void lapic_shutdown(void);
-extern int verify_local_APIC(void);
 extern void sync_Arb_IDs(void);
 extern void init_bsp_APIC(void);
 extern void setup_local_APIC(void);
index 2ab1eb33106eec42eff90d27b98cb698b5c4c835..959e45b81fe29192b0f1c97a65e028e7314f603d 100644 (file)
@@ -95,13 +95,11 @@ do {                                                                        \
  * Stop RDTSC speculation. This is needed when you need to use RDTSC
  * (or get_cycles or vread that possibly accesses the TSC) in a defined
  * code region.
- *
- * (Could use an alternative three way for this if there was one.)
  */
 static __always_inline void rdtsc_barrier(void)
 {
-       alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
-       alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
+       alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
+                         "lfence", X86_FEATURE_LFENCE_RDTSC);
 }
 
 #endif /* _ASM_X86_BARRIER_H */
index 1f1297b46f833ecd7843bf09a9592e0e5e61ec96..1c8b50edb2db3cac8f54c0c300d393b18bf24342 100644 (file)
@@ -55,143 +55,157 @@ For 32-bit we have the following conventions - kernel is built with
  * for assembly code:
  */
 
-#define R15              0
-#define R14              8
-#define R13             16
-#define R12             24
-#define RBP             32
-#define RBX             40
-
-/* arguments: interrupts/non tracing syscalls only save up to here: */
-#define R11             48
-#define R10             56
-#define R9              64
-#define R8              72
-#define RAX             80
-#define RCX             88
-#define RDX             96
-#define RSI            104
-#define RDI            112
-#define ORIG_RAX       120       /* + error_code */
-/* end of arguments */
-
-/* cpu exception frame or undefined in case of fast syscall: */
-#define RIP            128
-#define CS             136
-#define EFLAGS         144
-#define RSP            152
-#define SS             160
-
-#define ARGOFFSET      R11
-
-       .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0
-       subq  $9*8+\addskip, %rsp
-       CFI_ADJUST_CFA_OFFSET   9*8+\addskip
-       movq_cfi rdi, 8*8
-       movq_cfi rsi, 7*8
-       movq_cfi rdx, 6*8
-
-       .if \save_rcx
-       movq_cfi rcx, 5*8
-       .endif
+/* The layout forms the "struct pt_regs" on the stack: */
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
+#define R15            0*8
+#define R14            1*8
+#define R13            2*8
+#define R12            3*8
+#define RBP            4*8
+#define RBX            5*8
+/* These regs are callee-clobbered. Always saved on kernel entry. */
+#define R11            6*8
+#define R10            7*8
+#define R9             8*8
+#define R8             9*8
+#define RAX            10*8
+#define RCX            11*8
+#define RDX            12*8
+#define RSI            13*8
+#define RDI            14*8
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
+#define ORIG_RAX       15*8
+/* Return frame for iretq */
+#define RIP            16*8
+#define CS             17*8
+#define EFLAGS         18*8
+#define RSP            19*8
+#define SS             20*8
+
+#define SIZEOF_PTREGS  21*8
+
+       .macro ALLOC_PT_GPREGS_ON_STACK addskip=0
+       subq    $15*8+\addskip, %rsp
+       CFI_ADJUST_CFA_OFFSET 15*8+\addskip
+       .endm
 
-       .if \rax_enosys
-       movq $-ENOSYS, 4*8(%rsp)
-       .else
-       movq_cfi rax, 4*8
+       .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1
+       .if \r11
+       movq_cfi r11, 6*8+\offset
        .endif
-
-       .if \save_r891011
-       movq_cfi r8,  3*8
-       movq_cfi r9,  2*8
-       movq_cfi r10, 1*8
-       movq_cfi r11, 0*8
+       .if \r8910
+       movq_cfi r10, 7*8+\offset
+       movq_cfi r9,  8*8+\offset
+       movq_cfi r8,  9*8+\offset
+       .endif
+       .if \rax
+       movq_cfi rax, 10*8+\offset
+       .endif
+       .if \rcx
+       movq_cfi rcx, 11*8+\offset
        .endif
+       movq_cfi rdx, 12*8+\offset
+       movq_cfi rsi, 13*8+\offset
+       movq_cfi rdi, 14*8+\offset
+       .endm
+       .macro SAVE_C_REGS offset=0
+       SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1
+       .endm
+       .macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0
+       SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1
+       .endm
+       .macro SAVE_C_REGS_EXCEPT_R891011
+       SAVE_C_REGS_HELPER 0, 1, 1, 0, 0
+       .endm
+       .macro SAVE_C_REGS_EXCEPT_RCX_R891011
+       SAVE_C_REGS_HELPER 0, 1, 0, 0, 0
+       .endm
+       .macro SAVE_C_REGS_EXCEPT_RAX_RCX_R11
+       SAVE_C_REGS_HELPER 0, 0, 0, 1, 0
+       .endm
+
+       .macro SAVE_EXTRA_REGS offset=0
+       movq_cfi r15, 0*8+\offset
+       movq_cfi r14, 1*8+\offset
+       movq_cfi r13, 2*8+\offset
+       movq_cfi r12, 3*8+\offset
+       movq_cfi rbp, 4*8+\offset
+       movq_cfi rbx, 5*8+\offset
+       .endm
+       .macro SAVE_EXTRA_REGS_RBP offset=0
+       movq_cfi rbp, 4*8+\offset
+       .endm
 
+       .macro RESTORE_EXTRA_REGS offset=0
+       movq_cfi_restore 0*8+\offset, r15
+       movq_cfi_restore 1*8+\offset, r14
+       movq_cfi_restore 2*8+\offset, r13
+       movq_cfi_restore 3*8+\offset, r12
+       movq_cfi_restore 4*8+\offset, rbp
+       movq_cfi_restore 5*8+\offset, rbx
        .endm
 
-#define ARG_SKIP       (9*8)
+       .macro ZERO_EXTRA_REGS
+       xorl    %r15d, %r15d
+       xorl    %r14d, %r14d
+       xorl    %r13d, %r13d
+       xorl    %r12d, %r12d
+       xorl    %ebp, %ebp
+       xorl    %ebx, %ebx
+       .endm
 
-       .macro RESTORE_ARGS rstor_rax=1, addskip=0, rstor_rcx=1, rstor_r11=1, \
-                           rstor_r8910=1, rstor_rdx=1
+       .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
        .if \rstor_r11
-       movq_cfi_restore 0*8, r11
+       movq_cfi_restore 6*8, r11
        .endif
-
        .if \rstor_r8910
-       movq_cfi_restore 1*8, r10
-       movq_cfi_restore 2*8, r9
-       movq_cfi_restore 3*8, r8
+       movq_cfi_restore 7*8, r10
+       movq_cfi_restore 8*8, r9
+       movq_cfi_restore 9*8, r8
        .endif
-
        .if \rstor_rax
-       movq_cfi_restore 4*8, rax
+       movq_cfi_restore 10*8, rax
        .endif
-
        .if \rstor_rcx
-       movq_cfi_restore 5*8, rcx
+       movq_cfi_restore 11*8, rcx
        .endif
-
        .if \rstor_rdx
-       movq_cfi_restore 6*8, rdx
-       .endif
-
-       movq_cfi_restore 7*8, rsi
-       movq_cfi_restore 8*8, rdi
-
-       .if ARG_SKIP+\addskip > 0
-       addq $ARG_SKIP+\addskip, %rsp
-       CFI_ADJUST_CFA_OFFSET   -(ARG_SKIP+\addskip)
+       movq_cfi_restore 12*8, rdx
        .endif
+       movq_cfi_restore 13*8, rsi
+       movq_cfi_restore 14*8, rdi
        .endm
-
-       .macro LOAD_ARGS offset, skiprax=0
-       movq \offset(%rsp),    %r11
-       movq \offset+8(%rsp),  %r10
-       movq \offset+16(%rsp), %r9
-       movq \offset+24(%rsp), %r8
-       movq \offset+40(%rsp), %rcx
-       movq \offset+48(%rsp), %rdx
-       movq \offset+56(%rsp), %rsi
-       movq \offset+64(%rsp), %rdi
-       .if \skiprax
-       .else
-       movq \offset+72(%rsp), %rax
-       .endif
+       .macro RESTORE_C_REGS
+       RESTORE_C_REGS_HELPER 1,1,1,1,1
        .endm
-
-#define REST_SKIP      (6*8)
-
-       .macro SAVE_REST
-       subq $REST_SKIP, %rsp
-       CFI_ADJUST_CFA_OFFSET   REST_SKIP
-       movq_cfi rbx, 5*8
-       movq_cfi rbp, 4*8
-       movq_cfi r12, 3*8
-       movq_cfi r13, 2*8
-       movq_cfi r14, 1*8
-       movq_cfi r15, 0*8
+       .macro RESTORE_C_REGS_EXCEPT_RAX
+       RESTORE_C_REGS_HELPER 0,1,1,1,1
        .endm
-
-       .macro RESTORE_REST
-       movq_cfi_restore 0*8, r15
-       movq_cfi_restore 1*8, r14
-       movq_cfi_restore 2*8, r13
-       movq_cfi_restore 3*8, r12
-       movq_cfi_restore 4*8, rbp
-       movq_cfi_restore 5*8, rbx
-       addq $REST_SKIP, %rsp
-       CFI_ADJUST_CFA_OFFSET   -(REST_SKIP)
+       .macro RESTORE_C_REGS_EXCEPT_RCX
+       RESTORE_C_REGS_HELPER 1,0,1,1,1
        .endm
-
-       .macro SAVE_ALL
-       SAVE_ARGS
-       SAVE_REST
+       .macro RESTORE_C_REGS_EXCEPT_R11
+       RESTORE_C_REGS_HELPER 1,1,0,1,1
+       .endm
+       .macro RESTORE_C_REGS_EXCEPT_RCX_R11
+       RESTORE_C_REGS_HELPER 1,0,0,1,1
+       .endm
+       .macro RESTORE_RSI_RDI
+       RESTORE_C_REGS_HELPER 0,0,0,0,0
+       .endm
+       .macro RESTORE_RSI_RDI_RDX
+       RESTORE_C_REGS_HELPER 0,0,0,0,1
        .endm
 
-       .macro RESTORE_ALL addskip=0
-       RESTORE_REST
-       RESTORE_ARGS 1, \addskip
+       .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
+       addq $15*8+\addskip, %rsp
+       CFI_ADJUST_CFA_OFFSET -(15*8+\addskip)
        .endm
 
        .macro icebp
@@ -210,37 +224,23 @@ For 32-bit we have the following conventions - kernel is built with
  */
 
        .macro SAVE_ALL
-       pushl_cfi %eax
-       CFI_REL_OFFSET eax, 0
-       pushl_cfi %ebp
-       CFI_REL_OFFSET ebp, 0
-       pushl_cfi %edi
-       CFI_REL_OFFSET edi, 0
-       pushl_cfi %esi
-       CFI_REL_OFFSET esi, 0
-       pushl_cfi %edx
-       CFI_REL_OFFSET edx, 0
-       pushl_cfi %ecx
-       CFI_REL_OFFSET ecx, 0
-       pushl_cfi %ebx
-       CFI_REL_OFFSET ebx, 0
+       pushl_cfi_reg eax
+       pushl_cfi_reg ebp
+       pushl_cfi_reg edi
+       pushl_cfi_reg esi
+       pushl_cfi_reg edx
+       pushl_cfi_reg ecx
+       pushl_cfi_reg ebx
        .endm
 
        .macro RESTORE_ALL
-       popl_cfi %ebx
-       CFI_RESTORE ebx
-       popl_cfi %ecx
-       CFI_RESTORE ecx
-       popl_cfi %edx
-       CFI_RESTORE edx
-       popl_cfi %esi
-       CFI_RESTORE esi
-       popl_cfi %edi
-       CFI_RESTORE edi
-       popl_cfi %ebp
-       CFI_RESTORE ebp
-       popl_cfi %eax
-       CFI_RESTORE eax
+       popl_cfi_reg ebx
+       popl_cfi_reg ecx
+       popl_cfi_reg edx
+       popl_cfi_reg esi
+       popl_cfi_reg edi
+       popl_cfi_reg ebp
+       popl_cfi_reg eax
        .endm
 
 #endif /* CONFIG_X86_64 */
index 59c6c401f79f16d9b98533275d66d437cbaeff0c..acdee09228b30e020332f0beb7e55e58e65b718e 100644 (file)
@@ -301,7 +301,7 @@ static inline void __user *arch_compat_alloc_user_space(long len)
                sp = task_pt_regs(current)->sp;
        } else {
                /* -128 for the x32 ABI redzone */
-               sp = this_cpu_read(old_rsp) - 128;
+               sp = task_pt_regs(current)->sp - 128;
        }
 
        return (void __user *)round_down(sp - len, 16);
index 90a54851aedc98b29c65856986ade222818381b8..854c04b3c9c264ff84e84508e6a8d5fbf1ff2aa8 100644 (file)
 #define X86_FEATURE_RDSEED     ( 9*32+18) /* The RDSEED instruction */
 #define X86_FEATURE_ADX                ( 9*32+19) /* The ADCX and ADOX instructions */
 #define X86_FEATURE_SMAP       ( 9*32+20) /* Supervisor Mode Access Prevention */
+#define X86_FEATURE_PCOMMIT    ( 9*32+22) /* PCOMMIT instruction */
 #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
+#define X86_FEATURE_CLWB       ( 9*32+24) /* CLWB instruction */
 #define X86_FEATURE_AVX512PF   ( 9*32+26) /* AVX-512 Prefetch */
 #define X86_FEATURE_AVX512ER   ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
 #define X86_FEATURE_AVX512CD   ( 9*32+28) /* AVX-512 Conflict Detection */
@@ -418,6 +420,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
                         " .word %P0\n"         /* 1: do replace */
                         " .byte 2b - 1b\n"     /* source len */
                         " .byte 0\n"           /* replacement len */
+                        " .byte 0\n"           /* pad len */
                         ".previous\n"
                         /* skipping size check since replacement size = 0 */
                         : : "i" (X86_FEATURE_ALWAYS) : : t_warn);
@@ -432,6 +435,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
                         " .word %P0\n"         /* feature bit */
                         " .byte 2b - 1b\n"     /* source len */
                         " .byte 0\n"           /* replacement len */
+                        " .byte 0\n"           /* pad len */
                         ".previous\n"
                         /* skipping size check since replacement size = 0 */
                         : : "i" (bit) : : t_no);
@@ -457,6 +461,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
                             " .word %P1\n"             /* feature bit */
                             " .byte 2b - 1b\n"         /* source len */
                             " .byte 4f - 3f\n"         /* replacement len */
+                            " .byte 0\n"               /* pad len */
                             ".previous\n"
                             ".section .discard,\"aw\",@progbits\n"
                             " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
@@ -483,31 +488,30 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
 static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
 {
 #ifdef CC_HAVE_ASM_GOTO
-/*
- * We need to spell the jumps to the compiler because, depending on the offset,
- * the replacement jump can be bigger than the original jump, and this we cannot
- * have. Thus, we force the jump to the widest, 4-byte, signed relative
- * offset even though the last would often fit in less bytes.
- */
-               asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
+               asm_volatile_goto("1: jmp %l[t_dynamic]\n"
                         "2:\n"
+                        ".skip -(((5f-4f) - (2b-1b)) > 0) * "
+                                "((5f-4f) - (2b-1b)),0x90\n"
+                        "3:\n"
                         ".section .altinstructions,\"a\"\n"
                         " .long 1b - .\n"              /* src offset */
-                        " .long 3f - .\n"              /* repl offset */
+                        " .long 4f - .\n"              /* repl offset */
                         " .word %P1\n"                 /* always replace */
-                        " .byte 2b - 1b\n"             /* src len */
-                        " .byte 4f - 3f\n"             /* repl len */
+                        " .byte 3b - 1b\n"             /* src len */
+                        " .byte 5f - 4f\n"             /* repl len */
+                        " .byte 3b - 2b\n"             /* pad len */
                         ".previous\n"
                         ".section .altinstr_replacement,\"ax\"\n"
-                        "3: .byte 0xe9\n .long %l[t_no] - 2b\n"
-                        "4:\n"
+                        "4: jmp %l[t_no]\n"
+                        "5:\n"
                         ".previous\n"
                         ".section .altinstructions,\"a\"\n"
                         " .long 1b - .\n"              /* src offset */
                         " .long 0\n"                   /* no replacement */
                         " .word %P0\n"                 /* feature bit */
-                        " .byte 2b - 1b\n"             /* src len */
+                        " .byte 3b - 1b\n"             /* src len */
                         " .byte 0\n"                   /* repl len */
+                        " .byte 0\n"                   /* pad len */
                         ".previous\n"
                         : : "i" (bit), "i" (X86_FEATURE_ALWAYS)
                         : : t_dynamic, t_no);
@@ -527,6 +531,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
                             " .word %P2\n"             /* always replace */
                             " .byte 2b - 1b\n"         /* source len */
                             " .byte 4f - 3f\n"         /* replacement len */
+                            " .byte 0\n"               /* pad len */
                             ".previous\n"
                             ".section .discard,\"aw\",@progbits\n"
                             " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
@@ -541,6 +546,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
                             " .word %P1\n"             /* feature bit */
                             " .byte 4b - 3b\n"         /* src len */
                             " .byte 6f - 5f\n"         /* repl len */
+                            " .byte 0\n"               /* pad len */
                             ".previous\n"
                             ".section .discard,\"aw\",@progbits\n"
                             " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */
index a94b82e8f156f3888e0ab90ac879e39dd05ccec1..a0bf89fd26470102f6f38031808437f01afe8e1f 100644 (file)
@@ -376,11 +376,16 @@ static inline void _set_gate(int gate, unsigned type, void *addr,
  * Pentium F0 0F bugfix can have resulted in the mapped
  * IDT being write-protected.
  */
-#define set_intr_gate(n, addr)                                         \
+#define set_intr_gate_notrace(n, addr)                                 \
        do {                                                            \
                BUG_ON((unsigned)n > 0xFF);                             \
                _set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0,        \
                          __KERNEL_CS);                                 \
+       } while (0)
+
+#define set_intr_gate(n, addr)                                         \
+       do {                                                            \
+               set_intr_gate_notrace(n, addr);                         \
                _trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\
                                0, 0, __KERNEL_CS);                     \
        } while (0)
index f6f15986df6ca0f097cf245f04da7a6c123ad3b7..de1cdaf4d74346040667da81e767a5a32697c7c6 100644 (file)
        CFI_ADJUST_CFA_OFFSET 8
        .endm
 
+       .macro pushq_cfi_reg reg
+       pushq %\reg
+       CFI_ADJUST_CFA_OFFSET 8
+       CFI_REL_OFFSET \reg, 0
+       .endm
+
        .macro popq_cfi reg
        popq \reg
        CFI_ADJUST_CFA_OFFSET -8
        .endm
 
+       .macro popq_cfi_reg reg
+       popq %\reg
+       CFI_ADJUST_CFA_OFFSET -8
+       CFI_RESTORE \reg
+       .endm
+
        .macro pushfq_cfi
        pushfq
        CFI_ADJUST_CFA_OFFSET 8
        CFI_ADJUST_CFA_OFFSET 4
        .endm
 
+       .macro pushl_cfi_reg reg
+       pushl %\reg
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET \reg, 0
+       .endm
+
        .macro popl_cfi reg
        popl \reg
        CFI_ADJUST_CFA_OFFSET -4
        .endm
 
+       .macro popl_cfi_reg reg
+       popl %\reg
+       CFI_ADJUST_CFA_OFFSET -4
+       CFI_RESTORE \reg
+       .endm
+
        .macro pushfl_cfi
        pushfl
        CFI_ADJUST_CFA_OFFSET 4
index 25bce45c6fc42f848bf70b56cf5a37884dd18684..3738b138b843d46467c75a910d916cc79ebad25f 100644 (file)
@@ -2,6 +2,8 @@
 #define _ASM_X86_EFI_H
 
 #include <asm/i387.h>
+#include <asm/pgtable.h>
+
 /*
  * We map the EFI regions needed for runtime services non-contiguously,
  * with preserved alignment on virtual addresses starting from -4G down
@@ -89,8 +91,8 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
 extern struct efi_scratch efi_scratch;
 extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable);
 extern int __init efi_memblock_x86_reserve_range(void);
-extern void __init efi_call_phys_prolog(void);
-extern void __init efi_call_phys_epilog(void);
+extern pgd_t * __init efi_call_phys_prolog(void);
+extern void __init efi_call_phys_epilog(pgd_t *save_pgd);
 extern void __init efi_unmap_memmap(void);
 extern void __init efi_memory_uc(u64 addr, unsigned long size);
 extern void __init efi_map_region(efi_memory_desc_t *md);
index ca3347a9dab5211399e9e93a5e53d71ca1943095..935588d95c82fa35b68e3638d9acfe0a2734b373 100644 (file)
@@ -171,10 +171,11 @@ do {                                              \
 static inline void elf_common_init(struct thread_struct *t,
                                   struct pt_regs *regs, const u16 ds)
 {
-       regs->ax = regs->bx = regs->cx = regs->dx = 0;
-       regs->si = regs->di = regs->bp = 0;
+       /* Commented-out registers are cleared in stub_execve */
+       /*regs->ax = regs->bx =*/ regs->cx = regs->dx = 0;
+       regs->si = regs->di /*= regs->bp*/ = 0;
        regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0;
-       regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
+       /*regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;*/
        t->fs = t->gs = 0;
        t->fsindex = t->gsindex = 0;
        t->ds = t->es = ds;
@@ -365,6 +366,7 @@ enum align_flags {
 struct va_alignment {
        int flags;
        unsigned long mask;
+       unsigned long bits;
 } ____cacheline_aligned;
 
 extern struct va_alignment va_align;
index 72ba21a8b5fc2ff8b76c6c3e53b331ca6a558339..da5e96756570be6b1794e20d4161f4a540d1a781 100644 (file)
@@ -67,6 +67,34 @@ extern void finit_soft_fpu(struct i387_soft_struct *soft);
 static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
 #endif
 
+/*
+ * Must be run with preemption disabled: this clears the fpu_owner_task,
+ * on this CPU.
+ *
+ * This will disable any lazy FPU state restore of the current FPU state,
+ * but if the current thread owns the FPU, it will still be saved by.
+ */
+static inline void __cpu_disable_lazy_restore(unsigned int cpu)
+{
+       per_cpu(fpu_owner_task, cpu) = NULL;
+}
+
+/*
+ * Used to indicate that the FPU state in memory is newer than the FPU
+ * state in registers, and the FPU state should be reloaded next time the
+ * task is run. Only safe on the current task, or non-running tasks.
+ */
+static inline void task_disable_lazy_fpu_restore(struct task_struct *tsk)
+{
+       tsk->thread.fpu.last_cpu = ~0;
+}
+
+static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
+{
+       return new == this_cpu_read_stable(fpu_owner_task) &&
+               cpu == new->thread.fpu.last_cpu;
+}
+
 static inline int is_ia32_compat_frame(void)
 {
        return config_enabled(CONFIG_IA32_EMULATION) &&
@@ -107,7 +135,6 @@ static __always_inline __pure bool use_fxsr(void)
 
 static inline void fx_finit(struct i387_fxsave_struct *fx)
 {
-       memset(fx, 0, xstate_size);
        fx->cwd = 0x37f;
        fx->mxcsr = MXCSR_DEFAULT;
 }
@@ -351,8 +378,14 @@ static inline void __thread_fpu_begin(struct task_struct *tsk)
        __thread_set_has_fpu(tsk);
 }
 
-static inline void __drop_fpu(struct task_struct *tsk)
+static inline void drop_fpu(struct task_struct *tsk)
 {
+       /*
+        * Forget coprocessor state..
+        */
+       preempt_disable();
+       tsk->thread.fpu_counter = 0;
+
        if (__thread_has_fpu(tsk)) {
                /* Ignore delayed exceptions from user space */
                asm volatile("1: fwait\n"
@@ -360,30 +393,29 @@ static inline void __drop_fpu(struct task_struct *tsk)
                             _ASM_EXTABLE(1b, 2b));
                __thread_fpu_end(tsk);
        }
-}
 
-static inline void drop_fpu(struct task_struct *tsk)
-{
-       /*
-        * Forget coprocessor state..
-        */
-       preempt_disable();
-       tsk->thread.fpu_counter = 0;
-       __drop_fpu(tsk);
        clear_stopped_child_used_math(tsk);
        preempt_enable();
 }
 
-static inline void drop_init_fpu(struct task_struct *tsk)
+static inline void restore_init_xstate(void)
+{
+       if (use_xsave())
+               xrstor_state(init_xstate_buf, -1);
+       else
+               fxrstor_checking(&init_xstate_buf->i387);
+}
+
+/*
+ * Reset the FPU state in the eager case and drop it in the lazy case (later use
+ * will reinit it).
+ */
+static inline void fpu_reset_state(struct task_struct *tsk)
 {
        if (!use_eager_fpu())
                drop_fpu(tsk);
-       else {
-               if (use_xsave())
-                       xrstor_state(init_xstate_buf, -1);
-               else
-                       fxrstor_checking(&init_xstate_buf->i387);
-       }
+       else
+               restore_init_xstate();
 }
 
 /*
@@ -400,24 +432,6 @@ static inline void drop_init_fpu(struct task_struct *tsk)
  */
 typedef struct { int preload; } fpu_switch_t;
 
-/*
- * Must be run with preemption disabled: this clears the fpu_owner_task,
- * on this CPU.
- *
- * This will disable any lazy FPU state restore of the current FPU state,
- * but if the current thread owns the FPU, it will still be saved by.
- */
-static inline void __cpu_disable_lazy_restore(unsigned int cpu)
-{
-       per_cpu(fpu_owner_task, cpu) = NULL;
-}
-
-static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
-{
-       return new == this_cpu_read_stable(fpu_owner_task) &&
-               cpu == new->thread.fpu.last_cpu;
-}
-
 static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu)
 {
        fpu_switch_t fpu;
@@ -426,13 +440,17 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
         * If the task has used the math, pre-load the FPU on xsave processors
         * or if the past 5 consecutive context-switches used math.
         */
-       fpu.preload = tsk_used_math(new) && (use_eager_fpu() ||
-                                            new->thread.fpu_counter > 5);
+       fpu.preload = tsk_used_math(new) &&
+                     (use_eager_fpu() || new->thread.fpu_counter > 5);
+
        if (__thread_has_fpu(old)) {
                if (!__save_init_fpu(old))
-                       cpu = ~0;
-               old->thread.fpu.last_cpu = cpu;
-               old->thread.fpu.has_fpu = 0;    /* But leave fpu_owner_task! */
+                       task_disable_lazy_fpu_restore(old);
+               else
+                       old->thread.fpu.last_cpu = cpu;
+
+               /* But leave fpu_owner_task! */
+               old->thread.fpu.has_fpu = 0;
 
                /* Don't change CR0.TS if we just switch! */
                if (fpu.preload) {
@@ -443,10 +461,10 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
                        stts();
        } else {
                old->thread.fpu_counter = 0;
-               old->thread.fpu.last_cpu = ~0;
+               task_disable_lazy_fpu_restore(old);
                if (fpu.preload) {
                        new->thread.fpu_counter++;
-                       if (!use_eager_fpu() && fpu_lazy_restore(new, cpu))
+                       if (fpu_lazy_restore(new, cpu))
                                fpu.preload = 0;
                        else
                                prefetch(new->thread.fpu.state);
@@ -466,7 +484,7 @@ static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
 {
        if (fpu.preload) {
                if (unlikely(restore_fpu_checking(new)))
-                       drop_init_fpu(new);
+                       fpu_reset_state(new);
        }
 }
 
@@ -495,10 +513,12 @@ static inline int restore_xstate_sig(void __user *buf, int ia32_frame)
 }
 
 /*
- * Need to be preemption-safe.
+ * Needs to be preemption-safe.
  *
  * NOTE! user_fpu_begin() must be used only immediately before restoring
- * it. This function does not do any save/restore on their own.
+ * the save state. It does not do any saving/restoring on its own. In
+ * lazy FPU mode, it is just an optimization to avoid a #NM exception,
+ * the task can lose the FPU right after preempt_enable().
  */
 static inline void user_fpu_begin(void)
 {
@@ -519,24 +539,6 @@ static inline void __save_fpu(struct task_struct *tsk)
                fpu_fxsave(&tsk->thread.fpu);
 }
 
-/*
- * These disable preemption on their own and are safe
- */
-static inline void save_init_fpu(struct task_struct *tsk)
-{
-       WARN_ON_ONCE(!__thread_has_fpu(tsk));
-
-       if (use_eager_fpu()) {
-               __save_fpu(tsk);
-               return;
-       }
-
-       preempt_disable();
-       __save_init_fpu(tsk);
-       __thread_fpu_end(tsk);
-       preempt_enable();
-}
-
 /*
  * i387 state interaction
  */
index 9662290e0b2075ab42608af776abbe4a4219b6fd..e9571ddabc4feb821ae04d47c9d6c3b509178344 100644 (file)
@@ -181,10 +181,9 @@ extern __visible void smp_call_function_single_interrupt(struct pt_regs *);
 extern __visible void smp_invalidate_interrupt(struct pt_regs *);
 #endif
 
-extern void (*__initconst interrupt[FIRST_SYSTEM_VECTOR
-                                   - FIRST_EXTERNAL_VECTOR])(void);
+extern char irq_entries_start[];
 #ifdef CONFIG_TRACING
-#define trace_interrupt interrupt
+#define trace_irq_entries_start irq_entries_start
 #endif
 
 #define VECTOR_UNDEFINED       (-1)
index 47f29b1d18464aa6870ce6add18d5f37af2c28fb..e7814b74caf8235c985d5f49492aa0656cfac053 100644 (file)
@@ -69,7 +69,7 @@ struct insn {
        const insn_byte_t *next_byte;
 };
 
-#define MAX_INSN_SIZE  16
+#define MAX_INSN_SIZE  15
 
 #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
 #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)
index f42a04735a0a66c7f15cdc8640308861902c753d..e37d6b3ad9831bb05412114bac11f1cd0bf0bac3 100644 (file)
@@ -79,11 +79,12 @@ struct iommu_table_entry {
  *  d). Similar to the 'init', except that this gets called from pci_iommu_init
  *      where we do have a memory allocator.
  *
- * The standard vs the _FINISH differs in that the _FINISH variant will
- * continue detecting other IOMMUs in the call list after the
- * the detection routine returns a positive number. The _FINISH will
- * stop the execution chain. Both will still call the 'init' and
- * 'late_init' functions if they are set.
+ * The standard IOMMU_INIT differs from the IOMMU_INIT_FINISH variant
+ * in that the former will continue detecting other IOMMUs in the call
+ * list after the detection routine returns a positive number, while the
+ * latter will stop the execution chain upon first successful detection.
+ * Both variants will still call the 'init' and 'late_init' functions if
+ * they are set.
  */
 #define IOMMU_INIT_FINISH(_detect, _depend, _init, _late_init)         \
        __IOMMU_INIT(_detect, _depend, _init, _late_init, 1)
index 0a8b519226b8feb37368ffbc4ca81011bc031fde..b77f5edb03b0c02dc2047d52d07c9da447dba209 100644 (file)
@@ -136,10 +136,6 @@ static inline notrace unsigned long arch_local_irq_save(void)
 #define USERGS_SYSRET32                                \
        swapgs;                                 \
        sysretl
-#define ENABLE_INTERRUPTS_SYSEXIT32            \
-       swapgs;                                 \
-       sti;                                    \
-       sysexit
 
 #else
 #define INTERRUPT_RETURN               iret
@@ -163,22 +159,27 @@ static inline int arch_irqs_disabled(void)
 
        return arch_irqs_disabled_flags(flags);
 }
+#endif /* !__ASSEMBLY__ */
 
+#ifdef __ASSEMBLY__
+#ifdef CONFIG_TRACE_IRQFLAGS
+#  define TRACE_IRQS_ON                call trace_hardirqs_on_thunk;
+#  define TRACE_IRQS_OFF       call trace_hardirqs_off_thunk;
 #else
-
-#ifdef CONFIG_X86_64
-#define ARCH_LOCKDEP_SYS_EXIT          call lockdep_sys_exit_thunk
-#define ARCH_LOCKDEP_SYS_EXIT_IRQ      \
+#  define TRACE_IRQS_ON
+#  define TRACE_IRQS_OFF
+#endif
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#  ifdef CONFIG_X86_64
+#    define LOCKDEP_SYS_EXIT           call lockdep_sys_exit_thunk
+#    define LOCKDEP_SYS_EXIT_IRQ \
        TRACE_IRQS_ON; \
        sti; \
-       SAVE_REST; \
-       LOCKDEP_SYS_EXIT; \
-       RESTORE_REST; \
+       call lockdep_sys_exit_thunk; \
        cli; \
        TRACE_IRQS_OFF;
-
-#else
-#define ARCH_LOCKDEP_SYS_EXIT                  \
+#  else
+#    define LOCKDEP_SYS_EXIT \
        pushl %eax;                             \
        pushl %ecx;                             \
        pushl %edx;                             \
@@ -186,24 +187,12 @@ static inline int arch_irqs_disabled(void)
        popl %edx;                              \
        popl %ecx;                              \
        popl %eax;
-
-#define ARCH_LOCKDEP_SYS_EXIT_IRQ
-#endif
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-#  define TRACE_IRQS_ON                call trace_hardirqs_on_thunk;
-#  define TRACE_IRQS_OFF       call trace_hardirqs_off_thunk;
+#    define LOCKDEP_SYS_EXIT_IRQ
+#  endif
 #else
-#  define TRACE_IRQS_ON
-#  define TRACE_IRQS_OFF
-#endif
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-#  define LOCKDEP_SYS_EXIT     ARCH_LOCKDEP_SYS_EXIT
-#  define LOCKDEP_SYS_EXIT_IRQ ARCH_LOCKDEP_SYS_EXIT_IRQ
-# else
 #  define LOCKDEP_SYS_EXIT
 #  define LOCKDEP_SYS_EXIT_IRQ
-# endif
-
+#endif
 #endif /* __ASSEMBLY__ */
+
 #endif
index 6a2cefb4395a4228cce550ef8b231f3e7158d9d1..a4c1cf7e93f812e85fb56d3f858547192c028c58 100644 (file)
@@ -1,7 +1,7 @@
 #ifndef _ASM_X86_JUMP_LABEL_H
 #define _ASM_X86_JUMP_LABEL_H
 
-#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
 
 #include <linux/stringify.h>
 #include <linux/types.h>
@@ -30,8 +30,6 @@ l_yes:
        return true;
 }
 
-#endif /* __KERNEL__ */
-
 #ifdef CONFIG_X86_64
 typedef u64 jump_label_t;
 #else
@@ -44,4 +42,5 @@ struct jump_entry {
        jump_label_t key;
 };
 
+#endif  /* __ASSEMBLY__ */
 #endif
index a236e39cc385a4a468e745ce0d9fdb1e2d08341b..dea2e7e962e3e0648c9ecaaaffc5cb723b32f299 100644 (file)
@@ -81,11 +81,6 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
                (base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
 }
 
-#define SELECTOR_TI_MASK (1 << 2)
-#define SELECTOR_RPL_MASK 0x03
-
-#define IOPL_SHIFT 12
-
 #define KVM_PERMILLE_MMU_PAGES 20
 #define KVM_MIN_ALLOC_MMU_PAGES 64
 #define KVM_MMU_HASH_SHIFT 10
@@ -345,6 +340,7 @@ struct kvm_pmu {
 enum {
        KVM_DEBUGREG_BP_ENABLED = 1,
        KVM_DEBUGREG_WONT_EXIT = 2,
+       KVM_DEBUGREG_RELOAD = 4,
 };
 
 struct kvm_vcpu_arch {
@@ -431,6 +427,9 @@ struct kvm_vcpu_arch {
 
        int cpuid_nent;
        struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES];
+
+       int maxphyaddr;
+
        /* emulate context */
 
        struct x86_emulate_ctxt emulate_ctxt;
@@ -550,11 +549,20 @@ struct kvm_arch_memory_slot {
        struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
 };
 
+/*
+ * We use as the mode the number of bits allocated in the LDR for the
+ * logical processor ID.  It happens that these are all powers of two.
+ * This makes it is very easy to detect cases where the APICs are
+ * configured for multiple modes; in that case, we cannot use the map and
+ * hence cannot use kvm_irq_delivery_to_apic_fast either.
+ */
+#define KVM_APIC_MODE_XAPIC_CLUSTER          4
+#define KVM_APIC_MODE_XAPIC_FLAT             8
+#define KVM_APIC_MODE_X2APIC                16
+
 struct kvm_apic_map {
        struct rcu_head rcu;
-       u8 ldr_bits;
-       /* fields bellow are used to decode ldr values in different modes */
-       u32 cid_shift, cid_mask, lid_mask, broadcast;
+       u8 mode;
        struct kvm_lapic *phys_map[256];
        /* first index is cluster id second is cpu id in a cluster */
        struct kvm_lapic *logical_map[16][16];
@@ -859,6 +867,8 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
 void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
                                      struct kvm_memory_slot *memslot);
+void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
+                                       struct kvm_memory_slot *memslot);
 void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
                                   struct kvm_memory_slot *memslot);
 void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
@@ -933,6 +943,7 @@ struct x86_emulate_ctxt;
 int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port);
 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
 int kvm_emulate_halt(struct kvm_vcpu *vcpu);
+int kvm_vcpu_halt(struct kvm_vcpu *vcpu);
 int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
 
 void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
@@ -1128,7 +1139,6 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
 int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
-int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
 int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
index e62cf897f7819bc9795ab096a15689b3543e07b2..c1adf33fdd0d6f70f055b9a056bc7787bda7635e 100644 (file)
@@ -115,7 +115,7 @@ static inline void kvm_spinlock_init(void)
 
 static inline bool kvm_para_available(void)
 {
-       return 0;
+       return false;
 }
 
 static inline unsigned int kvm_arch_para_features(void)
index 9b3de99dc0044a8b6ccdba0e7523f5b1e8425c03..1f5a86d518db379ea65c100158df0c60988bb810 100644 (file)
@@ -116,6 +116,12 @@ struct mca_config {
        u32 rip_msr;
 };
 
+struct mce_vendor_flags {
+       __u64           overflow_recov  : 1, /* cpuid_ebx(80000007) */
+                       __reserved_0    : 63;
+};
+extern struct mce_vendor_flags mce_flags;
+
 extern struct mca_config mca_cfg;
 extern void mce_register_decode_chain(struct notifier_block *nb);
 extern void mce_unregister_decode_chain(struct notifier_block *nb);
@@ -128,9 +134,11 @@ extern int mce_p5_enabled;
 #ifdef CONFIG_X86_MCE
 int mcheck_init(void);
 void mcheck_cpu_init(struct cpuinfo_x86 *c);
+void mcheck_vendor_init_severity(void);
 #else
 static inline int mcheck_init(void) { return 0; }
 static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
+static inline void mcheck_vendor_init_severity(void) {}
 #endif
 
 #ifdef CONFIG_X86_ANCIENT_MCE
@@ -183,11 +191,11 @@ typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
 DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
 
 enum mcp_flags {
-       MCP_TIMESTAMP = (1 << 0),       /* log time stamp */
-       MCP_UC = (1 << 1),              /* log uncorrected errors */
-       MCP_DONTLOG = (1 << 2),         /* only clear, don't log */
+       MCP_TIMESTAMP   = BIT(0),       /* log time stamp */
+       MCP_UC          = BIT(1),       /* log uncorrected errors */
+       MCP_DONTLOG     = BIT(2),       /* only clear, don't log */
 };
-void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
+bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
 
 int mce_notify_irq(void);
 
index 201b520521ed74b6e35b63cda531ae58f75ec674..2fb20d6f7e23b0ccace549901dacf89b51e9c381 100644 (file)
@@ -75,6 +75,79 @@ static inline void __exit exit_amd_microcode(void) {}
 
 #ifdef CONFIG_MICROCODE_EARLY
 #define MAX_UCODE_COUNT 128
+
+#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24))
+#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u')
+#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I')
+#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l')
+#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h')
+#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i')
+#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D')
+
+#define CPUID_IS(a, b, c, ebx, ecx, edx)       \
+               (!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c))))
+
+/*
+ * In early loading microcode phase on BSP, boot_cpu_data is not set up yet.
+ * x86_vendor() gets vendor id for BSP.
+ *
+ * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify
+ * coding, we still use x86_vendor() to get vendor id for AP.
+ *
+ * x86_vendor() gets vendor information directly from CPUID.
+ */
+static inline int x86_vendor(void)
+{
+       u32 eax = 0x00000000;
+       u32 ebx, ecx = 0, edx;
+
+       native_cpuid(&eax, &ebx, &ecx, &edx);
+
+       if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx))
+               return X86_VENDOR_INTEL;
+
+       if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx))
+               return X86_VENDOR_AMD;
+
+       return X86_VENDOR_UNKNOWN;
+}
+
+static inline unsigned int __x86_family(unsigned int sig)
+{
+       unsigned int x86;
+
+       x86 = (sig >> 8) & 0xf;
+
+       if (x86 == 0xf)
+               x86 += (sig >> 20) & 0xff;
+
+       return x86;
+}
+
+static inline unsigned int x86_family(void)
+{
+       u32 eax = 0x00000001;
+       u32 ebx, ecx = 0, edx;
+
+       native_cpuid(&eax, &ebx, &ecx, &edx);
+
+       return __x86_family(eax);
+}
+
+static inline unsigned int x86_model(unsigned int sig)
+{
+       unsigned int x86, model;
+
+       x86 = __x86_family(sig);
+
+       model = (sig >> 4) & 0xf;
+
+       if (x86 == 0x6 || x86 == 0xf)
+               model += ((sig >> 16) & 0xf) << 4;
+
+       return model;
+}
+
 extern void __init load_ucode_bsp(void);
 extern void load_ucode_ap(void);
 extern int __init save_microcode_in_initrd(void);
index dd4c20043ce75be7545e963dd98ba38ce662396e..2b9209c46ca939991abed04a1c5d4ef786b0c698 100644 (file)
@@ -56,12 +56,15 @@ struct extended_sigtable {
 
 #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
 
-extern int
-get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev);
+extern int get_matching_microcode(unsigned int csig, int cpf, int rev, void *mc);
 extern int microcode_sanity_check(void *mc, int print_err);
-extern int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev);
-extern int
-update_match_revision(struct microcode_header_intel *mc_header, int rev);
+extern int get_matching_sig(unsigned int csig, int cpf, int rev, void *mc);
+
+static inline int
+revision_is_newer(struct microcode_header_intel *mc_header, int rev)
+{
+       return (mc_header->rev <= rev) ? 0 : 1;
+}
 
 #ifdef CONFIG_MICROCODE_INTEL_EARLY
 extern void __init load_ucode_intel_bsp(void);
index a1410db38a1a682f758d44f384e2947b39203175..653dfa7662e17aa297b1277a81999854ecbea6e2 100644 (file)
@@ -30,6 +30,14 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
                     :: "a" (eax), "c" (ecx));
 }
 
+static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
+{
+       trace_hardirqs_on();
+       /* "mwait %eax, %ecx;" */
+       asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
+                    :: "a" (eax), "c" (ecx));
+}
+
 /*
  * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
  * which can obviate IPI to trigger checking of need_resched.
index 965c47d254aa0f68ea83e4dc35cd983cc9ff789b..5f6051d5d139afb8ed126a5a6883fa43f23a2cde 100644 (file)
@@ -976,11 +976,6 @@ extern void default_banner(void);
        PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),       \
                  CLBR_NONE,                                            \
                  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
-
-#define ENABLE_INTERRUPTS_SYSEXIT32                                    \
-       PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit),    \
-                 CLBR_NONE,                                            \
-                 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
 #endif /* CONFIG_X86_32 */
 
 #endif /* __ASSEMBLY__ */
index ec1c93588cefd0c4e3a4705c6d966e2555ba95aa..d2203b5d9538ff700784c088b945c506903fab29 100644 (file)
@@ -210,8 +210,23 @@ struct x86_hw_tss {
        unsigned long           sp0;
        unsigned short          ss0, __ss0h;
        unsigned long           sp1;
-       /* ss1 caches MSR_IA32_SYSENTER_CS: */
-       unsigned short          ss1, __ss1h;
+
+       /*
+        * We don't use ring 1, so ss1 is a convenient scratch space in
+        * the same cacheline as sp0.  We use ss1 to cache the value in
+        * MSR_IA32_SYSENTER_CS.  When we context switch
+        * MSR_IA32_SYSENTER_CS, we first check if the new value being
+        * written matches ss1, and, if it's not, then we wrmsr the new
+        * value and update ss1.
+        *
+        * The only reason we context switch MSR_IA32_SYSENTER_CS is
+        * that we set it to zero in vm86 tasks to avoid corrupting the
+        * stack if we were to go through the sysenter path from vm86
+        * mode.
+        */
+       unsigned short          ss1;    /* MSR_IA32_SYSENTER_CS */
+
+       unsigned short          __ss1h;
        unsigned long           sp2;
        unsigned short          ss2, __ss2h;
        unsigned long           __cr3;
@@ -276,13 +291,17 @@ struct tss_struct {
        unsigned long           io_bitmap[IO_BITMAP_LONGS + 1];
 
        /*
-        * .. and then another 0x100 bytes for the emergency kernel stack:
+        * Space for the temporary SYSENTER stack:
         */
-       unsigned long           stack[64];
+       unsigned long           SYSENTER_stack[64];
 
 } ____cacheline_aligned;
 
-DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss);
+DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
+
+#ifdef CONFIG_X86_32
+DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
+#endif
 
 /*
  * Save the original ist values for checking stack pointers during debugging
@@ -474,7 +493,6 @@ struct thread_struct {
 #ifdef CONFIG_X86_32
        unsigned long           sysenter_cs;
 #else
-       unsigned long           usersp; /* Copy from PDA */
        unsigned short          es;
        unsigned short          ds;
        unsigned short          fsindex;
@@ -564,6 +582,16 @@ static inline void native_swapgs(void)
 #endif
 }
 
+static inline unsigned long current_top_of_stack(void)
+{
+#ifdef CONFIG_X86_64
+       return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
+#else
+       /* sp0 on x86_32 is special in and around vm86 mode. */
+       return this_cpu_read_stable(cpu_current_top_of_stack);
+#endif
+}
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else
@@ -761,10 +789,10 @@ extern char                       ignore_fpu_irq;
 #define ARCH_HAS_SPINLOCK_PREFETCH
 
 #ifdef CONFIG_X86_32
-# define BASE_PREFETCH         ASM_NOP4
+# define BASE_PREFETCH         ""
 # define ARCH_HAS_PREFETCH
 #else
-# define BASE_PREFETCH         "prefetcht0 (%1)"
+# define BASE_PREFETCH         "prefetcht0 %P1"
 #endif
 
 /*
@@ -775,10 +803,9 @@ extern char                        ignore_fpu_irq;
  */
 static inline void prefetch(const void *x)
 {
-       alternative_input(BASE_PREFETCH,
-                         "prefetchnta (%1)",
+       alternative_input(BASE_PREFETCH, "prefetchnta %P1",
                          X86_FEATURE_XMM,
-                         "r" (x));
+                         "m" (*(const char *)x));
 }
 
 /*
@@ -788,10 +815,9 @@ static inline void prefetch(const void *x)
  */
 static inline void prefetchw(const void *x)
 {
-       alternative_input(BASE_PREFETCH,
-                         "prefetchw (%1)",
-                         X86_FEATURE_3DNOW,
-                         "r" (x));
+       alternative_input(BASE_PREFETCH, "prefetchw %P1",
+                         X86_FEATURE_3DNOWPREFETCH,
+                         "m" (*(const char *)x));
 }
 
 static inline void spin_lock_prefetch(const void *x)
@@ -799,6 +825,9 @@ static inline void spin_lock_prefetch(const void *x)
        prefetchw(x);
 }
 
+#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
+                          TOP_OF_KERNEL_STACK_PADDING)
+
 #ifdef CONFIG_X86_32
 /*
  * User space process size: 3GB (default).
@@ -809,39 +838,16 @@ static inline void spin_lock_prefetch(const void *x)
 #define STACK_TOP_MAX          STACK_TOP
 
 #define INIT_THREAD  {                                                   \
-       .sp0                    = sizeof(init_stack) + (long)&init_stack, \
+       .sp0                    = TOP_OF_INIT_STACK,                      \
        .vm86_info              = NULL,                                   \
        .sysenter_cs            = __KERNEL_CS,                            \
        .io_bitmap_ptr          = NULL,                                   \
 }
 
-/*
- * Note that the .io_bitmap member must be extra-big. This is because
- * the CPU will access an additional byte beyond the end of the IO
- * permission bitmap. The extra byte must be all 1 bits, and must
- * be within the limit.
- */
-#define INIT_TSS  {                                                      \
-       .x86_tss = {                                                      \
-               .sp0            = sizeof(init_stack) + (long)&init_stack, \
-               .ss0            = __KERNEL_DS,                            \
-               .ss1            = __KERNEL_CS,                            \
-               .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,               \
-        },                                                               \
-       .io_bitmap              = { [0 ... IO_BITMAP_LONGS] = ~0 },       \
-}
-
 extern unsigned long thread_saved_pc(struct task_struct *tsk);
 
-#define THREAD_SIZE_LONGS      (THREAD_SIZE/sizeof(unsigned long))
-#define KSTK_TOP(info)                                                 \
-({                                                                     \
-       unsigned long *__ptr = (unsigned long *)(info);                 \
-       (unsigned long)(&__ptr[THREAD_SIZE_LONGS]);                     \
-})
-
 /*
- * The below -8 is to reserve 8 bytes on top of the ring0 stack.
+ * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
  * This is necessary to guarantee that the entire "struct pt_regs"
  * is accessible even if the CPU haven't stored the SS/ESP registers
  * on the stack (interrupt gate does not save these registers
@@ -850,11 +856,11 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
  * "struct pt_regs" is possible, but they may contain the
  * completely wrong values.
  */
-#define task_pt_regs(task)                                             \
-({                                                                     \
-       struct pt_regs *__regs__;                                       \
-       __regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \
-       __regs__ - 1;                                                   \
+#define task_pt_regs(task) \
+({                                                                     \
+       unsigned long __ptr = (unsigned long)task_stack_page(task);     \
+       __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;             \
+       ((struct pt_regs *)__ptr) - 1;                                  \
 })
 
 #define KSTK_ESP(task)         (task_pt_regs(task)->sp)
@@ -886,11 +892,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
 #define STACK_TOP_MAX          TASK_SIZE_MAX
 
 #define INIT_THREAD  { \
-       .sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
-}
-
-#define INIT_TSS  { \
-       .x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
+       .sp0 = TOP_OF_INIT_STACK \
 }
 
 /*
@@ -902,11 +904,6 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
 #define task_pt_regs(tsk)      ((struct pt_regs *)(tsk)->thread.sp0 - 1)
 extern unsigned long KSTK_ESP(struct task_struct *task);
 
-/*
- * User space RSP while inside the SYSCALL fast path
- */
-DECLARE_PER_CPU(unsigned long, old_rsp);
-
 #endif /* CONFIG_X86_64 */
 
 extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
index 86fc2bb82287a687bd8ca0e976e8d32761678480..19507ffa5d28e9ce3ddece3856dd9cde4446f7f8 100644 (file)
@@ -31,13 +31,17 @@ struct pt_regs {
 #else /* __i386__ */
 
 struct pt_regs {
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
        unsigned long r15;
        unsigned long r14;
        unsigned long r13;
        unsigned long r12;
        unsigned long bp;
        unsigned long bx;
-/* arguments: non interrupts/non tracing syscalls only save up to here*/
+/* These regs are callee-clobbered. Always saved on kernel entry. */
        unsigned long r11;
        unsigned long r10;
        unsigned long r9;
@@ -47,9 +51,12 @@ struct pt_regs {
        unsigned long dx;
        unsigned long si;
        unsigned long di;
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
        unsigned long orig_ax;
-/* end of arguments */
-/* cpu exception frame or undefined */
+/* Return frame for iretq */
        unsigned long ip;
        unsigned long cs;
        unsigned long flags;
@@ -89,11 +96,13 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
 }
 
 /*
- * user_mode_vm(regs) determines whether a register set came from user mode.
- * This is true if V8086 mode was enabled OR if the register set was from
- * protected mode with RPL-3 CS value.  This tricky test checks that with
- * one comparison.  Many places in the kernel can bypass this full check
- * if they have already ruled out V8086 mode, so user_mode(regs) can be used.
+ * user_mode(regs) determines whether a register set came from user
+ * mode.  On x86_32, this is true if V8086 mode was enabled OR if the
+ * register set was from protected mode with RPL-3 CS value.  This
+ * tricky test checks that with one comparison.
+ *
+ * On x86_64, vm86 mode is mercifully nonexistent, and we don't need
+ * the extra check.
  */
 static inline int user_mode(struct pt_regs *regs)
 {
@@ -104,16 +113,6 @@ static inline int user_mode(struct pt_regs *regs)
 #endif
 }
 
-static inline int user_mode_vm(struct pt_regs *regs)
-{
-#ifdef CONFIG_X86_32
-       return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >=
-               USER_RPL;
-#else
-       return user_mode(regs);
-#endif
-}
-
 static inline int v8086_mode(struct pt_regs *regs)
 {
 #ifdef CONFIG_X86_32
@@ -138,12 +137,8 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
 #endif
 }
 
-#define current_user_stack_pointer()   this_cpu_read(old_rsp)
-/* ia32 vs. x32 difference */
-#define compat_user_stack_pointer()    \
-       (test_thread_flag(TIF_IA32)     \
-        ? current_pt_regs()->sp        \
-        : this_cpu_read(old_rsp))
+#define current_user_stack_pointer()   current_pt_regs()->sp
+#define compat_user_stack_pointer()    current_pt_regs()->sp
 #endif
 
 #ifdef CONFIG_X86_32
@@ -248,7 +243,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
  */
 #define arch_ptrace_stop_needed(code, info)                            \
 ({                                                                     \
-       set_thread_flag(TIF_NOTIFY_RESUME);                             \
+       force_iret();                                                   \
        false;                                                          \
 })
 
index d6b078e9fa28a3f4588237cb9a122f5b5ce53162..25b1cc07d49668c8a40306bf2ec81e4e2a11988e 100644 (file)
@@ -95,6 +95,7 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
 
 struct pvclock_vsyscall_time_info {
        struct pvclock_vcpu_time_info pvti;
+       u32 migrate_count;
 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
 
 #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info)
index db257a58571f0b47429e2dcc70cf628f1a968fe4..5a9856eb12bad7edb0f9a333870e331f5677d588 100644 (file)
@@ -3,8 +3,10 @@
 
 #include <linux/const.h>
 
-/* Constructor for a conventional segment GDT (or LDT) entry */
-/* This is a macro so it can be used in initializers */
+/*
+ * Constructor for a conventional segment GDT (or LDT) entry.
+ * This is a macro so it can be used in initializers.
+ */
 #define GDT_ENTRY(flags, base, limit)                  \
        ((((base)  & _AC(0xff000000,ULL)) << (56-24)) | \
         (((flags) & _AC(0x0000f0ff,ULL)) << 40) |      \
         (((base)  & _AC(0x00ffffff,ULL)) << 16) |      \
         (((limit) & _AC(0x0000ffff,ULL))))
 
-/* Simple and small GDT entries for booting only */
+/* Simple and small GDT entries for booting only: */
 
 #define GDT_ENTRY_BOOT_CS      2
-#define __BOOT_CS              (GDT_ENTRY_BOOT_CS * 8)
+#define GDT_ENTRY_BOOT_DS      3
+#define GDT_ENTRY_BOOT_TSS     4
+#define __BOOT_CS              (GDT_ENTRY_BOOT_CS*8)
+#define __BOOT_DS              (GDT_ENTRY_BOOT_DS*8)
+#define __BOOT_TSS             (GDT_ENTRY_BOOT_TSS*8)
+
+/*
+ * Bottom two bits of selector give the ring
+ * privilege level
+ */
+#define SEGMENT_RPL_MASK       0x3
 
-#define GDT_ENTRY_BOOT_DS      (GDT_ENTRY_BOOT_CS + 1)
-#define __BOOT_DS              (GDT_ENTRY_BOOT_DS * 8)
+/* User mode is privilege level 3: */
+#define USER_RPL               0x3
 
-#define GDT_ENTRY_BOOT_TSS     (GDT_ENTRY_BOOT_CS + 2)
-#define __BOOT_TSS             (GDT_ENTRY_BOOT_TSS * 8)
+/* Bit 2 is Table Indicator (TI): selects between LDT or GDT */
+#define SEGMENT_TI_MASK                0x4
+/* LDT segment has TI set ... */
+#define SEGMENT_LDT            0x4
+/* ... GDT has it cleared */
+#define SEGMENT_GDT            0x0
 
-#define SEGMENT_RPL_MASK       0x3 /*
-                                    * Bottom two bits of selector give the ring
-                                    * privilege level
-                                    */
-#define SEGMENT_TI_MASK                0x4 /* Bit 2 is table indicator (LDT/GDT) */
-#define USER_RPL               0x3 /* User mode is privilege level 3 */
-#define SEGMENT_LDT            0x4 /* LDT segment has TI set... */
-#define SEGMENT_GDT            0x0 /* ... GDT has it cleared */
+#define GDT_ENTRY_INVALID_SEG  0
 
 #ifdef CONFIG_X86_32
 /*
  * The layout of the per-CPU GDT under Linux:
  *
- *   0 - null
+ *   0 - null                                                          <=== cacheline #1
  *   1 - reserved
  *   2 - reserved
  *   3 - reserved
  *
- *   4 - unused                        <==== new cacheline
+ *   4 - unused                                                                <=== cacheline #2
  *   5 - unused
  *
  *  ------- start of TLS (Thread-Local Storage) segments:
  *
  *   6 - TLS segment #1                        [ glibc's TLS segment ]
  *   7 - TLS segment #2                        [ Wine's %fs Win32 segment ]
- *   8 - TLS segment #3
+ *   8 - TLS segment #3                                                        <=== cacheline #3
  *   9 - reserved
  *  10 - reserved
  *  11 - reserved
  *
  *  ------- start of kernel segments:
  *
- *  12 - kernel code segment           <==== new cacheline
+ *  12 - kernel code segment                                           <=== cacheline #4
  *  13 - kernel data segment
  *  14 - default user CS
  *  15 - default user DS
- *  16 - TSS
+ *  16 - TSS                                                           <=== cacheline #5
  *  17 - LDT
  *  18 - PNPBIOS support (16->32 gate)
  *  19 - PNPBIOS support
- *  20 - PNPBIOS support
+ *  20 - PNPBIOS support                                               <=== cacheline #6
  *  21 - PNPBIOS support
  *  22 - PNPBIOS support
  *  23 - APM BIOS support
- *  24 - APM BIOS support
+ *  24 - APM BIOS support                                              <=== cacheline #7
  *  25 - APM BIOS support
  *
  *  26 - ESPFIX small SS
  *  27 - per-cpu                       [ offset to per-cpu data area ]
- *  28 - stack_canary-20               [ for stack protector ]
+ *  28 - stack_canary-20               [ for stack protector ]         <=== cacheline #8
  *  29 - unused
  *  30 - unused
  *  31 - TSS for double fault handler
  */
-#define GDT_ENTRY_TLS_MIN      6
-#define GDT_ENTRY_TLS_MAX      (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
+#define GDT_ENTRY_TLS_MIN              6
+#define GDT_ENTRY_TLS_MAX              (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
 
+#define GDT_ENTRY_KERNEL_CS            12
+#define GDT_ENTRY_KERNEL_DS            13
 #define GDT_ENTRY_DEFAULT_USER_CS      14
-
 #define GDT_ENTRY_DEFAULT_USER_DS      15
+#define GDT_ENTRY_TSS                  16
+#define GDT_ENTRY_LDT                  17
+#define GDT_ENTRY_PNPBIOS_CS32         18
+#define GDT_ENTRY_PNPBIOS_CS16         19
+#define GDT_ENTRY_PNPBIOS_DS           20
+#define GDT_ENTRY_PNPBIOS_TS1          21
+#define GDT_ENTRY_PNPBIOS_TS2          22
+#define GDT_ENTRY_APMBIOS_BASE         23
+
+#define GDT_ENTRY_ESPFIX_SS            26
+#define GDT_ENTRY_PERCPU               27
+#define GDT_ENTRY_STACK_CANARY         28
+
+#define GDT_ENTRY_DOUBLEFAULT_TSS      31
 
-#define GDT_ENTRY_KERNEL_BASE          (12)
+/*
+ * Number of entries in the GDT table:
+ */
+#define GDT_ENTRIES                    32
 
-#define GDT_ENTRY_KERNEL_CS            (GDT_ENTRY_KERNEL_BASE+0)
+/*
+ * Segment selector values corresponding to the above entries:
+ */
 
-#define GDT_ENTRY_KERNEL_DS            (GDT_ENTRY_KERNEL_BASE+1)
+#define __KERNEL_CS                    (GDT_ENTRY_KERNEL_CS*8)
+#define __KERNEL_DS                    (GDT_ENTRY_KERNEL_DS*8)
+#define __USER_DS                      (GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
+#define __USER_CS                      (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
+#define __ESPFIX_SS                    (GDT_ENTRY_ESPFIX_SS*8)
 
-#define GDT_ENTRY_TSS                  (GDT_ENTRY_KERNEL_BASE+4)
-#define GDT_ENTRY_LDT                  (GDT_ENTRY_KERNEL_BASE+5)
+/* segment for calling fn: */
+#define PNP_CS32                       (GDT_ENTRY_PNPBIOS_CS32*8)
+/* code segment for BIOS: */
+#define PNP_CS16                       (GDT_ENTRY_PNPBIOS_CS16*8)
 
-#define GDT_ENTRY_PNPBIOS_BASE         (GDT_ENTRY_KERNEL_BASE+6)
-#define GDT_ENTRY_APMBIOS_BASE         (GDT_ENTRY_KERNEL_BASE+11)
+/* "Is this PNP code selector (PNP_CS32 or PNP_CS16)?" */
+#define SEGMENT_IS_PNP_CODE(x)         (((x) & 0xf4) == PNP_CS32)
 
-#define GDT_ENTRY_ESPFIX_SS            (GDT_ENTRY_KERNEL_BASE+14)
-#define __ESPFIX_SS                    (GDT_ENTRY_ESPFIX_SS*8)
+/* data segment for BIOS: */
+#define PNP_DS                         (GDT_ENTRY_PNPBIOS_DS*8)
+/* transfer data segment: */
+#define PNP_TS1                                (GDT_ENTRY_PNPBIOS_TS1*8)
+/* another data segment: */
+#define PNP_TS2                                (GDT_ENTRY_PNPBIOS_TS2*8)
 
-#define GDT_ENTRY_PERCPU               (GDT_ENTRY_KERNEL_BASE+15)
 #ifdef CONFIG_SMP
-#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8)
+# define __KERNEL_PERCPU               (GDT_ENTRY_PERCPU*8)
 #else
-#define __KERNEL_PERCPU 0
+# define __KERNEL_PERCPU               0
 #endif
 
-#define GDT_ENTRY_STACK_CANARY         (GDT_ENTRY_KERNEL_BASE+16)
 #ifdef CONFIG_CC_STACKPROTECTOR
-#define __KERNEL_STACK_CANARY          (GDT_ENTRY_STACK_CANARY*8)
+# define __KERNEL_STACK_CANARY         (GDT_ENTRY_STACK_CANARY*8)
 #else
-#define __KERNEL_STACK_CANARY          0
+# define __KERNEL_STACK_CANARY         0
 #endif
 
-#define GDT_ENTRY_DOUBLEFAULT_TSS      31
-
-/*
- * The GDT has 32 entries
- */
-#define GDT_ENTRIES 32
+#else /* 64-bit: */
 
-/* The PnP BIOS entries in the GDT */
-#define GDT_ENTRY_PNPBIOS_CS32         (GDT_ENTRY_PNPBIOS_BASE + 0)
-#define GDT_ENTRY_PNPBIOS_CS16         (GDT_ENTRY_PNPBIOS_BASE + 1)
-#define GDT_ENTRY_PNPBIOS_DS           (GDT_ENTRY_PNPBIOS_BASE + 2)
-#define GDT_ENTRY_PNPBIOS_TS1          (GDT_ENTRY_PNPBIOS_BASE + 3)
-#define GDT_ENTRY_PNPBIOS_TS2          (GDT_ENTRY_PNPBIOS_BASE + 4)
-
-/* The PnP BIOS selectors */
-#define PNP_CS32   (GDT_ENTRY_PNPBIOS_CS32 * 8)        /* segment for calling fn */
-#define PNP_CS16   (GDT_ENTRY_PNPBIOS_CS16 * 8)        /* code segment for BIOS */
-#define PNP_DS     (GDT_ENTRY_PNPBIOS_DS * 8)  /* data segment for BIOS */
-#define PNP_TS1    (GDT_ENTRY_PNPBIOS_TS1 * 8) /* transfer data segment */
-#define PNP_TS2    (GDT_ENTRY_PNPBIOS_TS2 * 8) /* another data segment */
+#include <asm/cache.h>
 
+#define GDT_ENTRY_KERNEL32_CS          1
+#define GDT_ENTRY_KERNEL_CS            2
+#define GDT_ENTRY_KERNEL_DS            3
 
 /*
- * Matching rules for certain types of segments.
+ * We cannot use the same code segment descriptor for user and kernel mode,
+ * not even in long flat mode, because of different DPL.
+ *
+ * GDT layout to get 64-bit SYSCALL/SYSRET support right. SYSRET hardcodes
+ * selectors:
+ *
+ *   if returning to 32-bit userspace: cs = STAR.SYSRET_CS,
+ *   if returning to 64-bit userspace: cs = STAR.SYSRET_CS+16,
+ *
+ * ss = STAR.SYSRET_CS+8 (in either case)
+ *
+ * thus USER_DS should be between 32-bit and 64-bit code selectors:
  */
+#define GDT_ENTRY_DEFAULT_USER32_CS    4
+#define GDT_ENTRY_DEFAULT_USER_DS      5
+#define GDT_ENTRY_DEFAULT_USER_CS      6
 
-/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
-#define SEGMENT_IS_PNP_CODE(x)   (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)
-
+/* Needs two entries */
+#define GDT_ENTRY_TSS                  8
+/* Needs two entries */
+#define GDT_ENTRY_LDT                  10
 
-#else
-#include <asm/cache.h>
-
-#define GDT_ENTRY_KERNEL32_CS 1
-#define GDT_ENTRY_KERNEL_CS 2
-#define GDT_ENTRY_KERNEL_DS 3
+#define GDT_ENTRY_TLS_MIN              12
+#define GDT_ENTRY_TLS_MAX              14
 
-#define __KERNEL32_CS   (GDT_ENTRY_KERNEL32_CS * 8)
+/* Abused to load per CPU data from limit */
+#define GDT_ENTRY_PER_CPU              15
 
 /*
- * we cannot use the same code segment descriptor for user and kernel
- * -- not even in the long flat mode, because of different DPL /kkeil
- * The segment offset needs to contain a RPL. Grr. -AK
- * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets)
+ * Number of entries in the GDT table:
  */
-#define GDT_ENTRY_DEFAULT_USER32_CS 4
-#define GDT_ENTRY_DEFAULT_USER_DS 5
-#define GDT_ENTRY_DEFAULT_USER_CS 6
-#define __USER32_CS   (GDT_ENTRY_DEFAULT_USER32_CS*8+3)
-#define __USER32_DS    __USER_DS
-
-#define GDT_ENTRY_TSS 8        /* needs two entries */
-#define GDT_ENTRY_LDT 10 /* needs two entries */
-#define GDT_ENTRY_TLS_MIN 12
-#define GDT_ENTRY_TLS_MAX 14
-
-#define GDT_ENTRY_PER_CPU 15   /* Abused to load per CPU data from limit */
-#define __PER_CPU_SEG  (GDT_ENTRY_PER_CPU * 8 + 3)
+#define GDT_ENTRIES                    16
 
-/* TLS indexes for 64bit - hardcoded in arch_prctl */
-#define FS_TLS 0
-#define GS_TLS 1
-
-#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
-#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
-
-#define GDT_ENTRIES 16
+/*
+ * Segment selector values corresponding to the above entries:
+ *
+ * Note, selectors also need to have a correct RPL,
+ * expressed with the +3 value for user-space selectors:
+ */
+#define __KERNEL32_CS                  (GDT_ENTRY_KERNEL32_CS*8)
+#define __KERNEL_CS                    (GDT_ENTRY_KERNEL_CS*8)
+#define __KERNEL_DS                    (GDT_ENTRY_KERNEL_DS*8)
+#define __USER32_CS                    (GDT_ENTRY_DEFAULT_USER32_CS*8 + 3)
+#define __USER_DS                      (GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
+#define __USER32_DS                    __USER_DS
+#define __USER_CS                      (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
+#define __PER_CPU_SEG                  (GDT_ENTRY_PER_CPU*8 + 3)
+
+/* TLS indexes for 64-bit - hardcoded in arch_prctl(): */
+#define FS_TLS                         0
+#define GS_TLS                         1
+
+#define GS_TLS_SEL                     ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
+#define FS_TLS_SEL                     ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
 
 #endif
 
-#define __KERNEL_CS    (GDT_ENTRY_KERNEL_CS*8)
-#define __KERNEL_DS    (GDT_ENTRY_KERNEL_DS*8)
-#define __USER_DS      (GDT_ENTRY_DEFAULT_USER_DS*8+3)
-#define __USER_CS      (GDT_ENTRY_DEFAULT_USER_CS*8+3)
 #ifndef CONFIG_PARAVIRT
-#define get_kernel_rpl()  0
+# define get_kernel_rpl()              0
 #endif
 
-#define IDT_ENTRIES 256
-#define NUM_EXCEPTION_VECTORS 32
-/* Bitmask of exception vectors which push an error code on the stack */
-#define EXCEPTION_ERRCODE_MASK  0x00027d00
-#define GDT_SIZE (GDT_ENTRIES * 8)
-#define GDT_ENTRY_TLS_ENTRIES 3
-#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
+#define IDT_ENTRIES                    256
+#define NUM_EXCEPTION_VECTORS          32
+
+/* Bitmask of exception vectors which push an error code on the stack: */
+#define EXCEPTION_ERRCODE_MASK         0x00027d00
+
+#define GDT_SIZE                       (GDT_ENTRIES*8)
+#define GDT_ENTRY_TLS_ENTRIES          3
+#define TLS_SIZE                       (GDT_ENTRY_TLS_ENTRIES* 8)
 
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
+
 extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5];
 #ifdef CONFIG_TRACING
-#define trace_early_idt_handlers early_idt_handlers
+# define trace_early_idt_handlers early_idt_handlers
 #endif
 
 /*
@@ -228,37 +260,30 @@ do {                                                                      \
 } while (0)
 
 /*
- * Save a segment register away
+ * Save a segment register away:
  */
 #define savesegment(seg, value)                                \
        asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
 
 /*
- * x86_32 user gs accessors.
+ * x86-32 user GS accessors:
  */
 #ifdef CONFIG_X86_32
-#ifdef CONFIG_X86_32_LAZY_GS
-#define get_user_gs(regs)      (u16)({unsigned long v; savesegment(gs, v); v;})
-#define set_user_gs(regs, v)   loadsegment(gs, (unsigned long)(v))
-#define task_user_gs(tsk)      ((tsk)->thread.gs)
-#define lazy_save_gs(v)                savesegment(gs, (v))
-#define lazy_load_gs(v)                loadsegment(gs, (v))
-#else  /* X86_32_LAZY_GS */
-#define get_user_gs(regs)      (u16)((regs)->gs)
-#define set_user_gs(regs, v)   do { (regs)->gs = (v); } while (0)
-#define task_user_gs(tsk)      (task_pt_regs(tsk)->gs)
-#define lazy_save_gs(v)                do { } while (0)
-#define lazy_load_gs(v)                do { } while (0)
-#endif /* X86_32_LAZY_GS */
+# ifdef CONFIG_X86_32_LAZY_GS
+#  define get_user_gs(regs)            (u16)({ unsigned long v; savesegment(gs, v); v; })
+#  define set_user_gs(regs, v)         loadsegment(gs, (unsigned long)(v))
+#  define task_user_gs(tsk)            ((tsk)->thread.gs)
+#  define lazy_save_gs(v)              savesegment(gs, (v))
+#  define lazy_load_gs(v)              loadsegment(gs, (v))
+# else /* X86_32_LAZY_GS */
+#  define get_user_gs(regs)            (u16)((regs)->gs)
+#  define set_user_gs(regs, v)         do { (regs)->gs = (v); } while (0)
+#  define task_user_gs(tsk)            (task_pt_regs(tsk)->gs)
+#  define lazy_save_gs(v)              do { } while (0)
+#  define lazy_load_gs(v)              do { } while (0)
+# endif        /* X86_32_LAZY_GS */
 #endif /* X86_32 */
 
-static inline unsigned long get_limit(unsigned long segment)
-{
-       unsigned long __limit;
-       asm("lsll %1,%0" : "=r" (__limit) : "r" (segment));
-       return __limit + 1;
-}
-
 #endif /* !__ASSEMBLY__ */
 #endif /* __KERNEL__ */
 
index ff4e7b236e21d5393bd55c5f53493d44861c70c1..f69e06b283fb9ee03e8704847558aa577aecf221 100644 (file)
@@ -66,6 +66,11 @@ static inline void x86_ce4100_early_setup(void) { }
  */
 extern struct boot_params boot_params;
 
+static inline bool kaslr_enabled(void)
+{
+       return !!(boot_params.hdr.loadflags & KASLR_FLAG);
+}
+
 /*
  * Do NOT EVER look at the BIOS memory size location.
  * It does not work on many machines.
index 9dfce4e0417d92adc623d32ff93f67109316b451..6fe6b182c9981dd891a9a5bc9a55b3e6591a6f9f 100644 (file)
@@ -57,9 +57,9 @@ struct sigcontext {
        unsigned long ip;
        unsigned long flags;
        unsigned short cs;
-       unsigned short gs;
-       unsigned short fs;
-       unsigned short __pad0;
+       unsigned short __pad2;  /* Was called gs, but was always zero. */
+       unsigned short __pad1;  /* Was called fs, but was always zero. */
+       unsigned short ss;
        unsigned long err;
        unsigned long trapno;
        unsigned long oldmask;
index 7a958164088c10a61aeed98f6353152ebad83ff5..89db46752a8f0e1c78c9403ef4b40a805b488999 100644 (file)
@@ -13,9 +13,7 @@
                         X86_EFLAGS_CF | X86_EFLAGS_RF)
 
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
-
-int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
-                      unsigned long *pax);
+int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc);
 int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
                     struct pt_regs *regs, unsigned long mask);
 
index 8d3120f4e27053b3fae6d6334fe7a3a6dee33e82..ba665ebd17bb8f22a39712dbf2a8c398b9e9207f 100644 (file)
 
 #ifdef CONFIG_X86_SMAP
 
-#define ASM_CLAC                                                       \
-       661: ASM_NOP3 ;                                                 \
-       .pushsection .altinstr_replacement, "ax" ;                      \
-       662: __ASM_CLAC ;                                               \
-       .popsection ;                                                   \
-       .pushsection .altinstructions, "a" ;                            \
-       altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ;       \
-       .popsection
-
-#define ASM_STAC                                                       \
-       661: ASM_NOP3 ;                                                 \
-       .pushsection .altinstr_replacement, "ax" ;                      \
-       662: __ASM_STAC ;                                               \
-       .popsection ;                                                   \
-       .pushsection .altinstructions, "a" ;                            \
-       altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ;       \
-       .popsection
+#define ASM_CLAC \
+       ALTERNATIVE "", __stringify(__ASM_CLAC), X86_FEATURE_SMAP
+
+#define ASM_STAC \
+       ALTERNATIVE "", __stringify(__ASM_STAC), X86_FEATURE_SMAP
 
 #else /* CONFIG_X86_SMAP */
 
 static __always_inline void clac(void)
 {
        /* Note: a barrier is implicit in alternative() */
-       alternative(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
+       alternative("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
 }
 
 static __always_inline void stac(void)
 {
        /* Note: a barrier is implicit in alternative() */
-       alternative(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP);
+       alternative("", __stringify(__ASM_STAC), X86_FEATURE_SMAP);
 }
 
 /* These macros can be used in asm() statements */
 #define ASM_CLAC \
-       ALTERNATIVE(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
+       ALTERNATIVE("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
 #define ASM_STAC \
-       ALTERNATIVE(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP)
+       ALTERNATIVE("", __stringify(__ASM_STAC), X86_FEATURE_SMAP)
 
 #else /* CONFIG_X86_SMAP */
 
index 8cd1cc3bc8356ffef29e349f08b3de43aeb79506..81d02fc7dafa521e665adf7dce8f657d0036d391 100644 (file)
@@ -154,6 +154,7 @@ void cpu_die_common(unsigned int cpu);
 void native_smp_prepare_boot_cpu(void);
 void native_smp_prepare_cpus(unsigned int max_cpus);
 void native_smp_cpus_done(unsigned int max_cpus);
+void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
 int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
 int native_cpu_disable(void);
 void native_cpu_die(unsigned int cpu);
index 6a4b00fafb003cbcf4e2bdc62a244ee71dc75a45..aeb4666e0c0a770a7fbb8432b7d133b2dd9e764d 100644 (file)
@@ -4,6 +4,8 @@
 
 #ifdef __KERNEL__
 
+#include <asm/nops.h>
+
 static inline void native_clts(void)
 {
        asm volatile("clts");
@@ -199,6 +201,28 @@ static inline void clflushopt(volatile void *__p)
                       "+m" (*(volatile char __force *)__p));
 }
 
+static inline void clwb(volatile void *__p)
+{
+       volatile struct { char x[64]; } *p = __p;
+
+       asm volatile(ALTERNATIVE_2(
+               ".byte " __stringify(NOP_DS_PREFIX) "; clflush (%[pax])",
+               ".byte 0x66; clflush (%[pax])", /* clflushopt (%%rax) */
+               X86_FEATURE_CLFLUSHOPT,
+               ".byte 0x66, 0x0f, 0xae, 0x30",  /* clwb (%%rax) */
+               X86_FEATURE_CLWB)
+               : [p] "+m" (*p)
+               : [pax] "a" (p));
+}
+
+static inline void pcommit_sfence(void)
+{
+       alternative(ASM_NOP7,
+                   ".byte 0x66, 0x0f, 0xae, 0xf8\n\t" /* pcommit */
+                   "sfence",
+                   X86_FEATURE_PCOMMIT);
+}
+
 #define nop() asm volatile ("nop")
 
 
index 1d4e4f279a3281e094684ad29bd4b544d37af6ab..ea2dbe82cba3a74e06e269655cdfca8a67253e52 100644 (file)
 #include <asm/percpu.h>
 #include <asm/types.h>
 
+/*
+ * TOP_OF_KERNEL_STACK_PADDING is a number of unused bytes that we
+ * reserve at the top of the kernel stack.  We do it because of a nasty
+ * 32-bit corner case.  On x86_32, the hardware stack frame is
+ * variable-length.  Except for vm86 mode, struct pt_regs assumes a
+ * maximum-length frame.  If we enter from CPL 0, the top 8 bytes of
+ * pt_regs don't actually exist.  Ordinarily this doesn't matter, but it
+ * does in at least one case:
+ *
+ * If we take an NMI early enough in SYSENTER, then we can end up with
+ * pt_regs that extends above sp0.  On the way out, in the espfix code,
+ * we can read the saved SS value, but that value will be above sp0.
+ * Without this offset, that can result in a page fault.  (We are
+ * careful that, in this case, the value we read doesn't matter.)
+ *
+ * In vm86 mode, the hardware frame is much longer still, but we neither
+ * access the extra members from NMI context, nor do we write such a
+ * frame at sp0 at all.
+ *
+ * x86_64 has a fixed-length stack frame.
+ */
+#ifdef CONFIG_X86_32
+# define TOP_OF_KERNEL_STACK_PADDING 8
+#else
+# define TOP_OF_KERNEL_STACK_PADDING 0
+#endif
+
 /*
  * low level task data that entry.S needs immediate access to
  * - this struct should fit entirely inside of one cache line
@@ -145,7 +172,6 @@ struct thread_info {
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
 
 #define STACK_WARN             (THREAD_SIZE/8)
-#define KERNEL_STACK_OFFSET    (5*(BITS_PER_LONG/8))
 
 /*
  * macros/functions for gaining access to the thread information structure
@@ -158,10 +184,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack);
 
 static inline struct thread_info *current_thread_info(void)
 {
-       struct thread_info *ti;
-       ti = (void *)(this_cpu_read_stable(kernel_stack) +
-                     KERNEL_STACK_OFFSET - THREAD_SIZE);
-       return ti;
+       return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE);
 }
 
 static inline unsigned long current_stack_pointer(void)
@@ -177,16 +200,37 @@ static inline unsigned long current_stack_pointer(void)
 
 #else /* !__ASSEMBLY__ */
 
-/* how to get the thread information struct from ASM */
+/* Load thread_info address into "reg" */
 #define GET_THREAD_INFO(reg) \
        _ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \
-       _ASM_SUB $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg ;
+       _ASM_SUB $(THREAD_SIZE),reg ;
 
 /*
- * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in
- * a certain register (to be used in assembler memory operands).
+ * ASM operand which evaluates to a 'thread_info' address of
+ * the current task, if it is known that "reg" is exactly "off"
+ * bytes below the top of the stack currently.
+ *
+ * ( The kernel stack's size is known at build time, it is usually
+ *   2 or 4 pages, and the bottom  of the kernel stack contains
+ *   the thread_info structure. So to access the thread_info very
+ *   quickly from assembly code we can calculate down from the
+ *   top of the kernel stack to the bottom, using constant,
+ *   build-time calculations only. )
+ *
+ * For example, to fetch the current thread_info->flags value into %eax
+ * on x86-64 defconfig kernels, in syscall entry code where RSP is
+ * currently at exactly SIZEOF_PTREGS bytes away from the top of the
+ * stack:
+ *
+ *      mov ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS), %eax
+ *
+ * will translate to:
+ *
+ *      8b 84 24 b8 c0 ff ff      mov    -0x3f48(%rsp), %eax
+ *
+ * which is below the current RSP by almost 16K.
  */
-#define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg)
+#define ASM_THREAD_INFO(field, reg, off) ((field)+(off)-THREAD_SIZE)(reg)
 
 #endif
 
@@ -236,6 +280,16 @@ static inline bool is_ia32_task(void)
 #endif
        return false;
 }
+
+/*
+ * Force syscall return via IRET by making it look as if there was
+ * some work pending. IRET is our most capable (but slowest) syscall
+ * return path, which is able to restore modified SS, CS and certain
+ * EFLAGS values that other (fast) syscall return instructions
+ * are not able to restore properly.
+ */
+#define force_iret() set_thread_flag(TIF_NOTIFY_RESUME)
+
 #endif /* !__ASSEMBLY__ */
 
 #ifndef __ASSEMBLY__
index 12a26b979bf163008ebd1dc5b7cdff3ca5876f0b..f2f9b39b274ab0c2f81ab6b388f78ba5c881ec5e 100644 (file)
@@ -231,6 +231,6 @@ __copy_from_user_inatomic_nocache(void *dst, const void __user *src,
 }
 
 unsigned long
-copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest);
+copy_user_handle_tail(char *to, char *from, unsigned len);
 
 #endif /* _ASM_X86_UACCESS_64_H */
index 225b0988043a0a78ac9092a9af7a265122c685cd..ab456dc233b51482c53c0232a97ed71cbc50a990 100644 (file)
@@ -15,6 +15,7 @@
 
 /* loadflags */
 #define LOADED_HIGH    (1<<0)
+#define KASLR_FLAG     (1<<1)
 #define QUIET_FLAG     (1<<5)
 #define KEEP_SEGMENTS  (1<<6)
 #define CAN_USE_HEAP   (1<<7)
index 7b0a55a8885115386f40a4207838e60ad66abc21..580aee3072e0684082b0f74e1358925efaf4f540 100644 (file)
 #else /* __i386__ */
 
 #if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS)
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
 #define R15 0
 #define R14 8
 #define R13 16
 #define R12 24
 #define RBP 32
 #define RBX 40
-/* arguments: interrupts/non tracing syscalls only save up to here*/
+/* These regs are callee-clobbered. Always saved on kernel entry. */
 #define R11 48
 #define R10 56
 #define R9 64
 #define RDX 96
 #define RSI 104
 #define RDI 112
-#define ORIG_RAX 120       /* = ERROR */
-/* end of arguments */
-/* cpu exception frame or undefined in case of fast syscall. */
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
+#define ORIG_RAX 120
+/* Return frame for iretq */
 #define RIP 128
 #define CS 136
 #define EFLAGS 144
 #define RSP 152
 #define SS 160
-#define ARGOFFSET R11
 #endif /* __ASSEMBLY__ */
 
 /* top of stack page */
index ac4b9aa4d9996b413eaf71ada8bb6b458e7a83a0..bc16115af39b9e66c048b5e0237180f505535b77 100644 (file)
@@ -41,13 +41,17 @@ struct pt_regs {
 #ifndef __KERNEL__
 
 struct pt_regs {
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
        unsigned long r15;
        unsigned long r14;
        unsigned long r13;
        unsigned long r12;
        unsigned long rbp;
        unsigned long rbx;
-/* arguments: non interrupts/non tracing syscalls only save up to here*/
+/* These regs are callee-clobbered. Always saved on kernel entry. */
        unsigned long r11;
        unsigned long r10;
        unsigned long r9;
@@ -57,9 +61,12 @@ struct pt_regs {
        unsigned long rdx;
        unsigned long rsi;
        unsigned long rdi;
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
        unsigned long orig_rax;
-/* end of arguments */
-/* cpu exception frame or undefined */
+/* Return frame for iretq */
        unsigned long rip;
        unsigned long cs;
        unsigned long eflags;
index d8b9f9081e86fb486c88c34b8359bdfec28dd5fe..16dc4e8a2cd34845042445915f9e9d74d90546c6 100644 (file)
@@ -177,9 +177,24 @@ struct sigcontext {
        __u64 rip;
        __u64 eflags;           /* RFLAGS */
        __u16 cs;
-       __u16 gs;
-       __u16 fs;
-       __u16 __pad0;
+
+       /*
+        * Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"),
+        * Linux saved and restored fs and gs in these slots.  This
+        * was counterproductive, as fsbase and gsbase were never
+        * saved, so arch_prctl was presumably unreliable.
+        *
+        * If these slots are ever needed for any other purpose, there
+        * is some risk that very old 64-bit binaries could get
+        * confused.  I doubt that many such binaries still work,
+        * though, since the same patch in 2.5.64 also removed the
+        * 64-bit set_thread_area syscall, so it appears that there is
+        * no TLS API that works in both pre- and post-2.5.64 kernels.
+        */
+       __u16 __pad2;           /* Was gs. */
+       __u16 __pad1;           /* Was fs. */
+
+       __u16 ss;
        __u64 err;
        __u64 trapno;
        __u64 oldmask;
index c5f1a1deb91a904e21d88c2762836dd98500e600..1fe92181ee9ef8b3cb061a6091a732b2b9fa7bf0 100644 (file)
@@ -67,6 +67,7 @@
 #define EXIT_REASON_EPT_VIOLATION       48
 #define EXIT_REASON_EPT_MISCONFIG       49
 #define EXIT_REASON_INVEPT              50
+#define EXIT_REASON_RDTSCP              51
 #define EXIT_REASON_PREEMPTION_TIMER    52
 #define EXIT_REASON_INVVPID             53
 #define EXIT_REASON_WBINVD              54
index cdb1b70ddad0f026cbe42800571d0869211bf6f5..c887cd944f0c18e849fda278ec50dbee6b731919 100644 (file)
@@ -32,6 +32,7 @@ obj-$(CONFIG_X86_32)  += i386_ksyms_32.o
 obj-$(CONFIG_X86_64)   += sys_x86_64.o x8664_ksyms_64.o
 obj-$(CONFIG_X86_64)   += mcount_64.o
 obj-y                  += syscall_$(BITS).o vsyscall_gtod.o
+obj-$(CONFIG_IA32_EMULATION)   += syscall_32.o
 obj-$(CONFIG_X86_VSYSCALL_EMULATION)   += vsyscall_64.o vsyscall_emu_64.o
 obj-$(CONFIG_X86_ESPFIX64)     += espfix_64.o
 obj-$(CONFIG_SYSFS)    += ksysfs.o
index 703130f469ecf71978b9d67bf0fb50da9b31cbc9..aef65319316065eab845f35141682c3550f18a22 100644 (file)
@@ -52,10 +52,25 @@ static int __init setup_noreplace_paravirt(char *str)
 __setup("noreplace-paravirt", setup_noreplace_paravirt);
 #endif
 
-#define DPRINTK(fmt, ...)                              \
-do {                                                   \
-       if (debug_alternative)                          \
-               printk(KERN_DEBUG fmt, ##__VA_ARGS__);  \
+#define DPRINTK(fmt, args...)                                          \
+do {                                                                   \
+       if (debug_alternative)                                          \
+               printk(KERN_DEBUG "%s: " fmt "\n", __func__, ##args);   \
+} while (0)
+
+#define DUMP_BYTES(buf, len, fmt, args...)                             \
+do {                                                                   \
+       if (unlikely(debug_alternative)) {                              \
+               int j;                                                  \
+                                                                       \
+               if (!(len))                                             \
+                       break;                                          \
+                                                                       \
+               printk(KERN_DEBUG fmt, ##args);                         \
+               for (j = 0; j < (len) - 1; j++)                         \
+                       printk(KERN_CONT "%02hhx ", buf[j]);            \
+               printk(KERN_CONT "%02hhx\n", buf[j]);                   \
+       }                                                               \
 } while (0)
 
 /*
@@ -243,12 +258,89 @@ extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
 extern s32 __smp_locks[], __smp_locks_end[];
 void *text_poke_early(void *addr, const void *opcode, size_t len);
 
-/* Replace instructions with better alternatives for this CPU type.
-   This runs before SMP is initialized to avoid SMP problems with
-   self modifying code. This implies that asymmetric systems where
-   APs have less capabilities than the boot processor are not handled.
-   Tough. Make sure you disable such features by hand. */
+/*
+ * Are we looking at a near JMP with a 1 or 4-byte displacement.
+ */
+static inline bool is_jmp(const u8 opcode)
+{
+       return opcode == 0xeb || opcode == 0xe9;
+}
+
+static void __init_or_module
+recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
+{
+       u8 *next_rip, *tgt_rip;
+       s32 n_dspl, o_dspl;
+       int repl_len;
+
+       if (a->replacementlen != 5)
+               return;
+
+       o_dspl = *(s32 *)(insnbuf + 1);
+
+       /* next_rip of the replacement JMP */
+       next_rip = repl_insn + a->replacementlen;
+       /* target rip of the replacement JMP */
+       tgt_rip  = next_rip + o_dspl;
+       n_dspl = tgt_rip - orig_insn;
+
+       DPRINTK("target RIP: %p, new_displ: 0x%x", tgt_rip, n_dspl);
+
+       if (tgt_rip - orig_insn >= 0) {
+               if (n_dspl - 2 <= 127)
+                       goto two_byte_jmp;
+               else
+                       goto five_byte_jmp;
+       /* negative offset */
+       } else {
+               if (((n_dspl - 2) & 0xff) == (n_dspl - 2))
+                       goto two_byte_jmp;
+               else
+                       goto five_byte_jmp;
+       }
+
+two_byte_jmp:
+       n_dspl -= 2;
+
+       insnbuf[0] = 0xeb;
+       insnbuf[1] = (s8)n_dspl;
+       add_nops(insnbuf + 2, 3);
+
+       repl_len = 2;
+       goto done;
+
+five_byte_jmp:
+       n_dspl -= 5;
+
+       insnbuf[0] = 0xe9;
+       *(s32 *)&insnbuf[1] = n_dspl;
 
+       repl_len = 5;
+
+done:
+
+       DPRINTK("final displ: 0x%08x, JMP 0x%lx",
+               n_dspl, (unsigned long)orig_insn + n_dspl + repl_len);
+}
+
+static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
+{
+       if (instr[0] != 0x90)
+               return;
+
+       add_nops(instr + (a->instrlen - a->padlen), a->padlen);
+
+       DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ",
+                  instr, a->instrlen - a->padlen, a->padlen);
+}
+
+/*
+ * Replace instructions with better alternatives for this CPU type. This runs
+ * before SMP is initialized to avoid SMP problems with self modifying code.
+ * This implies that asymmetric systems where APs have less capabilities than
+ * the boot processor are not handled. Tough. Make sure you disable such
+ * features by hand.
+ */
 void __init_or_module apply_alternatives(struct alt_instr *start,
                                         struct alt_instr *end)
 {
@@ -256,10 +348,10 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
        u8 *instr, *replacement;
        u8 insnbuf[MAX_PATCH_LEN];
 
-       DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
+       DPRINTK("alt table %p -> %p", start, end);
        /*
         * The scan order should be from start to end. A later scanned
-        * alternative code can overwrite a previous scanned alternative code.
+        * alternative code can overwrite previously scanned alternative code.
         * Some kernel functions (e.g. memcpy, memset, etc) use this order to
         * patch code.
         *
@@ -267,29 +359,54 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
         * order.
         */
        for (a = start; a < end; a++) {
+               int insnbuf_sz = 0;
+
                instr = (u8 *)&a->instr_offset + a->instr_offset;
                replacement = (u8 *)&a->repl_offset + a->repl_offset;
-               BUG_ON(a->replacementlen > a->instrlen);
                BUG_ON(a->instrlen > sizeof(insnbuf));
                BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
-               if (!boot_cpu_has(a->cpuid))
+               if (!boot_cpu_has(a->cpuid)) {
+                       if (a->padlen > 1)
+                               optimize_nops(a, instr);
+
                        continue;
+               }
+
+               DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d), pad: %d",
+                       a->cpuid >> 5,
+                       a->cpuid & 0x1f,
+                       instr, a->instrlen,
+                       replacement, a->replacementlen, a->padlen);
+
+               DUMP_BYTES(instr, a->instrlen, "%p: old_insn: ", instr);
+               DUMP_BYTES(replacement, a->replacementlen, "%p: rpl_insn: ", replacement);
 
                memcpy(insnbuf, replacement, a->replacementlen);
+               insnbuf_sz = a->replacementlen;
 
                /* 0xe8 is a relative jump; fix the offset. */
-               if (*insnbuf == 0xe8 && a->replacementlen == 5)
-                   *(s32 *)(insnbuf + 1) += replacement - instr;
+               if (*insnbuf == 0xe8 && a->replacementlen == 5) {
+                       *(s32 *)(insnbuf + 1) += replacement - instr;
+                       DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx",
+                               *(s32 *)(insnbuf + 1),
+                               (unsigned long)instr + *(s32 *)(insnbuf + 1) + 5);
+               }
+
+               if (a->replacementlen && is_jmp(replacement[0]))
+                       recompute_jump(a, instr, replacement, insnbuf);
 
-               add_nops(insnbuf + a->replacementlen,
-                        a->instrlen - a->replacementlen);
+               if (a->instrlen > a->replacementlen) {
+                       add_nops(insnbuf + a->replacementlen,
+                                a->instrlen - a->replacementlen);
+                       insnbuf_sz += a->instrlen - a->replacementlen;
+               }
+               DUMP_BYTES(insnbuf, insnbuf_sz, "%p: final_insn: ", instr);
 
-               text_poke_early(instr, insnbuf, a->instrlen);
+               text_poke_early(instr, insnbuf, insnbuf_sz);
        }
 }
 
 #ifdef CONFIG_SMP
-
 static void alternatives_smp_lock(const s32 *start, const s32 *end,
                                  u8 *text, u8 *text_end)
 {
@@ -371,8 +488,8 @@ void __init_or_module alternatives_smp_module_add(struct module *mod,
        smp->locks_end  = locks_end;
        smp->text       = text;
        smp->text_end   = text_end;
-       DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
-               __func__, smp->locks, smp->locks_end,
+       DPRINTK("locks %p -> %p, text %p -> %p, name %s\n",
+               smp->locks, smp->locks_end,
                smp->text, smp->text_end, smp->name);
 
        list_add_tail(&smp->next, &smp_alt_modules);
@@ -440,7 +557,7 @@ int alternatives_text_reserved(void *start, void *end)
 
        return 0;
 }
-#endif
+#endif /* CONFIG_SMP */
 
 #ifdef CONFIG_PARAVIRT
 void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
@@ -601,7 +718,7 @@ int poke_int3_handler(struct pt_regs *regs)
        if (likely(!bp_patching_in_progress))
                return 0;
 
-       if (user_mode_vm(regs) || regs->ip != (unsigned long)bp_int3_addr)
+       if (user_mode(regs) || regs->ip != (unsigned long)bp_int3_addr)
                return 0;
 
        /* set up the specified breakpoint handler */
index ad3639ae1b9b50a71ff224faaf96e05302eccd9f..dcb52850a28fcbe00a4a25ddf47d6f4ccedf3c9c 100644 (file)
@@ -1084,67 +1084,6 @@ void lapic_shutdown(void)
        local_irq_restore(flags);
 }
 
-/*
- * This is to verify that we're looking at a real local APIC.
- * Check these against your board if the CPUs aren't getting
- * started for no apparent reason.
- */
-int __init verify_local_APIC(void)
-{
-       unsigned int reg0, reg1;
-
-       /*
-        * The version register is read-only in a real APIC.
-        */
-       reg0 = apic_read(APIC_LVR);
-       apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
-       apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
-       reg1 = apic_read(APIC_LVR);
-       apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
-
-       /*
-        * The two version reads above should print the same
-        * numbers.  If the second one is different, then we
-        * poke at a non-APIC.
-        */
-       if (reg1 != reg0)
-               return 0;
-
-       /*
-        * Check if the version looks reasonably.
-        */
-       reg1 = GET_APIC_VERSION(reg0);
-       if (reg1 == 0x00 || reg1 == 0xff)
-               return 0;
-       reg1 = lapic_get_maxlvt();
-       if (reg1 < 0x02 || reg1 == 0xff)
-               return 0;
-
-       /*
-        * The ID register is read/write in a real APIC.
-        */
-       reg0 = apic_read(APIC_ID);
-       apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
-       apic_write(APIC_ID, reg0 ^ apic->apic_id_mask);
-       reg1 = apic_read(APIC_ID);
-       apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
-       apic_write(APIC_ID, reg0);
-       if (reg1 != (reg0 ^ apic->apic_id_mask))
-               return 0;
-
-       /*
-        * The next two are just to see if we have sane values.
-        * They're only really relevant if we're in Virtual Wire
-        * compatibility mode, but most boxes are anymore.
-        */
-       reg0 = apic_read(APIC_LVT0);
-       apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
-       reg1 = apic_read(APIC_LVT1);
-       apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
-
-       return 1;
-}
-
 /**
  * sync_Arb_IDs - synchronize APIC bus arbitration IDs
  */
@@ -2283,7 +2222,6 @@ int __init APIC_init_uniprocessor(void)
                disable_ioapic_support();
 
        default_setup_apic_routing();
-       verify_local_APIC();
        apic_bsp_setup(true);
        return 0;
 }
index e658f21681c82e1ad8fa28bde084c7933939548b..d9d0bd2faaf42cf4b1e894628edb4139ee615780 100644 (file)
@@ -135,12 +135,12 @@ static void init_x2apic_ldr(void)
 
        per_cpu(x86_cpu_to_logical_apicid, this_cpu) = apic_read(APIC_LDR);
 
-       __cpu_set(this_cpu, per_cpu(cpus_in_cluster, this_cpu));
+       cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, this_cpu));
        for_each_online_cpu(cpu) {
                if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
                        continue;
-               __cpu_set(this_cpu, per_cpu(cpus_in_cluster, cpu));
-               __cpu_set(cpu, per_cpu(cpus_in_cluster, this_cpu));
+               cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu));
+               cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu));
        }
 }
 
@@ -195,7 +195,7 @@ static int x2apic_init_cpu_notifier(void)
 
        BUG_ON(!per_cpu(cpus_in_cluster, cpu) || !per_cpu(ipi_mask, cpu));
 
-       __cpu_set(cpu, per_cpu(cpus_in_cluster, cpu));
+       cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
        register_hotcpu_notifier(&x2apic_cpu_notifier);
        return 1;
 }
index 8e9dcfd630e4b539e7936050b7bb1673660ae752..c8d92950bc041bcebcb6a2361ebeac0ce3065279 100644 (file)
@@ -144,33 +144,60 @@ static void __init uv_set_apicid_hibit(void)
 
 static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
-       int pnodeid, is_uv1, is_uv2, is_uv3;
-
-       is_uv1 = !strcmp(oem_id, "SGI");
-       is_uv2 = !strcmp(oem_id, "SGI2");
-       is_uv3 = !strncmp(oem_id, "SGI3", 4);   /* there are varieties of UV3 */
-       if (is_uv1 || is_uv2 || is_uv3) {
-               uv_hub_info->hub_revision =
-                       (is_uv1 ? UV1_HUB_REVISION_BASE :
-                       (is_uv2 ? UV2_HUB_REVISION_BASE :
-                                 UV3_HUB_REVISION_BASE));
-               pnodeid = early_get_pnodeid();
-               early_get_apic_pnode_shift();
-               x86_platform.is_untracked_pat_range =  uv_is_untracked_pat_range;
-               x86_platform.nmi_init = uv_nmi_init;
-               if (!strcmp(oem_table_id, "UVL"))
-                       uv_system_type = UV_LEGACY_APIC;
-               else if (!strcmp(oem_table_id, "UVX"))
-                       uv_system_type = UV_X2APIC;
-               else if (!strcmp(oem_table_id, "UVH")) {
-                       __this_cpu_write(x2apic_extra_bits,
-                               pnodeid << uvh_apicid.s.pnode_shift);
-                       uv_system_type = UV_NON_UNIQUE_APIC;
-                       uv_set_apicid_hibit();
-                       return 1;
-               }
+       int pnodeid;
+       int uv_apic;
+
+       if (strncmp(oem_id, "SGI", 3) != 0)
+               return 0;
+
+       /*
+        * Determine UV arch type.
+        *   SGI: UV100/1000
+        *   SGI2: UV2000/3000
+        *   SGI3: UV300 (truncated to 4 chars because of different varieties)
+        */
+       uv_hub_info->hub_revision =
+               !strncmp(oem_id, "SGI3", 4) ? UV3_HUB_REVISION_BASE :
+               !strcmp(oem_id, "SGI2") ? UV2_HUB_REVISION_BASE :
+               !strcmp(oem_id, "SGI") ? UV1_HUB_REVISION_BASE : 0;
+
+       if (uv_hub_info->hub_revision == 0)
+               goto badbios;
+
+       pnodeid = early_get_pnodeid();
+       early_get_apic_pnode_shift();
+       x86_platform.is_untracked_pat_range =  uv_is_untracked_pat_range;
+       x86_platform.nmi_init = uv_nmi_init;
+
+       if (!strcmp(oem_table_id, "UVX")) {             /* most common */
+               uv_system_type = UV_X2APIC;
+               uv_apic = 0;
+
+       } else if (!strcmp(oem_table_id, "UVH")) {      /* only UV1 systems */
+               uv_system_type = UV_NON_UNIQUE_APIC;
+               __this_cpu_write(x2apic_extra_bits,
+                       pnodeid << uvh_apicid.s.pnode_shift);
+               uv_set_apicid_hibit();
+               uv_apic = 1;
+
+       } else  if (!strcmp(oem_table_id, "UVL")) {     /* only used for */
+               uv_system_type = UV_LEGACY_APIC;        /* very small systems */
+               uv_apic = 0;
+
+       } else {
+               goto badbios;
        }
-       return 0;
+
+       pr_info("UV: OEM IDs %s/%s, System/HUB Types %d/%d, uv_apic %d\n",
+               oem_id, oem_table_id, uv_system_type,
+               uv_min_hub_revision_id, uv_apic);
+
+       return uv_apic;
+
+badbios:
+       pr_err("UV: OEM_ID:%s OEM_TABLE_ID:%s\n", oem_id, oem_table_id);
+       pr_err("Current BIOS not supported, update kernel and/or BIOS\n");
+       BUG();
 }
 
 enum uv_system_type get_uv_system_type(void)
@@ -854,10 +881,14 @@ void __init uv_system_init(void)
        unsigned long mmr_base, present, paddr;
        unsigned short pnode_mask;
        unsigned char n_lshift;
-       char *hub = (is_uv1_hub() ? "UV1" :
-                   (is_uv2_hub() ? "UV2" :
-                                   "UV3"));
+       char *hub = (is_uv1_hub() ? "UV100/1000" :
+                   (is_uv2_hub() ? "UV2000/3000" :
+                   (is_uv3_hub() ? "UV300" : NULL)));
 
+       if (!hub) {
+               pr_err("UV: Unknown/unsupported UV hub\n");
+               return;
+       }
        pr_info("UV: Found %s hub\n", hub);
        map_low_mmrs();
 
index 3b3b9d33ac1d2f0809db3b16230c840e98774b70..47703aed74cfb7d013b2d577488d68c7280bf8c6 100644 (file)
@@ -68,7 +68,7 @@ void foo(void)
 
        /* Offset from the sysenter stack to tss.sp0 */
        DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
-                sizeof(struct tss_struct));
+              offsetofend(struct tss_struct, SYSENTER_stack));
 
 #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
        BLANK();
index fdcbb4d27c9f80f35a587f4284fa2b958c44e774..5ce6f2da87639c7d373879e43120c9035ba590b9 100644 (file)
@@ -81,6 +81,7 @@ int main(void)
 #undef ENTRY
 
        OFFSET(TSS_ist, tss_struct, x86_tss.ist);
+       OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
        BLANK();
 
        DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
index a220239cea65ca99b3c52ebbfaed6ce973f0dec1..fd470ebf924e574e827f1942b973075d3168a9b6 100644 (file)
@@ -5,6 +5,7 @@
 
 #include <linux/io.h>
 #include <linux/sched.h>
+#include <linux/random.h>
 #include <asm/processor.h>
 #include <asm/apic.h>
 #include <asm/cpu.h>
@@ -488,6 +489,9 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
 
                va_align.mask     = (upperbit - 1) & PAGE_MASK;
                va_align.flags    = ALIGN_VA_32 | ALIGN_VA_64;
+
+               /* A random value per boot for bit slice [12:upper_bit) */
+               va_align.bits = get_random_int() & va_align.mask;
        }
 }
 
@@ -711,6 +715,11 @@ static void init_amd(struct cpuinfo_x86 *c)
                set_cpu_bug(c, X86_BUG_AMD_APIC_C1E);
 
        rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
+
+       /* 3DNow or LM implies PREFETCHW */
+       if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH))
+               if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM))
+                       set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH);
 }
 
 #ifdef CONFIG_X86_32
index 2346c95c6ab1945077fdde28c129342ffe09749d..3f70538012e2d6ac0949f45983565a20a6ca87a3 100644 (file)
@@ -959,38 +959,37 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 #endif
 }
 
-#ifdef CONFIG_X86_64
-#ifdef CONFIG_IA32_EMULATION
-/* May not be __init: called during resume */
-static void syscall32_cpu_init(void)
-{
-       /* Load these always in case some future AMD CPU supports
-          SYSENTER from compat mode too. */
-       wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
-       wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
-       wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
-
-       wrmsrl(MSR_CSTAR, ia32_cstar_target);
-}
-#endif         /* CONFIG_IA32_EMULATION */
-#endif         /* CONFIG_X86_64 */
-
+/*
+ * Set up the CPU state needed to execute SYSENTER/SYSEXIT instructions
+ * on 32-bit kernels:
+ */
 #ifdef CONFIG_X86_32
 void enable_sep_cpu(void)
 {
-       int cpu = get_cpu();
-       struct tss_struct *tss = &per_cpu(init_tss, cpu);
+       struct tss_struct *tss;
+       int cpu;
 
-       if (!boot_cpu_has(X86_FEATURE_SEP)) {
-               put_cpu();
-               return;
-       }
+       cpu = get_cpu();
+       tss = &per_cpu(cpu_tss, cpu);
+
+       if (!boot_cpu_has(X86_FEATURE_SEP))
+               goto out;
+
+       /*
+        * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
+        * see the big comment in struct x86_hw_tss's definition.
+        */
 
        tss->x86_tss.ss1 = __KERNEL_CS;
-       tss->x86_tss.sp1 = sizeof(struct tss_struct) + (unsigned long) tss;
-       wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
-       wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.sp1, 0);
-       wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0);
+       wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
+
+       wrmsr(MSR_IA32_SYSENTER_ESP,
+             (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
+             0);
+
+       wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)ia32_sysenter_target, 0);
+
+out:
        put_cpu();
 }
 #endif
@@ -1118,7 +1117,7 @@ static __init int setup_disablecpuid(char *arg)
 __setup("clearcpuid=", setup_disablecpuid);
 
 DEFINE_PER_CPU(unsigned long, kernel_stack) =
-       (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
+       (unsigned long)&init_thread_union + THREAD_SIZE;
 EXPORT_PER_CPU_SYMBOL(kernel_stack);
 
 #ifdef CONFIG_X86_64
@@ -1130,8 +1129,8 @@ DEFINE_PER_CPU_FIRST(union irq_stack_union,
                     irq_stack_union) __aligned(PAGE_SIZE) __visible;
 
 /*
- * The following four percpu variables are hot.  Align current_task to
- * cacheline size such that all four fall in the same cacheline.
+ * The following percpu variables are hot.  Align current_task to
+ * cacheline size such that they fall in the same cacheline.
  */
 DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
        &init_task;
@@ -1171,10 +1170,23 @@ void syscall_init(void)
         */
        wrmsrl(MSR_STAR,  ((u64)__USER32_CS)<<48  | ((u64)__KERNEL_CS)<<32);
        wrmsrl(MSR_LSTAR, system_call);
-       wrmsrl(MSR_CSTAR, ignore_sysret);
 
 #ifdef CONFIG_IA32_EMULATION
-       syscall32_cpu_init();
+       wrmsrl(MSR_CSTAR, ia32_cstar_target);
+       /*
+        * This only works on Intel CPUs.
+        * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
+        * This does not cause SYSENTER to jump to the wrong location, because
+        * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
+        */
+       wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
+       wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+       wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
+#else
+       wrmsrl(MSR_CSTAR, ignore_sysret);
+       wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
+       wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+       wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
 #endif
 
        /* Flags to clear on syscall */
@@ -1226,6 +1238,15 @@ DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
 EXPORT_PER_CPU_SYMBOL(__preempt_count);
 DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
 
+/*
+ * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find
+ * the top of the kernel stack.  Use an extra percpu variable to track the
+ * top of the kernel stack directly.
+ */
+DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
+       (unsigned long)&init_thread_union + THREAD_SIZE;
+EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);
+
 #ifdef CONFIG_CC_STACKPROTECTOR
 DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
 #endif
@@ -1307,7 +1328,7 @@ void cpu_init(void)
         */
        load_ucode_ap();
 
-       t = &per_cpu(init_tss, cpu);
+       t = &per_cpu(cpu_tss, cpu);
        oist = &per_cpu(orig_ist, cpu);
 
 #ifdef CONFIG_NUMA
@@ -1391,7 +1412,7 @@ void cpu_init(void)
 {
        int cpu = smp_processor_id();
        struct task_struct *curr = current;
-       struct tss_struct *t = &per_cpu(init_tss, cpu);
+       struct tss_struct *t = &per_cpu(cpu_tss, cpu);
        struct thread_struct *thread = &curr->thread;
 
        wait_for_master_cpu(cpu);
index 659643376dbf7d9d04b377e85ef04bf1e6bf1ada..edcb0e28c336d085d0ee1011793c98ba6f5a13ae 100644 (file)
@@ -7,16 +7,14 @@
  *     Andi Kleen / Andreas Herrmann   : CPUID4 emulation on AMD.
  */
 
-#include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/device.h>
-#include <linux/compiler.h>
+#include <linux/cacheinfo.h>
 #include <linux/cpu.h>
 #include <linux/sched.h>
+#include <linux/sysfs.h>
 #include <linux/pci.h>
 
 #include <asm/processor.h>
-#include <linux/smp.h>
 #include <asm/amd_nb.h>
 #include <asm/smp.h>
 
@@ -116,10 +114,10 @@ static const struct _cache_table cache_table[] =
 
 
 enum _cache_type {
-       CACHE_TYPE_NULL = 0,
-       CACHE_TYPE_DATA = 1,
-       CACHE_TYPE_INST = 2,
-       CACHE_TYPE_UNIFIED = 3
+       CTYPE_NULL = 0,
+       CTYPE_DATA = 1,
+       CTYPE_INST = 2,
+       CTYPE_UNIFIED = 3
 };
 
 union _cpuid4_leaf_eax {
@@ -159,11 +157,6 @@ struct _cpuid4_info_regs {
        struct amd_northbridge *nb;
 };
 
-struct _cpuid4_info {
-       struct _cpuid4_info_regs base;
-       DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
-};
-
 unsigned short                 num_cache_leaves;
 
 /* AMD doesn't have CPUID4. Emulate it here to report the same
@@ -220,6 +213,13 @@ static const unsigned short assocs[] = {
 static const unsigned char levels[] = { 1, 1, 2, 3 };
 static const unsigned char types[] = { 1, 2, 3, 3 };
 
+static const enum cache_type cache_type_map[] = {
+       [CTYPE_NULL] = CACHE_TYPE_NOCACHE,
+       [CTYPE_DATA] = CACHE_TYPE_DATA,
+       [CTYPE_INST] = CACHE_TYPE_INST,
+       [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
+};
+
 static void
 amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
                     union _cpuid4_leaf_ebx *ebx,
@@ -291,14 +291,8 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
                (ebx->split.ways_of_associativity + 1) - 1;
 }
 
-struct _cache_attr {
-       struct attribute attr;
-       ssize_t (*show)(struct _cpuid4_info *, char *, unsigned int);
-       ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count,
-                        unsigned int);
-};
-
 #if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
+
 /*
  * L3 cache descriptors
  */
@@ -325,20 +319,6 @@ static void amd_calc_l3_indices(struct amd_northbridge *nb)
        l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
 }
 
-static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
-{
-       int node;
-
-       /* only for L3, and not in virtualized environments */
-       if (index < 3)
-               return;
-
-       node = amd_get_nb_id(smp_processor_id());
-       this_leaf->nb = node_to_amd_nb(node);
-       if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
-               amd_calc_l3_indices(this_leaf->nb);
-}
-
 /*
  * check whether a slot used for disabling an L3 index is occupied.
  * @l3: L3 cache descriptor
@@ -359,15 +339,13 @@ int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
        return -1;
 }
 
-static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
+static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
                                  unsigned int slot)
 {
        int index;
+       struct amd_northbridge *nb = this_leaf->priv;
 
-       if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
-               return -EINVAL;
-
-       index = amd_get_l3_disable_slot(this_leaf->base.nb, slot);
+       index = amd_get_l3_disable_slot(nb, slot);
        if (index >= 0)
                return sprintf(buf, "%d\n", index);
 
@@ -376,9 +354,10 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
 
 #define SHOW_CACHE_DISABLE(slot)                                       \
 static ssize_t                                                         \
-show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf,   \
-                         unsigned int cpu)                             \
+cache_disable_##slot##_show(struct device *dev,                                \
+                           struct device_attribute *attr, char *buf)   \
 {                                                                      \
+       struct cacheinfo *this_leaf = dev_get_drvdata(dev);             \
        return show_cache_disable(this_leaf, buf, slot);                \
 }
 SHOW_CACHE_DISABLE(0)
@@ -446,25 +425,23 @@ int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, unsigned slot,
        return 0;
 }
 
-static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
-                                 const char *buf, size_t count,
-                                 unsigned int slot)
+static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
+                                  const char *buf, size_t count,
+                                  unsigned int slot)
 {
        unsigned long val = 0;
        int cpu, err = 0;
+       struct amd_northbridge *nb = this_leaf->priv;
 
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
-               return -EINVAL;
-
-       cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
+       cpu = cpumask_first(&this_leaf->shared_cpu_map);
 
        if (kstrtoul(buf, 10, &val) < 0)
                return -EINVAL;
 
-       err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val);
+       err = amd_set_l3_disable_slot(nb, cpu, slot, val);
        if (err) {
                if (err == -EEXIST)
                        pr_warning("L3 slot %d in use/index already disabled!\n",
@@ -476,41 +453,36 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
 
 #define STORE_CACHE_DISABLE(slot)                                      \
 static ssize_t                                                         \
-store_cache_disable_##slot(struct _cpuid4_info *this_leaf,             \
-                          const char *buf, size_t count,               \
-                          unsigned int cpu)                            \
+cache_disable_##slot##_store(struct device *dev,                       \
+                            struct device_attribute *attr,             \
+                            const char *buf, size_t count)             \
 {                                                                      \
+       struct cacheinfo *this_leaf = dev_get_drvdata(dev);             \
        return store_cache_disable(this_leaf, buf, count, slot);        \
 }
 STORE_CACHE_DISABLE(0)
 STORE_CACHE_DISABLE(1)
 
-static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
-               show_cache_disable_0, store_cache_disable_0);
-static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
-               show_cache_disable_1, store_cache_disable_1);
-
-static ssize_t
-show_subcaches(struct _cpuid4_info *this_leaf, char *buf, unsigned int cpu)
+static ssize_t subcaches_show(struct device *dev,
+                             struct device_attribute *attr, char *buf)
 {
-       if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
-               return -EINVAL;
+       struct cacheinfo *this_leaf = dev_get_drvdata(dev);
+       int cpu = cpumask_first(&this_leaf->shared_cpu_map);
 
        return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
 }
 
-static ssize_t
-store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
-               unsigned int cpu)
+static ssize_t subcaches_store(struct device *dev,
+                              struct device_attribute *attr,
+                              const char *buf, size_t count)
 {
+       struct cacheinfo *this_leaf = dev_get_drvdata(dev);
+       int cpu = cpumask_first(&this_leaf->shared_cpu_map);
        unsigned long val;
 
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
-               return -EINVAL;
-
        if (kstrtoul(buf, 16, &val) < 0)
                return -EINVAL;
 
@@ -520,9 +492,92 @@ store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
        return count;
 }
 
-static struct _cache_attr subcaches =
-       __ATTR(subcaches, 0644, show_subcaches, store_subcaches);
+static DEVICE_ATTR_RW(cache_disable_0);
+static DEVICE_ATTR_RW(cache_disable_1);
+static DEVICE_ATTR_RW(subcaches);
+
+static umode_t
+cache_private_attrs_is_visible(struct kobject *kobj,
+                              struct attribute *attr, int unused)
+{
+       struct device *dev = kobj_to_dev(kobj);
+       struct cacheinfo *this_leaf = dev_get_drvdata(dev);
+       umode_t mode = attr->mode;
+
+       if (!this_leaf->priv)
+               return 0;
+
+       if ((attr == &dev_attr_subcaches.attr) &&
+           amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+               return mode;
+
+       if ((attr == &dev_attr_cache_disable_0.attr ||
+            attr == &dev_attr_cache_disable_1.attr) &&
+           amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
+               return mode;
+
+       return 0;
+}
+
+static struct attribute_group cache_private_group = {
+       .is_visible = cache_private_attrs_is_visible,
+};
+
+static void init_amd_l3_attrs(void)
+{
+       int n = 1;
+       static struct attribute **amd_l3_attrs;
+
+       if (amd_l3_attrs) /* already initialized */
+               return;
+
+       if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
+               n += 2;
+       if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+               n += 1;
+
+       amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
+       if (!amd_l3_attrs)
+               return;
+
+       n = 0;
+       if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
+               amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
+               amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
+       }
+       if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
+               amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
 
+       cache_private_group.attrs = amd_l3_attrs;
+}
+
+const struct attribute_group *
+cache_get_priv_group(struct cacheinfo *this_leaf)
+{
+       struct amd_northbridge *nb = this_leaf->priv;
+
+       if (this_leaf->level < 3 || !nb)
+               return NULL;
+
+       if (nb && nb->l3_cache.indices)
+               init_amd_l3_attrs();
+
+       return &cache_private_group;
+}
+
+static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
+{
+       int node;
+
+       /* only for L3, and not in virtualized environments */
+       if (index < 3)
+               return;
+
+       node = amd_get_nb_id(smp_processor_id());
+       this_leaf->nb = node_to_amd_nb(node);
+       if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
+               amd_calc_l3_indices(this_leaf->nb);
+}
 #else
 #define amd_init_l3_cache(x, y)
 #endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
@@ -546,7 +601,7 @@ cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
                cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
        }
 
-       if (eax.split.type == CACHE_TYPE_NULL)
+       if (eax.split.type == CTYPE_NULL)
                return -EIO; /* better error ? */
 
        this_leaf->eax = eax;
@@ -575,7 +630,7 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c)
                /* Do cpuid(op) loop to find out num_cache_leaves */
                cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
                cache_eax.full = eax;
-       } while (cache_eax.split.type != CACHE_TYPE_NULL);
+       } while (cache_eax.split.type != CTYPE_NULL);
        return i;
 }
 
@@ -626,9 +681,9 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
 
                        switch (this_leaf.eax.split.level) {
                        case 1:
-                               if (this_leaf.eax.split.type == CACHE_TYPE_DATA)
+                               if (this_leaf.eax.split.type == CTYPE_DATA)
                                        new_l1d = this_leaf.size/1024;
-                               else if (this_leaf.eax.split.type == CACHE_TYPE_INST)
+                               else if (this_leaf.eax.split.type == CTYPE_INST)
                                        new_l1i = this_leaf.size/1024;
                                break;
                        case 2:
@@ -747,55 +802,52 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
        return l2;
 }
 
-#ifdef CONFIG_SYSFS
-
-/* pointer to _cpuid4_info array (for each cache leaf) */
-static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
-#define CPUID4_INFO_IDX(x, y)  (&((per_cpu(ici_cpuid4_info, x))[y]))
-
-#ifdef CONFIG_SMP
-
-static int cache_shared_amd_cpu_map_setup(unsigned int cpu, int index)
+static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
+                                   struct _cpuid4_info_regs *base)
 {
-       struct _cpuid4_info *this_leaf;
+       struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+       struct cacheinfo *this_leaf;
        int i, sibling;
 
        if (cpu_has_topoext) {
                unsigned int apicid, nshared, first, last;
 
-               if (!per_cpu(ici_cpuid4_info, cpu))
-                       return 0;
-
-               this_leaf = CPUID4_INFO_IDX(cpu, index);
-               nshared = this_leaf->base.eax.split.num_threads_sharing + 1;
+               this_leaf = this_cpu_ci->info_list + index;
+               nshared = base->eax.split.num_threads_sharing + 1;
                apicid = cpu_data(cpu).apicid;
                first = apicid - (apicid % nshared);
                last = first + nshared - 1;
 
                for_each_online_cpu(i) {
+                       this_cpu_ci = get_cpu_cacheinfo(i);
+                       if (!this_cpu_ci->info_list)
+                               continue;
+
                        apicid = cpu_data(i).apicid;
                        if ((apicid < first) || (apicid > last))
                                continue;
-                       if (!per_cpu(ici_cpuid4_info, i))
-                               continue;
-                       this_leaf = CPUID4_INFO_IDX(i, index);
+
+                       this_leaf = this_cpu_ci->info_list + index;
 
                        for_each_online_cpu(sibling) {
                                apicid = cpu_data(sibling).apicid;
                                if ((apicid < first) || (apicid > last))
                                        continue;
-                               set_bit(sibling, this_leaf->shared_cpu_map);
+                               cpumask_set_cpu(sibling,
+                                               &this_leaf->shared_cpu_map);
                        }
                }
        } else if (index == 3) {
                for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
-                       if (!per_cpu(ici_cpuid4_info, i))
+                       this_cpu_ci = get_cpu_cacheinfo(i);
+                       if (!this_cpu_ci->info_list)
                                continue;
-                       this_leaf = CPUID4_INFO_IDX(i, index);
+                       this_leaf = this_cpu_ci->info_list + index;
                        for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
                                if (!cpu_online(sibling))
                                        continue;
-                               set_bit(sibling, this_leaf->shared_cpu_map);
+                               cpumask_set_cpu(sibling,
+                                               &this_leaf->shared_cpu_map);
                        }
                }
        } else
@@ -804,457 +856,86 @@ static int cache_shared_amd_cpu_map_setup(unsigned int cpu, int index)
        return 1;
 }
 
-static void cache_shared_cpu_map_setup(unsigned int cpu, int index)
+static void __cache_cpumap_setup(unsigned int cpu, int index,
+                                struct _cpuid4_info_regs *base)
 {
-       struct _cpuid4_info *this_leaf, *sibling_leaf;
+       struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+       struct cacheinfo *this_leaf, *sibling_leaf;
        unsigned long num_threads_sharing;
        int index_msb, i;
        struct cpuinfo_x86 *c = &cpu_data(cpu);
 
        if (c->x86_vendor == X86_VENDOR_AMD) {
-               if (cache_shared_amd_cpu_map_setup(cpu, index))
+               if (__cache_amd_cpumap_setup(cpu, index, base))
                        return;
        }
 
-       this_leaf = CPUID4_INFO_IDX(cpu, index);
-       num_threads_sharing = 1 + this_leaf->base.eax.split.num_threads_sharing;
+       this_leaf = this_cpu_ci->info_list + index;
+       num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
 
+       cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
        if (num_threads_sharing == 1)
-               cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
-       else {
-               index_msb = get_count_order(num_threads_sharing);
-
-               for_each_online_cpu(i) {
-                       if (cpu_data(i).apicid >> index_msb ==
-                           c->apicid >> index_msb) {
-                               cpumask_set_cpu(i,
-                                       to_cpumask(this_leaf->shared_cpu_map));
-                               if (i != cpu && per_cpu(ici_cpuid4_info, i))  {
-                                       sibling_leaf =
-                                               CPUID4_INFO_IDX(i, index);
-                                       cpumask_set_cpu(cpu, to_cpumask(
-                                               sibling_leaf->shared_cpu_map));
-                               }
-                       }
-               }
-       }
-}
-static void cache_remove_shared_cpu_map(unsigned int cpu, int index)
-{
-       struct _cpuid4_info     *this_leaf, *sibling_leaf;
-       int sibling;
-
-       this_leaf = CPUID4_INFO_IDX(cpu, index);
-       for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
-               sibling_leaf = CPUID4_INFO_IDX(sibling, index);
-               cpumask_clear_cpu(cpu,
-                                 to_cpumask(sibling_leaf->shared_cpu_map));
-       }
-}
-#else
-static void cache_shared_cpu_map_setup(unsigned int cpu, int index)
-{
-}
-
-static void cache_remove_shared_cpu_map(unsigned int cpu, int index)
-{
-}
-#endif
-
-static void free_cache_attributes(unsigned int cpu)
-{
-       int i;
-
-       for (i = 0; i < num_cache_leaves; i++)
-               cache_remove_shared_cpu_map(cpu, i);
-
-       kfree(per_cpu(ici_cpuid4_info, cpu));
-       per_cpu(ici_cpuid4_info, cpu) = NULL;
-}
-
-static void get_cpu_leaves(void *_retval)
-{
-       int j, *retval = _retval, cpu = smp_processor_id();
+               return;
 
-       /* Do cpuid and store the results */
-       for (j = 0; j < num_cache_leaves; j++) {
-               struct _cpuid4_info *this_leaf = CPUID4_INFO_IDX(cpu, j);
+       index_msb = get_count_order(num_threads_sharing);
 
-               *retval = cpuid4_cache_lookup_regs(j, &this_leaf->base);
-               if (unlikely(*retval < 0)) {
-                       int i;
+       for_each_online_cpu(i)
+               if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
+                       struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
 
-                       for (i = 0; i < j; i++)
-                               cache_remove_shared_cpu_map(cpu, i);
-                       break;
+                       if (i == cpu || !sib_cpu_ci->info_list)
+                               continue;/* skip if itself or no cacheinfo */
+                       sibling_leaf = sib_cpu_ci->info_list + index;
+                       cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
+                       cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
                }
-               cache_shared_cpu_map_setup(cpu, j);
-       }
 }
 
-static int detect_cache_attributes(unsigned int cpu)
+static void ci_leaf_init(struct cacheinfo *this_leaf,
+                        struct _cpuid4_info_regs *base)
 {
-       int                     retval;
-
-       if (num_cache_leaves == 0)
-               return -ENOENT;
-
-       per_cpu(ici_cpuid4_info, cpu) = kzalloc(
-           sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
-       if (per_cpu(ici_cpuid4_info, cpu) == NULL)
-               return -ENOMEM;
-
-       smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
-       if (retval) {
-               kfree(per_cpu(ici_cpuid4_info, cpu));
-               per_cpu(ici_cpuid4_info, cpu) = NULL;
-       }
-
-       return retval;
+       this_leaf->level = base->eax.split.level;
+       this_leaf->type = cache_type_map[base->eax.split.type];
+       this_leaf->coherency_line_size =
+                               base->ebx.split.coherency_line_size + 1;
+       this_leaf->ways_of_associativity =
+                               base->ebx.split.ways_of_associativity + 1;
+       this_leaf->size = base->size;
+       this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
+       this_leaf->physical_line_partition =
+                               base->ebx.split.physical_line_partition + 1;
+       this_leaf->priv = base->nb;
 }
 
-#include <linux/kobject.h>
-#include <linux/sysfs.h>
-#include <linux/cpu.h>
-
-/* pointer to kobject for cpuX/cache */
-static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject);
-
-struct _index_kobject {
-       struct kobject kobj;
-       unsigned int cpu;
-       unsigned short index;
-};
-
-/* pointer to array of kobjects for cpuX/cache/indexY */
-static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
-#define INDEX_KOBJECT_PTR(x, y)                (&((per_cpu(ici_index_kobject, x))[y]))
-
-#define show_one_plus(file_name, object, val)                          \
-static ssize_t show_##file_name(struct _cpuid4_info *this_leaf, char *buf, \
-                               unsigned int cpu)                       \
-{                                                                      \
-       return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
-}
-
-show_one_plus(level, base.eax.split.level, 0);
-show_one_plus(coherency_line_size, base.ebx.split.coherency_line_size, 1);
-show_one_plus(physical_line_partition, base.ebx.split.physical_line_partition, 1);
-show_one_plus(ways_of_associativity, base.ebx.split.ways_of_associativity, 1);
-show_one_plus(number_of_sets, base.ecx.split.number_of_sets, 1);
-
-static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf,
-                        unsigned int cpu)
-{
-       return sprintf(buf, "%luK\n", this_leaf->base.size / 1024);
-}
-
-static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
-                                       int type, char *buf)
-{
-       const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
-       int ret;
-
-       if (type)
-               ret = scnprintf(buf, PAGE_SIZE - 1, "%*pbl",
-                               cpumask_pr_args(mask));
-       else
-               ret = scnprintf(buf, PAGE_SIZE - 1, "%*pb",
-                               cpumask_pr_args(mask));
-       buf[ret++] = '\n';
-       buf[ret] = '\0';
-       return ret;
-}
-
-static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf,
-                                         unsigned int cpu)
+static int __init_cache_level(unsigned int cpu)
 {
-       return show_shared_cpu_map_func(leaf, 0, buf);
-}
-
-static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf,
-                                          unsigned int cpu)
-{
-       return show_shared_cpu_map_func(leaf, 1, buf);
-}
+       struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 
-static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf,
-                        unsigned int cpu)
-{
-       switch (this_leaf->base.eax.split.type) {
-       case CACHE_TYPE_DATA:
-               return sprintf(buf, "Data\n");
-       case CACHE_TYPE_INST:
-               return sprintf(buf, "Instruction\n");
-       case CACHE_TYPE_UNIFIED:
-               return sprintf(buf, "Unified\n");
-       default:
-               return sprintf(buf, "Unknown\n");
-       }
-}
-
-#define to_object(k)   container_of(k, struct _index_kobject, kobj)
-#define to_attr(a)     container_of(a, struct _cache_attr, attr)
-
-#define define_one_ro(_name) \
-static struct _cache_attr _name = \
-       __ATTR(_name, 0444, show_##_name, NULL)
-
-define_one_ro(level);
-define_one_ro(type);
-define_one_ro(coherency_line_size);
-define_one_ro(physical_line_partition);
-define_one_ro(ways_of_associativity);
-define_one_ro(number_of_sets);
-define_one_ro(size);
-define_one_ro(shared_cpu_map);
-define_one_ro(shared_cpu_list);
-
-static struct attribute *default_attrs[] = {
-       &type.attr,
-       &level.attr,
-       &coherency_line_size.attr,
-       &physical_line_partition.attr,
-       &ways_of_associativity.attr,
-       &number_of_sets.attr,
-       &size.attr,
-       &shared_cpu_map.attr,
-       &shared_cpu_list.attr,
-       NULL
-};
-
-#ifdef CONFIG_AMD_NB
-static struct attribute **amd_l3_attrs(void)
-{
-       static struct attribute **attrs;
-       int n;
-
-       if (attrs)
-               return attrs;
-
-       n = ARRAY_SIZE(default_attrs);
-
-       if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
-               n += 2;
-
-       if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
-               n += 1;
-
-       attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
-       if (attrs == NULL)
-               return attrs = default_attrs;
-
-       for (n = 0; default_attrs[n]; n++)
-               attrs[n] = default_attrs[n];
-
-       if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
-               attrs[n++] = &cache_disable_0.attr;
-               attrs[n++] = &cache_disable_1.attr;
-       }
-
-       if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
-               attrs[n++] = &subcaches.attr;
-
-       return attrs;
-}
-#endif
-
-static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
-{
-       struct _cache_attr *fattr = to_attr(attr);
-       struct _index_kobject *this_leaf = to_object(kobj);
-       ssize_t ret;
-
-       ret = fattr->show ?
-               fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
-                       buf, this_leaf->cpu) :
-               0;
-       return ret;
-}
-
-static ssize_t store(struct kobject *kobj, struct attribute *attr,
-                    const char *buf, size_t count)
-{
-       struct _cache_attr *fattr = to_attr(attr);
-       struct _index_kobject *this_leaf = to_object(kobj);
-       ssize_t ret;
-
-       ret = fattr->store ?
-               fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
-                       buf, count, this_leaf->cpu) :
-               0;
-       return ret;
-}
-
-static const struct sysfs_ops sysfs_ops = {
-       .show   = show,
-       .store  = store,
-};
-
-static struct kobj_type ktype_cache = {
-       .sysfs_ops      = &sysfs_ops,
-       .default_attrs  = default_attrs,
-};
-
-static struct kobj_type ktype_percpu_entry = {
-       .sysfs_ops      = &sysfs_ops,
-};
-
-static void cpuid4_cache_sysfs_exit(unsigned int cpu)
-{
-       kfree(per_cpu(ici_cache_kobject, cpu));
-       kfree(per_cpu(ici_index_kobject, cpu));
-       per_cpu(ici_cache_kobject, cpu) = NULL;
-       per_cpu(ici_index_kobject, cpu) = NULL;
-       free_cache_attributes(cpu);
-}
-
-static int cpuid4_cache_sysfs_init(unsigned int cpu)
-{
-       int err;
-
-       if (num_cache_leaves == 0)
+       if (!num_cache_leaves)
                return -ENOENT;
-
-       err = detect_cache_attributes(cpu);
-       if (err)
-               return err;
-
-       /* Allocate all required memory */
-       per_cpu(ici_cache_kobject, cpu) =
-               kzalloc(sizeof(struct kobject), GFP_KERNEL);
-       if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL))
-               goto err_out;
-
-       per_cpu(ici_index_kobject, cpu) = kzalloc(
-           sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL);
-       if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL))
-               goto err_out;
-
+       if (!this_cpu_ci)
+               return -EINVAL;
+       this_cpu_ci->num_levels = 3;
+       this_cpu_ci->num_leaves = num_cache_leaves;
        return 0;
-
-err_out:
-       cpuid4_cache_sysfs_exit(cpu);
-       return -ENOMEM;
 }
 
-static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
-
-/* Add/Remove cache interface for CPU device */
-static int cache_add_dev(struct device *dev)
+static int __populate_cache_leaves(unsigned int cpu)
 {
-       unsigned int cpu = dev->id;
-       unsigned long i, j;
-       struct _index_kobject *this_object;
-       struct _cpuid4_info   *this_leaf;
-       int retval;
-
-       retval = cpuid4_cache_sysfs_init(cpu);
-       if (unlikely(retval < 0))
-               return retval;
-
-       retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu),
-                                     &ktype_percpu_entry,
-                                     &dev->kobj, "%s", "cache");
-       if (retval < 0) {
-               cpuid4_cache_sysfs_exit(cpu);
-               return retval;
-       }
+       unsigned int idx, ret;
+       struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+       struct cacheinfo *this_leaf = this_cpu_ci->info_list;
+       struct _cpuid4_info_regs id4_regs = {};
 
-       for (i = 0; i < num_cache_leaves; i++) {
-               this_object = INDEX_KOBJECT_PTR(cpu, i);
-               this_object->cpu = cpu;
-               this_object->index = i;
-
-               this_leaf = CPUID4_INFO_IDX(cpu, i);
-
-               ktype_cache.default_attrs = default_attrs;
-#ifdef CONFIG_AMD_NB
-               if (this_leaf->base.nb)
-                       ktype_cache.default_attrs = amd_l3_attrs();
-#endif
-               retval = kobject_init_and_add(&(this_object->kobj),
-                                             &ktype_cache,
-                                             per_cpu(ici_cache_kobject, cpu),
-                                             "index%1lu", i);
-               if (unlikely(retval)) {
-                       for (j = 0; j < i; j++)
-                               kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj));
-                       kobject_put(per_cpu(ici_cache_kobject, cpu));
-                       cpuid4_cache_sysfs_exit(cpu);
-                       return retval;
-               }
-               kobject_uevent(&(this_object->kobj), KOBJ_ADD);
+       for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
+               ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
+               if (ret)
+                       return ret;
+               ci_leaf_init(this_leaf++, &id4_regs);
+               __cache_cpumap_setup(cpu, idx, &id4_regs);
        }
-       cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
-
-       kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD);
        return 0;
 }
 
-static void cache_remove_dev(struct device *dev)
-{
-       unsigned int cpu = dev->id;
-       unsigned long i;
-
-       if (per_cpu(ici_cpuid4_info, cpu) == NULL)
-               return;
-       if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
-               return;
-       cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
-
-       for (i = 0; i < num_cache_leaves; i++)
-               kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj));
-       kobject_put(per_cpu(ici_cache_kobject, cpu));
-       cpuid4_cache_sysfs_exit(cpu);
-}
-
-static int cacheinfo_cpu_callback(struct notifier_block *nfb,
-                                 unsigned long action, void *hcpu)
-{
-       unsigned int cpu = (unsigned long)hcpu;
-       struct device *dev;
-
-       dev = get_cpu_device(cpu);
-       switch (action) {
-       case CPU_ONLINE:
-       case CPU_ONLINE_FROZEN:
-               cache_add_dev(dev);
-               break;
-       case CPU_DEAD:
-       case CPU_DEAD_FROZEN:
-               cache_remove_dev(dev);
-               break;
-       }
-       return NOTIFY_OK;
-}
-
-static struct notifier_block cacheinfo_cpu_notifier = {
-       .notifier_call = cacheinfo_cpu_callback,
-};
-
-static int __init cache_sysfs_init(void)
-{
-       int i, err = 0;
-
-       if (num_cache_leaves == 0)
-               return 0;
-
-       cpu_notifier_register_begin();
-       for_each_online_cpu(i) {
-               struct device *dev = get_cpu_device(i);
-
-               err = cache_add_dev(dev);
-               if (err)
-                       goto out;
-       }
-       __register_hotcpu_notifier(&cacheinfo_cpu_notifier);
-
-out:
-       cpu_notifier_register_done();
-       return err;
-}
-
-device_initcall(cache_sysfs_init);
-
-#endif
+DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
+DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
index 10b46906767fd4857389570322757fa89bbc8e6c..fe32074b865b1686ba9e0e2cdc3ccb5ebe99ddfe 100644 (file)
@@ -14,6 +14,7 @@ enum severity_level {
 };
 
 #define ATTR_LEN               16
+#define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */
 
 /* One object for each MCE bank, shared by all CPUs */
 struct mce_bank {
@@ -23,20 +24,20 @@ struct mce_bank {
        char                    attrname[ATTR_LEN];     /* attribute name */
 };
 
-int mce_severity(struct mce *a, int tolerant, char **msg, bool is_excp);
+extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
 struct dentry *mce_get_debugfs_dir(void);
 
 extern struct mce_bank *mce_banks;
 extern mce_banks_t mce_banks_ce_disabled;
 
 #ifdef CONFIG_X86_MCE_INTEL
-unsigned long mce_intel_adjust_timer(unsigned long interval);
-void mce_intel_cmci_poll(void);
+unsigned long cmci_intel_adjust_timer(unsigned long interval);
+bool mce_intel_cmci_poll(void);
 void mce_intel_hcpu_update(unsigned long cpu);
 void cmci_disable_bank(int bank);
 #else
-# define mce_intel_adjust_timer mce_adjust_timer_default
-static inline void mce_intel_cmci_poll(void) { }
+# define cmci_intel_adjust_timer mce_adjust_timer_default
+static inline bool mce_intel_cmci_poll(void) { return false; }
 static inline void mce_intel_hcpu_update(unsigned long cpu) { }
 static inline void cmci_disable_bank(int bank) { }
 #endif
index 8bb433043a7f6877beaa8b38ccb8ec9684069212..9c682c222071db1960ba848c24c76567306d0542 100644 (file)
@@ -186,7 +186,61 @@ static int error_context(struct mce *m)
        return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
 }
 
-int mce_severity(struct mce *m, int tolerant, char **msg, bool is_excp)
+/*
+ * See AMD Error Scope Hierarchy table in a newer BKDG. For example
+ * 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
+ */
+static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_excp)
+{
+       enum context ctx = error_context(m);
+
+       /* Processor Context Corrupt, no need to fumble too much, die! */
+       if (m->status & MCI_STATUS_PCC)
+               return MCE_PANIC_SEVERITY;
+
+       if (m->status & MCI_STATUS_UC) {
+
+               /*
+                * On older systems where overflow_recov flag is not present, we
+                * should simply panic if an error overflow occurs. If
+                * overflow_recov flag is present and set, then software can try
+                * to at least kill process to prolong system operation.
+                */
+               if (mce_flags.overflow_recov) {
+                       /* software can try to contain */
+                       if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == IN_KERNEL))
+                               return MCE_PANIC_SEVERITY;
+
+                       /* kill current process */
+                       return MCE_AR_SEVERITY;
+               } else {
+                       /* at least one error was not logged */
+                       if (m->status & MCI_STATUS_OVER)
+                               return MCE_PANIC_SEVERITY;
+               }
+
+               /*
+                * For any other case, return MCE_UC_SEVERITY so that we log the
+                * error and exit #MC handler.
+                */
+               return MCE_UC_SEVERITY;
+       }
+
+       /*
+        * deferred error: poll handler catches these and adds to mce_ring so
+        * memory-failure can take recovery actions.
+        */
+       if (m->status & MCI_STATUS_DEFERRED)
+               return MCE_DEFERRED_SEVERITY;
+
+       /*
+        * corrected error: poll handler catches these and passes responsibility
+        * of decoding the error to EDAC
+        */
+       return MCE_KEEP_SEVERITY;
+}
+
+static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_excp)
 {
        enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP);
        enum context ctx = error_context(m);
@@ -216,6 +270,16 @@ int mce_severity(struct mce *m, int tolerant, char **msg, bool is_excp)
        }
 }
 
+/* Default to mce_severity_intel */
+int (*mce_severity)(struct mce *m, int tolerant, char **msg, bool is_excp) =
+                   mce_severity_intel;
+
+void __init mcheck_vendor_init_severity(void)
+{
+       if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+               mce_severity = mce_severity_amd;
+}
+
 #ifdef CONFIG_DEBUG_FS
 static void *s_start(struct seq_file *f, loff_t *pos)
 {
index 3c036cb4a370cad399cac636f97840b40c62bd4d..e535533d5ab89313ba51937ad8dd5740413f119e 100644 (file)
@@ -60,11 +60,12 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex);
 #define CREATE_TRACE_POINTS
 #include <trace/events/mce.h>
 
-#define SPINUNIT 100   /* 100ns */
+#define SPINUNIT               100     /* 100ns */
 
 DEFINE_PER_CPU(unsigned, mce_exception_count);
 
 struct mce_bank *mce_banks __read_mostly;
+struct mce_vendor_flags mce_flags __read_mostly;
 
 struct mca_config mca_cfg __read_mostly = {
        .bootlog  = -1,
@@ -89,9 +90,6 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
 static DEFINE_PER_CPU(struct mce, mces_seen);
 static int                     cpu_missing;
 
-/* CMCI storm detection filter */
-static DEFINE_PER_CPU(unsigned long, mce_polled_error);
-
 /*
  * MCA banks polled by the period polling timer for corrected events.
  * With Intel CMCI, this only has MCA banks which do not support CMCI (if any).
@@ -622,8 +620,9 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
  * is already totally * confused. In this case it's likely it will
  * not fully execute the machine check handler either.
  */
-void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
+bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 {
+       bool error_logged = false;
        struct mce m;
        int severity;
        int i;
@@ -646,7 +645,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
                if (!(m.status & MCI_STATUS_VAL))
                        continue;
 
-               this_cpu_write(mce_polled_error, 1);
+
                /*
                 * Uncorrected or signalled events are handled by the exception
                 * handler when it is enabled, so don't process those here.
@@ -679,8 +678,10 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
                 * Don't get the IP here because it's unlikely to
                 * have anything to do with the actual error location.
                 */
-               if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce)
+               if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) {
+                       error_logged = true;
                        mce_log(&m);
+               }
 
                /*
                 * Clear state for this bank.
@@ -694,6 +695,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
         */
 
        sync_core();
+
+       return error_logged;
 }
 EXPORT_SYMBOL_GPL(machine_check_poll);
 
@@ -813,7 +816,7 @@ static void mce_reign(void)
         * other CPUs.
         */
        if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
-               mce_panic("Fatal Machine check", m, msg);
+               mce_panic("Fatal machine check", m, msg);
 
        /*
         * For UC somewhere we let the CPU who detects it handle it.
@@ -826,7 +829,7 @@ static void mce_reign(void)
         * source or one CPU is hung. Panic.
         */
        if (global_worst <= MCE_KEEP_SEVERITY && mca_cfg.tolerant < 3)
-               mce_panic("Machine check from unknown source", NULL, NULL);
+               mce_panic("Fatal machine check from unknown source", NULL, NULL);
 
        /*
         * Now clear all the mces_seen so that they don't reappear on
@@ -1258,7 +1261,7 @@ void mce_log_therm_throt_event(__u64 status)
  * poller finds an MCE, poll 2x faster.  When the poller finds no more
  * errors, poll 2x slower (up to check_interval seconds).
  */
-static unsigned long check_interval = 5 * 60; /* 5 minutes */
+static unsigned long check_interval = INITIAL_CHECK_INTERVAL;
 
 static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
 static DEFINE_PER_CPU(struct timer_list, mce_timer);
@@ -1268,49 +1271,57 @@ static unsigned long mce_adjust_timer_default(unsigned long interval)
        return interval;
 }
 
-static unsigned long (*mce_adjust_timer)(unsigned long interval) =
-       mce_adjust_timer_default;
+static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
 
-static int cmc_error_seen(void)
+static void __restart_timer(struct timer_list *t, unsigned long interval)
 {
-       unsigned long *v = this_cpu_ptr(&mce_polled_error);
+       unsigned long when = jiffies + interval;
+       unsigned long flags;
+
+       local_irq_save(flags);
 
-       return test_and_clear_bit(0, v);
+       if (timer_pending(t)) {
+               if (time_before(when, t->expires))
+                       mod_timer_pinned(t, when);
+       } else {
+               t->expires = round_jiffies(when);
+               add_timer_on(t, smp_processor_id());
+       }
+
+       local_irq_restore(flags);
 }
 
 static void mce_timer_fn(unsigned long data)
 {
        struct timer_list *t = this_cpu_ptr(&mce_timer);
+       int cpu = smp_processor_id();
        unsigned long iv;
-       int notify;
 
-       WARN_ON(smp_processor_id() != data);
+       WARN_ON(cpu != data);
+
+       iv = __this_cpu_read(mce_next_interval);
 
        if (mce_available(this_cpu_ptr(&cpu_info))) {
-               machine_check_poll(MCP_TIMESTAMP,
-                               this_cpu_ptr(&mce_poll_banks));
-               mce_intel_cmci_poll();
+               machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_poll_banks));
+
+               if (mce_intel_cmci_poll()) {
+                       iv = mce_adjust_timer(iv);
+                       goto done;
+               }
        }
 
        /*
-        * Alert userspace if needed.  If we logged an MCE, reduce the
-        * polling interval, otherwise increase the polling interval.
+        * Alert userspace if needed. If we logged an MCE, reduce the polling
+        * interval, otherwise increase the polling interval.
         */
-       iv = __this_cpu_read(mce_next_interval);
-       notify = mce_notify_irq();
-       notify |= cmc_error_seen();
-       if (notify) {
+       if (mce_notify_irq())
                iv = max(iv / 2, (unsigned long) HZ/100);
-       } else {
+       else
                iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
-               iv = mce_adjust_timer(iv);
-       }
+
+done:
        __this_cpu_write(mce_next_interval, iv);
-       /* Might have become 0 after CMCI storm subsided */
-       if (iv) {
-               t->expires = jiffies + iv;
-               add_timer_on(t, smp_processor_id());
-       }
+       __restart_timer(t, iv);
 }
 
 /*
@@ -1319,16 +1330,10 @@ static void mce_timer_fn(unsigned long data)
 void mce_timer_kick(unsigned long interval)
 {
        struct timer_list *t = this_cpu_ptr(&mce_timer);
-       unsigned long when = jiffies + interval;
        unsigned long iv = __this_cpu_read(mce_next_interval);
 
-       if (timer_pending(t)) {
-               if (time_before(when, t->expires))
-                       mod_timer_pinned(t, when);
-       } else {
-               t->expires = round_jiffies(when);
-               add_timer_on(t, smp_processor_id());
-       }
+       __restart_timer(t, interval);
+
        if (interval < iv)
                __this_cpu_write(mce_next_interval, interval);
 }
@@ -1525,45 +1530,46 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
                 * Various K7s with broken bank 0 around. Always disable
                 * by default.
                 */
-                if (c->x86 == 6 && cfg->banks > 0)
+               if (c->x86 == 6 && cfg->banks > 0)
                        mce_banks[0].ctl = 0;
 
-                /*
-                 * Turn off MC4_MISC thresholding banks on those models since
-                 * they're not supported there.
-                 */
-                if (c->x86 == 0x15 &&
-                    (c->x86_model >= 0x10 && c->x86_model <= 0x1f)) {
-                        int i;
-                        u64 val, hwcr;
-                        bool need_toggle;
-                        u32 msrs[] = {
+               /*
+                * overflow_recov is supported for F15h Models 00h-0fh
+                * even though we don't have a CPUID bit for it.
+                */
+               if (c->x86 == 0x15 && c->x86_model <= 0xf)
+                       mce_flags.overflow_recov = 1;
+
+               /*
+                * Turn off MC4_MISC thresholding banks on those models since
+                * they're not supported there.
+                */
+               if (c->x86 == 0x15 &&
+                   (c->x86_model >= 0x10 && c->x86_model <= 0x1f)) {
+                       int i;
+                       u64 hwcr;
+                       bool need_toggle;
+                       u32 msrs[] = {
                                0x00000413, /* MC4_MISC0 */
                                0xc0000408, /* MC4_MISC1 */
-                        };
+                       };
 
-                        rdmsrl(MSR_K7_HWCR, hwcr);
+                       rdmsrl(MSR_K7_HWCR, hwcr);
 
-                        /* McStatusWrEn has to be set */
-                        need_toggle = !(hwcr & BIT(18));
+                       /* McStatusWrEn has to be set */
+                       need_toggle = !(hwcr & BIT(18));
 
-                        if (need_toggle)
-                                wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
+                       if (need_toggle)
+                               wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
 
-                        for (i = 0; i < ARRAY_SIZE(msrs); i++) {
-                                rdmsrl(msrs[i], val);
+                       /* Clear CntP bit safely */
+                       for (i = 0; i < ARRAY_SIZE(msrs); i++)
+                               msr_clear_bit(msrs[i], 62);
 
-                                /* CntP bit set? */
-                                if (val & BIT_64(62)) {
-                                       val &= ~BIT_64(62);
-                                       wrmsrl(msrs[i], val);
-                                }
-                        }
-
-                        /* restore old settings */
-                        if (need_toggle)
-                                wrmsrl(MSR_K7_HWCR, hwcr);
-                }
+                       /* restore old settings */
+                       if (need_toggle)
+                               wrmsrl(MSR_K7_HWCR, hwcr);
+               }
        }
 
        if (c->x86_vendor == X86_VENDOR_INTEL) {
@@ -1629,10 +1635,11 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
        switch (c->x86_vendor) {
        case X86_VENDOR_INTEL:
                mce_intel_feature_init(c);
-               mce_adjust_timer = mce_intel_adjust_timer;
+               mce_adjust_timer = cmci_intel_adjust_timer;
                break;
        case X86_VENDOR_AMD:
                mce_amd_feature_init(c);
+               mce_flags.overflow_recov = cpuid_ebx(0x80000007) & 0x1;
                break;
        default:
                break;
@@ -2017,6 +2024,7 @@ __setup("mce", mcheck_enable);
 int __init mcheck_init(void)
 {
        mcheck_intel_therm_init();
+       mcheck_vendor_init_severity();
 
        return 0;
 }
index f1c3769bbd6433344968bf5f78ad223170ef6878..55ad9b37cae853ce0d50f193dc7eb82a49207fee 100644 (file)
@@ -79,7 +79,7 @@ static inline bool is_shared_bank(int bank)
        return (bank == 4);
 }
 
-static const char * const bank4_names(struct threshold_block *b)
+static const char *bank4_names(const struct threshold_block *b)
 {
        switch (b->address) {
        /* MSR4_MISC0 */
@@ -250,6 +250,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
                        if (!b.interrupt_capable)
                                goto init;
 
+                       b.interrupt_enable = 1;
                        new     = (high & MASK_LVTOFF_HI) >> 20;
                        offset  = setup_APIC_mce(offset, new);
 
@@ -322,6 +323,8 @@ static void amd_threshold_interrupt(void)
 log:
        mce_setup(&m);
        rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status);
+       if (!(m.status & MCI_STATUS_VAL))
+               return;
        m.misc = ((u64)high << 32) | low;
        m.bank = bank;
        mce_log(&m);
@@ -497,10 +500,12 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
        b->interrupt_capable    = lvt_interrupt_supported(bank, high);
        b->threshold_limit      = THRESHOLD_MAX;
 
-       if (b->interrupt_capable)
+       if (b->interrupt_capable) {
                threshold_ktype.default_attrs[2] = &interrupt_enable.attr;
-       else
+               b->interrupt_enable = 1;
+       } else {
                threshold_ktype.default_attrs[2] = NULL;
+       }
 
        INIT_LIST_HEAD(&b->miscj);
 
index b3c97bafc1238fedd30f1ec0a3bab2ba7ee05cb8..b4a41cf030edab7dbfae7dc67560ba63eabc2309 100644 (file)
  */
 static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
 
+/*
+ * CMCI storm detection backoff counter
+ *
+ * During storm, we reset this counter to INITIAL_CHECK_INTERVAL in case we've
+ * encountered an error. If not, we decrement it by one. We signal the end of
+ * the CMCI storm when it reaches 0.
+ */
+static DEFINE_PER_CPU(int, cmci_backoff_cnt);
+
 /*
  * cmci_discover_lock protects against parallel discovery attempts
  * which could race against each other.
@@ -46,7 +55,7 @@ static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
 
 #define CMCI_THRESHOLD         1
 #define CMCI_POLL_INTERVAL     (30 * HZ)
-#define CMCI_STORM_INTERVAL    (1 * HZ)
+#define CMCI_STORM_INTERVAL    (HZ)
 #define CMCI_STORM_THRESHOLD   15
 
 static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
@@ -82,11 +91,21 @@ static int cmci_supported(int *banks)
        return !!(cap & MCG_CMCI_P);
 }
 
-void mce_intel_cmci_poll(void)
+bool mce_intel_cmci_poll(void)
 {
        if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
-               return;
-       machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
+               return false;
+
+       /*
+        * Reset the counter if we've logged an error in the last poll
+        * during the storm.
+        */
+       if (machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)))
+               this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
+       else
+               this_cpu_dec(cmci_backoff_cnt);
+
+       return true;
 }
 
 void mce_intel_hcpu_update(unsigned long cpu)
@@ -97,31 +116,32 @@ void mce_intel_hcpu_update(unsigned long cpu)
        per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
 }
 
-unsigned long mce_intel_adjust_timer(unsigned long interval)
+unsigned long cmci_intel_adjust_timer(unsigned long interval)
 {
-       int r;
-
-       if (interval < CMCI_POLL_INTERVAL)
-               return interval;
+       if ((this_cpu_read(cmci_backoff_cnt) > 0) &&
+           (__this_cpu_read(cmci_storm_state) == CMCI_STORM_ACTIVE)) {
+               mce_notify_irq();
+               return CMCI_STORM_INTERVAL;
+       }
 
        switch (__this_cpu_read(cmci_storm_state)) {
        case CMCI_STORM_ACTIVE:
+
                /*
                 * We switch back to interrupt mode once the poll timer has
-                * silenced itself. That means no events recorded and the
-                * timer interval is back to our poll interval.
+                * silenced itself. That means no events recorded and the timer
+                * interval is back to our poll interval.
                 */
                __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
-               r = atomic_sub_return(1, &cmci_storm_on_cpus);
-               if (r == 0)
+               if (!atomic_sub_return(1, &cmci_storm_on_cpus))
                        pr_notice("CMCI storm subsided: switching to interrupt mode\n");
+
                /* FALLTHROUGH */
 
        case CMCI_STORM_SUBSIDED:
                /*
-                * We wait for all cpus to go back to SUBSIDED
-                * state. When that happens we switch back to
-                * interrupt mode.
+                * We wait for all CPUs to go back to SUBSIDED state. When that
+                * happens we switch back to interrupt mode.
                 */
                if (!atomic_read(&cmci_storm_on_cpus)) {
                        __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
@@ -130,10 +150,8 @@ unsigned long mce_intel_adjust_timer(unsigned long interval)
                }
                return CMCI_POLL_INTERVAL;
        default:
-               /*
-                * We have shiny weather. Let the poll do whatever it
-                * thinks.
-                */
+
+               /* We have shiny weather. Let the poll do whatever it thinks. */
                return interval;
        }
 }
@@ -178,7 +196,8 @@ static bool cmci_storm_detect(void)
        cmci_storm_disable_banks();
        __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
        r = atomic_add_return(1, &cmci_storm_on_cpus);
-       mce_timer_kick(CMCI_POLL_INTERVAL);
+       mce_timer_kick(CMCI_STORM_INTERVAL);
+       this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
 
        if (r == 1)
                pr_notice("CMCI storm detected: switching to poll mode\n");
@@ -195,6 +214,7 @@ static void intel_threshold_interrupt(void)
 {
        if (cmci_storm_detect())
                return;
+
        machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
        mce_notify_irq();
 }
@@ -286,6 +306,7 @@ void cmci_recheck(void)
 
        if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
                return;
+
        local_irq_save(flags);
        machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
        local_irq_restore(flags);
index bfbbe6195e2da7e798323d014cfc5ae2d406d0f3..12829c3ced3c549c982a3072f825c2ed1b1c7758 100644 (file)
@@ -21,7 +21,6 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/firmware.h>
-#include <linux/pci_ids.h>
 #include <linux/uaccess.h>
 #include <linux/vmalloc.h>
 #include <linux/kernel.h>
index d45df4bd16abec23e9725132ab2f9e649237f564..a413a69cbd744f2e2873434ee20b66e86fb466dd 100644 (file)
 #include <asm/processor.h>
 #include <asm/cmdline.h>
 
-#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24))
-#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u')
-#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I')
-#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l')
-#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h')
-#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i')
-#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D')
-
-#define CPUID_IS(a, b, c, ebx, ecx, edx)       \
-               (!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c))))
-
-/*
- * In early loading microcode phase on BSP, boot_cpu_data is not set up yet.
- * x86_vendor() gets vendor id for BSP.
- *
- * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify
- * coding, we still use x86_vendor() to get vendor id for AP.
- *
- * x86_vendor() gets vendor information directly through cpuid.
- */
-static int x86_vendor(void)
-{
-       u32 eax = 0x00000000;
-       u32 ebx, ecx = 0, edx;
-
-       native_cpuid(&eax, &ebx, &ecx, &edx);
-
-       if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx))
-               return X86_VENDOR_INTEL;
-
-       if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx))
-               return X86_VENDOR_AMD;
-
-       return X86_VENDOR_UNKNOWN;
-}
-
-static int x86_family(void)
-{
-       u32 eax = 0x00000001;
-       u32 ebx, ecx = 0, edx;
-       int x86;
-
-       native_cpuid(&eax, &ebx, &ecx, &edx);
-
-       x86 = (eax >> 8) & 0xf;
-       if (x86 == 15)
-               x86 += (eax >> 20) & 0xff;
-
-       return x86;
-}
-
 static bool __init check_loader_disabled_bsp(void)
 {
 #ifdef CONFIG_X86_32
@@ -96,7 +45,7 @@ static bool __init check_loader_disabled_bsp(void)
 
 void __init load_ucode_bsp(void)
 {
-       int vendor, x86;
+       int vendor, family;
 
        if (check_loader_disabled_bsp())
                return;
@@ -105,15 +54,15 @@ void __init load_ucode_bsp(void)
                return;
 
        vendor = x86_vendor();
-       x86 = x86_family();
+       family = x86_family();
 
        switch (vendor) {
        case X86_VENDOR_INTEL:
-               if (x86 >= 6)
+               if (family >= 6)
                        load_ucode_intel_bsp();
                break;
        case X86_VENDOR_AMD:
-               if (x86 >= 0x10)
+               if (family >= 0x10)
                        load_ucode_amd_bsp();
                break;
        default:
@@ -132,7 +81,7 @@ static bool check_loader_disabled_ap(void)
 
 void load_ucode_ap(void)
 {
-       int vendor, x86;
+       int vendor, family;
 
        if (check_loader_disabled_ap())
                return;
@@ -141,15 +90,15 @@ void load_ucode_ap(void)
                return;
 
        vendor = x86_vendor();
-       x86 = x86_family();
+       family = x86_family();
 
        switch (vendor) {
        case X86_VENDOR_INTEL:
-               if (x86 >= 6)
+               if (family >= 6)
                        load_ucode_intel_ap();
                break;
        case X86_VENDOR_AMD:
-               if (x86 >= 0x10)
+               if (family >= 0x10)
                        load_ucode_amd_ap();
                break;
        default:
@@ -179,18 +128,18 @@ int __init save_microcode_in_initrd(void)
 
 void reload_early_microcode(void)
 {
-       int vendor, x86;
+       int vendor, family;
 
        vendor = x86_vendor();
-       x86 = x86_family();
+       family = x86_family();
 
        switch (vendor) {
        case X86_VENDOR_INTEL:
-               if (x86 >= 6)
+               if (family >= 6)
                        reload_ucode_intel();
                break;
        case X86_VENDOR_AMD:
-               if (x86 >= 0x10)
+               if (family >= 0x10)
                        reload_ucode_amd();
                break;
        default:
index 746e7fd08aad7082747ee1d9dca80570f1a00e3b..a41beadb3db9a396e5b74795e62a49648b367870 100644 (file)
@@ -124,7 +124,7 @@ static int get_matching_mc(struct microcode_intel *mc_intel, int cpu)
        cpf = cpu_sig.pf;
        crev = cpu_sig.rev;
 
-       return get_matching_microcode(csig, cpf, mc_intel, crev);
+       return get_matching_microcode(csig, cpf, crev, mc_intel);
 }
 
 static int apply_microcode_intel(int cpu)
@@ -226,7 +226,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
 
                csig = uci->cpu_sig.sig;
                cpf = uci->cpu_sig.pf;
-               if (get_matching_microcode(csig, cpf, mc, new_rev)) {
+               if (get_matching_microcode(csig, cpf, new_rev, mc)) {
                        vfree(new_mc);
                        new_rev = mc_header.rev;
                        new_mc  = mc;
index 420eb933189ca487110607475ddbf33be8e8267b..2f49ab4ac0ae137d7ab0b851cf4b9e751d58922c 100644 (file)
  *     as published by the Free Software Foundation; either version
  *     2 of the License, or (at your option) any later version.
  */
+
+/*
+ * This needs to be before all headers so that pr_debug in printk.h doesn't turn
+ * printk calls into no_printk().
+ *
+ *#define DEBUG
+ */
+
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
@@ -28,6 +36,9 @@
 #include <asm/tlbflush.h>
 #include <asm/setup.h>
 
+#undef pr_fmt
+#define pr_fmt(fmt)    "microcode: " fmt
+
 static unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT];
 static struct mc_saved_data {
        unsigned int mc_saved_count;
@@ -35,50 +46,45 @@ static struct mc_saved_data {
 } mc_saved_data;
 
 static enum ucode_state
-generic_load_microcode_early(struct microcode_intel **mc_saved_p,
-                            unsigned int mc_saved_count,
-                            struct ucode_cpu_info *uci)
+load_microcode_early(struct microcode_intel **saved,
+                    unsigned int num_saved, struct ucode_cpu_info *uci)
 {
        struct microcode_intel *ucode_ptr, *new_mc = NULL;
-       int new_rev = uci->cpu_sig.rev;
-       enum ucode_state state = UCODE_OK;
-       unsigned int mc_size;
-       struct microcode_header_intel *mc_header;
-       unsigned int csig = uci->cpu_sig.sig;
-       unsigned int cpf = uci->cpu_sig.pf;
-       int i;
+       struct microcode_header_intel *mc_hdr;
+       int new_rev, ret, i;
 
-       for (i = 0; i < mc_saved_count; i++) {
-               ucode_ptr = mc_saved_p[i];
+       new_rev = uci->cpu_sig.rev;
 
-               mc_header = (struct microcode_header_intel *)ucode_ptr;
-               mc_size = get_totalsize(mc_header);
-               if (get_matching_microcode(csig, cpf, ucode_ptr, new_rev)) {
-                       new_rev = mc_header->rev;
-                       new_mc  = ucode_ptr;
-               }
-       }
+       for (i = 0; i < num_saved; i++) {
+               ucode_ptr = saved[i];
+               mc_hdr    = (struct microcode_header_intel *)ucode_ptr;
 
-       if (!new_mc) {
-               state = UCODE_NFOUND;
-               goto out;
+               ret = get_matching_microcode(uci->cpu_sig.sig,
+                                            uci->cpu_sig.pf,
+                                            new_rev,
+                                            ucode_ptr);
+               if (!ret)
+                       continue;
+
+               new_rev = mc_hdr->rev;
+               new_mc  = ucode_ptr;
        }
 
+       if (!new_mc)
+               return UCODE_NFOUND;
+
        uci->mc = (struct microcode_intel *)new_mc;
-out:
-       return state;
+       return UCODE_OK;
 }
 
-static void
-microcode_pointer(struct microcode_intel **mc_saved,
-                 unsigned long *mc_saved_in_initrd,
-                 unsigned long initrd_start, int mc_saved_count)
+static inline void
+copy_initrd_ptrs(struct microcode_intel **mc_saved, unsigned long *initrd,
+                 unsigned long off, int num_saved)
 {
        int i;
 
-       for (i = 0; i < mc_saved_count; i++)
-               mc_saved[i] = (struct microcode_intel *)
-                             (mc_saved_in_initrd[i] + initrd_start);
+       for (i = 0; i < num_saved; i++)
+               mc_saved[i] = (struct microcode_intel *)(initrd[i] + off);
 }
 
 #ifdef CONFIG_X86_32
@@ -102,55 +108,27 @@ microcode_phys(struct microcode_intel **mc_saved_tmp,
 #endif
 
 static enum ucode_state
-load_microcode(struct mc_saved_data *mc_saved_data,
-              unsigned long *mc_saved_in_initrd,
-              unsigned long initrd_start,
-              struct ucode_cpu_info *uci)
+load_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
+              unsigned long initrd_start, struct ucode_cpu_info *uci)
 {
        struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT];
        unsigned int count = mc_saved_data->mc_saved_count;
 
        if (!mc_saved_data->mc_saved) {
-               microcode_pointer(mc_saved_tmp, mc_saved_in_initrd,
-                                 initrd_start, count);
+               copy_initrd_ptrs(mc_saved_tmp, initrd, initrd_start, count);
 
-               return generic_load_microcode_early(mc_saved_tmp, count, uci);
+               return load_microcode_early(mc_saved_tmp, count, uci);
        } else {
 #ifdef CONFIG_X86_32
                microcode_phys(mc_saved_tmp, mc_saved_data);
-               return generic_load_microcode_early(mc_saved_tmp, count, uci);
+               return load_microcode_early(mc_saved_tmp, count, uci);
 #else
-               return generic_load_microcode_early(mc_saved_data->mc_saved,
+               return load_microcode_early(mc_saved_data->mc_saved,
                                                    count, uci);
 #endif
        }
 }
 
-static u8 get_x86_family(unsigned long sig)
-{
-       u8 x86;
-
-       x86 = (sig >> 8) & 0xf;
-
-       if (x86 == 0xf)
-               x86 += (sig >> 20) & 0xff;
-
-       return x86;
-}
-
-static u8 get_x86_model(unsigned long sig)
-{
-       u8 x86, x86_model;
-
-       x86 = get_x86_family(sig);
-       x86_model = (sig >> 4) & 0xf;
-
-       if (x86 == 0x6 || x86 == 0xf)
-               x86_model += ((sig >> 16) & 0xf) << 4;
-
-       return x86_model;
-}
-
 /*
  * Given CPU signature and a microcode patch, this function finds if the
  * microcode patch has matching family and model with the CPU.
@@ -159,42 +137,40 @@ static enum ucode_state
 matching_model_microcode(struct microcode_header_intel *mc_header,
                        unsigned long sig)
 {
-       u8 x86, x86_model;
-       u8 x86_ucode, x86_model_ucode;
+       unsigned int fam, model;
+       unsigned int fam_ucode, model_ucode;
        struct extended_sigtable *ext_header;
        unsigned long total_size = get_totalsize(mc_header);
        unsigned long data_size = get_datasize(mc_header);
        int ext_sigcount, i;
        struct extended_signature *ext_sig;
 
-       x86 = get_x86_family(sig);
-       x86_model = get_x86_model(sig);
+       fam   = __x86_family(sig);
+       model = x86_model(sig);
 
-       x86_ucode = get_x86_family(mc_header->sig);
-       x86_model_ucode = get_x86_model(mc_header->sig);
+       fam_ucode   = __x86_family(mc_header->sig);
+       model_ucode = x86_model(mc_header->sig);
 
-       if (x86 == x86_ucode && x86_model == x86_model_ucode)
+       if (fam == fam_ucode && model == model_ucode)
                return UCODE_OK;
 
        /* Look for ext. headers: */
        if (total_size <= data_size + MC_HEADER_SIZE)
                return UCODE_NFOUND;
 
-       ext_header = (struct extended_sigtable *)
-                    mc_header + data_size + MC_HEADER_SIZE;
+       ext_header   = (void *) mc_header + data_size + MC_HEADER_SIZE;
+       ext_sig      = (void *)ext_header + EXT_HEADER_SIZE;
        ext_sigcount = ext_header->count;
-       ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
 
        for (i = 0; i < ext_sigcount; i++) {
-               x86_ucode = get_x86_family(ext_sig->sig);
-               x86_model_ucode = get_x86_model(ext_sig->sig);
+               fam_ucode   = __x86_family(ext_sig->sig);
+               model_ucode = x86_model(ext_sig->sig);
 
-               if (x86 == x86_ucode && x86_model == x86_model_ucode)
+               if (fam == fam_ucode && model == model_ucode)
                        return UCODE_OK;
 
                ext_sig++;
        }
-
        return UCODE_NFOUND;
 }
 
@@ -204,7 +180,7 @@ save_microcode(struct mc_saved_data *mc_saved_data,
               unsigned int mc_saved_count)
 {
        int i, j;
-       struct microcode_intel **mc_saved_p;
+       struct microcode_intel **saved_ptr;
        int ret;
 
        if (!mc_saved_count)
@@ -213,39 +189,45 @@ save_microcode(struct mc_saved_data *mc_saved_data,
        /*
         * Copy new microcode data.
         */
-       mc_saved_p = kmalloc(mc_saved_count*sizeof(struct microcode_intel *),
-                            GFP_KERNEL);
-       if (!mc_saved_p)
+       saved_ptr = kcalloc(mc_saved_count, sizeof(struct microcode_intel *), GFP_KERNEL);
+       if (!saved_ptr)
                return -ENOMEM;
 
        for (i = 0; i < mc_saved_count; i++) {
-               struct microcode_intel *mc = mc_saved_src[i];
-               struct microcode_header_intel *mc_header = &mc->hdr;
-               unsigned long mc_size = get_totalsize(mc_header);
-               mc_saved_p[i] = kmalloc(mc_size, GFP_KERNEL);
-               if (!mc_saved_p[i]) {
-                       ret = -ENOMEM;
-                       goto err;
-               }
+               struct microcode_header_intel *mc_hdr;
+               struct microcode_intel *mc;
+               unsigned long size;
+
                if (!mc_saved_src[i]) {
                        ret = -EINVAL;
                        goto err;
                }
-               memcpy(mc_saved_p[i], mc, mc_size);
+
+               mc     = mc_saved_src[i];
+               mc_hdr = &mc->hdr;
+               size   = get_totalsize(mc_hdr);
+
+               saved_ptr[i] = kmalloc(size, GFP_KERNEL);
+               if (!saved_ptr[i]) {
+                       ret = -ENOMEM;
+                       goto err;
+               }
+
+               memcpy(saved_ptr[i], mc, size);
        }
 
        /*
         * Point to newly saved microcode.
         */
-       mc_saved_data->mc_saved = mc_saved_p;
+       mc_saved_data->mc_saved = saved_ptr;
        mc_saved_data->mc_saved_count = mc_saved_count;
 
        return 0;
 
 err:
        for (j = 0; j <= i; j++)
-               kfree(mc_saved_p[j]);
-       kfree(mc_saved_p);
+               kfree(saved_ptr[j]);
+       kfree(saved_ptr);
 
        return ret;
 }
@@ -257,48 +239,45 @@ err:
  * - or if it is a newly discovered microcode patch.
  *
  * The microcode patch should have matching model with CPU.
+ *
+ * Returns: The updated number @num_saved of saved microcode patches.
  */
-static void _save_mc(struct microcode_intel **mc_saved, u8 *ucode_ptr,
-                    unsigned int *mc_saved_count_p)
+static unsigned int _save_mc(struct microcode_intel **mc_saved,
+                            u8 *ucode_ptr, unsigned int num_saved)
 {
-       int i;
-       int found = 0;
-       unsigned int mc_saved_count = *mc_saved_count_p;
-       struct microcode_header_intel *mc_header;
+       struct microcode_header_intel *mc_hdr, *mc_saved_hdr;
+       unsigned int sig, pf, new_rev;
+       int found = 0, i;
+
+       mc_hdr = (struct microcode_header_intel *)ucode_ptr;
+
+       for (i = 0; i < num_saved; i++) {
+               mc_saved_hdr = (struct microcode_header_intel *)mc_saved[i];
+               sig          = mc_saved_hdr->sig;
+               pf           = mc_saved_hdr->pf;
+               new_rev      = mc_hdr->rev;
+
+               if (!get_matching_sig(sig, pf, new_rev, ucode_ptr))
+                       continue;
+
+               found = 1;
+
+               if (!revision_is_newer(mc_hdr, new_rev))
+                       continue;
 
-       mc_header = (struct microcode_header_intel *)ucode_ptr;
-       for (i = 0; i < mc_saved_count; i++) {
-               unsigned int sig, pf;
-               unsigned int new_rev;
-               struct microcode_header_intel *mc_saved_header =
-                            (struct microcode_header_intel *)mc_saved[i];
-               sig = mc_saved_header->sig;
-               pf = mc_saved_header->pf;
-               new_rev = mc_header->rev;
-
-               if (get_matching_sig(sig, pf, ucode_ptr, new_rev)) {
-                       found = 1;
-                       if (update_match_revision(mc_header, new_rev)) {
-                               /*
-                                * Found an older ucode saved before.
-                                * Replace the older one with this newer
-                                * one.
-                                */
-                               mc_saved[i] =
-                                       (struct microcode_intel *)ucode_ptr;
-                               break;
-                       }
-               }
-       }
-       if (i >= mc_saved_count && !found)
                /*
-                * This ucode is first time discovered in ucode file.
-                * Save it to memory.
+                * Found an older ucode saved earlier. Replace it with
+                * this newer one.
                 */
-               mc_saved[mc_saved_count++] =
-                                (struct microcode_intel *)ucode_ptr;
+               mc_saved[i] = (struct microcode_intel *)ucode_ptr;
+               break;
+       }
+
+       /* Newly detected microcode, save it to memory. */
+       if (i >= num_saved && !found)
+               mc_saved[num_saved++] = (struct microcode_intel *)ucode_ptr;
 
-       *mc_saved_count_p = mc_saved_count;
+       return num_saved;
 }
 
 /*
@@ -346,7 +325,7 @@ get_matching_model_microcode(int cpu, unsigned long start,
                        continue;
                }
 
-               _save_mc(mc_saved_tmp, ucode_ptr, &mc_saved_count);
+               mc_saved_count = _save_mc(mc_saved_tmp, ucode_ptr, mc_saved_count);
 
                ucode_ptr += mc_size;
        }
@@ -372,7 +351,7 @@ out:
 static int collect_cpu_info_early(struct ucode_cpu_info *uci)
 {
        unsigned int val[2];
-       u8 x86, x86_model;
+       unsigned int family, model;
        struct cpu_signature csig;
        unsigned int eax, ebx, ecx, edx;
 
@@ -387,10 +366,10 @@ static int collect_cpu_info_early(struct ucode_cpu_info *uci)
        native_cpuid(&eax, &ebx, &ecx, &edx);
        csig.sig = eax;
 
-       x86 = get_x86_family(csig.sig);
-       x86_model = get_x86_model(csig.sig);
+       family = __x86_family(csig.sig);
+       model  = x86_model(csig.sig);
 
-       if ((x86_model >= 5) || (x86 > 6)) {
+       if ((model >= 5) || (family > 6)) {
                /* get processor flags from MSR 0x17 */
                native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
                csig.pf = 1 << ((val[1] >> 18) & 7);
@@ -429,8 +408,7 @@ static void __ref show_saved_mc(void)
        sig = uci.cpu_sig.sig;
        pf = uci.cpu_sig.pf;
        rev = uci.cpu_sig.rev;
-       pr_debug("CPU%d: sig=0x%x, pf=0x%x, rev=0x%x\n",
-                smp_processor_id(), sig, pf, rev);
+       pr_debug("CPU: sig=0x%x, pf=0x%x, rev=0x%x\n", sig, pf, rev);
 
        for (i = 0; i < mc_saved_data.mc_saved_count; i++) {
                struct microcode_header_intel *mc_saved_header;
@@ -457,8 +435,7 @@ static void __ref show_saved_mc(void)
                if (total_size <= data_size + MC_HEADER_SIZE)
                        continue;
 
-               ext_header = (struct extended_sigtable *)
-                            mc_saved_header + data_size + MC_HEADER_SIZE;
+               ext_header = (void *) mc_saved_header + data_size + MC_HEADER_SIZE;
                ext_sigcount = ext_header->count;
                ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
 
@@ -515,8 +492,7 @@ int save_mc_for_early(u8 *mc)
         * Save the microcode patch mc in mc_save_tmp structure if it's a newer
         * version.
         */
-
-       _save_mc(mc_saved_tmp, mc, &mc_saved_count);
+       mc_saved_count = _save_mc(mc_saved_tmp, mc, mc_saved_count);
 
        /*
         * Save the mc_save_tmp in global mc_saved_data.
@@ -548,12 +524,10 @@ EXPORT_SYMBOL_GPL(save_mc_for_early);
 
 static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin";
 static __init enum ucode_state
-scan_microcode(unsigned long start, unsigned long end,
-               struct mc_saved_data *mc_saved_data,
-               unsigned long *mc_saved_in_initrd,
-               struct ucode_cpu_info *uci)
+scan_microcode(struct mc_saved_data *mc_saved_data, unsigned long *initrd,
+              unsigned long start, unsigned long size,
+              struct ucode_cpu_info *uci)
 {
-       unsigned int size = end - start + 1;
        struct cpio_data cd;
        long offset = 0;
 #ifdef CONFIG_X86_32
@@ -569,10 +543,8 @@ scan_microcode(unsigned long start, unsigned long end,
        if (!cd.data)
                return UCODE_ERROR;
 
-
        return get_matching_model_microcode(0, start, cd.data, cd.size,
-                                           mc_saved_data, mc_saved_in_initrd,
-                                           uci);
+                                           mc_saved_data, initrd, uci);
 }
 
 /*
@@ -704,7 +676,7 @@ int __init save_microcode_in_initrd_intel(void)
        if (count == 0)
                return ret;
 
-       microcode_pointer(mc_saved, mc_saved_in_initrd, initrd_start, count);
+       copy_initrd_ptrs(mc_saved, mc_saved_in_initrd, initrd_start, count);
        ret = save_microcode(&mc_saved_data, mc_saved, count);
        if (ret)
                pr_err("Cannot save microcode patches from initrd.\n");
@@ -716,52 +688,44 @@ int __init save_microcode_in_initrd_intel(void)
 
 static void __init
 _load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data,
-                     unsigned long *mc_saved_in_initrd,
-                     unsigned long initrd_start_early,
-                     unsigned long initrd_end_early,
-                     struct ucode_cpu_info *uci)
+                     unsigned long *initrd,
+                     unsigned long start, unsigned long size)
 {
+       struct ucode_cpu_info uci;
        enum ucode_state ret;
 
-       collect_cpu_info_early(uci);
-       scan_microcode(initrd_start_early, initrd_end_early, mc_saved_data,
-                      mc_saved_in_initrd, uci);
+       collect_cpu_info_early(&uci);
 
-       ret = load_microcode(mc_saved_data, mc_saved_in_initrd,
-                            initrd_start_early, uci);
+       ret = scan_microcode(mc_saved_data, initrd, start, size, &uci);
+       if (ret != UCODE_OK)
+               return;
 
-       if (ret == UCODE_OK)
-               apply_microcode_early(uci, true);
+       ret = load_microcode(mc_saved_data, initrd, start, &uci);
+       if (ret != UCODE_OK)
+               return;
+
+       apply_microcode_early(&uci, true);
 }
 
-void __init
-load_ucode_intel_bsp(void)
+void __init load_ucode_intel_bsp(void)
 {
-       u64 ramdisk_image, ramdisk_size;
-       unsigned long initrd_start_early, initrd_end_early;
-       struct ucode_cpu_info uci;
+       u64 start, size;
 #ifdef CONFIG_X86_32
-       struct boot_params *boot_params_p;
+       struct boot_params *p;
 
-       boot_params_p = (struct boot_params *)__pa_nodebug(&boot_params);
-       ramdisk_image = boot_params_p->hdr.ramdisk_image;
-       ramdisk_size  = boot_params_p->hdr.ramdisk_size;
-       initrd_start_early = ramdisk_image;
-       initrd_end_early = initrd_start_early + ramdisk_size;
+       p       = (struct boot_params *)__pa_nodebug(&boot_params);
+       start   = p->hdr.ramdisk_image;
+       size    = p->hdr.ramdisk_size;
 
        _load_ucode_intel_bsp(
-               (struct mc_saved_data *)__pa_nodebug(&mc_saved_data),
-               (unsigned long *)__pa_nodebug(&mc_saved_in_initrd),
-               initrd_start_early, initrd_end_early, &uci);
+                       (struct mc_saved_data *)__pa_nodebug(&mc_saved_data),
+                       (unsigned long *)__pa_nodebug(&mc_saved_in_initrd),
+                       start, size);
 #else
-       ramdisk_image = boot_params.hdr.ramdisk_image;
-       ramdisk_size  = boot_params.hdr.ramdisk_size;
-       initrd_start_early = ramdisk_image + PAGE_OFFSET;
-       initrd_end_early = initrd_start_early + ramdisk_size;
-
-       _load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd,
-                             initrd_start_early, initrd_end_early,
-                             &uci);
+       start   = boot_params.hdr.ramdisk_image + PAGE_OFFSET;
+       size    = boot_params.hdr.ramdisk_size;
+
+       _load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd, start, size);
 #endif
 }
 
@@ -771,6 +735,7 @@ void load_ucode_intel_ap(void)
        struct ucode_cpu_info uci;
        unsigned long *mc_saved_in_initrd_p;
        unsigned long initrd_start_addr;
+       enum ucode_state ret;
 #ifdef CONFIG_X86_32
        unsigned long *initrd_start_p;
 
@@ -793,8 +758,12 @@ void load_ucode_intel_ap(void)
                return;
 
        collect_cpu_info_early(&uci);
-       load_microcode(mc_saved_data_p, mc_saved_in_initrd_p,
-                      initrd_start_addr, &uci);
+       ret = load_microcode(mc_saved_data_p, mc_saved_in_initrd_p,
+                            initrd_start_addr, &uci);
+
+       if (ret != UCODE_OK)
+               return;
+
        apply_microcode_early(&uci, true);
 }
 
@@ -808,8 +777,8 @@ void reload_ucode_intel(void)
 
        collect_cpu_info_early(&uci);
 
-       ret = generic_load_microcode_early(mc_saved_data.mc_saved,
-                                          mc_saved_data.mc_saved_count, &uci);
+       ret = load_microcode_early(mc_saved_data.mc_saved,
+                                  mc_saved_data.mc_saved_count, &uci);
        if (ret != UCODE_OK)
                return;
 
index ce69320d017907aa2d8e25c4fe7ecef47c725005..cd47a510a3f174233300d8763705b6f200faf9f4 100644 (file)
@@ -38,12 +38,6 @@ update_match_cpu(unsigned int csig, unsigned int cpf,
        return (!sigmatch(sig, csig, pf, cpf)) ? 0 : 1;
 }
 
-int
-update_match_revision(struct microcode_header_intel *mc_header, int rev)
-{
-       return (mc_header->rev <= rev) ? 0 : 1;
-}
-
 int microcode_sanity_check(void *mc, int print_err)
 {
        unsigned long total_size, data_size, ext_table_size;
@@ -128,10 +122,9 @@ int microcode_sanity_check(void *mc, int print_err)
 EXPORT_SYMBOL_GPL(microcode_sanity_check);
 
 /*
- * return 0 - no update found
- * return 1 - found update
+ * Returns 1 if update has been found, 0 otherwise.
  */
-int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev)
+int get_matching_sig(unsigned int csig, int cpf, int rev, void *mc)
 {
        struct microcode_header_intel *mc_header = mc;
        struct extended_sigtable *ext_header;
@@ -159,16 +152,15 @@ int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev)
 }
 
 /*
- * return 0 - no update found
- * return 1 - found update
+ * Returns 1 if update has been found, 0 otherwise.
  */
-int get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev)
+int get_matching_microcode(unsigned int csig, int cpf, int rev, void *mc)
 {
-       struct microcode_header_intel *mc_header = mc;
+       struct microcode_header_intel *mc_hdr = mc;
 
-       if (!update_match_revision(mc_header, rev))
+       if (!revision_is_newer(mc_hdr, rev))
                return 0;
 
-       return get_matching_sig(csig, cpf, mc, rev);
+       return get_matching_sig(csig, cpf, rev, mc);
 }
 EXPORT_SYMBOL_GPL(get_matching_microcode);
index 36d99a337b49f56398ca29d900638ddcedee277b..3f20710a5b23b7f7456e99bb7f654ce703bb6abf 100644 (file)
@@ -6,7 +6,7 @@
 IN=$1
 OUT=$2
 
-function dump_array()
+dump_array()
 {
        ARRAY=$1
        SIZE=$2
index b71a7f86d68aca8ba6dd864e9c8cf15e2b3d28ba..e2888a3ad1e3c3ffd0f2584f34c66f5de0608e19 100644 (file)
@@ -2146,6 +2146,12 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
  */
 static unsigned long code_segment_base(struct pt_regs *regs)
 {
+       /*
+        * For IA32 we look at the GDT/LDT segment base to convert the
+        * effective IP to a linear address.
+        */
+
+#ifdef CONFIG_X86_32
        /*
         * If we are in VM86 mode, add the segment offset to convert to a
         * linear address.
@@ -2153,18 +2159,12 @@ static unsigned long code_segment_base(struct pt_regs *regs)
        if (regs->flags & X86_VM_MASK)
                return 0x10 * regs->cs;
 
-       /*
-        * For IA32 we look at the GDT/LDT segment base to convert the
-        * effective IP to a linear address.
-        */
-#ifdef CONFIG_X86_32
        if (user_mode(regs) && regs->cs != __USER_CS)
                return get_segment_base(regs->cs);
 #else
-       if (test_thread_flag(TIF_IA32)) {
-               if (user_mode(regs) && regs->cs != __USER32_CS)
-                       return get_segment_base(regs->cs);
-       }
+       if (user_mode(regs) && !user_64bit_mode(regs) &&
+           regs->cs != __USER32_CS)
+               return get_segment_base(regs->cs);
 #endif
        return 0;
 }
index aceb2f90c7166afcfa844cd7da58bfe6358efaa6..c76d3e37c6e1dc99a7083f05a2f79b7cd82968e1 100644 (file)
@@ -105,7 +105,7 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
 #ifdef CONFIG_X86_32
        struct pt_regs fixed_regs;
 
-       if (!user_mode_vm(regs)) {
+       if (!user_mode(regs)) {
                crash_fixup_ss_esp(&fixed_regs, regs);
                regs = &fixed_regs;
        }
index 3d3503351242b7c6d8dcc0d32350e36e63da1939..6367a780cc8ca891b9513d2e4688717c8d5d3207 100644 (file)
@@ -286,13 +286,13 @@ static void __init x86_flattree_get_config(void)
        initial_boot_params = dt = early_memremap(initial_dtb, map_len);
        size = of_get_flat_dt_size();
        if (map_len < size) {
-               early_iounmap(dt, map_len);
+               early_memunmap(dt, map_len);
                initial_boot_params = dt = early_memremap(initial_dtb, size);
                map_len = size;
        }
 
        unflatten_and_copy_device_tree();
-       early_iounmap(dt, map_len);
+       early_memunmap(dt, map_len);
 }
 #else
 static inline void x86_flattree_get_config(void) { }
index cf3df1d8d039e5d1689417e04c56bcb7057ae754..9c30acfadae24757cca11f00513d089e488da78d 100644 (file)
@@ -25,10 +25,12 @@ unsigned int code_bytes = 64;
 int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
 static int die_counter;
 
-static void printk_stack_address(unsigned long address, int reliable)
+static void printk_stack_address(unsigned long address, int reliable,
+               void *data)
 {
-       pr_cont(" [<%p>] %s%pB\n",
-               (void *)address, reliable ? "" : "? ", (void *)address);
+       printk("%s [<%p>] %s%pB\n",
+               (char *)data, (void *)address, reliable ? "" : "? ",
+               (void *)address);
 }
 
 void printk_address(unsigned long address)
@@ -155,8 +157,7 @@ static int print_trace_stack(void *data, char *name)
 static void print_trace_address(void *data, unsigned long addr, int reliable)
 {
        touch_nmi_watchdog();
-       printk(data);
-       printk_stack_address(addr, reliable);
+       printk_stack_address(addr, reliable, data);
 }
 
 static const struct stacktrace_ops print_trace_ops = {
@@ -278,7 +279,7 @@ int __die(const char *str, struct pt_regs *regs, long err)
        print_modules();
        show_regs(regs);
 #ifdef CONFIG_X86_32
-       if (user_mode_vm(regs)) {
+       if (user_mode(regs)) {
                sp = regs->sp;
                ss = regs->ss & 0xffff;
        } else {
@@ -307,7 +308,7 @@ void die(const char *str, struct pt_regs *regs, long err)
        unsigned long flags = oops_begin();
        int sig = SIGSEGV;
 
-       if (!user_mode_vm(regs))
+       if (!user_mode(regs))
                report_bug(regs->ip, regs);
 
        if (__die(str, regs, err))
index 5abd4cd4230c69f3ff4730e97a1297be40013c44..464ffd69b92e9ef376b9c534aec3c12973d6ad7a 100644 (file)
@@ -108,9 +108,12 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
        for (i = 0; i < kstack_depth_to_print; i++) {
                if (kstack_end(stack))
                        break;
-               if (i && ((i % STACKSLOTS_PER_LINE) == 0))
-                       pr_cont("\n");
-               pr_cont(" %08lx", *stack++);
+               if ((i % STACKSLOTS_PER_LINE) == 0) {
+                       if (i != 0)
+                               pr_cont("\n");
+                       printk("%s %08lx", log_lvl, *stack++);
+               } else
+                       pr_cont(" %08lx", *stack++);
                touch_nmi_watchdog();
        }
        pr_cont("\n");
@@ -123,13 +126,13 @@ void show_regs(struct pt_regs *regs)
        int i;
 
        show_regs_print_info(KERN_EMERG);
-       __show_regs(regs, !user_mode_vm(regs));
+       __show_regs(regs, !user_mode(regs));
 
        /*
         * When in-kernel, we also print out the stack and code at the
         * time of the fault..
         */
-       if (!user_mode_vm(regs)) {
+       if (!user_mode(regs)) {
                unsigned int code_prologue = code_bytes * 43 / 64;
                unsigned int code_len = code_bytes;
                unsigned char c;
index ff86f19b575849fca7e20a4086e09f798ae8291d..5f1c6266eb3028579f5036161dc240387dcb124f 100644 (file)
@@ -280,12 +280,15 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
                                pr_cont(" <EOI> ");
                        }
                } else {
-               if (((long) stack & (THREAD_SIZE-1)) == 0)
+               if (kstack_end(stack))
                        break;
                }
-               if (i && ((i % STACKSLOTS_PER_LINE) == 0))
-                       pr_cont("\n");
-               pr_cont(" %016lx", *stack++);
+               if ((i % STACKSLOTS_PER_LINE) == 0) {
+                       if (i != 0)
+                               pr_cont("\n");
+                       printk("%s %016lx", log_lvl, *stack++);
+               } else
+                       pr_cont(" %016lx", *stack++);
                touch_nmi_watchdog();
        }
        preempt_enable();
index 46201deee923fffd686244546dc7039dbaab8b2b..7d46bb2603346b41dfb924eee28975cd76e704f3 100644 (file)
@@ -661,7 +661,7 @@ void __init parse_e820_ext(u64 phys_addr, u32 data_len)
        extmap = (struct e820entry *)(sdata->data);
        __append_e820_map(extmap, entries);
        sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
-       early_iounmap(sdata, data_len);
+       early_memunmap(sdata, data_len);
        printk(KERN_INFO "e820: extended physical RAM map:\n");
        e820_print_map("extended");
 }
index a62536a1be889019081696b2e7df908b0a4ef9fb..49ff55ef9b26cba11af5d9b36c694926ea3e7f6b 100644 (file)
@@ -95,20 +95,6 @@ static unsigned long early_serial_base = 0x3f8;  /* ttyS0 */
 #define DLL             0       /*  Divisor Latch Low         */
 #define DLH             1       /*  Divisor latch High        */
 
-static void mem32_serial_out(unsigned long addr, int offset, int value)
-{
-       uint32_t *vaddr = (uint32_t *)addr;
-       /* shift implied by pointer type */
-       writel(value, vaddr + offset);
-}
-
-static unsigned int mem32_serial_in(unsigned long addr, int offset)
-{
-       uint32_t *vaddr = (uint32_t *)addr;
-       /* shift implied by pointer type */
-       return readl(vaddr + offset);
-}
-
 static unsigned int io_serial_in(unsigned long addr, int offset)
 {
        return inb(addr + offset);
@@ -205,6 +191,20 @@ static __init void early_serial_init(char *s)
 }
 
 #ifdef CONFIG_PCI
+static void mem32_serial_out(unsigned long addr, int offset, int value)
+{
+       u32 *vaddr = (u32 *)addr;
+       /* shift implied by pointer type */
+       writel(value, vaddr + offset);
+}
+
+static unsigned int mem32_serial_in(unsigned long addr, int offset)
+{
+       u32 *vaddr = (u32 *)addr;
+       /* shift implied by pointer type */
+       return readl(vaddr + offset);
+}
+
 /*
  * early_pci_serial_init()
  *
@@ -217,8 +217,8 @@ static __init void early_pci_serial_init(char *s)
        unsigned divisor;
        unsigned long baud = DEFAULT_BAUD;
        u8 bus, slot, func;
-       uint32_t classcode, bar0;
-       uint16_t cmdreg;
+       u32 classcode, bar0;
+       u16 cmdreg;
        char *e;
 
 
index 31e2d5bf3e38887ca06402bff6c647b9aa9a3c5c..1c309763e32197d255b72e3f976aa948ab67f431 100644 (file)
@@ -395,10 +395,13 @@ sysenter_past_esp:
        /*CFI_REL_OFFSET cs, 0*/
        /*
         * Push current_thread_info()->sysenter_return to the stack.
-        * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
-        * pushed above; +8 corresponds to copy_thread's esp0 setting.
+        * A tiny bit of offset fixup is necessary: TI_sysenter_return
+        * is relative to thread_info, which is at the bottom of the
+        * kernel stack page.  4*4 means the 4 words pushed above;
+        * TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack;
+        * and THREAD_SIZE takes us to the bottom.
         */
-       pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp)
+       pushl_cfi ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp)
        CFI_REL_OFFSET eip, 0
 
        pushl_cfi %eax
@@ -432,7 +435,7 @@ sysenter_after_call:
        TRACE_IRQS_OFF
        movl TI_flags(%ebp), %ecx
        testl $_TIF_ALLWORK_MASK, %ecx
-       jne sysexit_audit
+       jnz sysexit_audit
 sysenter_exit:
 /* if something modifies registers it must also disable sysexit */
        movl PT_EIP(%esp), %edx
@@ -460,7 +463,7 @@ sysenter_audit:
 
 sysexit_audit:
        testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
-       jne syscall_exit_work
+       jnz syscall_exit_work
        TRACE_IRQS_ON
        ENABLE_INTERRUPTS(CLBR_ANY)
        movl %eax,%edx          /* second arg, syscall return value */
@@ -472,7 +475,7 @@ sysexit_audit:
        TRACE_IRQS_OFF
        movl TI_flags(%ebp), %ecx
        testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
-       jne syscall_exit_work
+       jnz syscall_exit_work
        movl PT_EAX(%esp),%eax  /* reload syscall return value */
        jmp sysenter_exit
 #endif
@@ -510,7 +513,7 @@ syscall_exit:
        TRACE_IRQS_OFF
        movl TI_flags(%ebp), %ecx
        testl $_TIF_ALLWORK_MASK, %ecx  # current->work
-       jne syscall_exit_work
+       jnz syscall_exit_work
 
 restore_all:
        TRACE_IRQS_IRET
@@ -612,7 +615,7 @@ work_notifysig:                             # deal with pending signals and
 #ifdef CONFIG_VM86
        testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
        movl %esp, %eax
-       jne work_notifysig_v86          # returning to kernel-space or
+       jnz work_notifysig_v86          # returning to kernel-space or
                                        # vm86-space
 1:
 #else
@@ -720,43 +723,22 @@ END(sysenter_badsys)
 .endm
 
 /*
- * Build the entry stubs and pointer table with some assembler magic.
- * We pack 7 stubs into a single 32-byte chunk, which will fit in a
- * single cache line on all modern x86 implementations.
+ * Build the entry stubs with some assembler magic.
+ * We pack 1 stub into every 8-byte block.
  */
-.section .init.rodata,"a"
-ENTRY(interrupt)
-.section .entry.text, "ax"
-       .p2align 5
-       .p2align CONFIG_X86_L1_CACHE_SHIFT
+       .align 8
 ENTRY(irq_entries_start)
        RING0_INT_FRAME
-vector=FIRST_EXTERNAL_VECTOR
-.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7
-       .balign 32
-  .rept        7
-    .if vector < FIRST_SYSTEM_VECTOR
-      .if vector <> FIRST_EXTERNAL_VECTOR
+    vector=FIRST_EXTERNAL_VECTOR
+    .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+       pushl_cfi $(~vector+0x80)       /* Note: always in signed byte range */
+    vector=vector+1
+       jmp     common_interrupt
        CFI_ADJUST_CFA_OFFSET -4
-      .endif
-1:     pushl_cfi $(~vector+0x80)       /* Note: always in signed byte range */
-      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
-       jmp 2f
-      .endif
-      .previous
-       .long 1b
-      .section .entry.text, "ax"
-vector=vector+1
-    .endif
-  .endr
-2:     jmp common_interrupt
-.endr
+       .align  8
+    .endr
 END(irq_entries_start)
 
-.previous
-END(interrupt)
-.previous
-
 /*
  * the CPU automatically disables interrupts when executing an IRQ vector,
  * so IRQ-flags tracing has to follow that:
@@ -816,15 +798,9 @@ ENTRY(simd_coprocessor_error)
        pushl_cfi $0
 #ifdef CONFIG_X86_INVD_BUG
        /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
-661:   pushl_cfi $do_general_protection
-662:
-.section .altinstructions,"a"
-       altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f
-.previous
-.section .altinstr_replacement,"ax"
-663:   pushl $do_simd_coprocessor_error
-664:
-.previous
+       ALTERNATIVE "pushl_cfi $do_general_protection", \
+                   "pushl $do_simd_coprocessor_error", \
+                   X86_FEATURE_XMM
 #else
        pushl_cfi $do_simd_coprocessor_error
 #endif
@@ -1240,20 +1216,13 @@ error_code:
        /*CFI_REL_OFFSET es, 0*/
        pushl_cfi %ds
        /*CFI_REL_OFFSET ds, 0*/
-       pushl_cfi %eax
-       CFI_REL_OFFSET eax, 0
-       pushl_cfi %ebp
-       CFI_REL_OFFSET ebp, 0
-       pushl_cfi %edi
-       CFI_REL_OFFSET edi, 0
-       pushl_cfi %esi
-       CFI_REL_OFFSET esi, 0
-       pushl_cfi %edx
-       CFI_REL_OFFSET edx, 0
-       pushl_cfi %ecx
-       CFI_REL_OFFSET ecx, 0
-       pushl_cfi %ebx
-       CFI_REL_OFFSET ebx, 0
+       pushl_cfi_reg eax
+       pushl_cfi_reg ebp
+       pushl_cfi_reg edi
+       pushl_cfi_reg esi
+       pushl_cfi_reg edx
+       pushl_cfi_reg ecx
+       pushl_cfi_reg ebx
        cld
        movl $(__KERNEL_PERCPU), %ecx
        movl %ecx, %fs
index f0095a76c18211813d711bfa52b82c916190f42d..c7b238494b31f267b9c4fc3f4a74d519fb7b3aa5 100644 (file)
  * NOTE: This code handles signal-recognition, which happens every time
  * after an interrupt and after each system call.
  *
- * Normal syscalls and interrupts don't save a full stack frame, this is
- * only done for syscall tracing, signals or fork/exec et.al.
- *
  * A note on terminology:
- * - top of stack: Architecture defined interrupt frame from SS to RIP
+ * - iret frame: Architecture defined interrupt frame from SS to RIP
  * at the top of the kernel process stack.
- * - partial stack frame: partially saved registers up to R11.
- * - full stack frame: Like partial stack frame, but all register saved.
  *
  * Some macro usage:
  * - CFI macros are used to generate dwarf2 unwind information for better
  * backtraces. They don't change any code.
- * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
- * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
- * There are unfortunately lots of special cases where some registers
- * not touched. The macro is a big mess that should be cleaned up.
- * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
- * Gives a full stack frame.
  * - ENTRY/END Define functions in the symbol table.
- * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
- * frame that is otherwise undefined after a SYSCALL
  * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
  * - idtentry - Define exception entry points.
  */
        .section .entry.text, "ax"
 
 
-#ifndef CONFIG_PREEMPT
-#define retint_kernel retint_restore_args
-#endif
-
 #ifdef CONFIG_PARAVIRT
 ENTRY(native_usergs_sysret64)
        swapgs
@@ -82,9 +65,9 @@ ENDPROC(native_usergs_sysret64)
 #endif /* CONFIG_PARAVIRT */
 
 
-.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
+.macro TRACE_IRQS_IRETQ
 #ifdef CONFIG_TRACE_IRQFLAGS
-       bt   $9,EFLAGS-\offset(%rsp)    /* interrupts off? */
+       bt   $9,EFLAGS(%rsp)    /* interrupts off? */
        jnc  1f
        TRACE_IRQS_ON
 1:
@@ -116,8 +99,8 @@ ENDPROC(native_usergs_sysret64)
        call debug_stack_reset
 .endm
 
-.macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET
-       bt   $9,EFLAGS-\offset(%rsp)    /* interrupts off? */
+.macro TRACE_IRQS_IRETQ_DEBUG
+       bt   $9,EFLAGS(%rsp)    /* interrupts off? */
        jnc  1f
        TRACE_IRQS_ON_DEBUG
 1:
@@ -130,34 +113,7 @@ ENDPROC(native_usergs_sysret64)
 #endif
 
 /*
- * C code is not supposed to know about undefined top of stack. Every time
- * a C function with an pt_regs argument is called from the SYSCALL based
- * fast path FIXUP_TOP_OF_STACK is needed.
- * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
- * manipulation.
- */
-
-       /* %rsp:at FRAMEEND */
-       .macro FIXUP_TOP_OF_STACK tmp offset=0
-       movq PER_CPU_VAR(old_rsp),\tmp
-       movq \tmp,RSP+\offset(%rsp)
-       movq $__USER_DS,SS+\offset(%rsp)
-       movq $__USER_CS,CS+\offset(%rsp)
-       movq RIP+\offset(%rsp),\tmp  /* get rip */
-       movq \tmp,RCX+\offset(%rsp)  /* copy it to rcx as sysret would do */
-       movq R11+\offset(%rsp),\tmp  /* get eflags */
-       movq \tmp,EFLAGS+\offset(%rsp)
-       .endm
-
-       .macro RESTORE_TOP_OF_STACK tmp offset=0
-       movq RSP+\offset(%rsp),\tmp
-       movq \tmp,PER_CPU_VAR(old_rsp)
-       movq EFLAGS+\offset(%rsp),\tmp
-       movq \tmp,R11+\offset(%rsp)
-       .endm
-
-/*
- * initial frame state for interrupts (and exceptions without error code)
+ * empty frame
  */
        .macro EMPTY_FRAME start=1 offset=0
        .if \start
@@ -173,12 +129,12 @@ ENDPROC(native_usergs_sysret64)
  * initial frame state for interrupts (and exceptions without error code)
  */
        .macro INTR_FRAME start=1 offset=0
-       EMPTY_FRAME \start, SS+8+\offset-RIP
-       /*CFI_REL_OFFSET ss, SS+\offset-RIP*/
-       CFI_REL_OFFSET rsp, RSP+\offset-RIP
-       /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/
-       /*CFI_REL_OFFSET cs, CS+\offset-RIP*/
-       CFI_REL_OFFSET rip, RIP+\offset-RIP
+       EMPTY_FRAME \start, 5*8+\offset
+       /*CFI_REL_OFFSET ss, 4*8+\offset*/
+       CFI_REL_OFFSET rsp, 3*8+\offset
+       /*CFI_REL_OFFSET rflags, 2*8+\offset*/
+       /*CFI_REL_OFFSET cs, 1*8+\offset*/
+       CFI_REL_OFFSET rip, 0*8+\offset
        .endm
 
 /*
@@ -186,30 +142,23 @@ ENDPROC(native_usergs_sysret64)
  * with vector already pushed)
  */
        .macro XCPT_FRAME start=1 offset=0
-       INTR_FRAME \start, RIP+\offset-ORIG_RAX
-       .endm
-
-/*
- * frame that enables calling into C.
- */
-       .macro PARTIAL_FRAME start=1 offset=0
-       XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
-       CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
-       CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
-       CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET
-       CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET
-       CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET
-       CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET
-       CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
-       CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
-       CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
+       INTR_FRAME \start, 1*8+\offset
        .endm
 
 /*
  * frame that enables passing a complete pt_regs to a C function.
  */
        .macro DEFAULT_FRAME start=1 offset=0
-       PARTIAL_FRAME \start, R11+\offset-R15
+       XCPT_FRAME \start, ORIG_RAX+\offset
+       CFI_REL_OFFSET rdi, RDI+\offset
+       CFI_REL_OFFSET rsi, RSI+\offset
+       CFI_REL_OFFSET rdx, RDX+\offset
+       CFI_REL_OFFSET rcx, RCX+\offset
+       CFI_REL_OFFSET rax, RAX+\offset
+       CFI_REL_OFFSET r8, R8+\offset
+       CFI_REL_OFFSET r9, R9+\offset
+       CFI_REL_OFFSET r10, R10+\offset
+       CFI_REL_OFFSET r11, R11+\offset
        CFI_REL_OFFSET rbx, RBX+\offset
        CFI_REL_OFFSET rbp, RBP+\offset
        CFI_REL_OFFSET r12, R12+\offset
@@ -218,105 +167,30 @@ ENDPROC(native_usergs_sysret64)
        CFI_REL_OFFSET r15, R15+\offset
        .endm
 
-ENTRY(save_paranoid)
-       XCPT_FRAME 1 RDI+8
-       cld
-       movq %rdi, RDI+8(%rsp)
-       movq %rsi, RSI+8(%rsp)
-       movq_cfi rdx, RDX+8
-       movq_cfi rcx, RCX+8
-       movq_cfi rax, RAX+8
-       movq %r8, R8+8(%rsp)
-       movq %r9, R9+8(%rsp)
-       movq %r10, R10+8(%rsp)
-       movq %r11, R11+8(%rsp)
-       movq_cfi rbx, RBX+8
-       movq %rbp, RBP+8(%rsp)
-       movq %r12, R12+8(%rsp)
-       movq %r13, R13+8(%rsp)
-       movq %r14, R14+8(%rsp)
-       movq %r15, R15+8(%rsp)
-       movl $1,%ebx
-       movl $MSR_GS_BASE,%ecx
-       rdmsr
-       testl %edx,%edx
-       js 1f   /* negative -> in kernel */
-       SWAPGS
-       xorl %ebx,%ebx
-1:     ret
-       CFI_ENDPROC
-END(save_paranoid)
-
 /*
- * A newly forked process directly context switches into this address.
+ * 64bit SYSCALL instruction entry. Up to 6 arguments in registers.
  *
- * rdi: prev task we switched from
- */
-ENTRY(ret_from_fork)
-       DEFAULT_FRAME
-
-       LOCK ; btr $TIF_FORK,TI_flags(%r8)
-
-       pushq_cfi $0x0002
-       popfq_cfi                               # reset kernel eflags
-
-       call schedule_tail                      # rdi: 'prev' task parameter
-
-       GET_THREAD_INFO(%rcx)
-
-       RESTORE_REST
-
-       testl $3, CS-ARGOFFSET(%rsp)            # from kernel_thread?
-       jz   1f
-
-       /*
-        * By the time we get here, we have no idea whether our pt_regs,
-        * ti flags, and ti status came from the 64-bit SYSCALL fast path,
-        * the slow path, or one of the ia32entry paths.
-        * Use int_ret_from_sys_call to return, since it can safely handle
-        * all of the above.
-        */
-       jmp  int_ret_from_sys_call
-
-1:
-       subq $REST_SKIP, %rsp   # leave space for volatiles
-       CFI_ADJUST_CFA_OFFSET   REST_SKIP
-       movq %rbp, %rdi
-       call *%rbx
-       movl $0, RAX(%rsp)
-       RESTORE_REST
-       jmp int_ret_from_sys_call
-       CFI_ENDPROC
-END(ret_from_fork)
-
-/*
- * System call entry. Up to 6 arguments in registers are supported.
+ * 64bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
+ * then loads new ss, cs, and rip from previously programmed MSRs.
+ * rflags gets masked by a value from another MSR (so CLD and CLAC
+ * are not needed). SYSCALL does not save anything on the stack
+ * and does not change rsp.
  *
- * SYSCALL does not save anything on the stack and does not change the
- * stack pointer.  However, it does mask the flags register for us, so
- * CLD and CLAC are not needed.
- */
-
-/*
- * Register setup:
+ * Registers on entry:
  * rax  system call number
+ * rcx  return address
+ * r11  saved rflags (note: r11 is callee-clobbered register in C ABI)
  * rdi  arg0
- * rcx  return address for syscall/sysret, C arg3
  * rsi  arg1
  * rdx  arg2
- * r10  arg3   (--> moved to rcx for C)
+ * r10  arg3 (needs to be moved to rcx to conform to C ABI)
  * r8   arg4
  * r9   arg5
- * r11  eflags for syscall/sysret, temporary for C
- * r12-r15,rbp,rbx saved by C code, not touched.
+ * (note: r12-r15,rbp,rbx are callee-preserved in C ABI)
  *
- * Interrupts are off on entry.
  * Only called from user space.
  *
- * XXX if we had a free scratch register we could save the RSP into the stack frame
- *      and report it properly in ps. Unfortunately we haven't.
- *
- * When user can change the frames always force IRET. That is because
+ * When user can change pt_regs->foo always force IRET. That is because
  * it deals with uncanonical addresses better. SYSRET has trouble
  * with them due to bugs in both AMD and Intel CPUs.
  */
@@ -324,9 +198,15 @@ END(ret_from_fork)
 ENTRY(system_call)
        CFI_STARTPROC   simple
        CFI_SIGNAL_FRAME
-       CFI_DEF_CFA     rsp,KERNEL_STACK_OFFSET
+       CFI_DEF_CFA     rsp,0
        CFI_REGISTER    rip,rcx
        /*CFI_REGISTER  rflags,r11*/
+
+       /*
+        * Interrupts are off on entry.
+        * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+        * it is too small to ever cause noticeable irq latency.
+        */
        SWAPGS_UNSAFE_STACK
        /*
         * A hypervisor implementation might want to use a label
@@ -335,18 +215,38 @@ ENTRY(system_call)
         */
 GLOBAL(system_call_after_swapgs)
 
-       movq    %rsp,PER_CPU_VAR(old_rsp)
+       movq    %rsp,PER_CPU_VAR(rsp_scratch)
        movq    PER_CPU_VAR(kernel_stack),%rsp
+
+       /* Construct struct pt_regs on stack */
+       pushq_cfi $__USER_DS                    /* pt_regs->ss */
+       pushq_cfi PER_CPU_VAR(rsp_scratch)      /* pt_regs->sp */
        /*
-        * No need to follow this irqs off/on section - it's straight
-        * and short:
+        * Re-enable interrupts.
+        * We use 'rsp_scratch' as a scratch space, hence irq-off block above
+        * must execute atomically in the face of possible interrupt-driven
+        * task preemption. We must enable interrupts only after we're done
+        * with using rsp_scratch:
         */
        ENABLE_INTERRUPTS(CLBR_NONE)
-       SAVE_ARGS 8, 0, rax_enosys=1
-       movq_cfi rax,(ORIG_RAX-ARGOFFSET)
-       movq  %rcx,RIP-ARGOFFSET(%rsp)
-       CFI_REL_OFFSET rip,RIP-ARGOFFSET
-       testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+       pushq_cfi       %r11                    /* pt_regs->flags */
+       pushq_cfi       $__USER_CS              /* pt_regs->cs */
+       pushq_cfi       %rcx                    /* pt_regs->ip */
+       CFI_REL_OFFSET rip,0
+       pushq_cfi_reg   rax                     /* pt_regs->orig_ax */
+       pushq_cfi_reg   rdi                     /* pt_regs->di */
+       pushq_cfi_reg   rsi                     /* pt_regs->si */
+       pushq_cfi_reg   rdx                     /* pt_regs->dx */
+       pushq_cfi_reg   rcx                     /* pt_regs->cx */
+       pushq_cfi       $-ENOSYS                /* pt_regs->ax */
+       pushq_cfi_reg   r8                      /* pt_regs->r8 */
+       pushq_cfi_reg   r9                      /* pt_regs->r9 */
+       pushq_cfi_reg   r10                     /* pt_regs->r10 */
+       pushq_cfi_reg   r11                     /* pt_regs->r11 */
+       sub     $(6*8),%rsp /* pt_regs->bp,bx,r12-15 not saved */
+       CFI_ADJUST_CFA_OFFSET 6*8
+
+       testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
        jnz tracesys
 system_call_fastpath:
 #if __SYSCALL_MASK == ~0
@@ -355,18 +255,21 @@ system_call_fastpath:
        andl $__SYSCALL_MASK,%eax
        cmpl $__NR_syscall_max,%eax
 #endif
-       ja ret_from_sys_call  /* and return regs->ax */
+       ja      1f      /* return -ENOSYS (already in pt_regs->ax) */
        movq %r10,%rcx
-       call *sys_call_table(,%rax,8)  # XXX:    rip relative
-       movq %rax,RAX-ARGOFFSET(%rsp)
+       call *sys_call_table(,%rax,8)
+       movq %rax,RAX(%rsp)
+1:
 /*
- * Syscall return path ending with SYSRET (fast path)
- * Has incomplete stack frame and undefined top of stack.
+ * Syscall return path ending with SYSRET (fast path).
+ * Has incompletely filled pt_regs.
  */
-ret_from_sys_call:
        LOCKDEP_SYS_EXIT
+       /*
+        * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+        * it is too small to ever cause noticeable irq latency.
+        */
        DISABLE_INTERRUPTS(CLBR_NONE)
-       TRACE_IRQS_OFF
 
        /*
         * We must check ti flags with interrupts (or at least preemption)
@@ -376,72 +279,73 @@ ret_from_sys_call:
         * flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is
         * very bad.
         */
-       testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-       jnz int_ret_from_sys_call_fixup /* Go the the slow path */
+       testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+       jnz int_ret_from_sys_call_irqs_off      /* Go to the slow path */
 
        CFI_REMEMBER_STATE
-       /*
-        * sysretq will re-enable interrupts:
-        */
-       TRACE_IRQS_ON
-       movq RIP-ARGOFFSET(%rsp),%rcx
+
+       RESTORE_C_REGS_EXCEPT_RCX_R11
+       movq    RIP(%rsp),%rcx
        CFI_REGISTER    rip,rcx
-       RESTORE_ARGS 1,-ARG_SKIP,0
+       movq    EFLAGS(%rsp),%r11
        /*CFI_REGISTER  rflags,r11*/
-       movq    PER_CPU_VAR(old_rsp), %rsp
+       movq    RSP(%rsp),%rsp
+       /*
+        * 64bit SYSRET restores rip from rcx,
+        * rflags from r11 (but RF and VM bits are forced to 0),
+        * cs and ss are loaded from MSRs.
+        * Restoration of rflags re-enables interrupts.
+        */
        USERGS_SYSRET64
 
        CFI_RESTORE_STATE
 
-int_ret_from_sys_call_fixup:
-       FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
-       jmp int_ret_from_sys_call_irqs_off
-
-       /* Do syscall tracing */
+       /* Do syscall entry tracing */
 tracesys:
-       leaq -REST_SKIP(%rsp), %rdi
-       movq $AUDIT_ARCH_X86_64, %rsi
+       movq %rsp, %rdi
+       movl $AUDIT_ARCH_X86_64, %esi
        call syscall_trace_enter_phase1
        test %rax, %rax
        jnz tracesys_phase2             /* if needed, run the slow path */
-       LOAD_ARGS 0                     /* else restore clobbered regs */
+       RESTORE_C_REGS_EXCEPT_RAX       /* else restore clobbered regs */
+       movq ORIG_RAX(%rsp), %rax
        jmp system_call_fastpath        /*      and return to the fast path */
 
 tracesys_phase2:
-       SAVE_REST
-       FIXUP_TOP_OF_STACK %rdi
+       SAVE_EXTRA_REGS
        movq %rsp, %rdi
-       movq $AUDIT_ARCH_X86_64, %rsi
+       movl $AUDIT_ARCH_X86_64, %esi
        movq %rax,%rdx
        call syscall_trace_enter_phase2
 
        /*
-        * Reload arg registers from stack in case ptrace changed them.
+        * Reload registers from stack in case ptrace changed them.
         * We don't reload %rax because syscall_trace_entry_phase2() returned
         * the value it wants us to use in the table lookup.
         */
-       LOAD_ARGS ARGOFFSET, 1
-       RESTORE_REST
+       RESTORE_C_REGS_EXCEPT_RAX
+       RESTORE_EXTRA_REGS
 #if __SYSCALL_MASK == ~0
        cmpq $__NR_syscall_max,%rax
 #else
        andl $__SYSCALL_MASK,%eax
        cmpl $__NR_syscall_max,%eax
 #endif
-       ja   int_ret_from_sys_call      /* RAX(%rsp) is already set */
+       ja      1f      /* return -ENOSYS (already in pt_regs->ax) */
        movq %r10,%rcx  /* fixup for C */
        call *sys_call_table(,%rax,8)
-       movq %rax,RAX-ARGOFFSET(%rsp)
-       /* Use IRET because user could have changed frame */
+       movq %rax,RAX(%rsp)
+1:
+       /* Use IRET because user could have changed pt_regs->foo */
 
 /*
  * Syscall return path ending with IRET.
- * Has correct top of stack, but partial stack frame.
+ * Has correct iret frame.
  */
 GLOBAL(int_ret_from_sys_call)
        DISABLE_INTERRUPTS(CLBR_NONE)
+int_ret_from_sys_call_irqs_off: /* jumps come here from the irqs-off SYSRET path */
        TRACE_IRQS_OFF
-int_ret_from_sys_call_irqs_off:
        movl $_TIF_ALLWORK_MASK,%edi
        /* edi: mask to check */
 GLOBAL(int_with_check)
@@ -450,8 +354,8 @@ GLOBAL(int_with_check)
        movl TI_flags(%rcx),%edx
        andl %edi,%edx
        jnz   int_careful
-       andl    $~TS_COMPAT,TI_status(%rcx)
-       jmp   retint_swapgs
+       andl    $~TS_COMPAT,TI_status(%rcx)
+       jmp     syscall_return
 
        /* Either reschedule or signal or syscall exit tracking needed. */
        /* First do a reschedule test. */
@@ -468,12 +372,11 @@ int_careful:
        TRACE_IRQS_OFF
        jmp int_with_check
 
-       /* handle signals and tracing -- both require a full stack frame */
+       /* handle signals and tracing -- both require a full pt_regs */
 int_very_careful:
        TRACE_IRQS_ON
        ENABLE_INTERRUPTS(CLBR_NONE)
-int_check_syscall_exit_work:
-       SAVE_REST
+       SAVE_EXTRA_REGS
        /* Check for syscall exit trace */
        testl $_TIF_WORK_SYSCALL_EXIT,%edx
        jz int_signal
@@ -492,86 +395,192 @@ int_signal:
        call do_notify_resume
 1:     movl $_TIF_WORK_MASK,%edi
 int_restore_rest:
-       RESTORE_REST
+       RESTORE_EXTRA_REGS
        DISABLE_INTERRUPTS(CLBR_NONE)
        TRACE_IRQS_OFF
        jmp int_with_check
+
+syscall_return:
+       /* The IRETQ could re-enable interrupts: */
+       DISABLE_INTERRUPTS(CLBR_ANY)
+       TRACE_IRQS_IRETQ
+
+       /*
+        * Try to use SYSRET instead of IRET if we're returning to
+        * a completely clean 64-bit userspace context.
+        */
+       movq RCX(%rsp),%rcx
+       cmpq %rcx,RIP(%rsp)             /* RCX == RIP */
+       jne opportunistic_sysret_failed
+
+       /*
+        * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
+        * in kernel space.  This essentially lets the user take over
+        * the kernel, since userspace controls RSP.  It's not worth
+        * testing for canonicalness exactly -- this check detects any
+        * of the 17 high bits set, which is true for non-canonical
+        * or kernel addresses.  (This will pessimize vsyscall=native.
+        * Big deal.)
+        *
+        * If virtual addresses ever become wider, this will need
+        * to be updated to remain correct on both old and new CPUs.
+        */
+       .ifne __VIRTUAL_MASK_SHIFT - 47
+       .error "virtual address width changed -- SYSRET checks need update"
+       .endif
+       shr $__VIRTUAL_MASK_SHIFT, %rcx
+       jnz opportunistic_sysret_failed
+
+       cmpq $__USER_CS,CS(%rsp)        /* CS must match SYSRET */
+       jne opportunistic_sysret_failed
+
+       movq R11(%rsp),%r11
+       cmpq %r11,EFLAGS(%rsp)          /* R11 == RFLAGS */
+       jne opportunistic_sysret_failed
+
+       /*
+        * SYSRET can't restore RF.  SYSRET can restore TF, but unlike IRET,
+        * restoring TF results in a trap from userspace immediately after
+        * SYSRET.  This would cause an infinite loop whenever #DB happens
+        * with register state that satisfies the opportunistic SYSRET
+        * conditions.  For example, single-stepping this user code:
+        *
+        *           movq $stuck_here,%rcx
+        *           pushfq
+        *           popq %r11
+        *   stuck_here:
+        *
+        * would never get past 'stuck_here'.
+        */
+       testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
+       jnz opportunistic_sysret_failed
+
+       /* nothing to check for RSP */
+
+       cmpq $__USER_DS,SS(%rsp)        /* SS must match SYSRET */
+       jne opportunistic_sysret_failed
+
+       /*
+        * We win!  This label is here just for ease of understanding
+        * perf profiles.  Nothing jumps here.
+        */
+syscall_return_via_sysret:
+       CFI_REMEMBER_STATE
+       /* r11 is already restored (see code above) */
+       RESTORE_C_REGS_EXCEPT_R11
+       movq RSP(%rsp),%rsp
+       USERGS_SYSRET64
+       CFI_RESTORE_STATE
+
+opportunistic_sysret_failed:
+       SWAPGS
+       jmp     restore_c_regs_and_iret
        CFI_ENDPROC
 END(system_call)
 
+
        .macro FORK_LIKE func
 ENTRY(stub_\func)
        CFI_STARTPROC
-       popq    %r11                    /* save return address */
-       PARTIAL_FRAME 0
-       SAVE_REST
-       pushq   %r11                    /* put it back on stack */
-       FIXUP_TOP_OF_STACK %r11, 8
-       DEFAULT_FRAME 0 8               /* offset 8: return address */
-       call sys_\func
-       RESTORE_TOP_OF_STACK %r11, 8
-       ret $REST_SKIP          /* pop extended registers */
+       DEFAULT_FRAME 0, 8              /* offset 8: return address */
+       SAVE_EXTRA_REGS 8
+       jmp sys_\func
        CFI_ENDPROC
 END(stub_\func)
        .endm
 
-       .macro FIXED_FRAME label,func
-ENTRY(\label)
-       CFI_STARTPROC
-       PARTIAL_FRAME 0 8               /* offset 8: return address */
-       FIXUP_TOP_OF_STACK %r11, 8-ARGOFFSET
-       call \func
-       RESTORE_TOP_OF_STACK %r11, 8-ARGOFFSET
-       ret
-       CFI_ENDPROC
-END(\label)
-       .endm
-
        FORK_LIKE  clone
        FORK_LIKE  fork
        FORK_LIKE  vfork
-       FIXED_FRAME stub_iopl, sys_iopl
 
 ENTRY(stub_execve)
        CFI_STARTPROC
-       addq $8, %rsp
-       PARTIAL_FRAME 0
-       SAVE_REST
-       FIXUP_TOP_OF_STACK %r11
-       call sys_execve
-       movq %rax,RAX(%rsp)
-       RESTORE_REST
-       jmp int_ret_from_sys_call
+       DEFAULT_FRAME 0, 8
+       call    sys_execve
+return_from_execve:
+       testl   %eax, %eax
+       jz      1f
+       /* exec failed, can use fast SYSRET code path in this case */
+       ret
+1:
+       /* must use IRET code path (pt_regs->cs may have changed) */
+       addq    $8, %rsp
+       CFI_ADJUST_CFA_OFFSET -8
+       ZERO_EXTRA_REGS
+       movq    %rax,RAX(%rsp)
+       jmp     int_ret_from_sys_call
        CFI_ENDPROC
 END(stub_execve)
-
-ENTRY(stub_execveat)
+/*
+ * Remaining execve stubs are only 7 bytes long.
+ * ENTRY() often aligns to 16 bytes, which in this case has no benefits.
+ */
+       .align  8
+GLOBAL(stub_execveat)
        CFI_STARTPROC
-       addq $8, %rsp
-       PARTIAL_FRAME 0
-       SAVE_REST
-       FIXUP_TOP_OF_STACK %r11
-       call sys_execveat
-       RESTORE_TOP_OF_STACK %r11
-       movq %rax,RAX(%rsp)
-       RESTORE_REST
-       jmp int_ret_from_sys_call
+       DEFAULT_FRAME 0, 8
+       call    sys_execveat
+       jmp     return_from_execve
        CFI_ENDPROC
 END(stub_execveat)
 
+#ifdef CONFIG_X86_X32_ABI
+       .align  8
+GLOBAL(stub_x32_execve)
+       CFI_STARTPROC
+       DEFAULT_FRAME 0, 8
+       call    compat_sys_execve
+       jmp     return_from_execve
+       CFI_ENDPROC
+END(stub_x32_execve)
+       .align  8
+GLOBAL(stub_x32_execveat)
+       CFI_STARTPROC
+       DEFAULT_FRAME 0, 8
+       call    compat_sys_execveat
+       jmp     return_from_execve
+       CFI_ENDPROC
+END(stub_x32_execveat)
+#endif
+
+#ifdef CONFIG_IA32_EMULATION
+       .align  8
+GLOBAL(stub32_execve)
+       CFI_STARTPROC
+       call    compat_sys_execve
+       jmp     return_from_execve
+       CFI_ENDPROC
+END(stub32_execve)
+       .align  8
+GLOBAL(stub32_execveat)
+       CFI_STARTPROC
+       call    compat_sys_execveat
+       jmp     return_from_execve
+       CFI_ENDPROC
+END(stub32_execveat)
+#endif
+
 /*
  * sigreturn is special because it needs to restore all registers on return.
  * This cannot be done with SYSRET, so use the IRET return path instead.
  */
 ENTRY(stub_rt_sigreturn)
        CFI_STARTPROC
-       addq $8, %rsp
-       PARTIAL_FRAME 0
-       SAVE_REST
-       FIXUP_TOP_OF_STACK %r11
+       DEFAULT_FRAME 0, 8
+       /*
+        * SAVE_EXTRA_REGS result is not normally needed:
+        * sigreturn overwrites all pt_regs->GPREGS.
+        * But sigreturn can fail (!), and there is no easy way to detect that.
+        * To make sure RESTORE_EXTRA_REGS doesn't restore garbage on error,
+        * we SAVE_EXTRA_REGS here.
+        */
+       SAVE_EXTRA_REGS 8
        call sys_rt_sigreturn
-       movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
-       RESTORE_REST
+return_from_stub:
+       addq    $8, %rsp
+       CFI_ADJUST_CFA_OFFSET -8
+       RESTORE_EXTRA_REGS
+       movq %rax,RAX(%rsp)
        jmp int_ret_from_sys_call
        CFI_ENDPROC
 END(stub_rt_sigreturn)
@@ -579,86 +588,70 @@ END(stub_rt_sigreturn)
 #ifdef CONFIG_X86_X32_ABI
 ENTRY(stub_x32_rt_sigreturn)
        CFI_STARTPROC
-       addq $8, %rsp
-       PARTIAL_FRAME 0
-       SAVE_REST
-       FIXUP_TOP_OF_STACK %r11
+       DEFAULT_FRAME 0, 8
+       SAVE_EXTRA_REGS 8
        call sys32_x32_rt_sigreturn
-       movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
-       RESTORE_REST
-       jmp int_ret_from_sys_call
+       jmp  return_from_stub
        CFI_ENDPROC
 END(stub_x32_rt_sigreturn)
+#endif
 
-ENTRY(stub_x32_execve)
-       CFI_STARTPROC
-       addq $8, %rsp
-       PARTIAL_FRAME 0
-       SAVE_REST
-       FIXUP_TOP_OF_STACK %r11
-       call compat_sys_execve
-       RESTORE_TOP_OF_STACK %r11
-       movq %rax,RAX(%rsp)
-       RESTORE_REST
-       jmp int_ret_from_sys_call
-       CFI_ENDPROC
-END(stub_x32_execve)
+/*
+ * A newly forked process directly context switches into this address.
+ *
+ * rdi: prev task we switched from
+ */
+ENTRY(ret_from_fork)
+       DEFAULT_FRAME
 
-ENTRY(stub_x32_execveat)
-       CFI_STARTPROC
-       addq $8, %rsp
-       PARTIAL_FRAME 0
-       SAVE_REST
-       FIXUP_TOP_OF_STACK %r11
-       call compat_sys_execveat
-       RESTORE_TOP_OF_STACK %r11
-       movq %rax,RAX(%rsp)
-       RESTORE_REST
+       LOCK ; btr $TIF_FORK,TI_flags(%r8)
+
+       pushq_cfi $0x0002
+       popfq_cfi                               # reset kernel eflags
+
+       call schedule_tail                      # rdi: 'prev' task parameter
+
+       RESTORE_EXTRA_REGS
+
+       testl $3,CS(%rsp)                       # from kernel_thread?
+
+       /*
+        * By the time we get here, we have no idea whether our pt_regs,
+        * ti flags, and ti status came from the 64-bit SYSCALL fast path,
+        * the slow path, or one of the ia32entry paths.
+        * Use IRET code path to return, since it can safely handle
+        * all of the above.
+        */
+       jnz     int_ret_from_sys_call
+
+       /* We came from kernel_thread */
+       /* nb: we depend on RESTORE_EXTRA_REGS above */
+       movq %rbp, %rdi
+       call *%rbx
+       movl $0, RAX(%rsp)
+       RESTORE_EXTRA_REGS
        jmp int_ret_from_sys_call
        CFI_ENDPROC
-END(stub_x32_execveat)
-
-#endif
+END(ret_from_fork)
 
 /*
- * Build the entry stubs and pointer table with some assembler magic.
- * We pack 7 stubs into a single 32-byte chunk, which will fit in a
- * single cache line on all modern x86 implementations.
+ * Build the entry stubs with some assembler magic.
+ * We pack 1 stub into every 8-byte block.
  */
-       .section .init.rodata,"a"
-ENTRY(interrupt)
-       .section .entry.text
-       .p2align 5
-       .p2align CONFIG_X86_L1_CACHE_SHIFT
+       .align 8
 ENTRY(irq_entries_start)
        INTR_FRAME
-vector=FIRST_EXTERNAL_VECTOR
-.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7
-       .balign 32
-  .rept        7
-    .if vector < FIRST_SYSTEM_VECTOR
-      .if vector <> FIRST_EXTERNAL_VECTOR
+    vector=FIRST_EXTERNAL_VECTOR
+    .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+       pushq_cfi $(~vector+0x80)       /* Note: always in signed byte range */
+    vector=vector+1
+       jmp     common_interrupt
        CFI_ADJUST_CFA_OFFSET -8
-      .endif
-1:     pushq_cfi $(~vector+0x80)       /* Note: always in signed byte range */
-      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
-       jmp 2f
-      .endif
-      .previous
-       .quad 1b
-      .section .entry.text
-vector=vector+1
-    .endif
-  .endr
-2:     jmp common_interrupt
-.endr
+       .align  8
+    .endr
        CFI_ENDPROC
 END(irq_entries_start)
 
-.previous
-END(interrupt)
-.previous
-
 /*
  * Interrupt entry/exit.
  *
@@ -669,47 +662,45 @@ END(interrupt)
 
 /* 0(%rsp): ~(interrupt number) */
        .macro interrupt func
-       /* reserve pt_regs for scratch regs and rbp */
-       subq $ORIG_RAX-RBP, %rsp
-       CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
        cld
-       /* start from rbp in pt_regs and jump over */
-       movq_cfi rdi, (RDI-RBP)
-       movq_cfi rsi, (RSI-RBP)
-       movq_cfi rdx, (RDX-RBP)
-       movq_cfi rcx, (RCX-RBP)
-       movq_cfi rax, (RAX-RBP)
-       movq_cfi  r8,  (R8-RBP)
-       movq_cfi  r9,  (R9-RBP)
-       movq_cfi r10, (R10-RBP)
-       movq_cfi r11, (R11-RBP)
-
-       /* Save rbp so that we can unwind from get_irq_regs() */
-       movq_cfi rbp, 0
-
-       /* Save previous stack value */
-       movq %rsp, %rsi
+       /*
+        * Since nothing in interrupt handling code touches r12...r15 members
+        * of "struct pt_regs", and since interrupts can nest, we can save
+        * four stack slots and simultaneously provide
+        * an unwind-friendly stack layout by saving "truncated" pt_regs
+        * exactly up to rbp slot, without these members.
+        */
+       ALLOC_PT_GPREGS_ON_STACK -RBP
+       SAVE_C_REGS -RBP
+       /* this goes to 0(%rsp) for unwinder, not for saving the value: */
+       SAVE_EXTRA_REGS_RBP -RBP
 
-       leaq -RBP(%rsp),%rdi    /* arg1 for handler */
-       testl $3, CS-RBP(%rsi)
+       leaq -RBP(%rsp),%rdi    /* arg1 for \func (pointer to pt_regs) */
+
+       testl $3, CS-RBP(%rsp)
        je 1f
        SWAPGS
+1:
        /*
+        * Save previous stack pointer, optionally switch to interrupt stack.
         * irq_count is used to check if a CPU is already on an interrupt stack
         * or not. While this is essentially redundant with preempt_count it is
         * a little cheaper to use a separate counter in the PDA (short of
         * moving irq_enter into assembly, which would be too much work)
         */
-1:     incl PER_CPU_VAR(irq_count)
+       movq %rsp, %rsi
+       incl PER_CPU_VAR(irq_count)
        cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
        CFI_DEF_CFA_REGISTER    rsi
-
-       /* Store previous stack value */
        pushq %rsi
+       /*
+        * For debugger:
+        * "CFA (Current Frame Address) is the value on stack + offset"
+        */
        CFI_ESCAPE      0x0f /* DW_CFA_def_cfa_expression */, 6, \
-                       0x77 /* DW_OP_breg7 */, 0, \
+                       0x77 /* DW_OP_breg7 (rsp) */, 0, \
                        0x06 /* DW_OP_deref */, \
-                       0x08 /* DW_OP_const1u */, SS+8-RBP, \
+                       0x08 /* DW_OP_const1u */, SIZEOF_PTREGS-RBP, \
                        0x22 /* DW_OP_plus */
        /* We entered an interrupt context - irqs are off: */
        TRACE_IRQS_OFF
@@ -727,7 +718,7 @@ common_interrupt:
        ASM_CLAC
        addq $-0x80,(%rsp)              /* Adjust vector to [-256,-1] range */
        interrupt do_IRQ
-       /* 0(%rsp): old_rsp-ARGOFFSET */
+       /* 0(%rsp): old RSP */
 ret_from_intr:
        DISABLE_INTERRUPTS(CLBR_NONE)
        TRACE_IRQS_OFF
@@ -735,19 +726,18 @@ ret_from_intr:
 
        /* Restore saved previous stack */
        popq %rsi
-       CFI_DEF_CFA rsi,SS+8-RBP        /* reg/off reset after def_cfa_expr */
-       leaq ARGOFFSET-RBP(%rsi), %rsp
+       CFI_DEF_CFA rsi,SIZEOF_PTREGS-RBP /* reg/off reset after def_cfa_expr */
+       /* return code expects complete pt_regs - adjust rsp accordingly: */
+       leaq -RBP(%rsi),%rsp
        CFI_DEF_CFA_REGISTER    rsp
-       CFI_ADJUST_CFA_OFFSET   RBP-ARGOFFSET
+       CFI_ADJUST_CFA_OFFSET   RBP
 
-exit_intr:
-       GET_THREAD_INFO(%rcx)
-       testl $3,CS-ARGOFFSET(%rsp)
+       testl $3,CS(%rsp)
        je retint_kernel
-
        /* Interrupt came from user space */
+
+       GET_THREAD_INFO(%rcx)
        /*
-        * Has a correct top of stack, but a partial stack frame
         * %rcx: thread info. Interrupts off.
         */
 retint_with_reschedule:
@@ -766,84 +756,34 @@ retint_swapgs:            /* return to user-space */
        DISABLE_INTERRUPTS(CLBR_ANY)
        TRACE_IRQS_IRETQ
 
-       /*
-        * Try to use SYSRET instead of IRET if we're returning to
-        * a completely clean 64-bit userspace context.
-        */
-       movq (RCX-R11)(%rsp), %rcx
-       cmpq %rcx,(RIP-R11)(%rsp)               /* RCX == RIP */
-       jne opportunistic_sysret_failed
-
-       /*
-        * On Intel CPUs, sysret with non-canonical RCX/RIP will #GP
-        * in kernel space.  This essentially lets the user take over
-        * the kernel, since userspace controls RSP.  It's not worth
-        * testing for canonicalness exactly -- this check detects any
-        * of the 17 high bits set, which is true for non-canonical
-        * or kernel addresses.  (This will pessimize vsyscall=native.
-        * Big deal.)
-        *
-        * If virtual addresses ever become wider, this will need
-        * to be updated to remain correct on both old and new CPUs.
-        */
-       .ifne __VIRTUAL_MASK_SHIFT - 47
-       .error "virtual address width changed -- sysret checks need update"
-       .endif
-       shr $__VIRTUAL_MASK_SHIFT, %rcx
-       jnz opportunistic_sysret_failed
-
-       cmpq $__USER_CS,(CS-R11)(%rsp)          /* CS must match SYSRET */
-       jne opportunistic_sysret_failed
-
-       movq (R11-ARGOFFSET)(%rsp), %r11
-       cmpq %r11,(EFLAGS-ARGOFFSET)(%rsp)      /* R11 == RFLAGS */
-       jne opportunistic_sysret_failed
-
-       /*
-        * SYSRET can't restore RF.  SYSRET can restore TF, but unlike IRET,
-        * restoring TF results in a trap from userspace immediately after
-        * SYSRET.  This would cause an infinite loop whenever #DB happens
-        * with register state that satisfies the opportunistic SYSRET
-        * conditions.  For example, single-stepping this user code:
-        *
-        *           movq $stuck_here,%rcx
-        *           pushfq
-        *           popq %r11
-        *   stuck_here:
-        *
-        * would never get past 'stuck_here'.
-        */
-       testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
-       jnz opportunistic_sysret_failed
-
-       /* nothing to check for RSP */
-
-       cmpq $__USER_DS,(SS-ARGOFFSET)(%rsp)    /* SS must match SYSRET */
-       jne opportunistic_sysret_failed
-
-       /*
-        * We win!  This label is here just for ease of understanding
-        * perf profiles.  Nothing jumps here.
-        */
-irq_return_via_sysret:
-       CFI_REMEMBER_STATE
-       RESTORE_ARGS 1,8,1
-       movq (RSP-RIP)(%rsp),%rsp
-       USERGS_SYSRET64
-       CFI_RESTORE_STATE
-
-opportunistic_sysret_failed:
        SWAPGS
-       jmp restore_args
+       jmp     restore_c_regs_and_iret
 
-retint_restore_args:   /* return to kernel space */
-       DISABLE_INTERRUPTS(CLBR_ANY)
+/* Returning to kernel space */
+retint_kernel:
+#ifdef CONFIG_PREEMPT
+       /* Interrupts are off */
+       /* Check if we need preemption */
+       bt      $9,EFLAGS(%rsp) /* interrupts were off? */
+       jnc     1f
+0:     cmpl    $0,PER_CPU_VAR(__preempt_count)
+       jnz     1f
+       call    preempt_schedule_irq
+       jmp     0b
+1:
+#endif
        /*
         * The iretq could re-enable interrupts:
         */
        TRACE_IRQS_IRETQ
-restore_args:
-       RESTORE_ARGS 1,8,1
+
+/*
+ * At this label, code paths which return to kernel and to user,
+ * which come from interrupts/exception and from syscalls, merge.
+ */
+restore_c_regs_and_iret:
+       RESTORE_C_REGS
+       REMOVE_PT_GPREGS_FROM_STACK 8
 
 irq_return:
        INTERRUPT_RETURN
@@ -914,28 +854,17 @@ retint_signal:
        jz    retint_swapgs
        TRACE_IRQS_ON
        ENABLE_INTERRUPTS(CLBR_NONE)
-       SAVE_REST
+       SAVE_EXTRA_REGS
        movq $-1,ORIG_RAX(%rsp)
        xorl %esi,%esi          # oldset
        movq %rsp,%rdi          # &pt_regs
        call do_notify_resume
-       RESTORE_REST
+       RESTORE_EXTRA_REGS
        DISABLE_INTERRUPTS(CLBR_NONE)
        TRACE_IRQS_OFF
        GET_THREAD_INFO(%rcx)
        jmp retint_with_reschedule
 
-#ifdef CONFIG_PREEMPT
-       /* Returning to kernel space. Check if we need preemption */
-       /* rcx:  threadinfo. interrupts off. */
-ENTRY(retint_kernel)
-       cmpl $0,PER_CPU_VAR(__preempt_count)
-       jnz  retint_restore_args
-       bt   $9,EFLAGS-ARGOFFSET(%rsp)  /* interrupts off? */
-       jnc  retint_restore_args
-       call preempt_schedule_irq
-       jmp exit_intr
-#endif
        CFI_ENDPROC
 END(common_interrupt)
 
@@ -1024,7 +953,7 @@ apicinterrupt IRQ_WORK_VECTOR \
 /*
  * Exception entry points.
  */
-#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8)
+#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
 
 .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
 ENTRY(\sym)
@@ -1046,8 +975,7 @@ ENTRY(\sym)
        pushq_cfi $-1                   /* ORIG_RAX: no syscall to restart */
        .endif
 
-       subq $ORIG_RAX-R15, %rsp
-       CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
+       ALLOC_PT_GPREGS_ON_STACK
 
        .if \paranoid
        .if \paranoid == 1
@@ -1055,10 +983,11 @@ ENTRY(\sym)
        testl $3, CS(%rsp)              /* If coming from userspace, switch */
        jnz 1f                          /* stacks. */
        .endif
-       call save_paranoid
+       call paranoid_entry
        .else
        call error_entry
        .endif
+       /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
 
        DEFAULT_FRAME 0
 
@@ -1080,19 +1009,20 @@ ENTRY(\sym)
        .endif
 
        .if \shift_ist != -1
-       subq $EXCEPTION_STKSZ, INIT_TSS_IST(\shift_ist)
+       subq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
        .endif
 
        call \do_sym
 
        .if \shift_ist != -1
-       addq $EXCEPTION_STKSZ, INIT_TSS_IST(\shift_ist)
+       addq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
        .endif
 
+       /* these procedures expect "no swapgs" flag in ebx */
        .if \paranoid
-       jmp paranoid_exit               /* %ebx: no swapgs flag */
+       jmp paranoid_exit
        .else
-       jmp error_exit                  /* %ebx: no swapgs flag */
+       jmp error_exit
        .endif
 
        .if \paranoid == 1
@@ -1296,7 +1226,9 @@ ENTRY(xen_failsafe_callback)
        addq $0x30,%rsp
        CFI_ADJUST_CFA_OFFSET -0x30
        pushq_cfi $-1 /* orig_ax = -1 => not a system call */
-       SAVE_ALL
+       ALLOC_PT_GPREGS_ON_STACK
+       SAVE_C_REGS
+       SAVE_EXTRA_REGS
        jmp error_exit
        CFI_ENDPROC
 END(xen_failsafe_callback)
@@ -1328,59 +1260,66 @@ idtentry async_page_fault do_async_page_fault has_error_code=1
 idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip)
 #endif
 
-       /*
-        * "Paranoid" exit path from exception stack.  This is invoked
-        * only on return from non-NMI IST interrupts that came
-        * from kernel space.
-        *
-        * We may be returning to very strange contexts (e.g. very early
-        * in syscall entry), so checking for preemption here would
-        * be complicated.  Fortunately, we there's no good reason
-        * to try to handle preemption here.
-        */
+/*
+ * Save all registers in pt_regs, and switch gs if needed.
+ * Use slow, but surefire "are we in kernel?" check.
+ * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
+ */
+ENTRY(paranoid_entry)
+       XCPT_FRAME 1 15*8
+       cld
+       SAVE_C_REGS 8
+       SAVE_EXTRA_REGS 8
+       movl $1,%ebx
+       movl $MSR_GS_BASE,%ecx
+       rdmsr
+       testl %edx,%edx
+       js 1f   /* negative -> in kernel */
+       SWAPGS
+       xorl %ebx,%ebx
+1:     ret
+       CFI_ENDPROC
+END(paranoid_entry)
 
-       /* ebx: no swapgs flag */
+/*
+ * "Paranoid" exit path from exception stack.  This is invoked
+ * only on return from non-NMI IST interrupts that came
+ * from kernel space.
+ *
+ * We may be returning to very strange contexts (e.g. very early
+ * in syscall entry), so checking for preemption here would
+ * be complicated.  Fortunately, we there's no good reason
+ * to try to handle preemption here.
+ */
+/* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */
 ENTRY(paranoid_exit)
        DEFAULT_FRAME
        DISABLE_INTERRUPTS(CLBR_NONE)
        TRACE_IRQS_OFF_DEBUG
        testl %ebx,%ebx                         /* swapgs needed? */
-       jnz paranoid_restore
-       TRACE_IRQS_IRETQ 0
+       jnz paranoid_exit_no_swapgs
+       TRACE_IRQS_IRETQ
        SWAPGS_UNSAFE_STACK
-       RESTORE_ALL 8
-       INTERRUPT_RETURN
-paranoid_restore:
-       TRACE_IRQS_IRETQ_DEBUG 0
-       RESTORE_ALL 8
+       jmp paranoid_exit_restore
+paranoid_exit_no_swapgs:
+       TRACE_IRQS_IRETQ_DEBUG
+paranoid_exit_restore:
+       RESTORE_EXTRA_REGS
+       RESTORE_C_REGS
+       REMOVE_PT_GPREGS_FROM_STACK 8
        INTERRUPT_RETURN
        CFI_ENDPROC
 END(paranoid_exit)
 
 /*
- * Exception entry point. This expects an error code/orig_rax on the stack.
- * returns in "no swapgs flag" in %ebx.
+ * Save all registers in pt_regs, and switch gs if needed.
+ * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
  */
 ENTRY(error_entry)
-       XCPT_FRAME
-       CFI_ADJUST_CFA_OFFSET 15*8
-       /* oldrax contains error code */
+       XCPT_FRAME 1 15*8
        cld
-       movq %rdi, RDI+8(%rsp)
-       movq %rsi, RSI+8(%rsp)
-       movq %rdx, RDX+8(%rsp)
-       movq %rcx, RCX+8(%rsp)
-       movq %rax, RAX+8(%rsp)
-       movq  %r8,  R8+8(%rsp)
-       movq  %r9,  R9+8(%rsp)
-       movq %r10, R10+8(%rsp)
-       movq %r11, R11+8(%rsp)
-       movq_cfi rbx, RBX+8
-       movq %rbp, RBP+8(%rsp)
-       movq %r12, R12+8(%rsp)
-       movq %r13, R13+8(%rsp)
-       movq %r14, R14+8(%rsp)
-       movq %r15, R15+8(%rsp)
+       SAVE_C_REGS 8
+       SAVE_EXTRA_REGS 8
        xorl %ebx,%ebx
        testl $3,CS+8(%rsp)
        je error_kernelspace
@@ -1390,12 +1329,12 @@ error_sti:
        TRACE_IRQS_OFF
        ret
 
-/*
- * There are two places in the kernel that can potentially fault with
- * usergs. Handle them here.  B stepping K8s sometimes report a
- * truncated RIP for IRET exceptions returning to compat mode. Check
- * for these here too.
- */
+       /*
       * There are two places in the kernel that can potentially fault with
       * usergs. Handle them here.  B stepping K8s sometimes report a
       * truncated RIP for IRET exceptions returning to compat mode. Check
       * for these here too.
       */
 error_kernelspace:
        CFI_REL_OFFSET rcx, RCX+8
        incl %ebx
@@ -1425,11 +1364,11 @@ error_bad_iret:
 END(error_entry)
 
 
-/* ebx:        no swapgs flag (1: don't need swapgs, 0: need it) */
+/* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */
 ENTRY(error_exit)
        DEFAULT_FRAME
        movl %ebx,%eax
-       RESTORE_REST
+       RESTORE_EXTRA_REGS
        DISABLE_INTERRUPTS(CLBR_NONE)
        TRACE_IRQS_OFF
        GET_THREAD_INFO(%rcx)
@@ -1444,19 +1383,7 @@ ENTRY(error_exit)
        CFI_ENDPROC
 END(error_exit)
 
-/*
- * Test if a given stack is an NMI stack or not.
- */
-       .macro test_in_nmi reg stack nmi_ret normal_ret
-       cmpq %\reg, \stack
-       ja \normal_ret
-       subq $EXCEPTION_STKSZ, %\reg
-       cmpq %\reg, \stack
-       jb \normal_ret
-       jmp \nmi_ret
-       .endm
-
-       /* runs on exception stack */
+/* Runs on exception stack */
 ENTRY(nmi)
        INTR_FRAME
        PARAVIRT_ADJUST_EXCEPTION_FRAME
@@ -1492,7 +1419,7 @@ ENTRY(nmi)
         * NMI.
         */
 
-       /* Use %rdx as out temp variable throughout */
+       /* Use %rdx as our temp variable throughout */
        pushq_cfi %rdx
        CFI_REL_OFFSET rdx, 0
 
@@ -1517,8 +1444,17 @@ ENTRY(nmi)
         * We check the variable because the first NMI could be in a
         * breakpoint routine using a breakpoint stack.
         */
-       lea 6*8(%rsp), %rdx
-       test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
+       lea     6*8(%rsp), %rdx
+       /* Compare the NMI stack (rdx) with the stack we came from (4*8(%rsp)) */
+       cmpq    %rdx, 4*8(%rsp)
+       /* If the stack pointer is above the NMI stack, this is a normal NMI */
+       ja      first_nmi
+       subq    $EXCEPTION_STKSZ, %rdx
+       cmpq    %rdx, 4*8(%rsp)
+       /* If it is below the NMI stack, it is a normal NMI */
+       jb      first_nmi
+       /* Ah, it is within the NMI stack, treat it as nested */
+
        CFI_REMEMBER_STATE
 
 nested_nmi:
@@ -1611,7 +1547,7 @@ first_nmi:
        .rept 5
        pushq_cfi 11*8(%rsp)
        .endr
-       CFI_DEF_CFA_OFFSET SS+8-RIP
+       CFI_DEF_CFA_OFFSET 5*8
 
        /* Everything up to here is safe from nested NMIs */
 
@@ -1639,7 +1575,7 @@ repeat_nmi:
        pushq_cfi -6*8(%rsp)
        .endr
        subq $(5*8), %rsp
-       CFI_DEF_CFA_OFFSET SS+8-RIP
+       CFI_DEF_CFA_OFFSET 5*8
 end_repeat_nmi:
 
        /*
@@ -1648,16 +1584,16 @@ end_repeat_nmi:
         * so that we repeat another NMI.
         */
        pushq_cfi $-1           /* ORIG_RAX: no syscall to restart */
-       subq $ORIG_RAX-R15, %rsp
-       CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
+       ALLOC_PT_GPREGS_ON_STACK
+
        /*
-        * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
+        * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
         * as we should not be calling schedule in NMI context.
         * Even with normal interrupts enabled. An NMI should not be
         * setting NEED_RESCHED or anything that normal interrupts and
         * exceptions might do.
         */
-       call save_paranoid
+       call paranoid_entry
        DEFAULT_FRAME 0
 
        /*
@@ -1688,8 +1624,10 @@ end_repeat_nmi:
 nmi_swapgs:
        SWAPGS_UNSAFE_STACK
 nmi_restore:
+       RESTORE_EXTRA_REGS
+       RESTORE_C_REGS
        /* Pop the extra iret frame at once */
-       RESTORE_ALL 6*8
+       REMOVE_PT_GPREGS_FROM_STACK 6*8
 
        /* Clear the NMI executing stack variable */
        movq $0, 5*8(%rsp)
index c4f8d4659070db99ce190543186bc4a4ac5d2ac9..2b55ee6db053c79fbe91a6119e613075be54111b 100644 (file)
@@ -177,9 +177,6 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
         */
        load_ucode_bsp();
 
-       if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG)
-               early_printk("Kernel alive\n");
-
        clear_page(init_level4_pgt);
        /* set init_level4_pgt kernel high mapping*/
        init_level4_pgt[511] = early_level4_pgt[511];
index f36bd42d6f0c8b5fc5cd75dcf133b35a1f6bfe3b..d031bad9e07eadf3a80bc69a449cd13a44ed8080 100644 (file)
@@ -22,6 +22,7 @@
 #include <asm/cpufeature.h>
 #include <asm/percpu.h>
 #include <asm/nops.h>
+#include <asm/bootparam.h>
 
 /* Physical address */
 #define pa(X) ((X) - __PAGE_OFFSET)
@@ -90,7 +91,7 @@ ENTRY(startup_32)
        
        /* test KEEP_SEGMENTS flag to see if the bootloader is asking
                us to not reload segments */
-       testb $(1<<6), BP_loadflags(%esi)
+       testb $KEEP_SEGMENTS, BP_loadflags(%esi)
        jnz 2f
 
 /*
index 6fd514d9f69a267a7813ca2b04657991429932d5..ae6588b301c248b3c281a1e072802e6764e9ac44 100644 (file)
@@ -1,5 +1,5 @@
 /*
- *  linux/arch/x86_64/kernel/head.S -- start in 32bit and switch to 64bit
+ *  linux/arch/x86/kernel/head_64.S -- start in 32bit and switch to 64bit
  *
  *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
  *  Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
@@ -56,7 +56,7 @@ startup_64:
         * %rsi holds a physical pointer to real_mode_data.
         *
         * We come here either directly from a 64bit bootloader, or from
-        * arch/x86_64/boot/compressed/head.S.
+        * arch/x86/boot/compressed/head_64.S.
         *
         * We only come here initially at boot nothing else comes here.
         *
@@ -146,7 +146,7 @@ startup_64:
        leaq    level2_kernel_pgt(%rip), %rdi
        leaq    4096(%rdi), %r8
        /* See if it is a valid page table entry */
-1:     testq   $1, 0(%rdi)
+1:     testb   $1, 0(%rdi)
        jz      2f
        addq    %rbp, 0(%rdi)
        /* Go to the next page */
index d5651fce0b71af6c15226483b6a06398ccbeb8a0..367f39d35e9cb98300fa368d3689d2ce143e71a6 100644 (file)
@@ -42,8 +42,8 @@ void kernel_fpu_enable(void)
  * be set (so that the clts/stts pair does nothing that is
  * visible in the interrupted kernel thread).
  *
- * Except for the eagerfpu case when we return 1 unless we've already
- * been eager and saved the state in kernel_fpu_begin().
+ * Except for the eagerfpu case when we return true; in the likely case
+ * the thread has FPU but we are not going to set/clear TS.
  */
 static inline bool interrupted_kernel_fpu_idle(void)
 {
@@ -51,7 +51,7 @@ static inline bool interrupted_kernel_fpu_idle(void)
                return false;
 
        if (use_eager_fpu())
-               return __thread_has_fpu(current);
+               return true;
 
        return !__thread_has_fpu(current) &&
                (read_cr0() & X86_CR0_TS);
@@ -68,7 +68,7 @@ static inline bool interrupted_kernel_fpu_idle(void)
 static inline bool interrupted_user_mode(void)
 {
        struct pt_regs *regs = get_irq_regs();
-       return regs && user_mode_vm(regs);
+       return regs && user_mode(regs);
 }
 
 /*
@@ -94,9 +94,10 @@ void __kernel_fpu_begin(void)
 
        if (__thread_has_fpu(me)) {
                __save_init_fpu(me);
-       } else if (!use_eager_fpu()) {
+       } else {
                this_cpu_write(fpu_owner_task, NULL);
-               clts();
+               if (!use_eager_fpu())
+                       clts();
        }
 }
 EXPORT_SYMBOL(__kernel_fpu_begin);
@@ -107,7 +108,7 @@ void __kernel_fpu_end(void)
 
        if (__thread_has_fpu(me)) {
                if (WARN_ON(restore_fpu_checking(me)))
-                       drop_init_fpu(me);
+                       fpu_reset_state(me);
        } else if (!use_eager_fpu()) {
                stts();
        }
@@ -120,10 +121,13 @@ void unlazy_fpu(struct task_struct *tsk)
 {
        preempt_disable();
        if (__thread_has_fpu(tsk)) {
-               __save_init_fpu(tsk);
-               __thread_fpu_end(tsk);
-       } else
-               tsk->thread.fpu_counter = 0;
+               if (use_eager_fpu()) {
+                       __save_fpu(tsk);
+               } else {
+                       __save_init_fpu(tsk);
+                       __thread_fpu_end(tsk);
+               }
+       }
        preempt_enable();
 }
 EXPORT_SYMBOL(unlazy_fpu);
@@ -221,11 +225,12 @@ void fpu_finit(struct fpu *fpu)
                return;
        }
 
+       memset(fpu->state, 0, xstate_size);
+
        if (cpu_has_fxsr) {
                fx_finit(&fpu->state->fxsave);
        } else {
                struct i387_fsave_struct *fp = &fpu->state->fsave;
-               memset(fp, 0, xstate_size);
                fp->cwd = 0xffff037fu;
                fp->swd = 0xffff0000u;
                fp->twd = 0xffffffffu;
@@ -247,7 +252,7 @@ int init_fpu(struct task_struct *tsk)
        if (tsk_used_math(tsk)) {
                if (cpu_has_fpu && tsk == current)
                        unlazy_fpu(tsk);
-               tsk->thread.fpu.last_cpu = ~0;
+               task_disable_lazy_fpu_restore(tsk);
                return 0;
        }
 
@@ -336,6 +341,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
                unsigned int pos, unsigned int count,
                void *kbuf, void __user *ubuf)
 {
+       struct xsave_struct *xsave = &target->thread.fpu.state->xsave;
        int ret;
 
        if (!cpu_has_xsave)
@@ -350,14 +356,12 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
         * memory layout in the thread struct, so that we can copy the entire
         * xstateregs to the user using one user_regset_copyout().
         */
-       memcpy(&target->thread.fpu.state->fxsave.sw_reserved,
-              xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
-
+       memcpy(&xsave->i387.sw_reserved,
+               xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
        /*
         * Copy the xstate memory layout.
         */
-       ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-                                 &target->thread.fpu.state->xsave, 0, -1);
+       ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
        return ret;
 }
 
@@ -365,8 +369,8 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
                  unsigned int pos, unsigned int count,
                  const void *kbuf, const void __user *ubuf)
 {
+       struct xsave_struct *xsave = &target->thread.fpu.state->xsave;
        int ret;
-       struct xsave_hdr_struct *xsave_hdr;
 
        if (!cpu_has_xsave)
                return -ENODEV;
@@ -375,22 +379,16 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
        if (ret)
                return ret;
 
-       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-                                &target->thread.fpu.state->xsave, 0, -1);
-
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
        /*
         * mxcsr reserved bits must be masked to zero for security reasons.
         */
-       target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask;
-
-       xsave_hdr = &target->thread.fpu.state->xsave.xsave_hdr;
-
-       xsave_hdr->xstate_bv &= pcntxt_mask;
+       xsave->i387.mxcsr &= mxcsr_feature_mask;
+       xsave->xsave_hdr.xstate_bv &= pcntxt_mask;
        /*
         * These bits must be zero.
         */
-       memset(xsave_hdr->reserved, 0, 48);
-
+       memset(&xsave->xsave_hdr.reserved, 0, 48);
        return ret;
 }
 
index 4ddaf66ea35f696eac6afce6bb43f01d2d84f1aa..37dae792dbbed00b480aa67a33cb0b2ae1699428 100644 (file)
@@ -54,7 +54,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
         * because the ->io_bitmap_max value must match the bitmap
         * contents:
         */
-       tss = &per_cpu(init_tss, get_cpu());
+       tss = &per_cpu(cpu_tss, get_cpu());
 
        if (turn_on)
                bitmap_clear(t->io_bitmap_ptr, from, num);
index 67b1cbe0093adba1141f8d9ebda29ad34dc9d23e..e5952c22553241e2ceea5d5fd6f1f7b758cc960e 100644 (file)
@@ -295,7 +295,7 @@ int check_irq_vectors_for_cpu_disable(void)
 
        this_cpu = smp_processor_id();
        cpumask_copy(&online_new, cpu_online_mask);
-       cpu_clear(this_cpu, online_new);
+       cpumask_clear_cpu(this_cpu, &online_new);
 
        this_count = 0;
        for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
@@ -307,7 +307,7 @@ int check_irq_vectors_for_cpu_disable(void)
 
                        data = irq_desc_get_irq_data(desc);
                        cpumask_copy(&affinity_new, data->affinity);
-                       cpu_clear(this_cpu, affinity_new);
+                       cpumask_clear_cpu(this_cpu, &affinity_new);
 
                        /* Do not count inactive or per-cpu irqs. */
                        if (!irq_has_action(irq) || irqd_is_per_cpu(data))
index 28d28f5eb8f49c2a9b8f84997e1dd0dc8841ece0..f9fd86a7fcc7d1bc8c2cc920fc5b5037f5859336 100644 (file)
@@ -165,7 +165,7 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
        if (unlikely(!desc))
                return false;
 
-       if (user_mode_vm(regs) || !execute_on_irq_stack(overflow, desc, irq)) {
+       if (user_mode(regs) || !execute_on_irq_stack(overflow, desc, irq)) {
                if (unlikely(overflow))
                        print_stack_overflow();
                desc->handle_irq(irq, desc);
index e4b503d5558c5d435eddc3c3c5be8c55f1de558d..394e643d7830fc01d4da516bd79cab1dc0aa6962 100644 (file)
@@ -44,7 +44,7 @@ static inline void stack_overflow_check(struct pt_regs *regs)
        u64 estack_top, estack_bottom;
        u64 curbase = (u64)task_stack_page(current);
 
-       if (user_mode_vm(regs))
+       if (user_mode(regs))
                return;
 
        if (regs->sp >= curbase + sizeof(struct thread_info) +
index 70e181ea1eac1f2da444482e6714e61b52d5a19e..cd10a64372647c3579ba6717db49c6cd63c6353a 100644 (file)
@@ -178,7 +178,8 @@ void __init native_init_IRQ(void)
 #endif
        for_each_clear_bit_from(i, used_vectors, first_system_vector) {
                /* IA32_SYSCALL_VECTOR could be used in trap_init already. */
-               set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
+               set_intr_gate(i, irq_entries_start +
+                               8 * (i - FIRST_EXTERNAL_VECTOR));
        }
 #ifdef CONFIG_X86_LOCAL_APIC
        for_each_clear_bit_from(i, used_vectors, NR_VECTORS)
index 25ecd56cefa8f22496153cf29b763c266bd8d91e..d6178d9791db7966e8bc188e3df16c03233c3da3 100644 (file)
@@ -126,11 +126,11 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
 #ifdef CONFIG_X86_32
        switch (regno) {
        case GDB_SS:
-               if (!user_mode_vm(regs))
+               if (!user_mode(regs))
                        *(unsigned long *)mem = __KERNEL_DS;
                break;
        case GDB_SP:
-               if (!user_mode_vm(regs))
+               if (!user_mode(regs))
                        *(unsigned long *)mem = kernel_stack_pointer(regs);
                break;
        case GDB_GS:
index 4e3d5a9621fe0052fac43d5ad6c109b9d3f54447..24d079604fd53afcab905a518c139e3ef2f13d71 100644 (file)
@@ -602,7 +602,7 @@ int kprobe_int3_handler(struct pt_regs *regs)
        struct kprobe *p;
        struct kprobe_ctlblk *kcb;
 
-       if (user_mode_vm(regs))
+       if (user_mode(regs))
                return 0;
 
        addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
@@ -1007,7 +1007,7 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
        struct die_args *args = data;
        int ret = NOTIFY_DONE;
 
-       if (args->regs && user_mode_vm(args->regs))
+       if (args->regs && user_mode(args->regs))
                return ret;
 
        if (val == DIE_GPF) {
index d1ac80b72c72184a0b999c2b299b5e265d26de7a..005c03e93fc54c7907e8e9e2bc1d771902b1c3ca 100644 (file)
@@ -33,6 +33,7 @@
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
+#include <asm/setup.h>
 
 #if 0
 #define DEBUGP(fmt, ...)                               \
@@ -47,21 +48,13 @@ do {                                                        \
 
 #ifdef CONFIG_RANDOMIZE_BASE
 static unsigned long module_load_offset;
-static int randomize_modules = 1;
 
 /* Mutex protects the module_load_offset. */
 static DEFINE_MUTEX(module_kaslr_mutex);
 
-static int __init parse_nokaslr(char *p)
-{
-       randomize_modules = 0;
-       return 0;
-}
-early_param("nokaslr", parse_nokaslr);
-
 static unsigned long int get_module_load_offset(void)
 {
-       if (randomize_modules) {
+       if (kaslr_enabled()) {
                mutex_lock(&module_kaslr_mutex);
                /*
                 * Calculate the module_load_offset the first time this
index 781861cc5ee8d7b9bbd27e9b13c380da59bb06c0..da8cb987b97312f6b37b5a967ce70fc5e75e0205 100644 (file)
@@ -131,10 +131,11 @@ void perf_get_regs_user(struct perf_regs *regs_user,
        }
 
        /*
-        * RIP, flags, and the argument registers are usually saved.
-        * orig_ax is probably okay, too.
+        * These registers are always saved on 64-bit syscall entry.
+        * On 32-bit entry points, they are saved too except r8..r11.
         */
        regs_user_copy->ip = user_regs->ip;
+       regs_user_copy->ax = user_regs->ax;
        regs_user_copy->cx = user_regs->cx;
        regs_user_copy->dx = user_regs->dx;
        regs_user_copy->si = user_regs->si;
@@ -145,9 +146,12 @@ void perf_get_regs_user(struct perf_regs *regs_user,
        regs_user_copy->r11 = user_regs->r11;
        regs_user_copy->orig_ax = user_regs->orig_ax;
        regs_user_copy->flags = user_regs->flags;
+       regs_user_copy->sp = user_regs->sp;
+       regs_user_copy->cs = user_regs->cs;
+       regs_user_copy->ss = user_regs->ss;
 
        /*
-        * Don't even try to report the "rest" regs.
+        * Most system calls don't save these registers, don't report them.
         */
        regs_user_copy->bx = -1;
        regs_user_copy->bp = -1;
@@ -158,37 +162,13 @@ void perf_get_regs_user(struct perf_regs *regs_user,
 
        /*
         * For this to be at all useful, we need a reasonable guess for
-        * sp and the ABI.  Be careful: we're in NMI context, and we're
+        * the ABI.  Be careful: we're in NMI context, and we're
         * considering current to be the current task, so we should
         * be careful not to look at any other percpu variables that might
         * change during context switches.
         */
-       if (IS_ENABLED(CONFIG_IA32_EMULATION) &&
-           task_thread_info(current)->status & TS_COMPAT) {
-               /* Easy case: we're in a compat syscall. */
-               regs_user->abi = PERF_SAMPLE_REGS_ABI_32;
-               regs_user_copy->sp = user_regs->sp;
-               regs_user_copy->cs = user_regs->cs;
-               regs_user_copy->ss = user_regs->ss;
-       } else if (user_regs->orig_ax != -1) {
-               /*
-                * We're probably in a 64-bit syscall.
-                * Warning: this code is severely racy.  At least it's better
-                * than just blindly copying user_regs.
-                */
-               regs_user->abi = PERF_SAMPLE_REGS_ABI_64;
-               regs_user_copy->sp = this_cpu_read(old_rsp);
-               regs_user_copy->cs = __USER_CS;
-               regs_user_copy->ss = __USER_DS;
-               regs_user_copy->cx = -1;  /* usually contains garbage */
-       } else {
-               /* We're probably in an interrupt or exception. */
-               regs_user->abi = user_64bit_mode(user_regs) ?
-                       PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32;
-               regs_user_copy->sp = user_regs->sp;
-               regs_user_copy->cs = user_regs->cs;
-               regs_user_copy->ss = user_regs->ss;
-       }
+       regs_user->abi = user_64bit_mode(user_regs) ?
+               PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32;
 
        regs_user->regs = regs_user_copy;
 }
index 046e2d620bbe7be507808e0f7188c45249f2d69c..8213da62b1b79c1c37798b598494add6802881d7 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/sched.h>
 #include <linux/module.h>
 #include <linux/pm.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 #include <linux/random.h>
 #include <linux/user-return-notifier.h>
 #include <linux/dmi.h>
@@ -24,6 +24,7 @@
 #include <asm/syscalls.h>
 #include <asm/idle.h>
 #include <asm/uaccess.h>
+#include <asm/mwait.h>
 #include <asm/i387.h>
 #include <asm/fpu-internal.h>
 #include <asm/debugreg.h>
  * section. Since TSS's are completely CPU-local, we want them
  * on exact cacheline boundaries, to eliminate cacheline ping-pong.
  */
-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
+__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
+       .x86_tss = {
+               .sp0 = TOP_OF_INIT_STACK,
+#ifdef CONFIG_X86_32
+               .ss0 = __KERNEL_DS,
+               .ss1 = __KERNEL_CS,
+               .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
+#endif
+        },
+#ifdef CONFIG_X86_32
+        /*
+         * Note that the .io_bitmap member must be extra-big. This is because
+         * the CPU will access an additional byte beyond the end of the IO
+         * permission bitmap. The extra byte must be all 1 bits, and must
+         * be within the limit.
+         */
+       .io_bitmap              = { [0 ... IO_BITMAP_LONGS] = ~0 },
+#endif
+};
+EXPORT_PER_CPU_SYMBOL_GPL(cpu_tss);
 
 #ifdef CONFIG_X86_64
 static DEFINE_PER_CPU(unsigned char, is_idle);
@@ -69,8 +89,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 
        dst->thread.fpu_counter = 0;
        dst->thread.fpu.has_fpu = 0;
-       dst->thread.fpu.last_cpu = ~0;
        dst->thread.fpu.state = NULL;
+       task_disable_lazy_fpu_restore(dst);
        if (tsk_used_math(src)) {
                int err = fpu_alloc(&dst->thread.fpu);
                if (err)
@@ -109,7 +129,7 @@ void exit_thread(void)
        unsigned long *bp = t->io_bitmap_ptr;
 
        if (bp) {
-               struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
+               struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu());
 
                t->io_bitmap_ptr = NULL;
                clear_thread_flag(TIF_IO_BITMAP);
@@ -131,13 +151,18 @@ void flush_thread(void)
 
        flush_ptrace_hw_breakpoint(tsk);
        memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
-       drop_init_fpu(tsk);
-       /*
-        * Free the FPU state for non xsave platforms. They get reallocated
-        * lazily at the first use.
-        */
-       if (!use_eager_fpu())
+
+       if (!use_eager_fpu()) {
+               /* FPU state will be reallocated lazily at the first use. */
+               drop_fpu(tsk);
                free_thread_xstate(tsk);
+       } else if (!used_math()) {
+               /* kthread execs. TODO: cleanup this horror. */
+               if (WARN_ON(init_fpu(tsk)))
+                       force_sig(SIGKILL, tsk);
+               user_fpu_begin();
+               restore_init_xstate();
+       }
 }
 
 static void hard_disable_TSC(void)
@@ -377,14 +402,11 @@ static void amd_e400_idle(void)
 
                if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) {
                        cpumask_set_cpu(cpu, amd_e400_c1e_mask);
-                       /*
-                        * Force broadcast so ACPI can not interfere.
-                        */
-                       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
-                                          &cpu);
+                       /* Force broadcast so ACPI can not interfere. */
+                       tick_broadcast_force();
                        pr_info("Switch to broadcast mode on CPU%d\n", cpu);
                }
-               clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
+               tick_broadcast_enter();
 
                default_idle();
 
@@ -393,12 +415,59 @@ static void amd_e400_idle(void)
                 * called with interrupts disabled.
                 */
                local_irq_disable();
-               clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
+               tick_broadcast_exit();
                local_irq_enable();
        } else
                default_idle();
 }
 
+/*
+ * Intel Core2 and older machines prefer MWAIT over HALT for C1.
+ * We can't rely on cpuidle installing MWAIT, because it will not load
+ * on systems that support only C1 -- so the boot default must be MWAIT.
+ *
+ * Some AMD machines are the opposite, they depend on using HALT.
+ *
+ * So for default C1, which is used during boot until cpuidle loads,
+ * use MWAIT-C1 on Intel HW that has it, else use HALT.
+ */
+static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
+{
+       if (c->x86_vendor != X86_VENDOR_INTEL)
+               return 0;
+
+       if (!cpu_has(c, X86_FEATURE_MWAIT))
+               return 0;
+
+       return 1;
+}
+
+/*
+ * MONITOR/MWAIT with no hints, used for default default C1 state.
+ * This invokes MWAIT with interrutps enabled and no flags,
+ * which is backwards compatible with the original MWAIT implementation.
+ */
+
+static void mwait_idle(void)
+{
+       if (!current_set_polling_and_test()) {
+               if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) {
+                       smp_mb(); /* quirk */
+                       clflush((void *)&current_thread_info()->flags);
+                       smp_mb(); /* quirk */
+               }
+
+               __monitor((void *)&current_thread_info()->flags, 0, 0);
+               if (!need_resched())
+                       __sti_mwait(0, 0);
+               else
+                       local_irq_enable();
+       } else {
+               local_irq_enable();
+       }
+       __current_clr_polling();
+}
+
 void select_idle_routine(const struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
@@ -412,6 +481,9 @@ void select_idle_routine(const struct cpuinfo_x86 *c)
                /* E400: APIC timer interrupt does not wake up CPU from C1e */
                pr_info("using AMD E400 aware idle routine\n");
                x86_idle = amd_e400_idle;
+       } else if (prefer_mwait_c1_over_halt(c)) {
+               pr_info("using mwait in idle threads\n");
+               x86_idle = mwait_idle;
        } else
                x86_idle = default_idle;
 }
index 603c4f99cb5a17f83e65c3066a5642a4bb9d0f42..8ed2106b06da63e0a8e0dcf561aad7a1fc112e40 100644 (file)
@@ -73,7 +73,7 @@ void __show_regs(struct pt_regs *regs, int all)
        unsigned long sp;
        unsigned short ss, gs;
 
-       if (user_mode_vm(regs)) {
+       if (user_mode(regs)) {
                sp = regs->sp;
                ss = regs->ss & 0xffff;
                gs = get_user_gs(regs);
@@ -206,11 +206,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
        regs->ip                = new_ip;
        regs->sp                = new_sp;
        regs->flags             = X86_EFLAGS_IF;
-       /*
-        * force it to the iret return path by making it look as if there was
-        * some work pending.
-        */
-       set_thread_flag(TIF_NOTIFY_RESUME);
+       force_iret();
 }
 EXPORT_SYMBOL_GPL(start_thread);
 
@@ -248,18 +244,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
        struct thread_struct *prev = &prev_p->thread,
                                 *next = &next_p->thread;
        int cpu = smp_processor_id();
-       struct tss_struct *tss = &per_cpu(init_tss, cpu);
+       struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
        fpu_switch_t fpu;
 
        /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
 
        fpu = switch_fpu_prepare(prev_p, next_p, cpu);
 
-       /*
-        * Reload esp0.
-        */
-       load_sp0(tss, next);
-
        /*
         * Save away %gs. No need to save %fs, as it was saved on the
         * stack on entry.  No need to save %es and %ds, as those are
@@ -310,9 +301,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
         */
        arch_end_context_switch(next_p);
 
+       /*
+        * Reload esp0, kernel_stack, and current_top_of_stack.  This changes
+        * current_thread_info().
+        */
+       load_sp0(tss, next);
        this_cpu_write(kernel_stack,
-                 (unsigned long)task_stack_page(next_p) +
-                 THREAD_SIZE - KERNEL_STACK_OFFSET);
+                      (unsigned long)task_stack_page(next_p) +
+                      THREAD_SIZE);
+       this_cpu_write(cpu_current_top_of_stack,
+                      (unsigned long)task_stack_page(next_p) +
+                      THREAD_SIZE);
 
        /*
         * Restore %gs if needed (which is common)
index 67fcc43577d279faa02941dd56dbf36f270a9832..4baaa972f52aaed15b3dddd94b4df4f653126d81 100644 (file)
@@ -52,7 +52,7 @@
 
 asmlinkage extern void ret_from_fork(void);
 
-__visible DEFINE_PER_CPU(unsigned long, old_rsp);
+__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
 
 /* Prints also some state that isn't saved in the pt_regs */
 void __show_regs(struct pt_regs *regs, int all)
@@ -161,7 +161,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
        p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
        childregs = task_pt_regs(p);
        p->thread.sp = (unsigned long) childregs;
-       p->thread.usersp = me->thread.usersp;
        set_tsk_thread_flag(p, TIF_FORK);
        p->thread.io_bitmap_ptr = NULL;
 
@@ -207,7 +206,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
         */
        if (clone_flags & CLONE_SETTLS) {
 #ifdef CONFIG_IA32_EMULATION
-               if (test_thread_flag(TIF_IA32))
+               if (is_ia32_task())
                        err = do_set_thread_area(p, -1,
                                (struct user_desc __user *)childregs->si, 0);
                else
@@ -235,13 +234,12 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
        loadsegment(es, _ds);
        loadsegment(ds, _ds);
        load_gs_index(0);
-       current->thread.usersp  = new_sp;
        regs->ip                = new_ip;
        regs->sp                = new_sp;
-       this_cpu_write(old_rsp, new_sp);
        regs->cs                = _cs;
        regs->ss                = _ss;
        regs->flags             = X86_EFLAGS_IF;
+       force_iret();
 }
 
 void
@@ -277,15 +275,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
        struct thread_struct *prev = &prev_p->thread;
        struct thread_struct *next = &next_p->thread;
        int cpu = smp_processor_id();
-       struct tss_struct *tss = &per_cpu(init_tss, cpu);
+       struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
        unsigned fsindex, gsindex;
        fpu_switch_t fpu;
 
        fpu = switch_fpu_prepare(prev_p, next_p, cpu);
 
-       /* Reload esp0 and ss1. */
-       load_sp0(tss, next);
-
        /* We must save %fs and %gs before load_TLS() because
         * %fs and %gs may be cleared by load_TLS().
         *
@@ -401,8 +396,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
        /*
         * Switch the PDA and FPU contexts.
         */
-       prev->usersp = this_cpu_read(old_rsp);
-       this_cpu_write(old_rsp, next->usersp);
        this_cpu_write(current_task, next_p);
 
        /*
@@ -413,9 +406,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
        task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count);
        this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count);
 
+       /* Reload esp0 and ss1.  This changes current_thread_info(). */
+       load_sp0(tss, next);
+
        this_cpu_write(kernel_stack,
-                 (unsigned long)task_stack_page(next_p) +
-                 THREAD_SIZE - KERNEL_STACK_OFFSET);
+               (unsigned long)task_stack_page(next_p) + THREAD_SIZE);
 
        /*
         * Now maybe reload the debug registers and handle I/O bitmaps
@@ -602,6 +597,5 @@ long sys_arch_prctl(int code, unsigned long addr)
 
 unsigned long KSTK_ESP(struct task_struct *task)
 {
-       return (test_tsk_thread_flag(task, TIF_IA32)) ?
-                       (task_pt_regs(task)->sp) : ((task)->thread.usersp);
+       return task_pt_regs(task)->sp;
 }
index e510618b2e91a7969bb8cf6c74a35f59e4bf1bea..a7bc794807195af79b6c15054b1941867d373198 100644 (file)
@@ -364,18 +364,12 @@ static int set_segment_reg(struct task_struct *task,
        case offsetof(struct user_regs_struct,cs):
                if (unlikely(value == 0))
                        return -EIO;
-#ifdef CONFIG_IA32_EMULATION
-               if (test_tsk_thread_flag(task, TIF_IA32))
-                       task_pt_regs(task)->cs = value;
-#endif
+               task_pt_regs(task)->cs = value;
                break;
        case offsetof(struct user_regs_struct,ss):
                if (unlikely(value == 0))
                        return -EIO;
-#ifdef CONFIG_IA32_EMULATION
-               if (test_tsk_thread_flag(task, TIF_IA32))
-                       task_pt_regs(task)->ss = value;
-#endif
+               task_pt_regs(task)->ss = value;
                break;
        }
 
@@ -1421,7 +1415,7 @@ static void fill_sigtrap_info(struct task_struct *tsk,
        memset(info, 0, sizeof(*info));
        info->si_signo = SIGTRAP;
        info->si_code = si_code;
-       info->si_addr = user_mode_vm(regs) ? (void __user *)regs->ip : NULL;
+       info->si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL;
 }
 
 void user_single_step_siginfo(struct task_struct *tsk,
index 2f355d229a587771680b28080d92fd06f345d7e7..e5ecd20e72dd56d82447c94c17e6e85ae29eba90 100644 (file)
@@ -141,7 +141,46 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
        set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
 }
 
+static struct pvclock_vsyscall_time_info *pvclock_vdso_info;
+
+static struct pvclock_vsyscall_time_info *
+pvclock_get_vsyscall_user_time_info(int cpu)
+{
+       if (!pvclock_vdso_info) {
+               BUG();
+               return NULL;
+       }
+
+       return &pvclock_vdso_info[cpu];
+}
+
+struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu)
+{
+       return &pvclock_get_vsyscall_user_time_info(cpu)->pvti;
+}
+
 #ifdef CONFIG_X86_64
+static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l,
+                               void *v)
+{
+       struct task_migration_notifier *mn = v;
+       struct pvclock_vsyscall_time_info *pvti;
+
+       pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu);
+
+       /* this is NULL when pvclock vsyscall is not initialized */
+       if (unlikely(pvti == NULL))
+               return NOTIFY_DONE;
+
+       pvti->migrate_count++;
+
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block pvclock_migrate = {
+       .notifier_call = pvclock_task_migrate,
+};
+
 /*
  * Initialize the generic pvclock vsyscall state.  This will allocate
  * a/some page(s) for the per-vcpu pvclock information, set up a
@@ -155,12 +194,17 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
 
        WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE);
 
+       pvclock_vdso_info = i;
+
        for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
                __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
                             __pa(i) + (idx*PAGE_SIZE),
                             PAGE_KERNEL_VVAR);
        }
 
+
+       register_task_migration_notifier(&pvclock_migrate);
+
        return 0;
 }
 #endif
index e13f8e7c22a68c3d9590b8deb69e116d2f625f68..77630d57e7bf30ac7f281e15c440cd7b7bae37a9 100644 (file)
@@ -226,23 +226,23 @@ swap_pages:
        movl    (%ebx), %ecx
        addl    $4, %ebx
 1:
-       testl   $0x1,   %ecx  /* is it a destination page */
+       testb   $0x1, %cl     /* is it a destination page */
        jz      2f
        movl    %ecx,   %edi
        andl    $0xfffff000, %edi
        jmp     0b
 2:
-       testl   $0x2,   %ecx  /* is it an indirection page */
+       testb   $0x2, %cl    /* is it an indirection page */
        jz      2f
        movl    %ecx,   %ebx
        andl    $0xfffff000, %ebx
        jmp     0b
 2:
-       testl   $0x4,   %ecx /* is it the done indicator */
+       testb   $0x4, %cl    /* is it the done indicator */
        jz      2f
        jmp     3f
 2:
-       testl   $0x8,   %ecx /* is it the source indicator */
+       testb   $0x8, %cl    /* is it the source indicator */
        jz      0b           /* Ignore it otherwise */
        movl    %ecx,   %esi /* For every source page do a copy */
        andl    $0xfffff000, %esi
index 3fd2c693e4752d01e071de68ef57e2db8b47b605..98111b38ebfd6eb9949242c5aae7b18bbbdb4489 100644 (file)
@@ -123,7 +123,7 @@ identity_mapped:
         * Set cr4 to a known state:
         *  - physical address extension enabled
         */
-       movq    $X86_CR4_PAE, %rax
+       movl    $X86_CR4_PAE, %eax
        movq    %rax, %cr4
 
        jmp 1f
@@ -221,23 +221,23 @@ swap_pages:
        movq    (%rbx), %rcx
        addq    $8,     %rbx
 1:
-       testq   $0x1,   %rcx  /* is it a destination page? */
+       testb   $0x1,   %cl   /* is it a destination page? */
        jz      2f
        movq    %rcx,   %rdi
        andq    $0xfffffffffffff000, %rdi
        jmp     0b
 2:
-       testq   $0x2,   %rcx  /* is it an indirection page? */
+       testb   $0x2,   %cl   /* is it an indirection page? */
        jz      2f
        movq    %rcx,   %rbx
        andq    $0xfffffffffffff000, %rbx
        jmp     0b
 2:
-       testq   $0x4,   %rcx  /* is it the done indicator? */
+       testb   $0x4,   %cl   /* is it the done indicator? */
        jz      2f
        jmp     3f
 2:
-       testq   $0x8,   %rcx  /* is it the source indicator? */
+       testb   $0x8,   %cl   /* is it the source indicator? */
        jz      0b            /* Ignore it otherwise */
        movq    %rcx,   %rsi  /* For ever source page do a copy */
        andq    $0xfffffffffffff000, %rsi
@@ -246,17 +246,17 @@ swap_pages:
        movq    %rsi, %rax
 
        movq    %r10, %rdi
-       movq    $512,   %rcx
+       movl    $512, %ecx
        rep ; movsq
 
        movq    %rax, %rdi
        movq    %rdx, %rsi
-       movq    $512,   %rcx
+       movl    $512, %ecx
        rep ; movsq
 
        movq    %rdx, %rdi
        movq    %r10, %rsi
-       movq    $512,   %rcx
+       movl    $512, %ecx
        rep ; movsq
 
        lea     PAGE_SIZE(%rax), %rsi
index 0a2421cca01fad095bbb7caa8e7c779d910d751b..d74ac33290ae3eeef46b923c4556d644b72f0d5a 100644 (file)
@@ -354,7 +354,7 @@ static void __init relocate_initrd(void)
                mapaddr = ramdisk_image & PAGE_MASK;
                p = early_memremap(mapaddr, clen+slop);
                memcpy(q, p+slop, clen);
-               early_iounmap(p, clen+slop);
+               early_memunmap(p, clen+slop);
                q += clen;
                ramdisk_image += clen;
                ramdisk_size  -= clen;
@@ -438,7 +438,7 @@ static void __init parse_setup_data(void)
                data_len = data->len + sizeof(struct setup_data);
                data_type = data->type;
                pa_next = data->next;
-               early_iounmap(data, sizeof(*data));
+               early_memunmap(data, sizeof(*data));
 
                switch (data_type) {
                case SETUP_E820_EXT:
@@ -470,7 +470,7 @@ static void __init e820_reserve_setup_data(void)
                         E820_RAM, E820_RESERVED_KERN);
                found = 1;
                pa_data = data->next;
-               early_iounmap(data, sizeof(*data));
+               early_memunmap(data, sizeof(*data));
        }
        if (!found)
                return;
@@ -491,7 +491,7 @@ static void __init memblock_x86_reserve_range_setup_data(void)
                data = early_memremap(pa_data, sizeof(*data));
                memblock_reserve(pa_data, sizeof(*data) + data->len);
                pa_data = data->next;
-               early_iounmap(data, sizeof(*data));
+               early_memunmap(data, sizeof(*data));
        }
 }
 
@@ -832,10 +832,15 @@ static void __init trim_low_memory_range(void)
 static int
 dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
 {
-       pr_emerg("Kernel Offset: 0x%lx from 0x%lx "
-                "(relocation range: 0x%lx-0x%lx)\n",
-                (unsigned long)&_text - __START_KERNEL, __START_KERNEL,
-                __START_KERNEL_map, MODULES_VADDR-1);
+       if (kaslr_enabled()) {
+               pr_emerg("Kernel Offset: 0x%lx from 0x%lx (relocation range: 0x%lx-0x%lx)\n",
+                        (unsigned long)&_text - __START_KERNEL,
+                        __START_KERNEL,
+                        __START_KERNEL_map,
+                        MODULES_VADDR-1);
+       } else {
+               pr_emerg("Kernel Offset: disabled\n");
+       }
 
        return 0;
 }
index e5042463c1bca59107e107117984c37753d732c3..3e581865c8e2a048bbc307a9d3fc4e04ad14b4d7 100644 (file)
@@ -61,8 +61,7 @@
        regs->seg = GET_SEG(seg) | 3;                   \
 } while (0)
 
-int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
-                      unsigned long *pax)
+int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
 {
        void __user *buf;
        unsigned int tmpflags;
@@ -81,7 +80,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 #endif /* CONFIG_X86_32 */
 
                COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
-               COPY(dx); COPY(cx); COPY(ip);
+               COPY(dx); COPY(cx); COPY(ip); COPY(ax);
 
 #ifdef CONFIG_X86_64
                COPY(r8);
@@ -94,27 +93,20 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
                COPY(r15);
 #endif /* CONFIG_X86_64 */
 
-#ifdef CONFIG_X86_32
                COPY_SEG_CPL3(cs);
                COPY_SEG_CPL3(ss);
-#else /* !CONFIG_X86_32 */
-               /* Kernel saves and restores only the CS segment register on signals,
-                * which is the bare minimum needed to allow mixed 32/64-bit code.
-                * App's signal handler can save/restore other segments if needed. */
-               COPY_SEG_CPL3(cs);
-#endif /* CONFIG_X86_32 */
 
                get_user_ex(tmpflags, &sc->flags);
                regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
                regs->orig_ax = -1;             /* disable syscall checks */
 
                get_user_ex(buf, &sc->fpstate);
-
-               get_user_ex(*pax, &sc->ax);
        } get_user_catch(err);
 
        err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32));
 
+       force_iret();
+
        return err;
 }
 
@@ -162,8 +154,9 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
 #else /* !CONFIG_X86_32 */
                put_user_ex(regs->flags, &sc->flags);
                put_user_ex(regs->cs, &sc->cs);
-               put_user_ex(0, &sc->gs);
-               put_user_ex(0, &sc->fs);
+               put_user_ex(0, &sc->__pad2);
+               put_user_ex(0, &sc->__pad1);
+               put_user_ex(regs->ss, &sc->ss);
 #endif /* CONFIG_X86_32 */
 
                put_user_ex(fpstate, &sc->fpstate);
@@ -457,9 +450,19 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
 
        regs->sp = (unsigned long)frame;
 
-       /* Set up the CS register to run signal handlers in 64-bit mode,
-          even if the handler happens to be interrupting 32-bit code. */
+       /*
+        * Set up the CS and SS registers to run signal handlers in
+        * 64-bit mode, even if the handler happens to be interrupting
+        * 32-bit or 16-bit code.
+        *
+        * SS is subtle.  In 64-bit mode, we don't need any particular
+        * SS descriptor, but we do need SS to be valid.  It's possible
+        * that the old SS is entirely bogus -- this can happen if the
+        * signal we're trying to deliver is #GP or #SS caused by a bad
+        * SS value.
+        */
        regs->cs = __USER_CS;
+       regs->ss = __USER_DS;
 
        return 0;
 }
@@ -539,7 +542,6 @@ asmlinkage unsigned long sys_sigreturn(void)
 {
        struct pt_regs *regs = current_pt_regs();
        struct sigframe __user *frame;
-       unsigned long ax;
        sigset_t set;
 
        frame = (struct sigframe __user *)(regs->sp - 8);
@@ -553,9 +555,9 @@ asmlinkage unsigned long sys_sigreturn(void)
 
        set_current_blocked(&set);
 
-       if (restore_sigcontext(regs, &frame->sc, &ax))
+       if (restore_sigcontext(regs, &frame->sc))
                goto badframe;
-       return ax;
+       return regs->ax;
 
 badframe:
        signal_fault(regs, frame, "sigreturn");
@@ -568,7 +570,6 @@ asmlinkage long sys_rt_sigreturn(void)
 {
        struct pt_regs *regs = current_pt_regs();
        struct rt_sigframe __user *frame;
-       unsigned long ax;
        sigset_t set;
 
        frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
@@ -579,13 +580,13 @@ asmlinkage long sys_rt_sigreturn(void)
 
        set_current_blocked(&set);
 
-       if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
+       if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
                goto badframe;
 
        if (restore_altstack(&frame->uc.uc_stack))
                goto badframe;
 
-       return ax;
+       return regs->ax;
 
 badframe:
        signal_fault(regs, frame, "rt_sigreturn");
@@ -679,7 +680,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
                 * Ensure the signal handler starts with the new fpu state.
                 */
                if (used_math())
-                       drop_init_fpu(current);
+                       fpu_reset_state(current);
        }
        signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP));
 }
@@ -780,7 +781,6 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
        struct pt_regs *regs = current_pt_regs();
        struct rt_sigframe_x32 __user *frame;
        sigset_t set;
-       unsigned long ax;
 
        frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
 
@@ -791,13 +791,13 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
 
        set_current_blocked(&set);
 
-       if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
+       if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
                goto badframe;
 
        if (compat_restore_altstack(&frame->uc.uc_stack))
                goto badframe;
 
-       return ax;
+       return regs->ax;
 
 badframe:
        signal_fault(regs, frame, "x32 rt_sigreturn");
index febc6aabc72e049443f68c167622d50cd8344f16..7035f6b21c3f99126f5362a8ba5ced4bbe565111 100644 (file)
@@ -779,6 +779,26 @@ out:
        return boot_error;
 }
 
+void common_cpu_up(unsigned int cpu, struct task_struct *idle)
+{
+       /* Just in case we booted with a single CPU. */
+       alternatives_enable_smp();
+
+       per_cpu(current_task, cpu) = idle;
+
+#ifdef CONFIG_X86_32
+       /* Stack for startup_32 can be just as for start_secondary onwards */
+       irq_ctx_init(cpu);
+       per_cpu(cpu_current_top_of_stack, cpu) =
+               (unsigned long)task_stack_page(idle) + THREAD_SIZE;
+#else
+       clear_tsk_thread_flag(idle, TIF_FORK);
+       initial_gs = per_cpu_offset(cpu);
+#endif
+       per_cpu(kernel_stack, cpu) =
+               (unsigned long)task_stack_page(idle) + THREAD_SIZE;
+}
+
 /*
  * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
  * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
@@ -796,23 +816,9 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
        int cpu0_nmi_registered = 0;
        unsigned long timeout;
 
-       /* Just in case we booted with a single CPU. */
-       alternatives_enable_smp();
-
        idle->thread.sp = (unsigned long) (((struct pt_regs *)
                          (THREAD_SIZE +  task_stack_page(idle))) - 1);
-       per_cpu(current_task, cpu) = idle;
 
-#ifdef CONFIG_X86_32
-       /* Stack for startup_32 can be just as for start_secondary onwards */
-       irq_ctx_init(cpu);
-#else
-       clear_tsk_thread_flag(idle, TIF_FORK);
-       initial_gs = per_cpu_offset(cpu);
-#endif
-       per_cpu(kernel_stack, cpu) =
-               (unsigned long)task_stack_page(idle) -
-               KERNEL_STACK_OFFSET + THREAD_SIZE;
        early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
        initial_code = (unsigned long)start_secondary;
        stack_start  = idle->thread.sp;
@@ -953,6 +959,8 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
        /* the FPU context is blank, nobody can own it */
        __cpu_disable_lazy_restore(cpu);
 
+       common_cpu_up(cpu, tidle);
+
        err = do_boot_cpu(apicid, cpu, tidle);
        if (err) {
                pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
@@ -1086,8 +1094,6 @@ static int __init smp_sanity_check(unsigned max_cpus)
                return SMP_NO_APIC;
        }
 
-       verify_local_APIC();
-
        /*
         * If SMP should be disabled, then really disable it!
         */
index 30277e27431acde9a9320e0b1be4470bddb40e3a..10e0272d789a189b7215100a1d66a676d9b4bbfa 100644 (file)
@@ -34,10 +34,26 @@ static unsigned long get_align_mask(void)
        return va_align.mask;
 }
 
+/*
+ * To avoid aliasing in the I$ on AMD F15h, the bits defined by the
+ * va_align.bits, [12:upper_bit), are set to a random value instead of
+ * zeroing them. This random value is computed once per boot. This form
+ * of ASLR is known as "per-boot ASLR".
+ *
+ * To achieve this, the random value is added to the info.align_offset
+ * value before calling vm_unmapped_area() or ORed directly to the
+ * address.
+ */
+static unsigned long get_align_bits(void)
+{
+       return va_align.bits & get_align_mask();
+}
+
 unsigned long align_vdso_addr(unsigned long addr)
 {
        unsigned long align_mask = get_align_mask();
-       return (addr + align_mask) & ~align_mask;
+       addr = (addr + align_mask) & ~align_mask;
+       return addr | get_align_bits();
 }
 
 static int __init control_va_addr_alignment(char *str)
@@ -135,8 +151,12 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
        info.length = len;
        info.low_limit = begin;
        info.high_limit = end;
-       info.align_mask = filp ? get_align_mask() : 0;
+       info.align_mask = 0;
        info.align_offset = pgoff << PAGE_SHIFT;
+       if (filp) {
+               info.align_mask = get_align_mask();
+               info.align_offset += get_align_bits();
+       }
        return vm_unmapped_area(&info);
 }
 
@@ -174,8 +194,12 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
        info.length = len;
        info.low_limit = PAGE_SIZE;
        info.high_limit = mm->mmap_base;
-       info.align_mask = filp ? get_align_mask() : 0;
+       info.align_mask = 0;
        info.align_offset = pgoff << PAGE_SHIFT;
+       if (filp) {
+               info.align_mask = get_align_mask();
+               info.align_offset += get_align_bits();
+       }
        addr = vm_unmapped_area(&info);
        if (!(addr & ~PAGE_MASK))
                return addr;
index e9bcd57d8a9eb862212351527de3597040e611ba..3777189c4a19f04d1b7118ce2fae228e86356592 100644 (file)
@@ -5,21 +5,29 @@
 #include <linux/cache.h>
 #include <asm/asm-offsets.h>
 
-#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ;
+#ifdef CONFIG_IA32_EMULATION
+#define SYM(sym, compat) compat
+#else
+#define SYM(sym, compat) sym
+#define ia32_sys_call_table sys_call_table
+#define __NR_ia32_syscall_max __NR_syscall_max
+#endif
+
+#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void SYM(sym, compat)(void) ;
 #include <asm/syscalls_32.h>
 #undef __SYSCALL_I386
 
-#define __SYSCALL_I386(nr, sym, compat) [nr] = sym,
+#define __SYSCALL_I386(nr, sym, compat) [nr] = SYM(sym, compat),
 
 typedef asmlinkage void (*sys_call_ptr_t)(void);
 
 extern asmlinkage void sys_ni_syscall(void);
 
-__visible const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
+__visible const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = {
        /*
         * Smells like a compiler bug -- it doesn't work
         * when the & below is removed.
         */
-       [0 ... __NR_syscall_max] = &sys_ni_syscall,
+       [0 ... __NR_ia32_syscall_max] = &sys_ni_syscall,
 #include <asm/syscalls_32.h>
 };
index 25adc0e16eaa6349e78bd909860f6710705acb8e..d39c09119db6d2bf9e7ed6f1921f26f45dee3019 100644 (file)
@@ -30,7 +30,7 @@ unsigned long profile_pc(struct pt_regs *regs)
 {
        unsigned long pc = instruction_pointer(regs);
 
-       if (!user_mode_vm(regs) && in_lock_functions(pc)) {
+       if (!user_mode(regs) && in_lock_functions(pc)) {
 #ifdef CONFIG_FRAME_POINTER
                return *(unsigned long *)(regs->bp + sizeof(long));
 #else
index 4ff5d162ff9fd55381259ff8dd96f84064ecea72..f4fa991406cd78463299631466298e46cf2ffcea 100644 (file)
@@ -112,7 +112,7 @@ enum ctx_state ist_enter(struct pt_regs *regs)
 {
        enum ctx_state prev_state;
 
-       if (user_mode_vm(regs)) {
+       if (user_mode(regs)) {
                /* Other than that, we're just an exception. */
                prev_state = exception_enter();
        } else {
@@ -146,7 +146,7 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
        /* Must be before exception_exit. */
        preempt_count_sub(HARDIRQ_OFFSET);
 
-       if (user_mode_vm(regs))
+       if (user_mode(regs))
                return exception_exit(prev_state);
        else
                rcu_nmi_exit();
@@ -158,7 +158,7 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
  *
  * IST exception handlers normally cannot schedule.  As a special
  * exception, if the exception interrupted userspace code (i.e.
- * user_mode_vm(regs) would return true) and the exception was not
+ * user_mode(regs) would return true) and the exception was not
  * a double fault, it can be safe to schedule.  ist_begin_non_atomic()
  * begins a non-atomic section within an ist_enter()/ist_exit() region.
  * Callers are responsible for enabling interrupts themselves inside
@@ -167,15 +167,15 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
  */
 void ist_begin_non_atomic(struct pt_regs *regs)
 {
-       BUG_ON(!user_mode_vm(regs));
+       BUG_ON(!user_mode(regs));
 
        /*
         * Sanity check: we need to be on the normal thread stack.  This
         * will catch asm bugs and any attempt to use ist_preempt_enable
         * from double_fault.
         */
-       BUG_ON(((current_stack_pointer() ^ this_cpu_read_stable(kernel_stack))
-               & ~(THREAD_SIZE - 1)) != 0);
+       BUG_ON((unsigned long)(current_top_of_stack() -
+                              current_stack_pointer()) >= THREAD_SIZE);
 
        preempt_count_sub(HARDIRQ_OFFSET);
 }
@@ -194,8 +194,7 @@ static nokprobe_inline int
 do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
                  struct pt_regs *regs, long error_code)
 {
-#ifdef CONFIG_X86_32
-       if (regs->flags & X86_VM_MASK) {
+       if (v8086_mode(regs)) {
                /*
                 * Traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
                 * On nmi (interrupt 2), do_trap should not be called.
@@ -207,7 +206,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
                }
                return -1;
        }
-#endif
+
        if (!user_mode(regs)) {
                if (!fixup_exception(regs)) {
                        tsk->thread.error_code = error_code;
@@ -384,7 +383,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
                goto exit;
        conditional_sti(regs);
 
-       if (!user_mode_vm(regs))
+       if (!user_mode(regs))
                die("bounds", regs, error_code);
 
        if (!cpu_feature_enabled(X86_FEATURE_MPX)) {
@@ -462,13 +461,11 @@ do_general_protection(struct pt_regs *regs, long error_code)
        prev_state = exception_enter();
        conditional_sti(regs);
 
-#ifdef CONFIG_X86_32
-       if (regs->flags & X86_VM_MASK) {
+       if (v8086_mode(regs)) {
                local_irq_enable();
                handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
                goto exit;
        }
-#endif
 
        tsk = current;
        if (!user_mode(regs)) {
@@ -587,7 +584,7 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
        /* Copy the remainder of the stack from the current stack. */
        memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip));
 
-       BUG_ON(!user_mode_vm(&new_stack->regs));
+       BUG_ON(!user_mode(&new_stack->regs));
        return new_stack;
 }
 NOKPROBE_SYMBOL(fixup_bad_iret);
@@ -637,7 +634,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
         * then it's very likely the result of an icebp/int01 trap.
         * User wants a sigtrap for that.
         */
-       if (!dr6 && user_mode_vm(regs))
+       if (!dr6 && user_mode(regs))
                user_icebp = 1;
 
        /* Catch kmemcheck conditions first of all! */
@@ -673,7 +670,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
        /* It's safe to allow irq's after DR6 has been saved */
        preempt_conditional_sti(regs);
 
-       if (regs->flags & X86_VM_MASK) {
+       if (v8086_mode(regs)) {
                handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code,
                                        X86_TRAP_DB);
                preempt_conditional_cli(regs);
@@ -721,7 +718,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
                return;
        conditional_sti(regs);
 
-       if (!user_mode_vm(regs))
+       if (!user_mode(regs))
        {
                if (!fixup_exception(regs)) {
                        task->thread.error_code = error_code;
@@ -734,7 +731,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
        /*
         * Save the info for the exception handler and clear the error.
         */
-       save_init_fpu(task);
+       unlazy_fpu(task);
        task->thread.trap_nr = trapnr;
        task->thread.error_code = error_code;
        info.si_signo = SIGFPE;
@@ -863,7 +860,7 @@ void math_state_restore(void)
        kernel_fpu_disable();
        __thread_fpu_begin(tsk);
        if (unlikely(restore_fpu_checking(tsk))) {
-               drop_init_fpu(tsk);
+               fpu_reset_state(tsk);
                force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk);
        } else {
                tsk->thread.fpu_counter++;
@@ -925,9 +922,21 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
 /* Set of traps needed for early debugging. */
 void __init early_trap_init(void)
 {
-       set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK);
+       /*
+        * Don't use IST to set DEBUG_STACK as it doesn't work until TSS
+        * is ready in cpu_init() <-- trap_init(). Before trap_init(),
+        * CPU runs at ring 0 so it is impossible to hit an invalid
+        * stack.  Using the original stack works well enough at this
+        * early stage. DEBUG_STACK will be equipped after cpu_init() in
+        * trap_init().
+        *
+        * We don't need to set trace_idt_table like set_intr_gate(),
+        * since we don't have trace_debug and it will be reset to
+        * 'debug' in trap_init() by set_intr_gate_ist().
+        */
+       set_intr_gate_notrace(X86_TRAP_DB, debug);
        /* int3 can be called from all */
-       set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
+       set_system_intr_gate(X86_TRAP_BP, &int3);
 #ifdef CONFIG_X86_32
        set_intr_gate(X86_TRAP_PF, page_fault);
 #endif
@@ -1005,6 +1014,15 @@ void __init trap_init(void)
         */
        cpu_init();
 
+       /*
+        * X86_TRAP_DB and X86_TRAP_BP have been set
+        * in early_trap_init(). However, ITS works only after
+        * cpu_init() loads TSS. See comments in early_trap_init().
+        */
+       set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK);
+       /* int3 can be called from all */
+       set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
+
        x86_init.irqs.trap_init();
 
 #ifdef CONFIG_X86_64
index 81f8adb0679e548d31af5982297c85141e3693f9..0b81ad67da07fa36e57577de5f7165ab320a55a1 100644 (file)
@@ -912,7 +912,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val,
        int ret = NOTIFY_DONE;
 
        /* We are only interested in userspace traps */
-       if (regs && !user_mode_vm(regs))
+       if (regs && !user_mode(regs))
                return NOTIFY_DONE;
 
        switch (val) {
index e8edcf52e06911fe5446e543f40368ea69114225..fc9db6ef2a95937b1abd4f37f3a466043ba818d4 100644 (file)
@@ -150,7 +150,7 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
                do_exit(SIGSEGV);
        }
 
-       tss = &per_cpu(init_tss, get_cpu());
+       tss = &per_cpu(cpu_tss, get_cpu());
        current->thread.sp0 = current->thread.saved_sp0;
        current->thread.sysenter_cs = __KERNEL_CS;
        load_sp0(tss, &current->thread);
@@ -318,7 +318,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
        tsk->thread.saved_fs = info->regs32->fs;
        tsk->thread.saved_gs = get_user_gs(info->regs32);
 
-       tss = &per_cpu(init_tss, get_cpu());
+       tss = &per_cpu(cpu_tss, get_cpu());
        tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0;
        if (cpu_has_sep)
                tsk->thread.sysenter_cs = 0;
index c7d791f32b98e4f8598684306f086c6193ff6e93..51e3304169951619362ea4a1494716e4f20696bf 100644 (file)
@@ -31,30 +31,30 @@ void update_vsyscall(struct timekeeper *tk)
        gtod_write_begin(vdata);
 
        /* copy vsyscall data */
-       vdata->vclock_mode      = tk->tkr.clock->archdata.vclock_mode;
-       vdata->cycle_last       = tk->tkr.cycle_last;
-       vdata->mask             = tk->tkr.mask;
-       vdata->mult             = tk->tkr.mult;
-       vdata->shift            = tk->tkr.shift;
+       vdata->vclock_mode      = tk->tkr_mono.clock->archdata.vclock_mode;
+       vdata->cycle_last       = tk->tkr_mono.cycle_last;
+       vdata->mask             = tk->tkr_mono.mask;
+       vdata->mult             = tk->tkr_mono.mult;
+       vdata->shift            = tk->tkr_mono.shift;
 
        vdata->wall_time_sec            = tk->xtime_sec;
-       vdata->wall_time_snsec          = tk->tkr.xtime_nsec;
+       vdata->wall_time_snsec          = tk->tkr_mono.xtime_nsec;
 
        vdata->monotonic_time_sec       = tk->xtime_sec
                                        + tk->wall_to_monotonic.tv_sec;
-       vdata->monotonic_time_snsec     = tk->tkr.xtime_nsec
+       vdata->monotonic_time_snsec     = tk->tkr_mono.xtime_nsec
                                        + ((u64)tk->wall_to_monotonic.tv_nsec
-                                               << tk->tkr.shift);
+                                               << tk->tkr_mono.shift);
        while (vdata->monotonic_time_snsec >=
-                                       (((u64)NSEC_PER_SEC) << tk->tkr.shift)) {
+                                       (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
                vdata->monotonic_time_snsec -=
-                                       ((u64)NSEC_PER_SEC) << tk->tkr.shift;
+                                       ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
                vdata->monotonic_time_sec++;
        }
 
        vdata->wall_time_coarse_sec     = tk->xtime_sec;
-       vdata->wall_time_coarse_nsec    = (long)(tk->tkr.xtime_nsec >>
-                                                tk->tkr.shift);
+       vdata->wall_time_coarse_nsec    = (long)(tk->tkr_mono.xtime_nsec >>
+                                                tk->tkr_mono.shift);
 
        vdata->monotonic_time_coarse_sec =
                vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
index cdc6cf90307800abb83f1b4b516ba389212c3dd0..87a815b85f3e5c1b9ee6db83224ad954eef1df75 100644 (file)
@@ -342,7 +342,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
                         config_enabled(CONFIG_IA32_EMULATION));
 
        if (!buf) {
-               drop_init_fpu(tsk);
+               fpu_reset_state(tsk);
                return 0;
        }
 
@@ -416,7 +416,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
                 */
                user_fpu_begin();
                if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) {
-                       drop_init_fpu(tsk);
+                       fpu_reset_state(tsk);
                        return -1;
                }
        }
@@ -678,19 +678,13 @@ void xsave_init(void)
        this_func();
 }
 
-static inline void __init eager_fpu_init_bp(void)
-{
-       current->thread.fpu.state =
-           alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct));
-       if (!init_xstate_buf)
-               setup_init_fpu_buf();
-}
-
-void eager_fpu_init(void)
+/*
+ * setup_init_fpu_buf() is __init and it is OK to call it here because
+ * init_xstate_buf will be unset only once during boot.
+ */
+void __init_refok eager_fpu_init(void)
 {
-       static __refdata void (*boot_func)(void) = eager_fpu_init_bp;
-
-       clear_used_math();
+       WARN_ON(used_math());
        current_thread_info()->status = 0;
 
        if (eagerfpu == ENABLE)
@@ -701,21 +695,8 @@ void eager_fpu_init(void)
                return;
        }
 
-       if (boot_func) {
-               boot_func();
-               boot_func = NULL;
-       }
-
-       /*
-        * This is same as math_state_restore(). But use_xsave() is
-        * not yet patched to use math_state_restore().
-        */
-       init_fpu(current);
-       __thread_fpu_begin(current);
-       if (cpu_has_xsave)
-               xrstor_state(init_xstate_buf, -1);
-       else
-               fxrstor_checking(&init_xstate_buf->i387);
+       if (!init_xstate_buf)
+               setup_init_fpu_buf();
 }
 
 /*
index 08f790dfadc9fb90dc3591687aaa93cc73fe1f88..16e8f962eaadf9d8f0d1cb4942288ebcd3f12a94 100644 (file)
@@ -1,5 +1,5 @@
 
-ccflags-y += -Ivirt/kvm -Iarch/x86/kvm
+ccflags-y += -Iarch/x86/kvm
 
 CFLAGS_x86.o := -I.
 CFLAGS_svm.o := -I.
index 8a80737ee6e6ec14bc7d9a6ffe08d9f580d3c890..59b69f6a2844cdce101a69c3bb34eb7ccd30556f 100644 (file)
@@ -104,6 +104,9 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
                ((best->eax & 0xff00) >> 8) != 0)
                return -EINVAL;
 
+       /* Update physical-address width */
+       vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
+
        kvm_pmu_cpuid_update(vcpu);
        return 0;
 }
@@ -135,6 +138,21 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
        }
 }
 
+int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu)
+{
+       struct kvm_cpuid_entry2 *best;
+
+       best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
+       if (!best || best->eax < 0x80000008)
+               goto not_found;
+       best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+       if (best)
+               return best->eax & 0xff;
+not_found:
+       return 36;
+}
+EXPORT_SYMBOL_GPL(cpuid_query_maxphyaddr);
+
 /* when an old userspace process fills a new kernel module */
 int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
                             struct kvm_cpuid *cpuid,
@@ -757,21 +775,6 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
 }
 EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
 
-int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
-{
-       struct kvm_cpuid_entry2 *best;
-
-       best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
-       if (!best || best->eax < 0x80000008)
-               goto not_found;
-       best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
-       if (best)
-               return best->eax & 0xff;
-not_found:
-       return 36;
-}
-EXPORT_SYMBOL_GPL(cpuid_maxphyaddr);
-
 /*
  * If no match is found, check whether we exceed the vCPU's limit
  * and return the content of the highest valid _standard_ leaf instead.
index 4452eedfaedd0a4849b54d4137edd5e85b24ae46..c3b1ad9fca818befb9e5920f7eb7c0d2b703d245 100644 (file)
@@ -20,13 +20,19 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
                              struct kvm_cpuid_entry2 __user *entries);
 void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
 
+int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu);
+
+static inline int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
+{
+       return vcpu->arch.maxphyaddr;
+}
 
 static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
 {
        struct kvm_cpuid_entry2 *best;
 
        if (!static_cpu_has(X86_FEATURE_XSAVE))
-               return 0;
+               return false;
 
        best = kvm_find_cpuid_entry(vcpu, 1, 0);
        return best && (best->ecx & bit(X86_FEATURE_XSAVE));
index 106c01557f2b63706eca28e462a3b072b590f0c5..630bcb0d7a045b4930213eac1c1bbee3ef7d0ebe 100644 (file)
@@ -248,27 +248,7 @@ struct mode_dual {
        struct opcode mode64;
 };
 
-/* EFLAGS bit definitions. */
-#define EFLG_ID (1<<21)
-#define EFLG_VIP (1<<20)
-#define EFLG_VIF (1<<19)
-#define EFLG_AC (1<<18)
-#define EFLG_VM (1<<17)
-#define EFLG_RF (1<<16)
-#define EFLG_IOPL (3<<12)
-#define EFLG_NT (1<<14)
-#define EFLG_OF (1<<11)
-#define EFLG_DF (1<<10)
-#define EFLG_IF (1<<9)
-#define EFLG_TF (1<<8)
-#define EFLG_SF (1<<7)
-#define EFLG_ZF (1<<6)
-#define EFLG_AF (1<<4)
-#define EFLG_PF (1<<2)
-#define EFLG_CF (1<<0)
-
 #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
-#define EFLG_RESERVED_ONE_MASK 2
 
 enum x86_transfer_type {
        X86_TRANSFER_NONE,
@@ -317,7 +297,8 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
  * These EFLAGS bits are restored from saved value during emulation, and
  * any changes are written back to the saved value after emulation.
  */
-#define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
+#define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\
+                    X86_EFLAGS_PF|X86_EFLAGS_CF)
 
 #ifdef CONFIG_X86_64
 #define ON64(x) x
@@ -478,6 +459,25 @@ static void assign_masked(ulong *dest, ulong src, ulong mask)
        *dest = (*dest & ~mask) | (src & mask);
 }
 
+static void assign_register(unsigned long *reg, u64 val, int bytes)
+{
+       /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
+       switch (bytes) {
+       case 1:
+               *(u8 *)reg = (u8)val;
+               break;
+       case 2:
+               *(u16 *)reg = (u16)val;
+               break;
+       case 4:
+               *reg = (u32)val;
+               break;  /* 64b: zero-extend */
+       case 8:
+               *reg = val;
+               break;
+       }
+}
+
 static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
 {
        return (1UL << (ctxt->ad_bytes << 3)) - 1;
@@ -943,6 +943,22 @@ FASTOP2(xadd);
 
 FASTOP2R(cmp, cmp_r);
 
+static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
+{
+       /* If src is zero, do not writeback, but update flags */
+       if (ctxt->src.val == 0)
+               ctxt->dst.type = OP_NONE;
+       return fastop(ctxt, em_bsf);
+}
+
+static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
+{
+       /* If src is zero, do not writeback, but update flags */
+       if (ctxt->src.val == 0)
+               ctxt->dst.type = OP_NONE;
+       return fastop(ctxt, em_bsr);
+}
+
 static u8 test_cc(unsigned int condition, unsigned long flags)
 {
        u8 rc;
@@ -1399,7 +1415,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
                unsigned int in_page, n;
                unsigned int count = ctxt->rep_prefix ?
                        address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
-               in_page = (ctxt->eflags & EFLG_DF) ?
+               in_page = (ctxt->eflags & X86_EFLAGS_DF) ?
                        offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
                        PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
                n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
@@ -1412,7 +1428,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
        }
 
        if (ctxt->rep_prefix && (ctxt->d & String) &&
-           !(ctxt->eflags & EFLG_DF)) {
+           !(ctxt->eflags & X86_EFLAGS_DF)) {
                ctxt->dst.data = rc->data + rc->pos;
                ctxt->dst.type = OP_MEM_STR;
                ctxt->dst.count = (rc->end - rc->pos) / size;
@@ -1691,21 +1707,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 
 static void write_register_operand(struct operand *op)
 {
-       /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
-       switch (op->bytes) {
-       case 1:
-               *(u8 *)op->addr.reg = (u8)op->val;
-               break;
-       case 2:
-               *(u16 *)op->addr.reg = (u16)op->val;
-               break;
-       case 4:
-               *op->addr.reg = (u32)op->val;
-               break;  /* 64b: zero-extend */
-       case 8:
-               *op->addr.reg = op->val;
-               break;
-       }
+       return assign_register(op->addr.reg, op->val, op->bytes);
 }
 
 static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
@@ -1792,32 +1794,34 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt,
 {
        int rc;
        unsigned long val, change_mask;
-       int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
+       int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
        int cpl = ctxt->ops->cpl(ctxt);
 
        rc = emulate_pop(ctxt, &val, len);
        if (rc != X86EMUL_CONTINUE)
                return rc;
 
-       change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
-               | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_AC | EFLG_ID;
+       change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
+                     X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF |
+                     X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT |
+                     X86_EFLAGS_AC | X86_EFLAGS_ID;
 
        switch(ctxt->mode) {
        case X86EMUL_MODE_PROT64:
        case X86EMUL_MODE_PROT32:
        case X86EMUL_MODE_PROT16:
                if (cpl == 0)
-                       change_mask |= EFLG_IOPL;
+                       change_mask |= X86_EFLAGS_IOPL;
                if (cpl <= iopl)
-                       change_mask |= EFLG_IF;
+                       change_mask |= X86_EFLAGS_IF;
                break;
        case X86EMUL_MODE_VM86:
                if (iopl < 3)
                        return emulate_gp(ctxt, 0);
-               change_mask |= EFLG_IF;
+               change_mask |= X86_EFLAGS_IF;
                break;
        default: /* real mode */
-               change_mask |= (EFLG_IOPL | EFLG_IF);
+               change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF);
                break;
        }
 
@@ -1918,7 +1922,7 @@ static int em_pusha(struct x86_emulate_ctxt *ctxt)
 
 static int em_pushf(struct x86_emulate_ctxt *ctxt)
 {
-       ctxt->src.val = (unsigned long)ctxt->eflags & ~EFLG_VM;
+       ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM;
        return em_push(ctxt);
 }
 
@@ -1926,6 +1930,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt)
 {
        int rc = X86EMUL_CONTINUE;
        int reg = VCPU_REGS_RDI;
+       u32 val;
 
        while (reg >= VCPU_REGS_RAX) {
                if (reg == VCPU_REGS_RSP) {
@@ -1933,9 +1938,10 @@ static int em_popa(struct x86_emulate_ctxt *ctxt)
                        --reg;
                }
 
-               rc = emulate_pop(ctxt, reg_rmw(ctxt, reg), ctxt->op_bytes);
+               rc = emulate_pop(ctxt, &val, ctxt->op_bytes);
                if (rc != X86EMUL_CONTINUE)
                        break;
+               assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes);
                --reg;
        }
        return rc;
@@ -1956,7 +1962,7 @@ static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
        if (rc != X86EMUL_CONTINUE)
                return rc;
 
-       ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC);
+       ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC);
 
        ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
        rc = em_push(ctxt);
@@ -2022,10 +2028,14 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
        unsigned long temp_eip = 0;
        unsigned long temp_eflags = 0;
        unsigned long cs = 0;
-       unsigned long mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_TF |
-                            EFLG_IF | EFLG_DF | EFLG_OF | EFLG_IOPL | EFLG_NT | EFLG_RF |
-                            EFLG_AC | EFLG_ID | (1 << 1); /* Last one is the reserved bit */
-       unsigned long vm86_mask = EFLG_VM | EFLG_VIF | EFLG_VIP;
+       unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
+                            X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF |
+                            X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF |
+                            X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF |
+                            X86_EFLAGS_AC | X86_EFLAGS_ID |
+                            X86_EFLAGS_FIXED;
+       unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF |
+                                 X86_EFLAGS_VIP;
 
        /* TODO: Add stack limit check */
 
@@ -2054,7 +2064,6 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
 
        ctxt->_eip = temp_eip;
 
-
        if (ctxt->op_bytes == 4)
                ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
        else if (ctxt->op_bytes == 2) {
@@ -2063,7 +2072,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
        }
 
        ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
-       ctxt->eflags |= EFLG_RESERVED_ONE_MASK;
+       ctxt->eflags |= X86_EFLAGS_FIXED;
        ctxt->ops->set_nmi_mask(ctxt, false);
 
        return rc;
@@ -2145,12 +2154,12 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
            ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
                *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
                *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
-               ctxt->eflags &= ~EFLG_ZF;
+               ctxt->eflags &= ~X86_EFLAGS_ZF;
        } else {
                ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
                        (u32) reg_read(ctxt, VCPU_REGS_RBX);
 
-               ctxt->eflags |= EFLG_ZF;
+               ctxt->eflags |= X86_EFLAGS_ZF;
        }
        return X86EMUL_CONTINUE;
 }
@@ -2222,7 +2231,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
        ctxt->src.val = ctxt->dst.orig_val;
        fastop(ctxt, em_cmp);
 
-       if (ctxt->eflags & EFLG_ZF) {
+       if (ctxt->eflags & X86_EFLAGS_ZF) {
                /* Success: write back to memory; no update of EAX */
                ctxt->src.type = OP_NONE;
                ctxt->dst.val = ctxt->src.orig_val;
@@ -2381,14 +2390,14 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
 
                ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
                ctxt->eflags &= ~msr_data;
-               ctxt->eflags |= EFLG_RESERVED_ONE_MASK;
+               ctxt->eflags |= X86_EFLAGS_FIXED;
 #endif
        } else {
                /* legacy mode */
                ops->get_msr(ctxt, MSR_STAR, &msr_data);
                ctxt->_eip = (u32)msr_data;
 
-               ctxt->eflags &= ~(EFLG_VM | EFLG_IF);
+               ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
        }
 
        return X86EMUL_CONTINUE;
@@ -2425,8 +2434,8 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
        if ((msr_data & 0xfffc) == 0x0)
                return emulate_gp(ctxt, 0);
 
-       ctxt->eflags &= ~(EFLG_VM | EFLG_IF);
-       cs_sel = (u16)msr_data & ~SELECTOR_RPL_MASK;
+       ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
+       cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
        ss_sel = cs_sel + 8;
        if (efer & EFER_LMA) {
                cs.d = 0;
@@ -2493,8 +2502,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
                        return emulate_gp(ctxt, 0);
                break;
        }
-       cs_sel |= SELECTOR_RPL_MASK;
-       ss_sel |= SELECTOR_RPL_MASK;
+       cs_sel |= SEGMENT_RPL_MASK;
+       ss_sel |= SEGMENT_RPL_MASK;
 
        ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
        ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
@@ -2512,7 +2521,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
                return false;
        if (ctxt->mode == X86EMUL_MODE_VM86)
                return true;
-       iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
+       iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
        return ctxt->ops->cpl(ctxt) > iopl;
 }
 
@@ -2782,10 +2791,8 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
                return ret;
        ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
                                        X86_TRANSFER_TASK_SWITCH, NULL);
-       if (ret != X86EMUL_CONTINUE)
-               return ret;
 
-       return X86EMUL_CONTINUE;
+       return ret;
 }
 
 static int task_switch_32(struct x86_emulate_ctxt *ctxt,
@@ -2954,7 +2961,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
                struct operand *op)
 {
-       int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count;
+       int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count;
 
        register_address_increment(ctxt, reg, df * op->bytes);
        op->addr.mem.ea = register_address(ctxt, reg);
@@ -3323,7 +3330,7 @@ static int em_clts(struct x86_emulate_ctxt *ctxt)
        return X86EMUL_CONTINUE;
 }
 
-static int em_vmcall(struct x86_emulate_ctxt *ctxt)
+static int em_hypercall(struct x86_emulate_ctxt *ctxt)
 {
        int rc = ctxt->ops->fix_hypercall(ctxt);
 
@@ -3395,17 +3402,6 @@ static int em_lgdt(struct x86_emulate_ctxt *ctxt)
        return em_lgdt_lidt(ctxt, true);
 }
 
-static int em_vmmcall(struct x86_emulate_ctxt *ctxt)
-{
-       int rc;
-
-       rc = ctxt->ops->fix_hypercall(ctxt);
-
-       /* Disable writeback. */
-       ctxt->dst.type = OP_NONE;
-       return rc;
-}
-
 static int em_lidt(struct x86_emulate_ctxt *ctxt)
 {
        return em_lgdt_lidt(ctxt, false);
@@ -3504,7 +3500,8 @@ static int em_sahf(struct x86_emulate_ctxt *ctxt)
 {
        u32 flags;
 
-       flags = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF;
+       flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
+               X86_EFLAGS_SF;
        flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;
 
        ctxt->eflags &= ~0xffUL;
@@ -3769,7 +3766,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
 
 static const struct opcode group7_rm0[] = {
        N,
-       I(SrcNone | Priv | EmulateOnUD, em_vmcall),
+       I(SrcNone | Priv | EmulateOnUD, em_hypercall),
        N, N, N, N, N, N,
 };
 
@@ -3781,7 +3778,7 @@ static const struct opcode group7_rm1[] = {
 
 static const struct opcode group7_rm3[] = {
        DIP(SrcNone | Prot | Priv,              vmrun,          check_svme_pa),
-       II(SrcNone  | Prot | EmulateOnUD,       em_vmmcall,     vmmcall),
+       II(SrcNone  | Prot | EmulateOnUD,       em_hypercall,   vmmcall),
        DIP(SrcNone | Prot | Priv,              vmload,         check_svme_pa),
        DIP(SrcNone | Prot | Priv,              vmsave,         check_svme_pa),
        DIP(SrcNone | Prot | Priv,              stgi,           check_svme),
@@ -4192,7 +4189,8 @@ static const struct opcode twobyte_table[256] = {
        N, N,
        G(BitOp, group8),
        F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
-       F(DstReg | SrcMem | ModRM, em_bsf), F(DstReg | SrcMem | ModRM, em_bsr),
+       I(DstReg | SrcMem | ModRM, em_bsf_c),
+       I(DstReg | SrcMem | ModRM, em_bsr_c),
        D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
        /* 0xC0 - 0xC7 */
        F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
@@ -4759,9 +4757,9 @@ static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
        if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
             (ctxt->b == 0xae) || (ctxt->b == 0xaf))
            && (((ctxt->rep_prefix == REPE_PREFIX) &&
-                ((ctxt->eflags & EFLG_ZF) == 0))
+                ((ctxt->eflags & X86_EFLAGS_ZF) == 0))
                || ((ctxt->rep_prefix == REPNE_PREFIX) &&
-                   ((ctxt->eflags & EFLG_ZF) == EFLG_ZF))))
+                   ((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF))))
                return true;
 
        return false;
@@ -4913,7 +4911,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
                        /* All REP prefixes have the same first termination condition */
                        if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
                                ctxt->eip = ctxt->_eip;
-                               ctxt->eflags &= ~EFLG_RF;
+                               ctxt->eflags &= ~X86_EFLAGS_RF;
                                goto done;
                        }
                }
@@ -4963,9 +4961,9 @@ special_insn:
        }
 
        if (ctxt->rep_prefix && (ctxt->d & String))
-               ctxt->eflags |= EFLG_RF;
+               ctxt->eflags |= X86_EFLAGS_RF;
        else
-               ctxt->eflags &= ~EFLG_RF;
+               ctxt->eflags &= ~X86_EFLAGS_RF;
 
        if (ctxt->execute) {
                if (ctxt->d & Fastop) {
@@ -5014,7 +5012,7 @@ special_insn:
                rc = emulate_int(ctxt, ctxt->src.val);
                break;
        case 0xce:              /* into */
-               if (ctxt->eflags & EFLG_OF)
+               if (ctxt->eflags & X86_EFLAGS_OF)
                        rc = emulate_int(ctxt, 4);
                break;
        case 0xe9: /* jmp rel */
@@ -5027,19 +5025,19 @@ special_insn:
                break;
        case 0xf5:      /* cmc */
                /* complement carry flag from eflags reg */
-               ctxt->eflags ^= EFLG_CF;
+               ctxt->eflags ^= X86_EFLAGS_CF;
                break;
        case 0xf8: /* clc */
-               ctxt->eflags &= ~EFLG_CF;
+               ctxt->eflags &= ~X86_EFLAGS_CF;
                break;
        case 0xf9: /* stc */
-               ctxt->eflags |= EFLG_CF;
+               ctxt->eflags |= X86_EFLAGS_CF;
                break;
        case 0xfc: /* cld */
-               ctxt->eflags &= ~EFLG_DF;
+               ctxt->eflags &= ~X86_EFLAGS_DF;
                break;
        case 0xfd: /* std */
-               ctxt->eflags |= EFLG_DF;
+               ctxt->eflags |= X86_EFLAGS_DF;
                break;
        default:
                goto cannot_emulate;
@@ -5100,7 +5098,7 @@ writeback:
                        }
                        goto done; /* skip rip writeback */
                }
-               ctxt->eflags &= ~EFLG_RF;
+               ctxt->eflags &= ~X86_EFLAGS_RF;
        }
 
        ctxt->eip = ctxt->_eip;
@@ -5137,8 +5135,7 @@ twobyte_insn:
        case 0x40 ... 0x4f:     /* cmov */
                if (test_cc(ctxt->b, ctxt->eflags))
                        ctxt->dst.val = ctxt->src.val;
-               else if (ctxt->mode != X86EMUL_MODE_PROT64 ||
-                        ctxt->op_bytes != 4)
+               else if (ctxt->op_bytes != 4)
                        ctxt->dst.type = OP_NONE; /* no writeback */
                break;
        case 0x80 ... 0x8f: /* jnz rel, etc*/
index 298781d4cfb44b7c6d6536d6d2779ada2eeb150a..4dce6f8b6129ebea2432840154cc97efd513947b 100644 (file)
@@ -443,7 +443,8 @@ static inline int pit_in_range(gpa_t addr)
                (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
 }
 
-static int pit_ioport_write(struct kvm_io_device *this,
+static int pit_ioport_write(struct kvm_vcpu *vcpu,
+                               struct kvm_io_device *this,
                            gpa_t addr, int len, const void *data)
 {
        struct kvm_pit *pit = dev_to_pit(this);
@@ -519,7 +520,8 @@ static int pit_ioport_write(struct kvm_io_device *this,
        return 0;
 }
 
-static int pit_ioport_read(struct kvm_io_device *this,
+static int pit_ioport_read(struct kvm_vcpu *vcpu,
+                          struct kvm_io_device *this,
                           gpa_t addr, int len, void *data)
 {
        struct kvm_pit *pit = dev_to_pit(this);
@@ -589,7 +591,8 @@ static int pit_ioport_read(struct kvm_io_device *this,
        return 0;
 }
 
-static int speaker_ioport_write(struct kvm_io_device *this,
+static int speaker_ioport_write(struct kvm_vcpu *vcpu,
+                               struct kvm_io_device *this,
                                gpa_t addr, int len, const void *data)
 {
        struct kvm_pit *pit = speaker_to_pit(this);
@@ -606,8 +609,9 @@ static int speaker_ioport_write(struct kvm_io_device *this,
        return 0;
 }
 
-static int speaker_ioport_read(struct kvm_io_device *this,
-                              gpa_t addr, int len, void *data)
+static int speaker_ioport_read(struct kvm_vcpu *vcpu,
+                                  struct kvm_io_device *this,
+                                  gpa_t addr, int len, void *data)
 {
        struct kvm_pit *pit = speaker_to_pit(this);
        struct kvm_kpit_state *pit_state = &pit->pit_state;
index dd1b16b611b0ae6c9d2386a7e690e56a774f0b74..c84990b42b5b189550eecc904f781c0e3ecadf3d 100644 (file)
@@ -3,7 +3,7 @@
 
 #include <linux/kthread.h>
 
-#include "iodev.h"
+#include <kvm/iodev.h>
 
 struct kvm_kpit_channel_state {
        u32 count; /* can be 65536 */
index 9541ba34126b90123ddfe383453145ddfcf789c4..fef922ff263589de97348e76ee4faeaeb5aaeef2 100644 (file)
@@ -529,42 +529,42 @@ static int picdev_read(struct kvm_pic *s,
        return 0;
 }
 
-static int picdev_master_write(struct kvm_io_device *dev,
+static int picdev_master_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
                               gpa_t addr, int len, const void *val)
 {
        return picdev_write(container_of(dev, struct kvm_pic, dev_master),
                            addr, len, val);
 }
 
-static int picdev_master_read(struct kvm_io_device *dev,
+static int picdev_master_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
                              gpa_t addr, int len, void *val)
 {
        return picdev_read(container_of(dev, struct kvm_pic, dev_master),
                            addr, len, val);
 }
 
-static int picdev_slave_write(struct kvm_io_device *dev,
+static int picdev_slave_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
                              gpa_t addr, int len, const void *val)
 {
        return picdev_write(container_of(dev, struct kvm_pic, dev_slave),
                            addr, len, val);
 }
 
-static int picdev_slave_read(struct kvm_io_device *dev,
+static int picdev_slave_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
                             gpa_t addr, int len, void *val)
 {
        return picdev_read(container_of(dev, struct kvm_pic, dev_slave),
                            addr, len, val);
 }
 
-static int picdev_eclr_write(struct kvm_io_device *dev,
+static int picdev_eclr_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
                             gpa_t addr, int len, const void *val)
 {
        return picdev_write(container_of(dev, struct kvm_pic, dev_eclr),
                            addr, len, val);
 }
 
-static int picdev_eclr_read(struct kvm_io_device *dev,
+static int picdev_eclr_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
                            gpa_t addr, int len, void *val)
 {
        return picdev_read(container_of(dev, struct kvm_pic, dev_eclr),
index 46d4449772bc714daa658ea6424fb45659095c70..28146f03c51421ce12f728d69613ded0a65699fd 100644 (file)
@@ -206,6 +206,8 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
 
        old_irr = ioapic->irr;
        ioapic->irr |= mask;
+       if (edge)
+               ioapic->irr_delivered &= ~mask;
        if ((edge && old_irr == ioapic->irr) ||
            (!edge && entry.fields.remote_irr)) {
                ret = 0;
@@ -349,7 +351,7 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
        irqe.shorthand = 0;
 
        if (irqe.trig_mode == IOAPIC_EDGE_TRIG)
-               ioapic->irr &= ~(1 << irq);
+               ioapic->irr_delivered |= 1 << irq;
 
        if (irq == RTC_GSI && line_status) {
                /*
@@ -473,13 +475,6 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
        }
 }
 
-bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
-{
-       struct kvm_ioapic *ioapic = kvm->arch.vioapic;
-       smp_rmb();
-       return test_bit(vector, ioapic->handled_vectors);
-}
-
 void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode)
 {
        struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
@@ -500,8 +495,8 @@ static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr)
                 (addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
 }
 
-static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
-                           void *val)
+static int ioapic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
+                               gpa_t addr, int len, void *val)
 {
        struct kvm_ioapic *ioapic = to_ioapic(this);
        u32 result;
@@ -543,8 +538,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
        return 0;
 }
 
-static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
-                            const void *val)
+static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
+                                gpa_t addr, int len, const void *val)
 {
        struct kvm_ioapic *ioapic = to_ioapic(this);
        u32 data;
@@ -599,6 +594,7 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
        ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
        ioapic->ioregsel = 0;
        ioapic->irr = 0;
+       ioapic->irr_delivered = 0;
        ioapic->id = 0;
        memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS);
        rtc_irq_eoi_tracking_reset(ioapic);
@@ -656,6 +652,7 @@ int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
 
        spin_lock(&ioapic->lock);
        memcpy(state, ioapic, sizeof(struct kvm_ioapic_state));
+       state->irr &= ~ioapic->irr_delivered;
        spin_unlock(&ioapic->lock);
        return 0;
 }
@@ -669,6 +666,7 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
        spin_lock(&ioapic->lock);
        memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
        ioapic->irr = 0;
+       ioapic->irr_delivered = 0;
        update_handled_vectors(ioapic);
        kvm_vcpu_request_scan_ioapic(kvm);
        kvm_ioapic_inject_all(ioapic, state->irr);
index c2e36d934af4d96ffceb2f1edb78601a2a848f67..ca0b0b4e625603687bbbe0343ddc0feb11dadbd7 100644 (file)
@@ -3,7 +3,7 @@
 
 #include <linux/kvm_host.h>
 
-#include "iodev.h"
+#include <kvm/iodev.h>
 
 struct kvm;
 struct kvm_vcpu;
@@ -77,6 +77,7 @@ struct kvm_ioapic {
        struct rtc_status rtc_status;
        struct delayed_work eoi_inject;
        u32 irq_eoi[IOAPIC_NUM_PINS];
+       u32 irr_delivered;
 };
 
 #ifdef DEBUG
@@ -97,13 +98,19 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
        return kvm->arch.vioapic;
 }
 
+static inline bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
+{
+       struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+       smp_rmb();
+       return test_bit(vector, ioapic->handled_vectors);
+}
+
 void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
 bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
                int short_hand, unsigned int dest, int dest_mode);
 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
 void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
                        int trigger_mode);
-bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector);
 int kvm_ioapic_init(struct kvm *kvm);
 void kvm_ioapic_destroy(struct kvm *kvm);
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
index 2d03568e9498356716b7504c195c71a912819f4d..ad68c73008c57f0c1926f0cec9e83f79fe070252 100644 (file)
@@ -27,7 +27,7 @@
 #include <linux/kvm_host.h>
 #include <linux/spinlock.h>
 
-#include "iodev.h"
+#include <kvm/iodev.h>
 #include "ioapic.h"
 #include "lapic.h"
 
index 4ee827d7bf36f730c25d358f709aa99cda93260a..d67206a7b99a689a4d7361de8bd8fc1b9ab02c1a 100644 (file)
@@ -133,6 +133,28 @@ static inline int kvm_apic_id(struct kvm_lapic *apic)
        return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
 }
 
+/* The logical map is definitely wrong if we have multiple
+ * modes at the same time.  (Physical map is always right.)
+ */
+static inline bool kvm_apic_logical_map_valid(struct kvm_apic_map *map)
+{
+       return !(map->mode & (map->mode - 1));
+}
+
+static inline void
+apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid)
+{
+       unsigned lid_bits;
+
+       BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_CLUSTER !=  4);
+       BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_FLAT    !=  8);
+       BUILD_BUG_ON(KVM_APIC_MODE_X2APIC        != 16);
+       lid_bits = map->mode;
+
+       *cid = dest_id >> lid_bits;
+       *lid = dest_id & ((1 << lid_bits) - 1);
+}
+
 static void recalculate_apic_map(struct kvm *kvm)
 {
        struct kvm_apic_map *new, *old = NULL;
@@ -146,48 +168,6 @@ static void recalculate_apic_map(struct kvm *kvm)
        if (!new)
                goto out;
 
-       new->ldr_bits = 8;
-       /* flat mode is default */
-       new->cid_shift = 8;
-       new->cid_mask = 0;
-       new->lid_mask = 0xff;
-       new->broadcast = APIC_BROADCAST;
-
-       kvm_for_each_vcpu(i, vcpu, kvm) {
-               struct kvm_lapic *apic = vcpu->arch.apic;
-
-               if (!kvm_apic_present(vcpu))
-                       continue;
-
-               if (apic_x2apic_mode(apic)) {
-                       new->ldr_bits = 32;
-                       new->cid_shift = 16;
-                       new->cid_mask = new->lid_mask = 0xffff;
-                       new->broadcast = X2APIC_BROADCAST;
-               } else if (kvm_apic_get_reg(apic, APIC_LDR)) {
-                       if (kvm_apic_get_reg(apic, APIC_DFR) ==
-                                                       APIC_DFR_CLUSTER) {
-                               new->cid_shift = 4;
-                               new->cid_mask = 0xf;
-                               new->lid_mask = 0xf;
-                       } else {
-                               new->cid_shift = 8;
-                               new->cid_mask = 0;
-                               new->lid_mask = 0xff;
-                       }
-               }
-
-               /*
-                * All APICs have to be configured in the same mode by an OS.
-                * We take advatage of this while building logical id loockup
-                * table. After reset APICs are in software disabled mode, so if
-                * we find apic with different setting we assume this is the mode
-                * OS wants all apics to be in; build lookup table accordingly.
-                */
-               if (kvm_apic_sw_enabled(apic))
-                       break;
-       }
-
        kvm_for_each_vcpu(i, vcpu, kvm) {
                struct kvm_lapic *apic = vcpu->arch.apic;
                u16 cid, lid;
@@ -198,11 +178,25 @@ static void recalculate_apic_map(struct kvm *kvm)
 
                aid = kvm_apic_id(apic);
                ldr = kvm_apic_get_reg(apic, APIC_LDR);
-               cid = apic_cluster_id(new, ldr);
-               lid = apic_logical_id(new, ldr);
 
                if (aid < ARRAY_SIZE(new->phys_map))
                        new->phys_map[aid] = apic;
+
+               if (apic_x2apic_mode(apic)) {
+                       new->mode |= KVM_APIC_MODE_X2APIC;
+               } else if (ldr) {
+                       ldr = GET_APIC_LOGICAL_ID(ldr);
+                       if (kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
+                               new->mode |= KVM_APIC_MODE_XAPIC_FLAT;
+                       else
+                               new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
+               }
+
+               if (!kvm_apic_logical_map_valid(new))
+                       continue;
+
+               apic_logical_id(new, ldr, &cid, &lid);
+
                if (lid && cid < ARRAY_SIZE(new->logical_map))
                        new->logical_map[cid][ffs(lid) - 1] = apic;
        }
@@ -588,15 +582,23 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
        apic_update_ppr(apic);
 }
 
-static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 dest)
+static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda)
 {
-       return dest == (apic_x2apic_mode(apic) ?
-                       X2APIC_BROADCAST : APIC_BROADCAST);
+       if (apic_x2apic_mode(apic))
+               return mda == X2APIC_BROADCAST;
+
+       return GET_APIC_DEST_FIELD(mda) == APIC_BROADCAST;
 }
 
-static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest)
+static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
 {
-       return kvm_apic_id(apic) == dest || kvm_apic_broadcast(apic, dest);
+       if (kvm_apic_broadcast(apic, mda))
+               return true;
+
+       if (apic_x2apic_mode(apic))
+               return mda == kvm_apic_id(apic);
+
+       return mda == SET_APIC_DEST_FIELD(kvm_apic_id(apic));
 }
 
 static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
@@ -613,6 +615,7 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
                       && (logical_id & mda & 0xffff) != 0;
 
        logical_id = GET_APIC_LOGICAL_ID(logical_id);
+       mda = GET_APIC_DEST_FIELD(mda);
 
        switch (kvm_apic_get_reg(apic, APIC_DFR)) {
        case APIC_DFR_FLAT:
@@ -627,10 +630,27 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
        }
 }
 
+/* KVM APIC implementation has two quirks
+ *  - dest always begins at 0 while xAPIC MDA has offset 24,
+ *  - IOxAPIC messages have to be delivered (directly) to x2APIC.
+ */
+static u32 kvm_apic_mda(unsigned int dest_id, struct kvm_lapic *source,
+                                              struct kvm_lapic *target)
+{
+       bool ipi = source != NULL;
+       bool x2apic_mda = apic_x2apic_mode(ipi ? source : target);
+
+       if (!ipi && dest_id == APIC_BROADCAST && x2apic_mda)
+               return X2APIC_BROADCAST;
+
+       return x2apic_mda ? dest_id : SET_APIC_DEST_FIELD(dest_id);
+}
+
 bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
                           int short_hand, unsigned int dest, int dest_mode)
 {
        struct kvm_lapic *target = vcpu->arch.apic;
+       u32 mda = kvm_apic_mda(dest, source, target);
 
        apic_debug("target %p, source %p, dest 0x%x, "
                   "dest_mode 0x%x, short_hand 0x%x\n",
@@ -640,9 +660,9 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
        switch (short_hand) {
        case APIC_DEST_NOSHORT:
                if (dest_mode == APIC_DEST_PHYSICAL)
-                       return kvm_apic_match_physical_addr(target, dest);
+                       return kvm_apic_match_physical_addr(target, mda);
                else
-                       return kvm_apic_match_logical_addr(target, dest);
+                       return kvm_apic_match_logical_addr(target, mda);
        case APIC_DEST_SELF:
                return target == source;
        case APIC_DEST_ALLINC:
@@ -664,6 +684,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
        struct kvm_lapic **dst;
        int i;
        bool ret = false;
+       bool x2apic_ipi = src && apic_x2apic_mode(src);
 
        *r = -1;
 
@@ -675,15 +696,15 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
        if (irq->shorthand)
                return false;
 
+       if (irq->dest_id == (x2apic_ipi ? X2APIC_BROADCAST : APIC_BROADCAST))
+               return false;
+
        rcu_read_lock();
        map = rcu_dereference(kvm->arch.apic_map);
 
        if (!map)
                goto out;
 
-       if (irq->dest_id == map->broadcast)
-               goto out;
-
        ret = true;
 
        if (irq->dest_mode == APIC_DEST_PHYSICAL) {
@@ -692,16 +713,20 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 
                dst = &map->phys_map[irq->dest_id];
        } else {
-               u32 mda = irq->dest_id << (32 - map->ldr_bits);
-               u16 cid = apic_cluster_id(map, mda);
+               u16 cid;
+
+               if (!kvm_apic_logical_map_valid(map)) {
+                       ret = false;
+                       goto out;
+               }
+
+               apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
 
                if (cid >= ARRAY_SIZE(map->logical_map))
                        goto out;
 
                dst = map->logical_map[cid];
 
-               bitmap = apic_logical_id(map, mda);
-
                if (irq->delivery_mode == APIC_DM_LOWEST) {
                        int l = -1;
                        for_each_set_bit(i, &bitmap, 16) {
@@ -1037,7 +1062,7 @@ static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
            addr < apic->base_address + LAPIC_MMIO_LENGTH;
 }
 
-static int apic_mmio_read(struct kvm_io_device *this,
+static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
                           gpa_t address, int len, void *data)
 {
        struct kvm_lapic *apic = to_lapic(this);
@@ -1357,7 +1382,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
        return ret;
 }
 
-static int apic_mmio_write(struct kvm_io_device *this,
+static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
                            gpa_t address, int len, const void *data)
 {
        struct kvm_lapic *apic = to_lapic(this);
@@ -1497,8 +1522,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
                return;
        }
 
-       if (!kvm_vcpu_is_bsp(apic->vcpu))
-               value &= ~MSR_IA32_APICBASE_BSP;
        vcpu->arch.apic_base = value;
 
        /* update jump label if enable bit changes */
index 0bc6c656625b8377df4ed6ed18da0ece953cfc38..9d28383fc1e70cc3437ae2eb062ff9433c659f96 100644 (file)
@@ -1,7 +1,7 @@
 #ifndef __KVM_X86_LAPIC_H
 #define __KVM_X86_LAPIC_H
 
-#include "iodev.h"
+#include <kvm/iodev.h>
 
 #include <linux/kvm_host.h>
 
@@ -148,21 +148,6 @@ static inline bool kvm_apic_vid_enabled(struct kvm *kvm)
        return kvm_x86_ops->vm_has_apicv(kvm);
 }
 
-static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
-{
-       u16 cid;
-       ldr >>= 32 - map->ldr_bits;
-       cid = (ldr >> map->cid_shift) & map->cid_mask;
-
-       return cid;
-}
-
-static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
-{
-       ldr >>= (32 - map->ldr_bits);
-       return ldr & map->lid_mask;
-}
-
 static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
 {
        return vcpu->arch.apic->pending_events;
index cee759299a356dd720a22095ea4cb684c89fcdd3..146f295ee32214a6f4ad40a58e53b61a3d1fa06d 100644 (file)
@@ -4465,6 +4465,79 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
                kvm_flush_remote_tlbs(kvm);
 }
 
+static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
+               unsigned long *rmapp)
+{
+       u64 *sptep;
+       struct rmap_iterator iter;
+       int need_tlb_flush = 0;
+       pfn_t pfn;
+       struct kvm_mmu_page *sp;
+
+       for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
+               BUG_ON(!(*sptep & PT_PRESENT_MASK));
+
+               sp = page_header(__pa(sptep));
+               pfn = spte_to_pfn(*sptep);
+
+               /*
+                * Only EPT supported for now; otherwise, one would need to
+                * find out efficiently whether the guest page tables are
+                * also using huge pages.
+                */
+               if (sp->role.direct &&
+                       !kvm_is_reserved_pfn(pfn) &&
+                       PageTransCompound(pfn_to_page(pfn))) {
+                       drop_spte(kvm, sptep);
+                       sptep = rmap_get_first(*rmapp, &iter);
+                       need_tlb_flush = 1;
+               } else
+                       sptep = rmap_get_next(&iter);
+       }
+
+       return need_tlb_flush;
+}
+
+void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
+                       struct kvm_memory_slot *memslot)
+{
+       bool flush = false;
+       unsigned long *rmapp;
+       unsigned long last_index, index;
+       gfn_t gfn_start, gfn_end;
+
+       spin_lock(&kvm->mmu_lock);
+
+       gfn_start = memslot->base_gfn;
+       gfn_end = memslot->base_gfn + memslot->npages - 1;
+
+       if (gfn_start >= gfn_end)
+               goto out;
+
+       rmapp = memslot->arch.rmap[0];
+       last_index = gfn_to_index(gfn_end, memslot->base_gfn,
+                                       PT_PAGE_TABLE_LEVEL);
+
+       for (index = 0; index <= last_index; ++index, ++rmapp) {
+               if (*rmapp)
+                       flush |= kvm_mmu_zap_collapsible_spte(kvm, rmapp);
+
+               if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
+                       if (flush) {
+                               kvm_flush_remote_tlbs(kvm);
+                               flush = false;
+                       }
+                       cond_resched_lock(&kvm->mmu_lock);
+               }
+       }
+
+       if (flush)
+               kvm_flush_remote_tlbs(kvm);
+
+out:
+       spin_unlock(&kvm->mmu_lock);
+}
+
 void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
                                   struct kvm_memory_slot *memslot)
 {
index 8e6b7d869d2f7f34432a1f1685606eab7945561a..29fbf9dfdc549f47f1e189f58c5d5adfeb7a5fd1 100644 (file)
@@ -38,7 +38,7 @@ static struct kvm_arch_event_perf_mapping {
 };
 
 /* mapping between fixed pmc index and arch_events array */
-int fixed_pmc_events[] = {1, 0, 7};
+static int fixed_pmc_events[] = {1, 0, 7};
 
 static bool pmc_is_gp(struct kvm_pmc *pmc)
 {
index cc618c882f900ad21cb4de57d94daa91a5f4ec4c..ce741b8650f6ece694fb47e1750d153291a1803f 100644 (file)
@@ -1261,7 +1261,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 
        svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
                                   MSR_IA32_APICBASE_ENABLE;
-       if (kvm_vcpu_is_bsp(&svm->vcpu))
+       if (kvm_vcpu_is_reset_bsp(&svm->vcpu))
                svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
 
        svm_init_osvw(&svm->vcpu);
@@ -1929,14 +1929,12 @@ static int nop_on_interception(struct vcpu_svm *svm)
 static int halt_interception(struct vcpu_svm *svm)
 {
        svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
-       skip_emulated_instruction(&svm->vcpu);
        return kvm_emulate_halt(&svm->vcpu);
 }
 
 static int vmmcall_interception(struct vcpu_svm *svm)
 {
        svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
-       skip_emulated_instruction(&svm->vcpu);
        kvm_emulate_hypercall(&svm->vcpu);
        return 1;
 }
@@ -2757,11 +2755,11 @@ static int invlpga_interception(struct vcpu_svm *svm)
 {
        struct kvm_vcpu *vcpu = &svm->vcpu;
 
-       trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX],
-                         vcpu->arch.regs[VCPU_REGS_RAX]);
+       trace_kvm_invlpga(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RCX),
+                         kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
 
        /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
-       kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]);
+       kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
 
        svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
        skip_emulated_instruction(&svm->vcpu);
@@ -2770,12 +2768,18 @@ static int invlpga_interception(struct vcpu_svm *svm)
 
 static int skinit_interception(struct vcpu_svm *svm)
 {
-       trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]);
+       trace_kvm_skinit(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
 
        kvm_queue_exception(&svm->vcpu, UD_VECTOR);
        return 1;
 }
 
+static int wbinvd_interception(struct vcpu_svm *svm)
+{
+       kvm_emulate_wbinvd(&svm->vcpu);
+       return 1;
+}
+
 static int xsetbv_interception(struct vcpu_svm *svm)
 {
        u64 new_bv = kvm_read_edx_eax(&svm->vcpu);
@@ -2902,7 +2906,8 @@ static int rdpmc_interception(struct vcpu_svm *svm)
        return 1;
 }
 
-bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val)
+static bool check_selective_cr0_intercepted(struct vcpu_svm *svm,
+                                           unsigned long val)
 {
        unsigned long cr0 = svm->vcpu.arch.cr0;
        bool ret = false;
@@ -2940,7 +2945,10 @@ static int cr_interception(struct vcpu_svm *svm)
                return emulate_on_interception(svm);
 
        reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
-       cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0;
+       if (svm->vmcb->control.exit_code == SVM_EXIT_CR0_SEL_WRITE)
+               cr = SVM_EXIT_WRITE_CR0 - SVM_EXIT_READ_CR0;
+       else
+               cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0;
 
        err = 0;
        if (cr >= 16) { /* mov to cr */
@@ -3133,7 +3141,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
 
 static int rdmsr_interception(struct vcpu_svm *svm)
 {
-       u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
+       u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
        u64 data;
 
        if (svm_get_msr(&svm->vcpu, ecx, &data)) {
@@ -3142,8 +3150,8 @@ static int rdmsr_interception(struct vcpu_svm *svm)
        } else {
                trace_kvm_msr_read(ecx, data);
 
-               svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff;
-               svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32;
+               kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, data & 0xffffffff);
+               kvm_register_write(&svm->vcpu, VCPU_REGS_RDX, data >> 32);
                svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
                skip_emulated_instruction(&svm->vcpu);
        }
@@ -3246,9 +3254,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 static int wrmsr_interception(struct vcpu_svm *svm)
 {
        struct msr_data msr;
-       u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
-       u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
-               | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
+       u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
+       u64 data = kvm_read_edx_eax(&svm->vcpu);
 
        msr.data = data;
        msr.index = ecx;
@@ -3325,7 +3332,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
        [SVM_EXIT_READ_CR3]                     = cr_interception,
        [SVM_EXIT_READ_CR4]                     = cr_interception,
        [SVM_EXIT_READ_CR8]                     = cr_interception,
-       [SVM_EXIT_CR0_SEL_WRITE]                = emulate_on_interception,
+       [SVM_EXIT_CR0_SEL_WRITE]                = cr_interception,
        [SVM_EXIT_WRITE_CR0]                    = cr_interception,
        [SVM_EXIT_WRITE_CR3]                    = cr_interception,
        [SVM_EXIT_WRITE_CR4]                    = cr_interception,
@@ -3376,7 +3383,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
        [SVM_EXIT_STGI]                         = stgi_interception,
        [SVM_EXIT_CLGI]                         = clgi_interception,
        [SVM_EXIT_SKINIT]                       = skinit_interception,
-       [SVM_EXIT_WBINVD]                       = emulate_on_interception,
+       [SVM_EXIT_WBINVD]                       = wbinvd_interception,
        [SVM_EXIT_MONITOR]                      = monitor_interception,
        [SVM_EXIT_MWAIT]                        = mwait_interception,
        [SVM_EXIT_XSETBV]                       = xsetbv_interception,
@@ -3555,7 +3562,7 @@ static int handle_exit(struct kvm_vcpu *vcpu)
 
        if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
            || !svm_exit_handlers[exit_code]) {
-               WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_code);
+               WARN_ONCE(1, "svm: unexpected exit reason 0x%x\n", exit_code);
                kvm_queue_exception(vcpu, UD_VECTOR);
                return 1;
        }
index ae4f6d35d19c268315745741150dd6d1a7df5222..f5e8dce8046c56b5273e9aa043754f98a7dee7d7 100644 (file)
@@ -2470,6 +2470,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
        vmx->nested.nested_vmx_secondary_ctls_low = 0;
        vmx->nested.nested_vmx_secondary_ctls_high &=
                SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+               SECONDARY_EXEC_RDTSCP |
                SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
                SECONDARY_EXEC_APIC_REGISTER_VIRT |
                SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
@@ -3268,8 +3269,8 @@ static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
                 * default value.
                 */
                if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS)
-                       save->selector &= ~SELECTOR_RPL_MASK;
-               save->dpl = save->selector & SELECTOR_RPL_MASK;
+                       save->selector &= ~SEGMENT_RPL_MASK;
+               save->dpl = save->selector & SEGMENT_RPL_MASK;
                save->s = 1;
        }
        vmx_set_segment(vcpu, save, seg);
@@ -3842,7 +3843,7 @@ static bool code_segment_valid(struct kvm_vcpu *vcpu)
        unsigned int cs_rpl;
 
        vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
-       cs_rpl = cs.selector & SELECTOR_RPL_MASK;
+       cs_rpl = cs.selector & SEGMENT_RPL_MASK;
 
        if (cs.unusable)
                return false;
@@ -3870,7 +3871,7 @@ static bool stack_segment_valid(struct kvm_vcpu *vcpu)
        unsigned int ss_rpl;
 
        vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
-       ss_rpl = ss.selector & SELECTOR_RPL_MASK;
+       ss_rpl = ss.selector & SEGMENT_RPL_MASK;
 
        if (ss.unusable)
                return true;
@@ -3892,7 +3893,7 @@ static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
        unsigned int rpl;
 
        vmx_get_segment(vcpu, &var, seg);
-       rpl = var.selector & SELECTOR_RPL_MASK;
+       rpl = var.selector & SEGMENT_RPL_MASK;
 
        if (var.unusable)
                return true;
@@ -3919,7 +3920,7 @@ static bool tr_valid(struct kvm_vcpu *vcpu)
 
        if (tr.unusable)
                return false;
-       if (tr.selector & SELECTOR_TI_MASK)     /* TI = 1 */
+       if (tr.selector & SEGMENT_TI_MASK)      /* TI = 1 */
                return false;
        if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */
                return false;
@@ -3937,7 +3938,7 @@ static bool ldtr_valid(struct kvm_vcpu *vcpu)
 
        if (ldtr.unusable)
                return true;
-       if (ldtr.selector & SELECTOR_TI_MASK)   /* TI = 1 */
+       if (ldtr.selector & SEGMENT_TI_MASK)    /* TI = 1 */
                return false;
        if (ldtr.type != 2)
                return false;
@@ -3954,8 +3955,8 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
        vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
        vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
 
-       return ((cs.selector & SELECTOR_RPL_MASK) ==
-                (ss.selector & SELECTOR_RPL_MASK));
+       return ((cs.selector & SEGMENT_RPL_MASK) ==
+                (ss.selector & SEGMENT_RPL_MASK));
 }
 
 /*
@@ -4711,7 +4712,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
        vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
        kvm_set_cr8(&vmx->vcpu, 0);
        apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
-       if (kvm_vcpu_is_bsp(&vmx->vcpu))
+       if (kvm_vcpu_is_reset_bsp(&vmx->vcpu))
                apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
        apic_base_msr.host_initiated = true;
        kvm_set_apic_base(&vmx->vcpu, &apic_base_msr);
@@ -5006,7 +5007,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
                if (emulate_instruction(vcpu, 0) == EMULATE_DONE) {
                        if (vcpu->arch.halt_request) {
                                vcpu->arch.halt_request = 0;
-                               return kvm_emulate_halt(vcpu);
+                               return kvm_vcpu_halt(vcpu);
                        }
                        return 1;
                }
@@ -5071,6 +5072,10 @@ static int handle_exception(struct kvm_vcpu *vcpu)
        }
 
        if (is_invalid_opcode(intr_info)) {
+               if (is_guest_mode(vcpu)) {
+                       kvm_queue_exception(vcpu, UD_VECTOR);
+                       return 1;
+               }
                er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
                if (er != EMULATE_DONE)
                        kvm_queue_exception(vcpu, UD_VECTOR);
@@ -5090,9 +5095,10 @@ static int handle_exception(struct kvm_vcpu *vcpu)
            !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) {
                vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
-               vcpu->run->internal.ndata = 2;
+               vcpu->run->internal.ndata = 3;
                vcpu->run->internal.data[0] = vect_info;
                vcpu->run->internal.data[1] = intr_info;
+               vcpu->run->internal.data[2] = error_code;
                return 0;
        }
 
@@ -5533,13 +5539,11 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu)
 
 static int handle_halt(struct kvm_vcpu *vcpu)
 {
-       skip_emulated_instruction(vcpu);
        return kvm_emulate_halt(vcpu);
 }
 
 static int handle_vmcall(struct kvm_vcpu *vcpu)
 {
-       skip_emulated_instruction(vcpu);
        kvm_emulate_hypercall(vcpu);
        return 1;
 }
@@ -5570,7 +5574,6 @@ static int handle_rdpmc(struct kvm_vcpu *vcpu)
 
 static int handle_wbinvd(struct kvm_vcpu *vcpu)
 {
-       skip_emulated_instruction(vcpu);
        kvm_emulate_wbinvd(vcpu);
        return 1;
 }
@@ -5828,7 +5831,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
        gpa_t gpa;
 
        gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
-       if (!kvm_io_bus_write(vcpu->kvm, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
+       if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
                skip_emulated_instruction(vcpu);
                return 1;
        }
@@ -5909,7 +5912,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
 
                if (vcpu->arch.halt_request) {
                        vcpu->arch.halt_request = 0;
-                       ret = kvm_emulate_halt(vcpu);
+                       ret = kvm_vcpu_halt(vcpu);
                        goto out;
                }
 
@@ -7318,21 +7321,21 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
                else if (port < 0x10000)
                        bitmap = vmcs12->io_bitmap_b;
                else
-                       return 1;
+                       return true;
                bitmap += (port & 0x7fff) / 8;
 
                if (last_bitmap != bitmap)
                        if (kvm_read_guest(vcpu->kvm, bitmap, &b, 1))
-                               return 1;
+                               return true;
                if (b & (1 << (port & 7)))
-                       return 1;
+                       return true;
 
                port++;
                size--;
                last_bitmap = bitmap;
        }
 
-       return 0;
+       return false;
 }
 
 /*
@@ -7348,7 +7351,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
        gpa_t bitmap;
 
        if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
-               return 1;
+               return true;
 
        /*
         * The MSR_BITMAP page is divided into four 1024-byte bitmaps,
@@ -7367,10 +7370,10 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
        if (msr_index < 1024*8) {
                unsigned char b;
                if (kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1))
-                       return 1;
+                       return true;
                return 1 & (b >> (msr_index & 7));
        } else
-               return 1; /* let L1 handle the wrong parameter */
+               return true; /* let L1 handle the wrong parameter */
 }
 
 /*
@@ -7392,7 +7395,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
                case 0:
                        if (vmcs12->cr0_guest_host_mask &
                            (val ^ vmcs12->cr0_read_shadow))
-                               return 1;
+                               return true;
                        break;
                case 3:
                        if ((vmcs12->cr3_target_count >= 1 &&
@@ -7403,37 +7406,37 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
                                        vmcs12->cr3_target_value2 == val) ||
                                (vmcs12->cr3_target_count >= 4 &&
                                        vmcs12->cr3_target_value3 == val))
-                               return 0;
+                               return false;
                        if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING))
-                               return 1;
+                               return true;
                        break;
                case 4:
                        if (vmcs12->cr4_guest_host_mask &
                            (vmcs12->cr4_read_shadow ^ val))
-                               return 1;
+                               return true;
                        break;
                case 8:
                        if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING))
-                               return 1;
+                               return true;
                        break;
                }
                break;
        case 2: /* clts */
                if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) &&
                    (vmcs12->cr0_read_shadow & X86_CR0_TS))
-                       return 1;
+                       return true;
                break;
        case 1: /* mov from cr */
                switch (cr) {
                case 3:
                        if (vmcs12->cpu_based_vm_exec_control &
                            CPU_BASED_CR3_STORE_EXITING)
-                               return 1;
+                               return true;
                        break;
                case 8:
                        if (vmcs12->cpu_based_vm_exec_control &
                            CPU_BASED_CR8_STORE_EXITING)
-                               return 1;
+                               return true;
                        break;
                }
                break;
@@ -7444,14 +7447,14 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
                 */
                if (vmcs12->cr0_guest_host_mask & 0xe &
                    (val ^ vmcs12->cr0_read_shadow))
-                       return 1;
+                       return true;
                if ((vmcs12->cr0_guest_host_mask & 0x1) &&
                    !(vmcs12->cr0_read_shadow & 0x1) &&
                    (val & 0x1))
-                       return 1;
+                       return true;
                break;
        }
-       return 0;
+       return false;
 }
 
 /*
@@ -7474,48 +7477,48 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
                                KVM_ISA_VMX);
 
        if (vmx->nested.nested_run_pending)
-               return 0;
+               return false;
 
        if (unlikely(vmx->fail)) {
                pr_info_ratelimited("%s failed vm entry %x\n", __func__,
                                    vmcs_read32(VM_INSTRUCTION_ERROR));
-               return 1;
+               return true;
        }
 
        switch (exit_reason) {
        case EXIT_REASON_EXCEPTION_NMI:
                if (!is_exception(intr_info))
-                       return 0;
+                       return false;
                else if (is_page_fault(intr_info))
                        return enable_ept;
                else if (is_no_device(intr_info) &&
                         !(vmcs12->guest_cr0 & X86_CR0_TS))
-                       return 0;
+                       return false;
                return vmcs12->exception_bitmap &
                                (1u << (intr_info & INTR_INFO_VECTOR_MASK));
        case EXIT_REASON_EXTERNAL_INTERRUPT:
-               return 0;
+               return false;
        case EXIT_REASON_TRIPLE_FAULT:
-               return 1;
+               return true;
        case EXIT_REASON_PENDING_INTERRUPT:
                return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING);
        case EXIT_REASON_NMI_WINDOW:
                return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING);
        case EXIT_REASON_TASK_SWITCH:
-               return 1;
+               return true;
        case EXIT_REASON_CPUID:
                if (kvm_register_read(vcpu, VCPU_REGS_RAX) == 0xa)
-                       return 0;
-               return 1;
+                       return false;
+               return true;
        case EXIT_REASON_HLT:
                return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
        case EXIT_REASON_INVD:
-               return 1;
+               return true;
        case EXIT_REASON_INVLPG:
                return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
        case EXIT_REASON_RDPMC:
                return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
-       case EXIT_REASON_RDTSC:
+       case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
                return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
        case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
        case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
@@ -7527,7 +7530,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
                 * VMX instructions trap unconditionally. This allows L1 to
                 * emulate them for its L2 guest, i.e., allows 3-level nesting!
                 */
-               return 1;
+               return true;
        case EXIT_REASON_CR_ACCESS:
                return nested_vmx_exit_handled_cr(vcpu, vmcs12);
        case EXIT_REASON_DR_ACCESS:
@@ -7538,7 +7541,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
        case EXIT_REASON_MSR_WRITE:
                return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
        case EXIT_REASON_INVALID_STATE:
-               return 1;
+               return true;
        case EXIT_REASON_MWAIT_INSTRUCTION:
                return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
        case EXIT_REASON_MONITOR_INSTRUCTION:
@@ -7548,7 +7551,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
                        nested_cpu_has2(vmcs12,
                                SECONDARY_EXEC_PAUSE_LOOP_EXITING);
        case EXIT_REASON_MCE_DURING_VMENTRY:
-               return 0;
+               return false;
        case EXIT_REASON_TPR_BELOW_THRESHOLD:
                return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
        case EXIT_REASON_APIC_ACCESS:
@@ -7557,7 +7560,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
        case EXIT_REASON_APIC_WRITE:
        case EXIT_REASON_EOI_INDUCED:
                /* apic_write and eoi_induced should exit unconditionally. */
-               return 1;
+               return true;
        case EXIT_REASON_EPT_VIOLATION:
                /*
                 * L0 always deals with the EPT violation. If nested EPT is
@@ -7565,7 +7568,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
                 * missing in the guest EPT table (EPT12), the EPT violation
                 * will be injected with nested_ept_inject_page_fault()
                 */
-               return 0;
+               return false;
        case EXIT_REASON_EPT_MISCONFIG:
                /*
                 * L2 never uses directly L1's EPT, but rather L0's own EPT
@@ -7573,11 +7576,11 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
                 * (EPT on EPT). So any problems with the structure of the
                 * table is L0's fault.
                 */
-               return 0;
+               return false;
        case EXIT_REASON_WBINVD:
                return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
        case EXIT_REASON_XSETBV:
-               return 1;
+               return true;
        case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS:
                /*
                 * This should never happen, since it is not possible to
@@ -7587,7 +7590,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
                 */
                return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
        default:
-               return 1;
+               return true;
        }
 }
 
@@ -8522,6 +8525,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
                                                exec_control);
                        }
                }
+               if (nested && !vmx->rdtscp_enabled)
+                       vmx->nested.nested_vmx_secondary_ctls_high &=
+                               ~SECONDARY_EXEC_RDTSCP;
        }
 
        /* Exposing INVPCID only when PCID is exposed */
@@ -8622,10 +8628,11 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
                                        struct vmcs12 *vmcs12)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
+       int maxphyaddr = cpuid_maxphyaddr(vcpu);
 
        if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
-               /* TODO: Also verify bits beyond physical address width are 0 */
-               if (!PAGE_ALIGNED(vmcs12->apic_access_addr))
+               if (!PAGE_ALIGNED(vmcs12->apic_access_addr) ||
+                   vmcs12->apic_access_addr >> maxphyaddr)
                        return false;
 
                /*
@@ -8641,8 +8648,8 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
        }
 
        if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
-               /* TODO: Also verify bits beyond physical address width are 0 */
-               if (!PAGE_ALIGNED(vmcs12->virtual_apic_page_addr))
+               if (!PAGE_ALIGNED(vmcs12->virtual_apic_page_addr) ||
+                   vmcs12->virtual_apic_page_addr >> maxphyaddr)
                        return false;
 
                if (vmx->nested.virtual_apic_page) /* shouldn't happen */
@@ -8665,7 +8672,8 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
        }
 
        if (nested_cpu_has_posted_intr(vmcs12)) {
-               if (!IS_ALIGNED(vmcs12->posted_intr_desc_addr, 64))
+               if (!IS_ALIGNED(vmcs12->posted_intr_desc_addr, 64) ||
+                   vmcs12->posted_intr_desc_addr >> maxphyaddr)
                        return false;
 
                if (vmx->nested.pi_desc_page) { /* shouldn't happen */
@@ -8864,9 +8872,9 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
 
 static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
                                       unsigned long count_field,
-                                      unsigned long addr_field,
-                                      int maxphyaddr)
+                                      unsigned long addr_field)
 {
+       int maxphyaddr;
        u64 count, addr;
 
        if (vmcs12_read_any(vcpu, count_field, &count) ||
@@ -8876,6 +8884,7 @@ static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
        }
        if (count == 0)
                return 0;
+       maxphyaddr = cpuid_maxphyaddr(vcpu);
        if (!IS_ALIGNED(addr, 16) || addr >> maxphyaddr ||
            (addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr) {
                pr_warn_ratelimited(
@@ -8889,19 +8898,16 @@ static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
 static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu,
                                                struct vmcs12 *vmcs12)
 {
-       int maxphyaddr;
-
        if (vmcs12->vm_exit_msr_load_count == 0 &&
            vmcs12->vm_exit_msr_store_count == 0 &&
            vmcs12->vm_entry_msr_load_count == 0)
                return 0; /* Fast path */
-       maxphyaddr = cpuid_maxphyaddr(vcpu);
        if (nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_LOAD_COUNT,
-                                       VM_EXIT_MSR_LOAD_ADDR, maxphyaddr) ||
+                                       VM_EXIT_MSR_LOAD_ADDR) ||
            nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_STORE_COUNT,
-                                       VM_EXIT_MSR_STORE_ADDR, maxphyaddr) ||
+                                       VM_EXIT_MSR_STORE_ADDR) ||
            nested_vmx_check_msr_switch(vcpu, VM_ENTRY_MSR_LOAD_COUNT,
-                                       VM_ENTRY_MSR_LOAD_ADDR, maxphyaddr))
+                                       VM_ENTRY_MSR_LOAD_ADDR))
                return -EINVAL;
        return 0;
 }
@@ -9151,8 +9157,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
                        exec_control &= ~SECONDARY_EXEC_RDTSCP;
                /* Take the following fields only from vmcs12 */
                exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+                                 SECONDARY_EXEC_RDTSCP |
                                  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
-                                  SECONDARY_EXEC_APIC_REGISTER_VIRT);
+                                 SECONDARY_EXEC_APIC_REGISTER_VIRT);
                if (nested_cpu_has(vmcs12,
                                CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
                        exec_control |= vmcs12->secondary_vm_exec_control;
@@ -9385,7 +9392,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
        }
 
        if (!nested_get_vmcs12_pages(vcpu, vmcs12)) {
-               /*TODO: Also verify bits beyond physical address width are 0*/
                nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
                return 1;
        }
@@ -9524,7 +9530,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
        vmcs12->launch_state = 1;
 
        if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
-               return kvm_emulate_halt(vcpu);
+               return kvm_vcpu_halt(vcpu);
 
        vmx->nested.nested_run_pending = 1;
 
index 32bf19ef3115f65c9dffc23a655be2763babcaff..e1a81267f3f632e971d1fc63cfe687be2e7c60f3 100644 (file)
@@ -801,6 +801,17 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_get_cr8);
 
+static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
+{
+       int i;
+
+       if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
+               for (i = 0; i < KVM_NR_DB_REGS; i++)
+                       vcpu->arch.eff_db[i] = vcpu->arch.db[i];
+               vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD;
+       }
+}
+
 static void kvm_update_dr6(struct kvm_vcpu *vcpu)
 {
        if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
@@ -1070,19 +1081,19 @@ static void update_pvclock_gtod(struct timekeeper *tk)
        struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
        u64 boot_ns;
 
-       boot_ns = ktime_to_ns(ktime_add(tk->tkr.base_mono, tk->offs_boot));
+       boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot));
 
        write_seqcount_begin(&vdata->seq);
 
        /* copy pvclock gtod data */
-       vdata->clock.vclock_mode        = tk->tkr.clock->archdata.vclock_mode;
-       vdata->clock.cycle_last         = tk->tkr.cycle_last;
-       vdata->clock.mask               = tk->tkr.mask;
-       vdata->clock.mult               = tk->tkr.mult;
-       vdata->clock.shift              = tk->tkr.shift;
+       vdata->clock.vclock_mode        = tk->tkr_mono.clock->archdata.vclock_mode;
+       vdata->clock.cycle_last         = tk->tkr_mono.cycle_last;
+       vdata->clock.mask               = tk->tkr_mono.mask;
+       vdata->clock.mult               = tk->tkr_mono.mult;
+       vdata->clock.shift              = tk->tkr_mono.shift;
 
        vdata->boot_ns                  = boot_ns;
-       vdata->nsec_base                = tk->tkr.xtime_nsec;
+       vdata->nsec_base                = tk->tkr_mono.xtime_nsec;
 
        write_seqcount_end(&vdata->seq);
 }
@@ -3149,6 +3160,7 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
                return -EINVAL;
 
        memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
+       kvm_update_dr0123(vcpu);
        vcpu->arch.dr6 = dbgregs->dr6;
        kvm_update_dr6(vcpu);
        vcpu->arch.dr7 = dbgregs->dr7;
@@ -4114,8 +4126,8 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
        do {
                n = min(len, 8);
                if (!(vcpu->arch.apic &&
-                     !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v))
-                   && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
+                     !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v))
+                   && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v))
                        break;
                handled += n;
                addr += n;
@@ -4134,8 +4146,9 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
        do {
                n = min(len, 8);
                if (!(vcpu->arch.apic &&
-                     !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v))
-                   && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
+                     !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev,
+                                        addr, n, v))
+                   && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
                        break;
                trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
                handled += n;
@@ -4475,7 +4488,8 @@ mmio:
        return X86EMUL_CONTINUE;
 }
 
-int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
+static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
+                       unsigned long addr,
                        void *val, unsigned int bytes,
                        struct x86_exception *exception,
                        const struct read_write_emulator_ops *ops)
@@ -4538,7 +4552,7 @@ static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
                                   exception, &read_emultor);
 }
 
-int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
+static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
                            unsigned long addr,
                            const void *val,
                            unsigned int bytes,
@@ -4629,10 +4643,10 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
        int r;
 
        if (vcpu->arch.pio.in)
-               r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
+               r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
                                    vcpu->arch.pio.size, pd);
        else
-               r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
+               r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
                                     vcpu->arch.pio.port, vcpu->arch.pio.size,
                                     pd);
        return r;
@@ -4705,7 +4719,7 @@ static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
        kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
 }
 
-int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
+int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
 {
        if (!need_emulate_wbinvd(vcpu))
                return X86EMUL_CONTINUE;
@@ -4722,19 +4736,29 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
                wbinvd();
        return X86EMUL_CONTINUE;
 }
+
+int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
+{
+       kvm_x86_ops->skip_emulated_instruction(vcpu);
+       return kvm_emulate_wbinvd_noskip(vcpu);
+}
 EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
 
+
+
 static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
 {
-       kvm_emulate_wbinvd(emul_to_vcpu(ctxt));
+       kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt));
 }
 
-int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
+static int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
+                          unsigned long *dest)
 {
        return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
 }
 
-int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
+static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
+                          unsigned long value)
 {
 
        return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
@@ -5816,7 +5840,7 @@ void kvm_arch_exit(void)
        free_percpu(shared_msrs);
 }
 
-int kvm_emulate_halt(struct kvm_vcpu *vcpu)
+int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
 {
        ++vcpu->stat.halt_exits;
        if (irqchip_in_kernel(vcpu->kvm)) {
@@ -5827,6 +5851,13 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
                return 0;
        }
 }
+EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
+
+int kvm_emulate_halt(struct kvm_vcpu *vcpu)
+{
+       kvm_x86_ops->skip_emulated_instruction(vcpu);
+       return kvm_vcpu_halt(vcpu);
+}
 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
 
 int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
@@ -5903,7 +5934,7 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
        lapic_irq.dest_id = apicid;
 
        lapic_irq.delivery_mode = APIC_DM_REMRD;
-       kvm_irq_delivery_to_apic(kvm, 0, &lapic_irq, NULL);
+       kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
 }
 
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
@@ -5911,6 +5942,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
        unsigned long nr, a0, a1, a2, a3, ret;
        int op_64_bit, r = 1;
 
+       kvm_x86_ops->skip_emulated_instruction(vcpu);
+
        if (kvm_hv_hypercall_enabled(vcpu->kvm))
                return kvm_hv_hypercall(vcpu);
 
@@ -6164,7 +6197,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 }
 
 /*
- * Returns 1 to let __vcpu_run() continue the guest execution loop without
+ * Returns 1 to let vcpu_run() continue the guest execution loop without
  * exiting to the userspace.  Otherwise, the value will be returned to the
  * userspace.
  */
@@ -6301,6 +6334,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                set_debugreg(vcpu->arch.eff_db[2], 2);
                set_debugreg(vcpu->arch.eff_db[3], 3);
                set_debugreg(vcpu->arch.dr6, 6);
+               vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
        }
 
        trace_kvm_entry(vcpu->vcpu_id);
@@ -6382,42 +6416,47 @@ out:
        return r;
 }
 
+static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
+{
+       if (!kvm_arch_vcpu_runnable(vcpu)) {
+               srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+               kvm_vcpu_block(vcpu);
+               vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+               if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
+                       return 1;
+       }
+
+       kvm_apic_accept_events(vcpu);
+       switch(vcpu->arch.mp_state) {
+       case KVM_MP_STATE_HALTED:
+               vcpu->arch.pv.pv_unhalted = false;
+               vcpu->arch.mp_state =
+                       KVM_MP_STATE_RUNNABLE;
+       case KVM_MP_STATE_RUNNABLE:
+               vcpu->arch.apf.halted = false;
+               break;
+       case KVM_MP_STATE_INIT_RECEIVED:
+               break;
+       default:
+               return -EINTR;
+               break;
+       }
+       return 1;
+}
 
-static int __vcpu_run(struct kvm_vcpu *vcpu)
+static int vcpu_run(struct kvm_vcpu *vcpu)
 {
        int r;
        struct kvm *kvm = vcpu->kvm;
 
        vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
 
-       r = 1;
-       while (r > 0) {
+       for (;;) {
                if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
                    !vcpu->arch.apf.halted)
                        r = vcpu_enter_guest(vcpu);
-               else {
-                       srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
-                       kvm_vcpu_block(vcpu);
-                       vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
-                       if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) {
-                               kvm_apic_accept_events(vcpu);
-                               switch(vcpu->arch.mp_state) {
-                               case KVM_MP_STATE_HALTED:
-                                       vcpu->arch.pv.pv_unhalted = false;
-                                       vcpu->arch.mp_state =
-                                               KVM_MP_STATE_RUNNABLE;
-                               case KVM_MP_STATE_RUNNABLE:
-                                       vcpu->arch.apf.halted = false;
-                                       break;
-                               case KVM_MP_STATE_INIT_RECEIVED:
-                                       break;
-                               default:
-                                       r = -EINTR;
-                                       break;
-                               }
-                       }
-               }
-
+               else
+                       r = vcpu_block(kvm, vcpu);
                if (r <= 0)
                        break;
 
@@ -6429,6 +6468,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
                        r = -EINTR;
                        vcpu->run->exit_reason = KVM_EXIT_INTR;
                        ++vcpu->stat.request_irq_exits;
+                       break;
                }
 
                kvm_check_async_pf_completion(vcpu);
@@ -6437,6 +6477,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
                        r = -EINTR;
                        vcpu->run->exit_reason = KVM_EXIT_INTR;
                        ++vcpu->stat.signal_exits;
+                       break;
                }
                if (need_resched()) {
                        srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
@@ -6568,7 +6609,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        } else
                WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
 
-       r = __vcpu_run(vcpu);
+       r = vcpu_run(vcpu);
 
 out:
        post_kvm_run_save(vcpu);
@@ -7075,11 +7116,14 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
        kvm_clear_exception_queue(vcpu);
 
        memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
+       kvm_update_dr0123(vcpu);
        vcpu->arch.dr6 = DR6_INIT;
        kvm_update_dr6(vcpu);
        vcpu->arch.dr7 = DR7_FIXED_1;
        kvm_update_dr7(vcpu);
 
+       vcpu->arch.cr2 = 0;
+
        kvm_make_request(KVM_REQ_EVENT, vcpu);
        vcpu->arch.apf.msr_val = 0;
        vcpu->arch.st.msr_val = 0;
@@ -7240,7 +7284,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
        vcpu->arch.pv.pv_unhalted = false;
        vcpu->arch.emulate_ctxt.ops = &emulate_ops;
-       if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
+       if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu))
                vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
        else
                vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
@@ -7288,6 +7332,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
        vcpu->arch.guest_supported_xcr0 = 0;
        vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
 
+       vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
+
        kvm_async_pf_hash_reset(vcpu);
        kvm_pmu_init(vcpu);
 
@@ -7428,7 +7474,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 
        for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
                if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
-                       kvm_kvfree(free->arch.rmap[i]);
+                       kvfree(free->arch.rmap[i]);
                        free->arch.rmap[i] = NULL;
                }
                if (i == 0)
@@ -7436,7 +7482,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 
                if (!dont || free->arch.lpage_info[i - 1] !=
                             dont->arch.lpage_info[i - 1]) {
-                       kvm_kvfree(free->arch.lpage_info[i - 1]);
+                       kvfree(free->arch.lpage_info[i - 1]);
                        free->arch.lpage_info[i - 1] = NULL;
                }
        }
@@ -7490,12 +7536,12 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 
 out_free:
        for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
-               kvm_kvfree(slot->arch.rmap[i]);
+               kvfree(slot->arch.rmap[i]);
                slot->arch.rmap[i] = NULL;
                if (i == 0)
                        continue;
 
-               kvm_kvfree(slot->arch.lpage_info[i - 1]);
+               kvfree(slot->arch.lpage_info[i - 1]);
                slot->arch.lpage_info[i - 1] = NULL;
        }
        return -ENOMEM;
@@ -7617,6 +7663,23 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
        /* It's OK to get 'new' slot here as it has already been installed */
        new = id_to_memslot(kvm->memslots, mem->slot);
 
+       /*
+        * Dirty logging tracks sptes in 4k granularity, meaning that large
+        * sptes have to be split.  If live migration is successful, the guest
+        * in the source machine will be destroyed and large sptes will be
+        * created in the destination. However, if the guest continues to run
+        * in the source machine (for example if live migration fails), small
+        * sptes will remain around and cause bad performance.
+        *
+        * Scan sptes if dirty logging has been stopped, dropping those
+        * which can be collapsed into a single large-page spte.  Later
+        * page faults will create the large-page sptes.
+        */
+       if ((change != KVM_MR_DELETE) &&
+               (old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
+               !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
+               kvm_mmu_zap_collapsible_sptes(kvm, new);
+
        /*
         * Set up write protection and/or dirty logging for the new slot.
         *
index ac4453d8520efd5e2080ef6f29cfd7da7b154d61..717908b16037d45957a0ec69b94c8c7d396bfaa6 100644 (file)
@@ -868,7 +868,8 @@ static void __init lguest_init_IRQ(void)
                /* Some systems map "vectors" to interrupts weirdly.  Not us! */
                __this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR);
                if (i != SYSCALL_VECTOR)
-                       set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
+                       set_intr_gate(i, irq_entries_start +
+                                       8 * (i - FIRST_EXTERNAL_VECTOR));
        }
 
        /*
@@ -1076,6 +1077,7 @@ static void lguest_load_sp0(struct tss_struct *tss,
 {
        lazy_hcall3(LHCALL_SET_STACK, __KERNEL_DS | 0x1, thread->sp0,
                   THREAD_SIZE / PAGE_SIZE);
+       tss->x86_tss.sp0 = thread->sp0;
 }
 
 /* Let's just say, I wouldn't do debugging under a Guest. */
index f5cc9eb1d51bc02ef817bd664c7810f4f5b928b1..082a85167a5b68feff28d749b0e04425a0f4adf2 100644 (file)
 #include <asm/alternative-asm.h>
 #include <asm/dwarf2.h>
 
-.macro SAVE reg
-       pushl_cfi %\reg
-       CFI_REL_OFFSET \reg, 0
-.endm
-
-.macro RESTORE reg
-       popl_cfi %\reg
-       CFI_RESTORE \reg
-.endm
-
 .macro read64 reg
        movl %ebx, %eax
        movl %ecx, %edx
@@ -67,10 +57,10 @@ ENDPROC(atomic64_xchg_cx8)
 .macro addsub_return func ins insc
 ENTRY(atomic64_\func\()_return_cx8)
        CFI_STARTPROC
-       SAVE ebp
-       SAVE ebx
-       SAVE esi
-       SAVE edi
+       pushl_cfi_reg ebp
+       pushl_cfi_reg ebx
+       pushl_cfi_reg esi
+       pushl_cfi_reg edi
 
        movl %eax, %esi
        movl %edx, %edi
@@ -89,10 +79,10 @@ ENTRY(atomic64_\func\()_return_cx8)
 10:
        movl %ebx, %eax
        movl %ecx, %edx
-       RESTORE edi
-       RESTORE esi
-       RESTORE ebx
-       RESTORE ebp
+       popl_cfi_reg edi
+       popl_cfi_reg esi
+       popl_cfi_reg ebx
+       popl_cfi_reg ebp
        ret
        CFI_ENDPROC
 ENDPROC(atomic64_\func\()_return_cx8)
@@ -104,7 +94,7 @@ addsub_return sub sub sbb
 .macro incdec_return func ins insc
 ENTRY(atomic64_\func\()_return_cx8)
        CFI_STARTPROC
-       SAVE ebx
+       pushl_cfi_reg ebx
 
        read64 %esi
 1:
@@ -119,7 +109,7 @@ ENTRY(atomic64_\func\()_return_cx8)
 10:
        movl %ebx, %eax
        movl %ecx, %edx
-       RESTORE ebx
+       popl_cfi_reg ebx
        ret
        CFI_ENDPROC
 ENDPROC(atomic64_\func\()_return_cx8)
@@ -130,7 +120,7 @@ incdec_return dec sub sbb
 
 ENTRY(atomic64_dec_if_positive_cx8)
        CFI_STARTPROC
-       SAVE ebx
+       pushl_cfi_reg ebx
 
        read64 %esi
 1:
@@ -146,18 +136,18 @@ ENTRY(atomic64_dec_if_positive_cx8)
 2:
        movl %ebx, %eax
        movl %ecx, %edx
-       RESTORE ebx
+       popl_cfi_reg ebx
        ret
        CFI_ENDPROC
 ENDPROC(atomic64_dec_if_positive_cx8)
 
 ENTRY(atomic64_add_unless_cx8)
        CFI_STARTPROC
-       SAVE ebp
-       SAVE ebx
+       pushl_cfi_reg ebp
+       pushl_cfi_reg ebx
 /* these just push these two parameters on the stack */
-       SAVE edi
-       SAVE ecx
+       pushl_cfi_reg edi
+       pushl_cfi_reg ecx
 
        movl %eax, %ebp
        movl %edx, %edi
@@ -179,8 +169,8 @@ ENTRY(atomic64_add_unless_cx8)
 3:
        addl $8, %esp
        CFI_ADJUST_CFA_OFFSET -8
-       RESTORE ebx
-       RESTORE ebp
+       popl_cfi_reg ebx
+       popl_cfi_reg ebp
        ret
 4:
        cmpl %edx, 4(%esp)
@@ -192,7 +182,7 @@ ENDPROC(atomic64_add_unless_cx8)
 
 ENTRY(atomic64_inc_not_zero_cx8)
        CFI_STARTPROC
-       SAVE ebx
+       pushl_cfi_reg ebx
 
        read64 %esi
 1:
@@ -209,7 +199,7 @@ ENTRY(atomic64_inc_not_zero_cx8)
 
        movl $1, %eax
 3:
-       RESTORE ebx
+       popl_cfi_reg ebx
        ret
        CFI_ENDPROC
 ENDPROC(atomic64_inc_not_zero_cx8)
index e78b8eee66155df85844e8cf6dae4507fa313bc6..9bc944a9127481ead40689a73054d80e50f0bc10 100644 (file)
@@ -51,10 +51,8 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
           */           
 ENTRY(csum_partial)
        CFI_STARTPROC
-       pushl_cfi %esi
-       CFI_REL_OFFSET esi, 0
-       pushl_cfi %ebx
-       CFI_REL_OFFSET ebx, 0
+       pushl_cfi_reg esi
+       pushl_cfi_reg ebx
        movl 20(%esp),%eax      # Function arg: unsigned int sum
        movl 16(%esp),%ecx      # Function arg: int len
        movl 12(%esp),%esi      # Function arg: unsigned char *buff
@@ -127,14 +125,12 @@ ENTRY(csum_partial)
 6:     addl %ecx,%eax
        adcl $0, %eax 
 7:     
-       testl $1, 12(%esp)
+       testb $1, 12(%esp)
        jz 8f
        roll $8, %eax
 8:
-       popl_cfi %ebx
-       CFI_RESTORE ebx
-       popl_cfi %esi
-       CFI_RESTORE esi
+       popl_cfi_reg ebx
+       popl_cfi_reg esi
        ret
        CFI_ENDPROC
 ENDPROC(csum_partial)
@@ -145,10 +141,8 @@ ENDPROC(csum_partial)
 
 ENTRY(csum_partial)
        CFI_STARTPROC
-       pushl_cfi %esi
-       CFI_REL_OFFSET esi, 0
-       pushl_cfi %ebx
-       CFI_REL_OFFSET ebx, 0
+       pushl_cfi_reg esi
+       pushl_cfi_reg ebx
        movl 20(%esp),%eax      # Function arg: unsigned int sum
        movl 16(%esp),%ecx      # Function arg: int len
        movl 12(%esp),%esi      # Function arg: const unsigned char *buf
@@ -251,14 +245,12 @@ ENTRY(csum_partial)
        addl %ebx,%eax
        adcl $0,%eax
 80: 
-       testl $1, 12(%esp)
+       testb $1, 12(%esp)
        jz 90f
        roll $8, %eax
 90: 
-       popl_cfi %ebx
-       CFI_RESTORE ebx
-       popl_cfi %esi
-       CFI_RESTORE esi
+       popl_cfi_reg ebx
+       popl_cfi_reg esi
        ret
        CFI_ENDPROC
 ENDPROC(csum_partial)
@@ -298,12 +290,9 @@ ENTRY(csum_partial_copy_generic)
        CFI_STARTPROC
        subl  $4,%esp   
        CFI_ADJUST_CFA_OFFSET 4
-       pushl_cfi %edi
-       CFI_REL_OFFSET edi, 0
-       pushl_cfi %esi
-       CFI_REL_OFFSET esi, 0
-       pushl_cfi %ebx
-       CFI_REL_OFFSET ebx, 0
+       pushl_cfi_reg edi
+       pushl_cfi_reg esi
+       pushl_cfi_reg ebx
        movl ARGBASE+16(%esp),%eax      # sum
        movl ARGBASE+12(%esp),%ecx      # len
        movl ARGBASE+4(%esp),%esi       # src
@@ -412,12 +401,9 @@ DST(       movb %cl, (%edi)        )
 
 .previous
 
-       popl_cfi %ebx
-       CFI_RESTORE ebx
-       popl_cfi %esi
-       CFI_RESTORE esi
-       popl_cfi %edi
-       CFI_RESTORE edi
+       popl_cfi_reg ebx
+       popl_cfi_reg esi
+       popl_cfi_reg edi
        popl_cfi %ecx                   # equivalent to addl $4,%esp
        ret     
        CFI_ENDPROC
@@ -441,12 +427,9 @@ ENDPROC(csum_partial_copy_generic)
                
 ENTRY(csum_partial_copy_generic)
        CFI_STARTPROC
-       pushl_cfi %ebx
-       CFI_REL_OFFSET ebx, 0
-       pushl_cfi %edi
-       CFI_REL_OFFSET edi, 0
-       pushl_cfi %esi
-       CFI_REL_OFFSET esi, 0
+       pushl_cfi_reg ebx
+       pushl_cfi_reg edi
+       pushl_cfi_reg esi
        movl ARGBASE+4(%esp),%esi       #src
        movl ARGBASE+8(%esp),%edi       #dst    
        movl ARGBASE+12(%esp),%ecx      #len
@@ -506,12 +489,9 @@ DST(       movb %dl, (%edi)         )
        jmp  7b                 
 .previous                              
 
-       popl_cfi %esi
-       CFI_RESTORE esi
-       popl_cfi %edi
-       CFI_RESTORE edi
-       popl_cfi %ebx
-       CFI_RESTORE ebx
+       popl_cfi_reg esi
+       popl_cfi_reg edi
+       popl_cfi_reg ebx
        ret
        CFI_ENDPROC
 ENDPROC(csum_partial_copy_generic)
index f2145cfa12a66830e834718340c4cc88e64a731a..e67e579c93bdf7f3d0737565ea106edeeefb3b6d 100644 (file)
@@ -1,31 +1,35 @@
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
+#include <asm/cpufeature.h>
 #include <asm/alternative-asm.h>
 
 /*
- * Zero a page.        
- * rdi page
- */                    
-ENTRY(clear_page_c)
+ * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is
+ * recommended to use this when possible and we do use them by default.
+ * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
+ * Otherwise, use original.
+ */
+
+/*
+ * Zero a page.
+ * %rdi        - page
+ */
+ENTRY(clear_page)
        CFI_STARTPROC
+
+       ALTERNATIVE_2 "jmp clear_page_orig", "", X86_FEATURE_REP_GOOD, \
+                     "jmp clear_page_c_e", X86_FEATURE_ERMS
+
        movl $4096/8,%ecx
        xorl %eax,%eax
        rep stosq
        ret
        CFI_ENDPROC
-ENDPROC(clear_page_c)
+ENDPROC(clear_page)
 
-ENTRY(clear_page_c_e)
+ENTRY(clear_page_orig)
        CFI_STARTPROC
-       movl $4096,%ecx
-       xorl %eax,%eax
-       rep stosb
-       ret
-       CFI_ENDPROC
-ENDPROC(clear_page_c_e)
 
-ENTRY(clear_page)
-       CFI_STARTPROC
        xorl   %eax,%eax
        movl   $4096/64,%ecx
        .p2align 4
@@ -45,29 +49,13 @@ ENTRY(clear_page)
        nop
        ret
        CFI_ENDPROC
-.Lclear_page_end:
-ENDPROC(clear_page)
-
-       /*
-        * Some CPUs support enhanced REP MOVSB/STOSB instructions.
-        * It is recommended to use this when possible.
-        * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
-        * Otherwise, use original function.
-        *
-        */
+ENDPROC(clear_page_orig)
 
-#include <asm/cpufeature.h>
-
-       .section .altinstr_replacement,"ax"
-1:     .byte 0xeb                                      /* jmp <disp8> */
-       .byte (clear_page_c - clear_page) - (2f - 1b)   /* offset */
-2:     .byte 0xeb                                      /* jmp <disp8> */
-       .byte (clear_page_c_e - clear_page) - (3f - 2b) /* offset */
-3:
-       .previous
-       .section .altinstructions,"a"
-       altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\
-                            .Lclear_page_end-clear_page, 2b-1b
-       altinstruction_entry clear_page,2b,X86_FEATURE_ERMS,   \
-                            .Lclear_page_end-clear_page,3b-2b
-       .previous
+ENTRY(clear_page_c_e)
+       CFI_STARTPROC
+       movl $4096,%ecx
+       xorl %eax,%eax
+       rep stosb
+       ret
+       CFI_ENDPROC
+ENDPROC(clear_page_c_e)
index 176cca67212b7072687f42450ef56018c03ebaaa..8239dbcbf98455a99a125953392114f07a09aa74 100644 (file)
@@ -2,23 +2,26 @@
 
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
+#include <asm/cpufeature.h>
 #include <asm/alternative-asm.h>
 
+/*
+ * Some CPUs run faster using the string copy instructions (sane microcode).
+ * It is also a lot simpler. Use this when possible. But, don't use streaming
+ * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the
+ * prefetch distance based on SMP/UP.
+ */
        ALIGN
-copy_page_rep:
+ENTRY(copy_page)
        CFI_STARTPROC
+       ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
        movl    $4096/8, %ecx
        rep     movsq
        ret
        CFI_ENDPROC
-ENDPROC(copy_page_rep)
-
-/*
- *  Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD.
- *  Could vary the prefetch distance based on SMP/UP.
-*/
+ENDPROC(copy_page)
 
-ENTRY(copy_page)
+ENTRY(copy_page_regs)
        CFI_STARTPROC
        subq    $2*8,   %rsp
        CFI_ADJUST_CFA_OFFSET 2*8
@@ -90,21 +93,5 @@ ENTRY(copy_page)
        addq    $2*8, %rsp
        CFI_ADJUST_CFA_OFFSET -2*8
        ret
-.Lcopy_page_end:
        CFI_ENDPROC
-ENDPROC(copy_page)
-
-       /* Some CPUs run faster using the string copy instructions.
-          It is also a lot simpler. Use this when possible */
-
-#include <asm/cpufeature.h>
-
-       .section .altinstr_replacement,"ax"
-1:     .byte 0xeb                                      /* jmp <disp8> */
-       .byte (copy_page_rep - copy_page) - (2f - 1b)   /* offset */
-2:
-       .previous
-       .section .altinstructions,"a"
-       altinstruction_entry copy_page, 1b, X86_FEATURE_REP_GOOD,       \
-               .Lcopy_page_end-copy_page, 2b-1b
-       .previous
+ENDPROC(copy_page_regs)
index dee945d555941a078f40049b24b8b4731aba6f2c..fa997dfaef242fa9abdb28c20658a939caf72697 100644 (file)
@@ -8,9 +8,6 @@
 
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
-
-#define FIX_ALIGNMENT 1
-
 #include <asm/current.h>
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
 #include <asm/asm.h>
 #include <asm/smap.h>
 
-/*
- * By placing feature2 after feature1 in altinstructions section, we logically
- * implement:
- * If CPU has feature2, jmp to alt2 is used
- * else if CPU has feature1, jmp to alt1 is used
- * else jmp to orig is used.
- */
-       .macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
-0:
-       .byte 0xe9      /* 32bit jump */
-       .long \orig-1f  /* by default jump to orig */
-1:
-       .section .altinstr_replacement,"ax"
-2:     .byte 0xe9                      /* near jump with 32bit immediate */
-       .long \alt1-1b /* offset */   /* or alternatively to alt1 */
-3:     .byte 0xe9                      /* near jump with 32bit immediate */
-       .long \alt2-1b /* offset */   /* or alternatively to alt2 */
-       .previous
-
-       .section .altinstructions,"a"
-       altinstruction_entry 0b,2b,\feature1,5,5
-       altinstruction_entry 0b,3b,\feature2,5,5
-       .previous
-       .endm
-
        .macro ALIGN_DESTINATION
-#ifdef FIX_ALIGNMENT
        /* check for bad alignment of destination */
        movl %edi,%ecx
        andl $7,%ecx
@@ -67,7 +38,6 @@
 
        _ASM_EXTABLE(100b,103b)
        _ASM_EXTABLE(101b,103b)
-#endif
        .endm
 
 /* Standard copy_to_user with segment limit checking */
@@ -79,9 +49,11 @@ ENTRY(_copy_to_user)
        jc bad_to_user
        cmpq TI_addr_limit(%rax),%rcx
        ja bad_to_user
-       ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
-               copy_user_generic_unrolled,copy_user_generic_string,    \
-               copy_user_enhanced_fast_string
+       ALTERNATIVE_2 "jmp copy_user_generic_unrolled",         \
+                     "jmp copy_user_generic_string",           \
+                     X86_FEATURE_REP_GOOD,                     \
+                     "jmp copy_user_enhanced_fast_string",     \
+                     X86_FEATURE_ERMS
        CFI_ENDPROC
 ENDPROC(_copy_to_user)
 
@@ -94,9 +66,11 @@ ENTRY(_copy_from_user)
        jc bad_from_user
        cmpq TI_addr_limit(%rax),%rcx
        ja bad_from_user
-       ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
-               copy_user_generic_unrolled,copy_user_generic_string,    \
-               copy_user_enhanced_fast_string
+       ALTERNATIVE_2 "jmp copy_user_generic_unrolled",         \
+                     "jmp copy_user_generic_string",           \
+                     X86_FEATURE_REP_GOOD,                     \
+                     "jmp copy_user_enhanced_fast_string",     \
+                     X86_FEATURE_ERMS
        CFI_ENDPROC
 ENDPROC(_copy_from_user)
 
index 2419d5fefae30ac2453fabf71c633ede013d2e4a..9734182966f3be925a38c0762aa71e3148156148 100644 (file)
@@ -196,7 +196,7 @@ ENTRY(csum_partial_copy_generic)
 
        /* handle last odd byte */
 .Lhandle_1:
-       testl $1, %r10d
+       testb $1, %r10b
        jz    .Lende
        xorl  %ebx, %ebx
        source
index 1313ae6b478b6c439741ee032a8c33b86868ee2c..8f72b334aea03387ab7084d9998f497cf7287da2 100644 (file)
  */
 void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
 {
+       /*
+        * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid
+        * even if the input buffer is long enough to hold them.
+        */
+       if (buf_len > MAX_INSN_SIZE)
+               buf_len = MAX_INSN_SIZE;
+
        memset(insn, 0, sizeof(*insn));
        insn->kaddr = kaddr;
        insn->end_kaddr = kaddr + buf_len;
@@ -164,6 +171,12 @@ found:
                                /* VEX.W overrides opnd_size */
                                insn->opnd_bytes = 8;
                } else {
+                       /*
+                        * For VEX2, fake VEX3-like byte#2.
+                        * Makes it easier to decode vex.W, vex.vvvv,
+                        * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0.
+                        */
+                       insn->vex_prefix.bytes[2] = b2 & 0x7f;
                        insn->vex_prefix.nbytes = 2;
                        insn->next_byte += 2;
                }
index 89b53c9968e7d50c1dd129dd92272f1a47edefe5..b046664f5a1ccf37f3c94b2ed7d8d5b3298e89bf 100644 (file)
@@ -1,11 +1,19 @@
 /* Copyright 2002 Andi Kleen */
 
 #include <linux/linkage.h>
-
 #include <asm/cpufeature.h>
 #include <asm/dwarf2.h>
 #include <asm/alternative-asm.h>
 
+/*
+ * We build a jump to memcpy_orig by default which gets NOPped out on
+ * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
+ * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
+ * to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
+ */
+
+.weak memcpy
+
 /*
  * memcpy - Copy a memory block.
  *
  * Output:
  * rax original destination
  */
+ENTRY(__memcpy)
+ENTRY(memcpy)
+       ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
+                     "jmp memcpy_erms", X86_FEATURE_ERMS
 
-/*
- * memcpy_c() - fast string ops (REP MOVSQ) based variant.
- *
- * This gets patched over the unrolled variant (below) via the
- * alternative instructions framework:
- */
-       .section .altinstr_replacement, "ax", @progbits
-.Lmemcpy_c:
        movq %rdi, %rax
        movq %rdx, %rcx
        shrq $3, %rcx
        movl %edx, %ecx
        rep movsb
        ret
-.Lmemcpy_e:
-       .previous
+ENDPROC(memcpy)
+ENDPROC(__memcpy)
 
 /*
- * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than
- * memcpy_c. Use memcpy_c_e when possible.
- *
- * This gets patched over the unrolled variant (below) via the
- * alternative instructions framework:
+ * memcpy_erms() - enhanced fast string memcpy. This is faster and
+ * simpler than memcpy. Use memcpy_erms when possible.
  */
-       .section .altinstr_replacement, "ax", @progbits
-.Lmemcpy_c_e:
+ENTRY(memcpy_erms)
        movq %rdi, %rax
        movq %rdx, %rcx
        rep movsb
        ret
-.Lmemcpy_e_e:
-       .previous
-
-.weak memcpy
+ENDPROC(memcpy_erms)
 
-ENTRY(__memcpy)
-ENTRY(memcpy)
+ENTRY(memcpy_orig)
        CFI_STARTPROC
        movq %rdi, %rax
 
@@ -183,26 +179,4 @@ ENTRY(memcpy)
 .Lend:
        retq
        CFI_ENDPROC
-ENDPROC(memcpy)
-ENDPROC(__memcpy)
-
-       /*
-        * Some CPUs are adding enhanced REP MOVSB/STOSB feature
-        * If the feature is supported, memcpy_c_e() is the first choice.
-        * If enhanced rep movsb copy is not available, use fast string copy
-        * memcpy_c() when possible. This is faster and code is simpler than
-        * original memcpy().
-        * Otherwise, original memcpy() is used.
-        * In .altinstructions section, ERMS feature is placed after REG_GOOD
-         * feature to implement the right patch order.
-        *
-        * Replace only beginning, memcpy is used to apply alternatives,
-        * so it is silly to overwrite itself with nops - reboot is the
-        * only outcome...
-        */
-       .section .altinstructions, "a"
-       altinstruction_entry __memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\
-                            .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c
-       altinstruction_entry __memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \
-                            .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e
-       .previous
+ENDPROC(memcpy_orig)
index 9c4b530575da6e9b70cb8505f1e4b391b6fdff02..0f8a0d0331b91715238f01e8f54ce74525fb0cc4 100644 (file)
@@ -5,7 +5,6 @@
  * This assembly file is re-written from memmove_64.c file.
  *     - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
  */
-#define _STRING_C
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
 #include <asm/cpufeature.h>
@@ -44,6 +43,8 @@ ENTRY(__memmove)
        jg 2f
 
 .Lmemmove_begin_forward:
+       ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS
+
        /*
         * movsq instruction have many startup latency
         * so we handle small size by general register.
@@ -207,21 +208,5 @@ ENTRY(__memmove)
 13:
        retq
        CFI_ENDPROC
-
-       .section .altinstr_replacement,"ax"
-.Lmemmove_begin_forward_efs:
-       /* Forward moving data. */
-       movq %rdx, %rcx
-       rep movsb
-       retq
-.Lmemmove_end_forward_efs:
-       .previous
-
-       .section .altinstructions,"a"
-       altinstruction_entry .Lmemmove_begin_forward,           \
-               .Lmemmove_begin_forward_efs,X86_FEATURE_ERMS,   \
-               .Lmemmove_end_forward-.Lmemmove_begin_forward,  \
-               .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
-       .previous
 ENDPROC(__memmove)
 ENDPROC(memmove)
index 6f44935c6a606a1607022e889cc60ea151e54b65..93118fb239762ba78efd754d20a756aed0221411 100644 (file)
@@ -5,19 +5,30 @@
 #include <asm/cpufeature.h>
 #include <asm/alternative-asm.h>
 
+.weak memset
+
 /*
  * ISO C memset - set a memory block to a byte value. This function uses fast
  * string to get better performance than the original function. The code is
  * simpler and shorter than the orignal function as well.
- *     
+ *
  * rdi   destination
- * rsi   value (char) 
- * rdx   count (bytes) 
- * 
+ * rsi   value (char)
+ * rdx   count (bytes)
+ *
  * rax   original destination
- */    
-       .section .altinstr_replacement, "ax", @progbits
-.Lmemset_c:
+ */
+ENTRY(memset)
+ENTRY(__memset)
+       /*
+        * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
+        * to use it when possible. If not available, use fast string instructions.
+        *
+        * Otherwise, use original memset function.
+        */
+       ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
+                     "jmp memset_erms", X86_FEATURE_ERMS
+
        movq %rdi,%r9
        movq %rdx,%rcx
        andl $7,%edx
@@ -31,8 +42,8 @@
        rep stosb
        movq %r9,%rax
        ret
-.Lmemset_e:
-       .previous
+ENDPROC(memset)
+ENDPROC(__memset)
 
 /*
  * ISO C memset - set a memory block to a byte value. This function uses
  *
  * rax   original destination
  */
-       .section .altinstr_replacement, "ax", @progbits
-.Lmemset_c_e:
+ENTRY(memset_erms)
        movq %rdi,%r9
        movb %sil,%al
        movq %rdx,%rcx
        rep stosb
        movq %r9,%rax
        ret
-.Lmemset_e_e:
-       .previous
-
-.weak memset
+ENDPROC(memset_erms)
 
-ENTRY(memset)
-ENTRY(__memset)
+ENTRY(memset_orig)
        CFI_STARTPROC
        movq %rdi,%r10
 
@@ -134,23 +140,4 @@ ENTRY(__memset)
        jmp .Lafter_bad_alignment
 .Lfinal:
        CFI_ENDPROC
-ENDPROC(memset)
-ENDPROC(__memset)
-
-       /* Some CPUs support enhanced REP MOVSB/STOSB feature.
-        * It is recommended to use this when possible.
-        *
-        * If enhanced REP MOVSB/STOSB feature is not available, use fast string
-        * instructions.
-        *
-        * Otherwise, use original memset function.
-        *
-        * In .altinstructions section, ERMS feature is placed after REG_GOOD
-         * feature to implement the right patch order.
-        */
-       .section .altinstructions,"a"
-       altinstruction_entry __memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\
-                            .Lfinal-__memset,.Lmemset_e-.Lmemset_c
-       altinstruction_entry __memset,.Lmemset_c_e,X86_FEATURE_ERMS, \
-                            .Lfinal-__memset,.Lmemset_e_e-.Lmemset_c_e
-       .previous
+ENDPROC(memset_orig)
index f6d13eefad1063d5e525dbc0cfff1ccc12b34bc9..3ca5218fbece0c568ed6079ee2370ff1cedf71f8 100644 (file)
@@ -14,8 +14,8 @@
 .macro op_safe_regs op
 ENTRY(\op\()_safe_regs)
        CFI_STARTPROC
-       pushq_cfi %rbx
-       pushq_cfi %rbp
+       pushq_cfi_reg rbx
+       pushq_cfi_reg rbp
        movq    %rdi, %r10      /* Save pointer */
        xorl    %r11d, %r11d    /* Return value */
        movl    (%rdi), %eax
@@ -35,8 +35,8 @@ ENTRY(\op\()_safe_regs)
        movl    %ebp, 20(%r10)
        movl    %esi, 24(%r10)
        movl    %edi, 28(%r10)
-       popq_cfi %rbp
-       popq_cfi %rbx
+       popq_cfi_reg rbp
+       popq_cfi_reg rbx
        ret
 3:
        CFI_RESTORE_STATE
@@ -53,10 +53,10 @@ ENDPROC(\op\()_safe_regs)
 .macro op_safe_regs op
 ENTRY(\op\()_safe_regs)
        CFI_STARTPROC
-       pushl_cfi %ebx
-       pushl_cfi %ebp
-       pushl_cfi %esi
-       pushl_cfi %edi
+       pushl_cfi_reg ebx
+       pushl_cfi_reg ebp
+       pushl_cfi_reg esi
+       pushl_cfi_reg edi
        pushl_cfi $0              /* Return value */
        pushl_cfi %eax
        movl    4(%eax), %ecx
@@ -80,10 +80,10 @@ ENTRY(\op\()_safe_regs)
        movl    %esi, 24(%eax)
        movl    %edi, 28(%eax)
        popl_cfi %eax
-       popl_cfi %edi
-       popl_cfi %esi
-       popl_cfi %ebp
-       popl_cfi %ebx
+       popl_cfi_reg edi
+       popl_cfi_reg esi
+       popl_cfi_reg ebp
+       popl_cfi_reg ebx
        ret
 3:
        CFI_RESTORE_STATE
index 5dff5f042468acfedf6e4058643855d59c75315d..2322abe4da3b014aef389c03e177c37eec31b802 100644 (file)
  */
 
 #define save_common_regs \
-       pushl_cfi %ecx; CFI_REL_OFFSET ecx, 0
+       pushl_cfi_reg ecx
 
 #define restore_common_regs \
-       popl_cfi %ecx; CFI_RESTORE ecx
+       popl_cfi_reg ecx
 
        /* Avoid uglifying the argument copying x86-64 needs to do. */
        .macro movq src, dst
  */
 
 #define save_common_regs \
-       pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \
-       pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \
-       pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \
-       pushq_cfi %r8;  CFI_REL_OFFSET r8,  0; \
-       pushq_cfi %r9;  CFI_REL_OFFSET r9,  0; \
-       pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \
-       pushq_cfi %r11; CFI_REL_OFFSET r11, 0
+       pushq_cfi_reg rdi; \
+       pushq_cfi_reg rsi; \
+       pushq_cfi_reg rcx; \
+       pushq_cfi_reg r8;  \
+       pushq_cfi_reg r9;  \
+       pushq_cfi_reg r10; \
+       pushq_cfi_reg r11
 
 #define restore_common_regs \
-       popq_cfi %r11; CFI_RESTORE r11; \
-       popq_cfi %r10; CFI_RESTORE r10; \
-       popq_cfi %r9;  CFI_RESTORE r9; \
-       popq_cfi %r8;  CFI_RESTORE r8; \
-       popq_cfi %rcx; CFI_RESTORE rcx; \
-       popq_cfi %rsi; CFI_RESTORE rsi; \
-       popq_cfi %rdi; CFI_RESTORE rdi
+       popq_cfi_reg r11; \
+       popq_cfi_reg r10; \
+       popq_cfi_reg r9; \
+       popq_cfi_reg r8; \
+       popq_cfi_reg rcx; \
+       popq_cfi_reg rsi; \
+       popq_cfi_reg rdi
 
 #endif
 
 ENTRY(call_rwsem_down_read_failed)
        CFI_STARTPROC
        save_common_regs
-       __ASM_SIZE(push,_cfi) %__ASM_REG(dx)
-       CFI_REL_OFFSET __ASM_REG(dx), 0
+       __ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
        movq %rax,%rdi
        call rwsem_down_read_failed
-       __ASM_SIZE(pop,_cfi) %__ASM_REG(dx)
-       CFI_RESTORE __ASM_REG(dx)
+       __ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
        restore_common_regs
        ret
        CFI_ENDPROC
@@ -124,12 +122,10 @@ ENDPROC(call_rwsem_wake)
 ENTRY(call_rwsem_downgrade_wake)
        CFI_STARTPROC
        save_common_regs
-       __ASM_SIZE(push,_cfi) %__ASM_REG(dx)
-       CFI_REL_OFFSET __ASM_REG(dx), 0
+       __ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
        movq %rax,%rdi
        call rwsem_downgrade_wake
-       __ASM_SIZE(pop,_cfi) %__ASM_REG(dx)
-       CFI_RESTORE __ASM_REG(dx)
+       __ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
        restore_common_regs
        ret
        CFI_ENDPROC
index e28cdaf5ac2c629cd8bca126a55a45745e403534..5eb715087b804e7d9cad520b46488c0270740d7c 100644 (file)
        .globl \name
 \name:
        CFI_STARTPROC
-       pushl_cfi %eax
-       CFI_REL_OFFSET eax, 0
-       pushl_cfi %ecx
-       CFI_REL_OFFSET ecx, 0
-       pushl_cfi %edx
-       CFI_REL_OFFSET edx, 0
+       pushl_cfi_reg eax
+       pushl_cfi_reg ecx
+       pushl_cfi_reg edx
 
        .if \put_ret_addr_in_eax
        /* Place EIP in the arg1 */
        .endif
 
        call \func
-       popl_cfi %edx
-       CFI_RESTORE edx
-       popl_cfi %ecx
-       CFI_RESTORE ecx
-       popl_cfi %eax
-       CFI_RESTORE eax
+       popl_cfi_reg edx
+       popl_cfi_reg ecx
+       popl_cfi_reg eax
        ret
        CFI_ENDPROC
        _ASM_NOKPROBE(\name)
index b30b5ebd614ada18d25b3fb7403f7a32e32a96d9..f89ba4e93025dec7c45480600a0ef567d4696e0a 100644 (file)
        CFI_STARTPROC
 
        /* this one pushes 9 elems, the next one would be %rIP */
-       SAVE_ARGS
+       pushq_cfi_reg rdi
+       pushq_cfi_reg rsi
+       pushq_cfi_reg rdx
+       pushq_cfi_reg rcx
+       pushq_cfi_reg rax
+       pushq_cfi_reg r8
+       pushq_cfi_reg r9
+       pushq_cfi_reg r10
+       pushq_cfi_reg r11
 
        .if \put_ret_addr_in_rdi
+       /* 9*8(%rsp) is return addr on stack */
        movq_cfi_restore 9*8, rdi
        .endif
 
 #endif
 #endif
 
-       /* SAVE_ARGS below is used only for the .cfi directives it contains. */
+#if defined(CONFIG_TRACE_IRQFLAGS) \
+ || defined(CONFIG_DEBUG_LOCK_ALLOC) \
+ || defined(CONFIG_PREEMPT)
        CFI_STARTPROC
-       SAVE_ARGS
+       CFI_ADJUST_CFA_OFFSET 9*8
 restore:
-       RESTORE_ARGS
+       popq_cfi_reg r11
+       popq_cfi_reg r10
+       popq_cfi_reg r9
+       popq_cfi_reg r8
+       popq_cfi_reg rax
+       popq_cfi_reg rcx
+       popq_cfi_reg rdx
+       popq_cfi_reg rsi
+       popq_cfi_reg rdi
        ret
        CFI_ENDPROC
        _ASM_NOKPROBE(restore)
+#endif
index c905e89e19feb5ff79778a11a162dfbaa88852a9..1f33b3d1fd68239c9bb07840d287bcee5d2d26b8 100644 (file)
@@ -69,21 +69,20 @@ EXPORT_SYMBOL(copy_in_user);
  * it is not necessary to optimize tail handling.
  */
 __visible unsigned long
-copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest)
+copy_user_handle_tail(char *to, char *from, unsigned len)
 {
-       char c;
-       unsigned zero_len;
-
        for (; len; --len, to++) {
+               char c;
+
                if (__get_user_nocheck(c, from++, sizeof(char)))
                        break;
                if (__put_user_nocheck(c, to, sizeof(char)))
                        break;
        }
-
-       for (c = 0, zero_len = len; zerorest && zero_len; --zero_len)
-               if (__put_user_nocheck(c, to++, sizeof(char)))
-                       break;
        clac();
+
+       /* If the destination is a kernel buffer, we always clear the end */
+       if ((unsigned long)to >= TASK_SIZE_MAX)
+               memset(to, 0, len);
        return len;
 }
index 1a2be7c6895d811be12083b5dab49f92cfb8791f..816488c0b97e3540a59af5c5f4c001fed1f5ba41 100644 (file)
@@ -273,6 +273,9 @@ dd: ESC
 de: ESC
 df: ESC
 # 0xe0 - 0xef
+# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix
+# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation
+# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD.
 e0: LOOPNE/LOOPNZ Jb (f64)
 e1: LOOPE/LOOPZ Jb (f64)
 e2: LOOP Jb (f64)
@@ -281,6 +284,10 @@ e4: IN AL,Ib
 e5: IN eAX,Ib
 e6: OUT Ib,AL
 e7: OUT Ib,eAX
+# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset
+# in "near" jumps and calls is 16-bit. For CALL,
+# push of return address is 16-bit wide, RSP is decremented by 2
+# but is not truncated to 16 bits, unlike RIP.
 e8: CALL Jz (f64)
 e9: JMP-near Jz (f64)
 ea: JMP-far Ap (i64)
@@ -456,6 +463,7 @@ AVXcode: 1
 7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
 7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3)
 # 0x0f 0x80-0x8f
+# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
 80: JO Jz (f64)
 81: JNO Jz (f64)
 82: JB/JC/JNAE Jz (f64)
@@ -842,6 +850,7 @@ EndTable
 GrpTable: Grp5
 0: INC Ev
 1: DEC Ev
+# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
 2: CALLN Ev (f64)
 3: CALLF Ep
 4: JMPN Ev (f64)
index ede025fb46f137ed7576cd5a06547264356c25b0..181c53bac3a7ee8881b8844bae66b951d9beecde 100644 (file)
@@ -59,7 +59,7 @@ static nokprobe_inline int kprobes_fault(struct pt_regs *regs)
        int ret = 0;
 
        /* kprobe_running() needs smp_processor_id() */
-       if (kprobes_built_in() && !user_mode_vm(regs)) {
+       if (kprobes_built_in() && !user_mode(regs)) {
                preempt_disable();
                if (kprobe_running() && kprobe_fault_handler(regs, 14))
                        ret = 1;
@@ -148,7 +148,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
        instr = (void *)convert_ip_to_linear(current, regs);
        max_instr = instr + 15;
 
-       if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
+       if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE_MAX)
                return 0;
 
        while (instr < max_instr) {
@@ -1035,7 +1035,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
        if (error_code & PF_USER)
                return false;
 
-       if (!user_mode_vm(regs) && (regs->flags & X86_EFLAGS_AC))
+       if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC))
                return false;
 
        return true;
@@ -1140,7 +1140,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
         * User-mode registers count as a user access even for any
         * potential system fault or CPU buglet:
         */
-       if (user_mode_vm(regs)) {
+       if (user_mode(regs)) {
                local_irq_enable();
                error_code |= PF_USER;
                flags |= FAULT_FLAG_USER;
index a110efca6d068f7d881f8c1d955d8a6906457f64..1d553186c4345c02be5c0152764d988cfe365ae1 100644 (file)
 
 /*
  * Tables translating between page_cache_type_t and pte encoding.
- * Minimal supported modes are defined statically, modified if more supported
- * cache modes are available.
- * Index into __cachemode2pte_tbl is the cachemode.
- * Index into __pte2cachemode_tbl are the caching attribute bits of the pte
- * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2.
+ *
+ * Minimal supported modes are defined statically, they are modified
+ * during bootup if more supported cache modes are available.
+ *
+ *   Index into __cachemode2pte_tbl[] is the cachemode.
+ *
+ *   Index into __pte2cachemode_tbl[] are the caching attribute bits of the pte
+ *   (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2.
  */
 uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = {
-       [_PAGE_CACHE_MODE_WB]           = 0,
-       [_PAGE_CACHE_MODE_WC]           = _PAGE_PWT,
-       [_PAGE_CACHE_MODE_UC_MINUS]     = _PAGE_PCD,
-       [_PAGE_CACHE_MODE_UC]           = _PAGE_PCD | _PAGE_PWT,
-       [_PAGE_CACHE_MODE_WT]           = _PAGE_PCD,
-       [_PAGE_CACHE_MODE_WP]           = _PAGE_PCD,
+       [_PAGE_CACHE_MODE_WB      ]     = 0         | 0        ,
+       [_PAGE_CACHE_MODE_WC      ]     = _PAGE_PWT | 0        ,
+       [_PAGE_CACHE_MODE_UC_MINUS]     = 0         | _PAGE_PCD,
+       [_PAGE_CACHE_MODE_UC      ]     = _PAGE_PWT | _PAGE_PCD,
+       [_PAGE_CACHE_MODE_WT      ]     = 0         | _PAGE_PCD,
+       [_PAGE_CACHE_MODE_WP      ]     = 0         | _PAGE_PCD,
 };
 EXPORT_SYMBOL(__cachemode2pte_tbl);
+
 uint8_t __pte2cachemode_tbl[8] = {
-       [__pte2cm_idx(0)] = _PAGE_CACHE_MODE_WB,
-       [__pte2cm_idx(_PAGE_PWT)] = _PAGE_CACHE_MODE_WC,
-       [__pte2cm_idx(_PAGE_PCD)] = _PAGE_CACHE_MODE_UC_MINUS,
-       [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD)] = _PAGE_CACHE_MODE_UC,
-       [__pte2cm_idx(_PAGE_PAT)] = _PAGE_CACHE_MODE_WB,
-       [__pte2cm_idx(_PAGE_PWT | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC,
-       [__pte2cm_idx(_PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
+       [__pte2cm_idx( 0        | 0         | 0        )] = _PAGE_CACHE_MODE_WB,
+       [__pte2cm_idx(_PAGE_PWT | 0         | 0        )] = _PAGE_CACHE_MODE_WC,
+       [__pte2cm_idx( 0        | _PAGE_PCD | 0        )] = _PAGE_CACHE_MODE_UC_MINUS,
+       [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | 0        )] = _PAGE_CACHE_MODE_UC,
+       [__pte2cm_idx( 0        | 0         | _PAGE_PAT)] = _PAGE_CACHE_MODE_WB,
+       [__pte2cm_idx(_PAGE_PWT | 0         | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC,
+       [__pte2cm_idx(0         | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS,
        [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC,
 };
 EXPORT_SYMBOL(__pte2cachemode_tbl);
@@ -131,21 +135,7 @@ void  __init early_alloc_pgt_buf(void)
 
 int after_bootmem;
 
-int direct_gbpages
-#ifdef CONFIG_DIRECT_GBPAGES
-                               = 1
-#endif
-;
-
-static void __init init_gbpages(void)
-{
-#ifdef CONFIG_X86_64
-       if (direct_gbpages && cpu_has_gbpages)
-               printk(KERN_INFO "Using GB pages for direct mapping\n");
-       else
-               direct_gbpages = 0;
-#endif
-}
+early_param_on_off("gbpages", "nogbpages", direct_gbpages, CONFIG_X86_DIRECT_GBPAGES);
 
 struct map_range {
        unsigned long start;
@@ -157,16 +147,12 @@ static int page_size_mask;
 
 static void __init probe_page_size_mask(void)
 {
-       init_gbpages();
-
 #if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK)
        /*
         * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
         * This will simplify cpa(), which otherwise needs to support splitting
         * large pages into small in interrupt context, etc.
         */
-       if (direct_gbpages)
-               page_size_mask |= 1 << PG_LEVEL_1G;
        if (cpu_has_pse)
                page_size_mask |= 1 << PG_LEVEL_2M;
 #endif
@@ -179,6 +165,15 @@ static void __init probe_page_size_mask(void)
        if (cpu_has_pge) {
                cr4_set_bits_and_update_boot(X86_CR4_PGE);
                __supported_pte_mask |= _PAGE_GLOBAL;
+       } else
+               __supported_pte_mask &= ~_PAGE_GLOBAL;
+
+       /* Enable 1 GB linear kernel mappings if available: */
+       if (direct_gbpages && cpu_has_gbpages) {
+               printk(KERN_INFO "Using GB pages for direct mapping\n");
+               page_size_mask |= 1 << PG_LEVEL_1G;
+       } else {
+               direct_gbpages = 0;
        }
 }
 
index 30eb05ae7061624f7faee0077ce41bddb4991ef7..3fba623e3ba558553d9740d3a994343a2960428f 100644 (file)
@@ -130,20 +130,6 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
        return 0;
 }
 
-static int __init parse_direct_gbpages_off(char *arg)
-{
-       direct_gbpages = 0;
-       return 0;
-}
-early_param("nogbpages", parse_direct_gbpages_off);
-
-static int __init parse_direct_gbpages_on(char *arg)
-{
-       direct_gbpages = 1;
-       return 0;
-}
-early_param("gbpages", parse_direct_gbpages_on);
-
 /*
  * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
  * physical space so we can cache the place of the first one and move
index cd4785bbacb9a86366ccd74121e5161b078e9596..4053bb58bf92e6c328936aed1c8439b528468e1d 100644 (file)
@@ -482,9 +482,16 @@ static void __init numa_clear_kernel_node_hotplug(void)
                                  &memblock.reserved, mb->nid);
        }
 
-       /* Mark all kernel nodes. */
+       /*
+        * Mark all kernel nodes.
+        *
+        * When booting with mem=nn[kMG] or in a kdump kernel, numa_meminfo
+        * may not include all the memblock.reserved memory ranges because
+        * trim_snb_memory() reserves specific pages for Sandy Bridge graphics.
+        */
        for_each_memblock(reserved, r)
-               node_set(r->nid, numa_kernel_nodes);
+               if (r->nid != MAX_NUMNODES)
+                       node_set(r->nid, numa_kernel_nodes);
 
        /* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */
        for (i = 0; i < numa_meminfo.nr_blks; i++) {
index 536ea2fb6e335677df559390520c3312976dcb81..89af288ec6740cfd793a4c4804e939bb023e9453 100644 (file)
@@ -81,11 +81,9 @@ void arch_report_meminfo(struct seq_file *m)
        seq_printf(m, "DirectMap4M:    %8lu kB\n",
                        direct_pages_count[PG_LEVEL_2M] << 12);
 #endif
-#ifdef CONFIG_X86_64
        if (direct_gbpages)
                seq_printf(m, "DirectMap1G:    %8lu kB\n",
                        direct_pages_count[PG_LEVEL_1G] << 20);
-#endif
 }
 #else
 static inline void split_page_count(int level) { }
@@ -1654,13 +1652,11 @@ int set_memory_ro(unsigned long addr, int numpages)
 {
        return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
 }
-EXPORT_SYMBOL_GPL(set_memory_ro);
 
 int set_memory_rw(unsigned long addr, int numpages)
 {
        return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
 }
-EXPORT_SYMBOL_GPL(set_memory_rw);
 
 int set_memory_np(unsigned long addr, int numpages)
 {
index 7ac68698406c3b35e5ce0b0e98c73c5441e869a3..35af6771a95ad6c126cca1f352b896998116876e 100644 (file)
@@ -610,7 +610,7 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 }
 
 #ifdef CONFIG_STRICT_DEVMEM
-/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM*/
+/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM */
 static inline int range_is_allowed(unsigned long pfn, unsigned long size)
 {
        return 1;
@@ -628,8 +628,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
 
        while (cursor < to) {
                if (!devmem_is_allowed(pfn)) {
-                       printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx]\n",
-                               current->comm, from, to - 1);
+                       printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n",
+                              current->comm, from, to - 1);
                        return 0;
                }
                cursor += PAGE_SIZE;
index 7b22adaad4f1379a1195aec23b38a28a8db83f5c..5a7e5252c878bdcd2e45a11257adc15763cdea5f 100644 (file)
@@ -275,12 +275,87 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
        }
 }
 
+/*
+ * Xen paravirt assumes pgd table should be in one page. 64 bit kernel also
+ * assumes that pgd should be in one page.
+ *
+ * But kernel with PAE paging that is not running as a Xen domain
+ * only needs to allocate 32 bytes for pgd instead of one page.
+ */
+#ifdef CONFIG_X86_PAE
+
+#include <linux/slab.h>
+
+#define PGD_SIZE       (PTRS_PER_PGD * sizeof(pgd_t))
+#define PGD_ALIGN      32
+
+static struct kmem_cache *pgd_cache;
+
+static int __init pgd_cache_init(void)
+{
+       /*
+        * When PAE kernel is running as a Xen domain, it does not use
+        * shared kernel pmd. And this requires a whole page for pgd.
+        */
+       if (!SHARED_KERNEL_PMD)
+               return 0;
+
+       /*
+        * when PAE kernel is not running as a Xen domain, it uses
+        * shared kernel pmd. Shared kernel pmd does not require a whole
+        * page for pgd. We are able to just allocate a 32-byte for pgd.
+        * During boot time, we create a 32-byte slab for pgd table allocation.
+        */
+       pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,
+                                     SLAB_PANIC, NULL);
+       if (!pgd_cache)
+               return -ENOMEM;
+
+       return 0;
+}
+core_initcall(pgd_cache_init);
+
+static inline pgd_t *_pgd_alloc(void)
+{
+       /*
+        * If no SHARED_KERNEL_PMD, PAE kernel is running as a Xen domain.
+        * We allocate one page for pgd.
+        */
+       if (!SHARED_KERNEL_PMD)
+               return (pgd_t *)__get_free_page(PGALLOC_GFP);
+
+       /*
+        * Now PAE kernel is not running as a Xen domain. We can allocate
+        * a 32-byte slab for pgd to save memory space.
+        */
+       return kmem_cache_alloc(pgd_cache, PGALLOC_GFP);
+}
+
+static inline void _pgd_free(pgd_t *pgd)
+{
+       if (!SHARED_KERNEL_PMD)
+               free_page((unsigned long)pgd);
+       else
+               kmem_cache_free(pgd_cache, pgd);
+}
+#else
+static inline pgd_t *_pgd_alloc(void)
+{
+       return (pgd_t *)__get_free_page(PGALLOC_GFP);
+}
+
+static inline void _pgd_free(pgd_t *pgd)
+{
+       free_page((unsigned long)pgd);
+}
+#endif /* CONFIG_X86_PAE */
+
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
        pgd_t *pgd;
        pmd_t *pmds[PREALLOCATED_PMDS];
 
-       pgd = (pgd_t *)__get_free_page(PGALLOC_GFP);
+       pgd = _pgd_alloc();
 
        if (pgd == NULL)
                goto out;
@@ -310,7 +385,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 out_free_pmds:
        free_pmds(mm, pmds);
 out_free_pgd:
-       free_page((unsigned long)pgd);
+       _pgd_free(pgd);
 out:
        return NULL;
 }
@@ -320,7 +395,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
        pgd_mop_up_pmds(mm, pgd);
        pgd_dtor(pgd);
        paravirt_pgd_free(mm, pgd);
-       free_page((unsigned long)pgd);
+       _pgd_free(pgd);
 }
 
 /*
index 5d04be5efb6401b1b7512f4ca519a24a016b20d2..4e664bdb535ad89d09633fd73ea6eca5a46d4097 100644 (file)
@@ -111,7 +111,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
 {
        struct stack_frame *head = (struct stack_frame *)frame_pointer(regs);
 
-       if (!user_mode_vm(regs)) {
+       if (!user_mode(regs)) {
                unsigned long stack = kernel_stack_pointer(regs);
                if (depth)
                        dump_trace(NULL, regs, (unsigned long *)stack, 0,
index d143d216d52bec69b912128d88c3283cd0122c6c..d7f997f7c26d2502a1a188583d6817c3672417d1 100644 (file)
@@ -67,7 +67,7 @@ void __init efi_bgrt_init(void)
 
        image = efi_lookup_mapped_addr(bgrt_tab->image_address);
        if (!image) {
-               image = early_memremap(bgrt_tab->image_address,
+               image = early_ioremap(bgrt_tab->image_address,
                                       sizeof(bmp_header));
                ioremapped = true;
                if (!image) {
@@ -89,7 +89,7 @@ void __init efi_bgrt_init(void)
        }
 
        if (ioremapped) {
-               image = early_memremap(bgrt_tab->image_address,
+               image = early_ioremap(bgrt_tab->image_address,
                                       bmp_header.size);
                if (!image) {
                        pr_err("Ignoring BGRT: failed to map image memory\n");
index dbc8627a5cdf6d569a5f69cbea05ebac924d2279..02744df576d52588a35308998ecc1a138435012e 100644 (file)
@@ -85,12 +85,20 @@ static efi_status_t __init phys_efi_set_virtual_address_map(
        efi_memory_desc_t *virtual_map)
 {
        efi_status_t status;
+       unsigned long flags;
+       pgd_t *save_pgd;
 
-       efi_call_phys_prolog();
+       save_pgd = efi_call_phys_prolog();
+
+       /* Disable interrupts around EFI calls: */
+       local_irq_save(flags);
        status = efi_call_phys(efi_phys.set_virtual_address_map,
                               memory_map_size, descriptor_size,
                               descriptor_version, virtual_map);
-       efi_call_phys_epilog();
+       local_irq_restore(flags);
+
+       efi_call_phys_epilog(save_pgd);
+
        return status;
 }
 
@@ -491,7 +499,8 @@ void __init efi_init(void)
        if (efi_memmap_init())
                return;
 
-       print_efi_memmap();
+       if (efi_enabled(EFI_DBG))
+               print_efi_memmap();
 }
 
 void __init efi_late_init(void)
@@ -939,6 +948,8 @@ static int __init arch_parse_efi_cmdline(char *str)
 {
        if (parse_option_str(str, "old_map"))
                set_bit(EFI_OLD_MEMMAP, &efi.flags);
+       if (parse_option_str(str, "debug"))
+               set_bit(EFI_DBG, &efi.flags);
 
        return 0;
 }
index 40e7cda529365133b50bab8745d9995c47557d53..ed5b67338294f1325fffe5f7d9fce637731d5917 100644 (file)
 
 /*
  * To make EFI call EFI runtime service in physical addressing mode we need
- * prolog/epilog before/after the invocation to disable interrupt, to
- * claim EFI runtime service handler exclusively and to duplicate a memory in
- * low memory space say 0 - 3G.
+ * prolog/epilog before/after the invocation to claim the EFI runtime service
+ * handler exclusively and to duplicate a memory mapping in low memory space,
+ * say 0 - 3G.
  */
-static unsigned long efi_rt_eflags;
 
 void efi_sync_low_kernel_mappings(void) {}
 void __init efi_dump_pagetable(void) {}
@@ -57,21 +56,24 @@ void __init efi_map_region(efi_memory_desc_t *md)
 void __init efi_map_region_fixed(efi_memory_desc_t *md) {}
 void __init parse_efi_setup(u64 phys_addr, u32 data_len) {}
 
-void __init efi_call_phys_prolog(void)
+pgd_t * __init efi_call_phys_prolog(void)
 {
        struct desc_ptr gdt_descr;
+       pgd_t *save_pgd;
 
-       local_irq_save(efi_rt_eflags);
-
+       /* Current pgd is swapper_pg_dir, we'll restore it later: */
+       save_pgd = swapper_pg_dir;
        load_cr3(initial_page_table);
        __flush_tlb_all();
 
        gdt_descr.address = __pa(get_cpu_gdt_table(0));
        gdt_descr.size = GDT_SIZE - 1;
        load_gdt(&gdt_descr);
+
+       return save_pgd;
 }
 
-void __init efi_call_phys_epilog(void)
+void __init efi_call_phys_epilog(pgd_t *save_pgd)
 {
        struct desc_ptr gdt_descr;
 
@@ -79,10 +81,8 @@ void __init efi_call_phys_epilog(void)
        gdt_descr.size = GDT_SIZE - 1;
        load_gdt(&gdt_descr);
 
-       load_cr3(swapper_pg_dir);
+       load_cr3(save_pgd);
        __flush_tlb_all();
-
-       local_irq_restore(efi_rt_eflags);
 }
 
 void __init efi_runtime_mkexec(void)
index 17e80d829df0391536ea49379108d1383a39c46f..a0ac0f9c307f661c8b3ed08c4ca6d23507772e36 100644 (file)
@@ -41,9 +41,6 @@
 #include <asm/realmode.h>
 #include <asm/time.h>
 
-static pgd_t *save_pgd __initdata;
-static unsigned long efi_flags __initdata;
-
 /*
  * We allocate runtime services regions bottom-up, starting from -4G, i.e.
  * 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G.
@@ -78,17 +75,18 @@ static void __init early_code_mapping_set_exec(int executable)
        }
 }
 
-void __init efi_call_phys_prolog(void)
+pgd_t * __init efi_call_phys_prolog(void)
 {
        unsigned long vaddress;
+       pgd_t *save_pgd;
+
        int pgd;
        int n_pgds;
 
        if (!efi_enabled(EFI_OLD_MEMMAP))
-               return;
+               return NULL;
 
        early_code_mapping_set_exec(1);
-       local_irq_save(efi_flags);
 
        n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE);
        save_pgd = kmalloc(n_pgds * sizeof(pgd_t), GFP_KERNEL);
@@ -99,24 +97,29 @@ void __init efi_call_phys_prolog(void)
                set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress));
        }
        __flush_tlb_all();
+
+       return save_pgd;
 }
 
-void __init efi_call_phys_epilog(void)
+void __init efi_call_phys_epilog(pgd_t *save_pgd)
 {
        /*
         * After the lock is released, the original page table is restored.
         */
-       int pgd;
-       int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
+       int pgd_idx;
+       int nr_pgds;
 
-       if (!efi_enabled(EFI_OLD_MEMMAP))
+       if (!save_pgd)
                return;
 
-       for (pgd = 0; pgd < n_pgds; pgd++)
-               set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]);
+       nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
+
+       for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++)
+               set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
+
        kfree(save_pgd);
+
        __flush_tlb_all();
-       local_irq_restore(efi_flags);
        early_code_mapping_set_exec(0);
 }
 
index c9a0838890e241692f6a03f4984485b6746c2f72..278e4da4222f79cf82aa2f8dcbe0e371243fb836 100644 (file)
@@ -11,6 +11,7 @@
  */
 
 #include <asm-generic/sections.h>
+#include <asm/cpu_device_id.h>
 #include <asm/imr.h>
 #include <linux/init.h>
 #include <linux/mm.h>
@@ -101,6 +102,12 @@ static void __init imr_self_test(void)
        }
 }
 
+static const struct x86_cpu_id imr_ids[] __initconst = {
+       { X86_VENDOR_INTEL, 5, 9 },     /* Intel Quark SoC X1000. */
+       {}
+};
+MODULE_DEVICE_TABLE(x86cpu, imr_ids);
+
 /**
  * imr_self_test_init - entry point for IMR driver.
  *
@@ -108,7 +115,8 @@ static void __init imr_self_test(void)
  */
 static int __init imr_self_test_init(void)
 {
-       imr_self_test();
+       if (x86_match_cpu(imr_ids))
+               imr_self_test();
        return 0;
 }
 
index 994798548b1ad57288e4b51c01422d3dad70ff7f..3b6ec42718e460717182c2762ffd7e6a005fff6f 100644 (file)
@@ -415,7 +415,7 @@ static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp)
        struct reset_args reset_args;
 
        reset_args.sender = sender;
-       cpus_clear(*mask);
+       cpumask_clear(mask);
        /* find a single cpu for each uvhub in this distribution mask */
        maskbits = sizeof(struct pnmask) * BITSPERBYTE;
        /* each bit is a pnode relative to the partition base pnode */
@@ -425,7 +425,7 @@ static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp)
                        continue;
                apnode = pnode + bcp->partition_base_pnode;
                cpu = pnode_to_first_cpu(apnode, smaster);
-               cpu_set(cpu, *mask);
+               cpumask_set_cpu(cpu, mask);
        }
 
        /* IPI all cpus; preemption is already disabled */
@@ -1126,7 +1126,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
        /* don't actually do a shootdown of the local cpu */
        cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
 
-       if (cpu_isset(cpu, *cpumask))
+       if (cpumask_test_cpu(cpu, cpumask))
                stat->s_ntargself++;
 
        bau_desc = bcp->descriptor_base;
index 3e32ed5648a03894604976c4a9b2ae5c155c1750..757678fb26e1a06277687c1c90f86e75377de03a 100644 (file)
@@ -134,7 +134,7 @@ static void do_fpu_end(void)
 static void fix_processor_context(void)
 {
        int cpu = smp_processor_id();
-       struct tss_struct *t = &per_cpu(init_tss, cpu);
+       struct tss_struct *t = &per_cpu(cpu_tss, cpu);
 #ifdef CONFIG_X86_64
        struct desc_struct *desc = get_cpu_gdt_table(cpu);
        tss_desc tss;
index b3560ece1c9fbae0ff05c14824b76a07efdb0776..ef8187f9d28d96651d3d52e39d4441c60a408332 100644 (file)
 110    i386    iopl                    sys_iopl
 111    i386    vhangup                 sys_vhangup
 112    i386    idle
-113    i386    vm86old                 sys_vm86old                     sys32_vm86_warning
+113    i386    vm86old                 sys_vm86old                     sys_ni_syscall
 114    i386    wait4                   sys_wait4                       compat_sys_wait4
 115    i386    swapoff                 sys_swapoff
 116    i386    sysinfo                 sys_sysinfo                     compat_sys_sysinfo
 163    i386    mremap                  sys_mremap
 164    i386    setresuid               sys_setresuid16
 165    i386    getresuid               sys_getresuid16
-166    i386    vm86                    sys_vm86                        sys32_vm86_warning
+166    i386    vm86                    sys_vm86                        sys_ni_syscall
 167    i386    query_module
 168    i386    poll                    sys_poll
 169    i386    nfsservctl
index 8d656fbb57aab2606132858d48b2db7e99e8bf7c..9ef32d5f1b19e67ed10b69c67be5f53806c19ffa 100644 (file)
 169    common  reboot                  sys_reboot
 170    common  sethostname             sys_sethostname
 171    common  setdomainname           sys_setdomainname
-172    common  iopl                    stub_iopl
+172    common  iopl                    sys_iopl
 173    common  ioperm                  sys_ioperm
 174    64      create_module
 175    common  init_module             sys_init_module
index 2d7d9a1f5b531ee399b7a0e6a1f9eb4a190fd820..8ffd2146fa6a0367be6710913b5c4161f1a8859b 100644 (file)
@@ -64,8 +64,8 @@
  */
 static inline void rdtsc_barrier(void)
 {
-       alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
-       alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
+       alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
+                         "lfence", X86_FEATURE_LFENCE_RDTSC);
 }
 
 #endif
index 5cdfa9db22175ed8dc327465b4cf033a7a65d3bc..a75d8700472a4f52db0211217cf7bbcd4f2dc1f5 100644 (file)
@@ -16,7 +16,7 @@
  */
 
 /* Not going to be implemented by UML, since we have no hardware. */
-#define stub_iopl sys_ni_syscall
+#define sys_iopl sys_ni_syscall
 #define sys_ioperm sys_ni_syscall
 
 /*
index 7b9be9822724ebd96347bdb41c96fdcdb7a5504e..275a3a8b78afa3221b78296d2a7e3c2945de512b 100644 (file)
@@ -51,7 +51,7 @@ VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
 $(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
        $(call if_changed,vdso)
 
-HOST_EXTRACFLAGS += -I$(srctree)/tools/include
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi
 hostprogs-y                    += vdso2c
 
 quiet_cmd_vdso2c = VDSO2C  $@
@@ -206,4 +206,4 @@ $(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE
 PHONY += vdso_install $(vdso_img_insttargets)
 vdso_install: $(vdso_img_insttargets) FORCE
 
-clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* vdso64*
+clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* vdso64* vdso-image-*.c vdsox32.so*
index 9793322751e02f63ddba0d1b8fef5f21b0a4d502..40d2473836c923acc5705018bf3aebf50cfb12b8 100644 (file)
@@ -82,18 +82,15 @@ static notrace cycle_t vread_pvclock(int *mode)
        cycle_t ret;
        u64 last;
        u32 version;
+       u32 migrate_count;
        u8 flags;
        unsigned cpu, cpu1;
 
 
        /*
-        * Note: hypervisor must guarantee that:
-        * 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
-        * 2. that per-CPU pvclock time info is updated if the
-        *    underlying CPU changes.
-        * 3. that version is increased whenever underlying CPU
-        *    changes.
-        *
+        * When looping to get a consistent (time-info, tsc) pair, we
+        * also need to deal with the possibility we can switch vcpus,
+        * so make sure we always re-fetch time-info for the current vcpu.
         */
        do {
                cpu = __getcpu() & VGETCPU_CPU_MASK;
@@ -102,20 +99,27 @@ static notrace cycle_t vread_pvclock(int *mode)
                 * __getcpu() calls (Gleb).
                 */
 
-               pvti = get_pvti(cpu);
+               /* Make sure migrate_count will change if we leave the VCPU. */
+               do {
+                       pvti = get_pvti(cpu);
+                       migrate_count = pvti->migrate_count;
+
+                       cpu1 = cpu;
+                       cpu = __getcpu() & VGETCPU_CPU_MASK;
+               } while (unlikely(cpu != cpu1));
 
                version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
 
                /*
                 * Test we're still on the cpu as well as the version.
-                * We could have been migrated just after the first
-                * vgetcpu but before fetching the version, so we
-                * wouldn't notice a version change.
+                * - We must read TSC of pvti's VCPU.
+                * - KVM doesn't follow the versioning protocol, so data could
+                *   change before version if we left the VCPU.
                 */
-               cpu1 = __getcpu() & VGETCPU_CPU_MASK;
-       } while (unlikely(cpu != cpu1 ||
-                         (pvti->pvti.version & 1) ||
-                         pvti->pvti.version != version));
+               smp_rmb();
+       } while (unlikely((pvti->pvti.version & 1) ||
+                         pvti->pvti.version != version ||
+                         pvti->migrate_count != migrate_count));
 
        if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
                *mode = VCLOCK_NONE;
index 5415b5613d5545b68dc88ee08de03f1061c48f8c..6b286bb5251ce1d15cc00299ae096e7518f03986 100644 (file)
@@ -19,8 +19,6 @@ __kernel_vsyscall:
 .Lpush_ebp:
        movl    %ecx, %ebp
        syscall
-       movl    $__USER32_DS, %ecx
-       movl    %ecx, %ss
        movl    %ebp, %ecx
        popl    %ebp
 .Lpop_ebp:
index 5240f563076de2e0e27c92af2d04ad03d213ee8f..81665c9f21327f3e7db5088a94cbcef127bb3d93 100644 (file)
@@ -912,6 +912,7 @@ static void xen_load_sp0(struct tss_struct *tss,
        mcs = xen_mc_entry(0);
        MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
        xen_mc_issue(PARAVIRT_LAZY_CPU);
+       tss->x86_tss.sp0 = thread->sp0;
 }
 
 static void xen_set_iopl_mask(unsigned mask)
index 08e8489c47f1ba058ca57dd6353d6bf9c1776ba7..7413ee3706d02502d2dc9a94fa90fb5b97495e90 100644 (file)
@@ -445,15 +445,7 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle)
 {
        int rc;
 
-       per_cpu(current_task, cpu) = idle;
-#ifdef CONFIG_X86_32
-       irq_ctx_init(cpu);
-#else
-       clear_tsk_thread_flag(idle, TIF_FORK);
-#endif
-       per_cpu(kernel_stack, cpu) =
-               (unsigned long)task_stack_page(idle) -
-               KERNEL_STACK_OFFSET + THREAD_SIZE;
+       common_cpu_up(cpu, idle);
 
        xen_setup_runstate_info(cpu);
        xen_setup_timer(cpu);
@@ -468,10 +460,6 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle)
        if (rc)
                return rc;
 
-       if (num_online_cpus() == 1)
-               /* Just in case we booted with a single CPU. */
-               alternatives_enable_smp();
-
        rc = xen_smp_intr_init(cpu);
        if (rc)
                return rc;
index c4df9dbd63b7c81f70c624c417d8362b85d3d34e..d9497698645a53b10ab4b62ddc98d12f4412a616 100644 (file)
@@ -1,5 +1,5 @@
 #include <linux/types.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 
 #include <xen/interface/xen.h>
 #include <xen/grant_table.h>
@@ -81,17 +81,14 @@ void xen_arch_post_suspend(int cancelled)
 
 static void xen_vcpu_notify_restore(void *data)
 {
-       unsigned long reason = (unsigned long)data;
-
        /* Boot processor notified via generic timekeeping_resume() */
-       if ( smp_processor_id() == 0)
+       if (smp_processor_id() == 0)
                return;
 
-       clockevents_notify(reason, NULL);
+       tick_resume_local();
 }
 
 void xen_arch_resume(void)
 {
-       on_each_cpu(xen_vcpu_notify_restore,
-                   (void *)CLOCK_EVT_NOTIFY_RESUME, 1);
+       on_each_cpu(xen_vcpu_notify_restore, NULL, 1);
 }
index 53adefda4275330a810b6d883b6ad8b58a72730c..985fc3ee0973c85f916c67cd9a40fc9c2c73d340 100644 (file)
@@ -68,11 +68,11 @@ ENTRY(xen_sysret64)
         * We're already on the usermode stack at this point, but
         * still with the kernel gs, so we can easily switch back
         */
-       movq %rsp, PER_CPU_VAR(old_rsp)
+       movq %rsp, PER_CPU_VAR(rsp_scratch)
        movq PER_CPU_VAR(kernel_stack), %rsp
 
        pushq $__USER_DS
-       pushq PER_CPU_VAR(old_rsp)
+       pushq PER_CPU_VAR(rsp_scratch)
        pushq %r11
        pushq $__USER_CS
        pushq %rcx
@@ -87,11 +87,11 @@ ENTRY(xen_sysret32)
         * We're already on the usermode stack at this point, but
         * still with the kernel gs, so we can easily switch back
         */
-       movq %rsp, PER_CPU_VAR(old_rsp)
+       movq %rsp, PER_CPU_VAR(rsp_scratch)
        movq PER_CPU_VAR(kernel_stack), %rsp
 
        pushq $__USER32_DS
-       pushq PER_CPU_VAR(old_rsp)
+       pushq PER_CPU_VAR(rsp_scratch)
        pushq %r11
        pushq $__USER32_CS
        pushq %rcx
index b7b8933ec24188b2807229af1aa844503b46dc89..33c428530193548e38e0ec4ce3608e08be8b1d8d 100644 (file)
@@ -1457,7 +1457,7 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
 
                do {
                        page = alloc_pages_node(set->numa_node,
-                               GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
+                               GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO,
                                this_order);
                        if (page)
                                break;
@@ -1479,8 +1479,6 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
                left -= to_do * rq_size;
                for (j = 0; j < to_do; j++) {
                        tags->rqs[i] = p;
-                       tags->rqs[i]->atomic_flags = 0;
-                       tags->rqs[i]->cmd_flags = 0;
                        if (set->ops->init_request) {
                                if (set->ops->init_request(set->driver_data,
                                                tags->rqs[i], hctx_idx, i,
index c7b105c0e1d377991f1e324fae161535e01efe27..6bc9cbc01ad6a3f20c27740ebbf1a919e7c74d0d 100644 (file)
@@ -26,7 +26,7 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <linux/cpu.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 #include <linux/slab.h>
 #include <linux/acpi.h>
 #include <asm/mwait.h>
@@ -41,8 +41,6 @@ static unsigned long power_saving_mwait_eax;
 
 static unsigned char tsc_detected_unstable;
 static unsigned char tsc_marked_unstable;
-static unsigned char lapic_detected_unstable;
-static unsigned char lapic_marked_unstable;
 
 static void power_saving_mwait_init(void)
 {
@@ -82,13 +80,10 @@ static void power_saving_mwait_init(void)
                 */
                if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
                        tsc_detected_unstable = 1;
-               if (!boot_cpu_has(X86_FEATURE_ARAT))
-                       lapic_detected_unstable = 1;
                break;
        default:
-               /* TSC & LAPIC could halt in idle */
+               /* TSC could halt in idle */
                tsc_detected_unstable = 1;
-               lapic_detected_unstable = 1;
        }
 #endif
 }
@@ -155,7 +150,6 @@ static int power_saving_thread(void *data)
        sched_setscheduler(current, SCHED_RR, &param);
 
        while (!kthread_should_stop()) {
-               int cpu;
                unsigned long expire_time;
 
                try_to_freeze();
@@ -177,28 +171,15 @@ static int power_saving_thread(void *data)
                                mark_tsc_unstable("TSC halts in idle");
                                tsc_marked_unstable = 1;
                        }
-                       if (lapic_detected_unstable && !lapic_marked_unstable) {
-                               int i;
-                               /* LAPIC could halt in idle, so notify users */
-                               for_each_online_cpu(i)
-                                       clockevents_notify(
-                                               CLOCK_EVT_NOTIFY_BROADCAST_ON,
-                                               &i);
-                               lapic_marked_unstable = 1;
-                       }
                        local_irq_disable();
-                       cpu = smp_processor_id();
-                       if (lapic_marked_unstable)
-                               clockevents_notify(
-                                       CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
+                       tick_broadcast_enable();
+                       tick_broadcast_enter();
                        stop_critical_timings();
 
                        mwait_idle_with_hints(power_saving_mwait_eax, 1);
 
                        start_critical_timings();
-                       if (lapic_marked_unstable)
-                               clockevents_notify(
-                                       CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
+                       tick_broadcast_exit();
                        local_irq_enable();
 
                        if (time_before(expire_time, jiffies)) {
index c6bb9f1257c92844fa076c0585cf73125d278dec..39e0c8e36244f75aac3de4f09a0a89817e035b91 100644 (file)
@@ -32,7 +32,7 @@
 #include <linux/acpi.h>
 #include <linux/dmi.h>
 #include <linux/sched.h>       /* need_resched() */
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 #include <linux/cpuidle.h>
 #include <linux/syscore_ops.h>
 #include <acpi/processor.h>
@@ -157,12 +157,11 @@ static void lapic_timer_check_state(int state, struct acpi_processor *pr,
 static void __lapic_timer_propagate_broadcast(void *arg)
 {
        struct acpi_processor *pr = (struct acpi_processor *) arg;
-       unsigned long reason;
 
-       reason = pr->power.timer_broadcast_on_state < INT_MAX ?
-               CLOCK_EVT_NOTIFY_BROADCAST_ON : CLOCK_EVT_NOTIFY_BROADCAST_OFF;
-
-       clockevents_notify(reason, &pr->id);
+       if (pr->power.timer_broadcast_on_state < INT_MAX)
+               tick_broadcast_enable();
+       else
+               tick_broadcast_disable();
 }
 
 static void lapic_timer_propagate_broadcast(struct acpi_processor *pr)
@@ -179,11 +178,10 @@ static void lapic_timer_state_broadcast(struct acpi_processor *pr,
        int state = cx - pr->power.states;
 
        if (state >= pr->power.timer_broadcast_on_state) {
-               unsigned long reason;
-
-               reason = broadcast ?  CLOCK_EVT_NOTIFY_BROADCAST_ENTER :
-                       CLOCK_EVT_NOTIFY_BROADCAST_EXIT;
-               clockevents_notify(reason, &pr->id);
+               if (broadcast)
+                       tick_broadcast_enter();
+               else
+                       tick_broadcast_exit();
        }
 }
 
@@ -922,7 +920,7 @@ static int acpi_processor_setup_cpuidle_states(struct acpi_processor *pr)
                return -EINVAL;
 
        drv->safe_state_index = -1;
-       for (i = 0; i < CPUIDLE_STATE_MAX; i++) {
+       for (i = CPUIDLE_DRIVER_STATE_START; i < CPUIDLE_STATE_MAX; i++) {
                drv->states[i].name[0] = '\0';
                drv->states[i].desc[0] = '\0';
        }
index 0a533653ef3bec4ea52d4883792eb4f587c7a78c..609e4c84f485b89ab0d219a3cfd06a2faf5a68ba 100644 (file)
@@ -1,3 +1,6 @@
+# For include/trace/define_trace.h to include trace.h
+CFLAGS_regmap.o := -I$(src)
+
 obj-$(CONFIG_REGMAP) += regmap.o regcache.o
 obj-$(CONFIG_REGMAP) += regcache-rbtree.o regcache-lzo.o regcache-flat.o
 obj-$(CONFIG_DEBUG_FS) += regmap-debugfs.o
index 87db9893b463ba505acc3c28e253b5a08cf20c90..7eb7b3b98794849c6718c068504c7ac1acf3a2c9 100644 (file)
@@ -15,8 +15,8 @@
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/sort.h>
-#include <trace/events/regmap.h>
 
+#include "trace.h"
 #include "internal.h"
 
 static const struct regcache_ops *cache_types[] = {
index dbfe6a69c3daa67969df7c670f9e5144a053afb6..6273ff072f3eaa4cbd777b1a4f41fb3cf533a42a 100644 (file)
@@ -20,7 +20,7 @@
 #include <linux/sched.h>
 
 #define CREATE_TRACE_POINTS
-#include <trace/events/regmap.h>
+#include "trace.h"
 
 #include "internal.h"
 
diff --git a/drivers/base/regmap/trace.h b/drivers/base/regmap/trace.h
new file mode 100644 (file)
index 0000000..64586a1
--- /dev/null
@@ -0,0 +1,257 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM regmap
+
+#if !defined(_TRACE_REGMAP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_REGMAP_H
+
+#include <linux/ktime.h>
+#include <linux/tracepoint.h>
+
+#include "internal.h"
+
+/*
+ * Log register events
+ */
+DECLARE_EVENT_CLASS(regmap_reg,
+
+       TP_PROTO(struct regmap *map, unsigned int reg,
+                unsigned int val),
+
+       TP_ARGS(map, reg, val),
+
+       TP_STRUCT__entry(
+               __string(       name,           regmap_name(map)        )
+               __field(        unsigned int,   reg                     )
+               __field(        unsigned int,   val                     )
+       ),
+
+       TP_fast_assign(
+               __assign_str(name, regmap_name(map));
+               __entry->reg = reg;
+               __entry->val = val;
+       ),
+
+       TP_printk("%s reg=%x val=%x", __get_str(name),
+                 (unsigned int)__entry->reg,
+                 (unsigned int)__entry->val)
+);
+
+DEFINE_EVENT(regmap_reg, regmap_reg_write,
+
+       TP_PROTO(struct regmap *map, unsigned int reg,
+                unsigned int val),
+
+       TP_ARGS(map, reg, val)
+
+);
+
+DEFINE_EVENT(regmap_reg, regmap_reg_read,
+
+       TP_PROTO(struct regmap *map, unsigned int reg,
+                unsigned int val),
+
+       TP_ARGS(map, reg, val)
+
+);
+
+DEFINE_EVENT(regmap_reg, regmap_reg_read_cache,
+
+       TP_PROTO(struct regmap *map, unsigned int reg,
+                unsigned int val),
+
+       TP_ARGS(map, reg, val)
+
+);
+
+DECLARE_EVENT_CLASS(regmap_block,
+
+       TP_PROTO(struct regmap *map, unsigned int reg, int count),
+
+       TP_ARGS(map, reg, count),
+
+       TP_STRUCT__entry(
+               __string(       name,           regmap_name(map)        )
+               __field(        unsigned int,   reg                     )
+               __field(        int,            count                   )
+       ),
+
+       TP_fast_assign(
+               __assign_str(name, regmap_name(map));
+               __entry->reg = reg;
+               __entry->count = count;
+       ),
+
+       TP_printk("%s reg=%x count=%d", __get_str(name),
+                 (unsigned int)__entry->reg,
+                 (int)__entry->count)
+);
+
+DEFINE_EVENT(regmap_block, regmap_hw_read_start,
+
+       TP_PROTO(struct regmap *map, unsigned int reg, int count),
+
+       TP_ARGS(map, reg, count)
+);
+
+DEFINE_EVENT(regmap_block, regmap_hw_read_done,
+
+       TP_PROTO(struct regmap *map, unsigned int reg, int count),
+
+       TP_ARGS(map, reg, count)
+);
+
+DEFINE_EVENT(regmap_block, regmap_hw_write_start,
+
+       TP_PROTO(struct regmap *map, unsigned int reg, int count),
+
+       TP_ARGS(map, reg, count)
+);
+
+DEFINE_EVENT(regmap_block, regmap_hw_write_done,
+
+       TP_PROTO(struct regmap *map, unsigned int reg, int count),
+
+       TP_ARGS(map, reg, count)
+);
+
+TRACE_EVENT(regcache_sync,
+
+       TP_PROTO(struct regmap *map, const char *type,
+                const char *status),
+
+       TP_ARGS(map, type, status),
+
+       TP_STRUCT__entry(
+               __string(       name,           regmap_name(map)        )
+               __string(       status,         status                  )
+               __string(       type,           type                    )
+               __field(        int,            type                    )
+       ),
+
+       TP_fast_assign(
+               __assign_str(name, regmap_name(map));
+               __assign_str(status, status);
+               __assign_str(type, type);
+       ),
+
+       TP_printk("%s type=%s status=%s", __get_str(name),
+                 __get_str(type), __get_str(status))
+);
+
+DECLARE_EVENT_CLASS(regmap_bool,
+
+       TP_PROTO(struct regmap *map, bool flag),
+
+       TP_ARGS(map, flag),
+
+       TP_STRUCT__entry(
+               __string(       name,           regmap_name(map)        )
+               __field(        int,            flag                    )
+       ),
+
+       TP_fast_assign(
+               __assign_str(name, regmap_name(map));
+               __entry->flag = flag;
+       ),
+
+       TP_printk("%s flag=%d", __get_str(name),
+                 (int)__entry->flag)
+);
+
+DEFINE_EVENT(regmap_bool, regmap_cache_only,
+
+       TP_PROTO(struct regmap *map, bool flag),
+
+       TP_ARGS(map, flag)
+
+);
+
+DEFINE_EVENT(regmap_bool, regmap_cache_bypass,
+
+       TP_PROTO(struct regmap *map, bool flag),
+
+       TP_ARGS(map, flag)
+
+);
+
+DECLARE_EVENT_CLASS(regmap_async,
+
+       TP_PROTO(struct regmap *map),
+
+       TP_ARGS(map),
+
+       TP_STRUCT__entry(
+               __string(       name,           regmap_name(map)        )
+       ),
+
+       TP_fast_assign(
+               __assign_str(name, regmap_name(map));
+       ),
+
+       TP_printk("%s", __get_str(name))
+);
+
+DEFINE_EVENT(regmap_block, regmap_async_write_start,
+
+       TP_PROTO(struct regmap *map, unsigned int reg, int count),
+
+       TP_ARGS(map, reg, count)
+);
+
+DEFINE_EVENT(regmap_async, regmap_async_io_complete,
+
+       TP_PROTO(struct regmap *map),
+
+       TP_ARGS(map)
+
+);
+
+DEFINE_EVENT(regmap_async, regmap_async_complete_start,
+
+       TP_PROTO(struct regmap *map),
+
+       TP_ARGS(map)
+
+);
+
+DEFINE_EVENT(regmap_async, regmap_async_complete_done,
+
+       TP_PROTO(struct regmap *map),
+
+       TP_ARGS(map)
+
+);
+
+TRACE_EVENT(regcache_drop_region,
+
+       TP_PROTO(struct regmap *map, unsigned int from,
+                unsigned int to),
+
+       TP_ARGS(map, from, to),
+
+       TP_STRUCT__entry(
+               __string(       name,           regmap_name(map)        )
+               __field(        unsigned int,   from                    )
+               __field(        unsigned int,   to                      )
+       ),
+
+       TP_fast_assign(
+               __assign_str(name, regmap_name(map));
+               __entry->from = from;
+               __entry->to = to;
+       ),
+
+       TP_printk("%s %u-%u", __get_str(name), (unsigned int)__entry->from,
+                 (unsigned int)__entry->to)
+);
+
+#endif /* _TRACE_REGMAP_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
index 79524ed2a3cb0d8bb24ebf94af80bd461591f96f..8753b0f6a317790562cc1126805a11eea3a25a5d 100644 (file)
@@ -125,6 +125,7 @@ static int ipmi_powernv_recv(struct ipmi_smi_powernv *smi)
        spin_lock_irqsave(&smi->msg_lock, flags);
 
        if (!smi->cur_msg) {
+               spin_unlock_irqrestore(&smi->msg_lock, flags);
                pr_warn("no current message?\n");
                return 0;
        }
index f6646ed3047e09a3656b089491f0afaa14982af1..518585c1ce94626b1142f83bd0697afef9c05438 100644 (file)
@@ -262,6 +262,11 @@ struct smi_info {
         */
        bool supports_event_msg_buff;
 
+       /*
+        * Can we clear the global enables receive irq bit?
+        */
+       bool cannot_clear_recv_irq_bit;
+
        /*
         * Did we get an attention that we did not handle?
         */
@@ -461,6 +466,9 @@ static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val)
  * allocate messages, we just leave them in the BMC and run the system
  * polled until we can allocate some memory.  Once we have some
  * memory, we will re-enable the interrupt.
+ *
+ * Note that we cannot just use disable_irq(), since the interrupt may
+ * be shared.
  */
 static inline bool disable_si_irq(struct smi_info *smi_info)
 {
@@ -549,20 +557,15 @@ static u8 current_global_enables(struct smi_info *smi_info, u8 base,
 
        if (smi_info->supports_event_msg_buff)
                enables |= IPMI_BMC_EVT_MSG_BUFF;
-       else
-               enables &= ~IPMI_BMC_EVT_MSG_BUFF;
 
-       if (smi_info->irq && !smi_info->interrupt_disabled)
+       if ((smi_info->irq && !smi_info->interrupt_disabled) ||
+           smi_info->cannot_clear_recv_irq_bit)
                enables |= IPMI_BMC_RCV_MSG_INTR;
-       else
-               enables &= ~IPMI_BMC_RCV_MSG_INTR;
 
        if (smi_info->supports_event_msg_buff &&
            smi_info->irq && !smi_info->interrupt_disabled)
 
                enables |= IPMI_BMC_EVT_MSG_INTR;
-       else
-               enables &= ~IPMI_BMC_EVT_MSG_INTR;
 
        *irq_on = enables & (IPMI_BMC_EVT_MSG_INTR | IPMI_BMC_RCV_MSG_INTR);
 
@@ -2900,6 +2903,96 @@ static int try_get_dev_id(struct smi_info *smi_info)
        return rv;
 }
 
+/*
+ * Some BMCs do not support clearing the receive irq bit in the global
+ * enables (even if they don't support interrupts on the BMC).  Check
+ * for this and handle it properly.
+ */
+static void check_clr_rcv_irq(struct smi_info *smi_info)
+{
+       unsigned char         msg[3];
+       unsigned char         *resp;
+       unsigned long         resp_len;
+       int                   rv;
+
+       resp = kmalloc(IPMI_MAX_MSG_LENGTH, GFP_KERNEL);
+       if (!resp) {
+               printk(KERN_WARNING PFX "Out of memory allocating response for"
+                      " global enables command, cannot check recv irq bit"
+                      " handling.\n");
+               return;
+       }
+
+       msg[0] = IPMI_NETFN_APP_REQUEST << 2;
+       msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD;
+       smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2);
+
+       rv = wait_for_msg_done(smi_info);
+       if (rv) {
+               printk(KERN_WARNING PFX "Error getting response from get"
+                      " global enables command, cannot check recv irq bit"
+                      " handling.\n");
+               goto out;
+       }
+
+       resp_len = smi_info->handlers->get_result(smi_info->si_sm,
+                                                 resp, IPMI_MAX_MSG_LENGTH);
+
+       if (resp_len < 4 ||
+                       resp[0] != (IPMI_NETFN_APP_REQUEST | 1) << 2 ||
+                       resp[1] != IPMI_GET_BMC_GLOBAL_ENABLES_CMD   ||
+                       resp[2] != 0) {
+               printk(KERN_WARNING PFX "Invalid return from get global"
+                      " enables command, cannot check recv irq bit"
+                      " handling.\n");
+               rv = -EINVAL;
+               goto out;
+       }
+
+       if ((resp[3] & IPMI_BMC_RCV_MSG_INTR) == 0)
+               /* Already clear, should work ok. */
+               goto out;
+
+       msg[0] = IPMI_NETFN_APP_REQUEST << 2;
+       msg[1] = IPMI_SET_BMC_GLOBAL_ENABLES_CMD;
+       msg[2] = resp[3] & ~IPMI_BMC_RCV_MSG_INTR;
+       smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3);
+
+       rv = wait_for_msg_done(smi_info);
+       if (rv) {
+               printk(KERN_WARNING PFX "Error getting response from set"
+                      " global enables command, cannot check recv irq bit"
+                      " handling.\n");
+               goto out;
+       }
+
+       resp_len = smi_info->handlers->get_result(smi_info->si_sm,
+                                                 resp, IPMI_MAX_MSG_LENGTH);
+
+       if (resp_len < 3 ||
+                       resp[0] != (IPMI_NETFN_APP_REQUEST | 1) << 2 ||
+                       resp[1] != IPMI_SET_BMC_GLOBAL_ENABLES_CMD) {
+               printk(KERN_WARNING PFX "Invalid return from get global"
+                      " enables command, cannot check recv irq bit"
+                      " handling.\n");
+               rv = -EINVAL;
+               goto out;
+       }
+
+       if (resp[2] != 0) {
+               /*
+                * An error when setting the event buffer bit means
+                * clearing the bit is not supported.
+                */
+               printk(KERN_WARNING PFX "The BMC does not support clearing"
+                      " the recv irq bit, compensating, but the BMC needs to"
+                      " be fixed.\n");
+               smi_info->cannot_clear_recv_irq_bit = true;
+       }
+ out:
+       kfree(resp);
+}
+
 static int try_enable_event_buffer(struct smi_info *smi_info)
 {
        unsigned char         msg[3];
@@ -3395,6 +3488,8 @@ static int try_smi_init(struct smi_info *new_smi)
                goto out_err;
        }
 
+       check_clr_rcv_irq(new_smi);
+
        setup_oem_data_handler(new_smi);
        setup_xaction_handlers(new_smi);
 
index f6e378dac5f5b1031530839d5967ab96607f1f7b..f40e3bd2c69c265400f1241900be8dddd26b51e8 100644 (file)
@@ -468,11 +468,13 @@ static int ipmi_ssif_thread(void *data)
                int result;
 
                /* Wait for something to do */
-               wait_for_completion(&ssif_info->wake_thread);
-               init_completion(&ssif_info->wake_thread);
-
+               result = wait_for_completion_interruptible(
+                                               &ssif_info->wake_thread);
                if (ssif_info->stopping)
                        break;
+               if (result == -ERESTARTSYS)
+                       continue;
+               init_completion(&ssif_info->wake_thread);
 
                if (ssif_info->i2c_read_write == I2C_SMBUS_WRITE) {
                        result = i2c_smbus_write_block_data(
index a3025e7ae35f1741883d610310b915040d633101..266469691e5820151ad3f70d1479863e5d820f9b 100644 (file)
@@ -661,17 +661,17 @@ static const struct of_device_id arch_timer_mem_of_match[] __initconst = {
 };
 
 static bool __init
-arch_timer_probed(int type, const struct of_device_id *matches)
+arch_timer_needs_probing(int type, const struct of_device_id *matches)
 {
        struct device_node *dn;
-       bool probed = true;
+       bool needs_probing = false;
 
        dn = of_find_matching_node(NULL, matches);
        if (dn && of_device_is_available(dn) && !(arch_timers_present & type))
-               probed = false;
+               needs_probing = true;
        of_node_put(dn);
 
-       return probed;
+       return needs_probing;
 }
 
 static void __init arch_timer_common_init(void)
@@ -680,9 +680,9 @@ static void __init arch_timer_common_init(void)
 
        /* Wait until both nodes are probed if we have two timers */
        if ((arch_timers_present & mask) != mask) {
-               if (!arch_timer_probed(ARCH_MEM_TIMER, arch_timer_mem_of_match))
+               if (arch_timer_needs_probing(ARCH_MEM_TIMER, arch_timer_mem_of_match))
                        return;
-               if (!arch_timer_probed(ARCH_CP15_TIMER, arch_timer_of_match))
+               if (arch_timer_needs_probing(ARCH_CP15_TIMER, arch_timer_of_match))
                        return;
        }
 
index d305fb089767c1b1c51d05f33c1e84e064f728a7..a19a3f619cc755d3a9a04dbdcde24b230cb30d6d 100644 (file)
@@ -108,7 +108,7 @@ static void __init add_clocksource(struct device_node *source_timer)
 
 static u64 notrace read_sched_clock(void)
 {
-       return ~__raw_readl(sched_io_base);
+       return ~readl_relaxed(sched_io_base);
 }
 
 static const struct of_device_id sptimer_ids[] __initconst = {
index d0a7bd66b8b91e0eb9aa4f7092ec42869a9067d8..dc3c6ee04aaa009e117fcb43fb065394ddfd6fae 100644 (file)
@@ -210,7 +210,7 @@ static int em_sti_clocksource_enable(struct clocksource *cs)
 
        ret = em_sti_start(p, USER_CLOCKSOURCE);
        if (!ret)
-               __clocksource_updatefreq_hz(cs, p->rate);
+               __clocksource_update_freq_hz(cs, p->rate);
        return ret;
 }
 
index 2bd13b53b727635500051746e27299c89d8d9b60..b8ff3c64cc452a16fc4108426fb6e5b1c54e91e8 100644 (file)
@@ -641,7 +641,7 @@ static int sh_cmt_clocksource_enable(struct clocksource *cs)
 
        ret = sh_cmt_start(ch, FLAG_CLOCKSOURCE);
        if (!ret) {
-               __clocksource_updatefreq_hz(cs, ch->rate);
+               __clocksource_update_freq_hz(cs, ch->rate);
                ch->cs_enabled = true;
        }
        return ret;
index f150ca82bfaf106a7ef2c5a40dd12a1e098e39f0..b6b8fa3cd211fc6b03460f678168d0f6568362f0 100644 (file)
@@ -272,7 +272,7 @@ static int sh_tmu_clocksource_enable(struct clocksource *cs)
 
        ret = sh_tmu_enable(ch);
        if (!ret) {
-               __clocksource_updatefreq_hz(cs, ch->rate);
+               __clocksource_update_freq_hz(cs, ch->rate);
                ch->cs_enabled = true;
        }
 
index f4a9c0058b4d677382863a12bf887b40202f63fe..1928a8912584b98e835251442e4313016e112aa2 100644 (file)
@@ -170,7 +170,15 @@ static void __init sun4i_timer_init(struct device_node *node)
               TIMER_CTL_CLK_SRC(TIMER_CTL_CLK_SRC_OSC24M),
               timer_base + TIMER_CTL_REG(1));
 
-       sched_clock_register(sun4i_timer_sched_read, 32, rate);
+       /*
+        * sched_clock_register does not have priorities, and on sun6i and
+        * later there is a better sched_clock registered by arm_arch_timer.c
+        */
+       if (of_machine_is_compatible("allwinner,sun4i-a10") ||
+           of_machine_is_compatible("allwinner,sun5i-a13") ||
+           of_machine_is_compatible("allwinner,sun5i-a10s"))
+               sched_clock_register(sun4i_timer_sched_read, 32, rate);
+
        clocksource_mmio_init(timer_base + TIMER_CNTVAL_REG(1), node->name,
                              rate, 350, 32, clocksource_mmio_readl_down);
 
index d2616ef167701526a13efe7ef896f3cca2c8668a..5a112d72fc2d2ec43205f5cde5720431bdc89b54 100644 (file)
 static void __iomem *timer_reg_base;
 static void __iomem *rtc_base;
 
-static struct timespec persistent_ts;
+static struct timespec64 persistent_ts;
 static u64 persistent_ms, last_persistent_ms;
 
 static struct delay_timer tegra_delay_timer;
 
 #define timer_writel(value, reg) \
-       __raw_writel(value, timer_reg_base + (reg))
+       writel_relaxed(value, timer_reg_base + (reg))
 #define timer_readl(reg) \
-       __raw_readl(timer_reg_base + (reg))
+       readl_relaxed(timer_reg_base + (reg))
 
 static int tegra_timer_set_next_event(unsigned long cycles,
                                         struct clock_event_device *evt)
@@ -120,26 +120,25 @@ static u64 tegra_rtc_read_ms(void)
 }
 
 /*
- * tegra_read_persistent_clock -  Return time from a persistent clock.
+ * tegra_read_persistent_clock64 -  Return time from a persistent clock.
  *
  * Reads the time from a source which isn't disabled during PM, the
  * 32k sync timer.  Convert the cycles elapsed since last read into
- * nsecs and adds to a monotonically increasing timespec.
+ * nsecs and adds to a monotonically increasing timespec64.
  * Care must be taken that this funciton is not called while the
  * tegra_rtc driver could be executing to avoid race conditions
  * on the RTC shadow register
  */
-static void tegra_read_persistent_clock(struct timespec *ts)
+static void tegra_read_persistent_clock64(struct timespec64 *ts)
 {
        u64 delta;
-       struct timespec *tsp = &persistent_ts;
 
        last_persistent_ms = persistent_ms;
        persistent_ms = tegra_rtc_read_ms();
        delta = persistent_ms - last_persistent_ms;
 
-       timespec_add_ns(tsp, delta * NSEC_PER_MSEC);
-       *ts = *tsp;
+       timespec64_add_ns(&persistent_ts, delta * NSEC_PER_MSEC);
+       *ts = persistent_ts;
 }
 
 static unsigned long tegra_delay_timer_read_counter_long(void)
@@ -252,7 +251,7 @@ static void __init tegra20_init_rtc(struct device_node *np)
        else
                clk_prepare_enable(clk);
 
-       register_persistent_clock(NULL, tegra_read_persistent_clock);
+       register_persistent_clock(NULL, tegra_read_persistent_clock64);
 }
 CLOCKSOURCE_OF_DECLARE(tegra20_rtc, "nvidia,tegra20-rtc", tegra20_init_rtc);
 
index ec57ba2bbd87ac9f2a251e350598fa52019d7815..5b6e3d5644c9519f12468369f6a25c9ab4946ff4 100644 (file)
@@ -111,7 +111,7 @@ static irqreturn_t efm32_clock_event_handler(int irq, void *dev_id)
 static struct efm32_clock_event_ddata clock_event_ddata = {
        .evtdev = {
                .name = "efm32 clockevent",
-               .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_MODE_PERIODIC,
+               .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC,
                .set_mode = efm32_clock_event_set_mode,
                .set_next_event = efm32_clock_event_set_next_event,
                .rating = 200,
index b5b4d4585c9aba41bc396e91cc107b710b154664..c0304ff608b064b3785af81532fc2b8c7f7f2b30 100644 (file)
@@ -61,12 +61,12 @@ static inline struct pit_data *clkevt_to_pit_data(struct clock_event_device *clk
 
 static inline unsigned int pit_read(void __iomem *base, unsigned int reg_offset)
 {
-       return __raw_readl(base + reg_offset);
+       return readl_relaxed(base + reg_offset);
 }
 
 static inline void pit_write(void __iomem *base, unsigned int reg_offset, unsigned long value)
 {
-       __raw_writel(value, base + reg_offset);
+       writel_relaxed(value, base + reg_offset);
 }
 
 /*
index 58597fbcc046f27f88238aa949730d6109a704b7..28aa4b7bb6020c416974ec52c8f80eb12366e705 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/irq.h>
 #include <linux/irqreturn.h>
 #include <linux/reset.h>
+#include <linux/slab.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 
 #define TIMER_SYNC_TICKS       3
 
-static void __iomem *timer_base;
-static u32 ticks_per_jiffy;
+struct sun5i_timer {
+       void __iomem            *base;
+       struct clk              *clk;
+       struct notifier_block   clk_rate_cb;
+       u32                     ticks_per_jiffy;
+};
+
+#define to_sun5i_timer(x) \
+       container_of(x, struct sun5i_timer, clk_rate_cb)
+
+struct sun5i_timer_clksrc {
+       struct sun5i_timer      timer;
+       struct clocksource      clksrc;
+};
+
+#define to_sun5i_timer_clksrc(x) \
+       container_of(x, struct sun5i_timer_clksrc, clksrc)
+
+struct sun5i_timer_clkevt {
+       struct sun5i_timer              timer;
+       struct clock_event_device       clkevt;
+};
+
+#define to_sun5i_timer_clkevt(x) \
+       container_of(x, struct sun5i_timer_clkevt, clkevt)
 
 /*
  * When we disable a timer, we need to wait at least for 2 cycles of
@@ -45,30 +69,30 @@ static u32 ticks_per_jiffy;
  * that is already setup and runs at the same frequency than the other
  * timers, and we never will be disabled.
  */
-static void sun5i_clkevt_sync(void)
+static void sun5i_clkevt_sync(struct sun5i_timer_clkevt *ce)
 {
-       u32 old = readl(timer_base + TIMER_CNTVAL_LO_REG(1));
+       u32 old = readl(ce->timer.base + TIMER_CNTVAL_LO_REG(1));
 
-       while ((old - readl(timer_base + TIMER_CNTVAL_LO_REG(1))) < TIMER_SYNC_TICKS)
+       while ((old - readl(ce->timer.base + TIMER_CNTVAL_LO_REG(1))) < TIMER_SYNC_TICKS)
                cpu_relax();
 }
 
-static void sun5i_clkevt_time_stop(u8 timer)
+static void sun5i_clkevt_time_stop(struct sun5i_timer_clkevt *ce, u8 timer)
 {
-       u32 val = readl(timer_base + TIMER_CTL_REG(timer));
-       writel(val & ~TIMER_CTL_ENABLE, timer_base + TIMER_CTL_REG(timer));
+       u32 val = readl(ce->timer.base + TIMER_CTL_REG(timer));
+       writel(val & ~TIMER_CTL_ENABLE, ce->timer.base + TIMER_CTL_REG(timer));
 
-       sun5i_clkevt_sync();
+       sun5i_clkevt_sync(ce);
 }
 
-static void sun5i_clkevt_time_setup(u8 timer, u32 delay)
+static void sun5i_clkevt_time_setup(struct sun5i_timer_clkevt *ce, u8 timer, u32 delay)
 {
-       writel(delay, timer_base + TIMER_INTVAL_LO_REG(timer));
+       writel(delay, ce->timer.base + TIMER_INTVAL_LO_REG(timer));
 }
 
-static void sun5i_clkevt_time_start(u8 timer, bool periodic)
+static void sun5i_clkevt_time_start(struct sun5i_timer_clkevt *ce, u8 timer, bool periodic)
 {
-       u32 val = readl(timer_base + TIMER_CTL_REG(timer));
+       u32 val = readl(ce->timer.base + TIMER_CTL_REG(timer));
 
        if (periodic)
                val &= ~TIMER_CTL_ONESHOT;
@@ -76,75 +100,230 @@ static void sun5i_clkevt_time_start(u8 timer, bool periodic)
                val |= TIMER_CTL_ONESHOT;
 
        writel(val | TIMER_CTL_ENABLE | TIMER_CTL_RELOAD,
-              timer_base + TIMER_CTL_REG(timer));
+              ce->timer.base + TIMER_CTL_REG(timer));
 }
 
 static void sun5i_clkevt_mode(enum clock_event_mode mode,
-                             struct clock_event_device *clk)
+                             struct clock_event_device *clkevt)
 {
+       struct sun5i_timer_clkevt *ce = to_sun5i_timer_clkevt(clkevt);
+
        switch (mode) {
        case CLOCK_EVT_MODE_PERIODIC:
-               sun5i_clkevt_time_stop(0);
-               sun5i_clkevt_time_setup(0, ticks_per_jiffy);
-               sun5i_clkevt_time_start(0, true);
+               sun5i_clkevt_time_stop(ce, 0);
+               sun5i_clkevt_time_setup(ce, 0, ce->timer.ticks_per_jiffy);
+               sun5i_clkevt_time_start(ce, 0, true);
                break;
        case CLOCK_EVT_MODE_ONESHOT:
-               sun5i_clkevt_time_stop(0);
-               sun5i_clkevt_time_start(0, false);
+               sun5i_clkevt_time_stop(ce, 0);
+               sun5i_clkevt_time_start(ce, 0, false);
                break;
        case CLOCK_EVT_MODE_UNUSED:
        case CLOCK_EVT_MODE_SHUTDOWN:
        default:
-               sun5i_clkevt_time_stop(0);
+               sun5i_clkevt_time_stop(ce, 0);
                break;
        }
 }
 
 static int sun5i_clkevt_next_event(unsigned long evt,
-                                  struct clock_event_device *unused)
+                                  struct clock_event_device *clkevt)
 {
-       sun5i_clkevt_time_stop(0);
-       sun5i_clkevt_time_setup(0, evt - TIMER_SYNC_TICKS);
-       sun5i_clkevt_time_start(0, false);
+       struct sun5i_timer_clkevt *ce = to_sun5i_timer_clkevt(clkevt);
+
+       sun5i_clkevt_time_stop(ce, 0);
+       sun5i_clkevt_time_setup(ce, 0, evt - TIMER_SYNC_TICKS);
+       sun5i_clkevt_time_start(ce, 0, false);
 
        return 0;
 }
 
-static struct clock_event_device sun5i_clockevent = {
-       .name = "sun5i_tick",
-       .rating = 340,
-       .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
-       .set_mode = sun5i_clkevt_mode,
-       .set_next_event = sun5i_clkevt_next_event,
-};
-
-
 static irqreturn_t sun5i_timer_interrupt(int irq, void *dev_id)
 {
-       struct clock_event_device *evt = (struct clock_event_device *)dev_id;
+       struct sun5i_timer_clkevt *ce = (struct sun5i_timer_clkevt *)dev_id;
 
-       writel(0x1, timer_base + TIMER_IRQ_ST_REG);
-       evt->event_handler(evt);
+       writel(0x1, ce->timer.base + TIMER_IRQ_ST_REG);
+       ce->clkevt.event_handler(&ce->clkevt);
 
        return IRQ_HANDLED;
 }
 
-static struct irqaction sun5i_timer_irq = {
-       .name = "sun5i_timer0",
-       .flags = IRQF_TIMER | IRQF_IRQPOLL,
-       .handler = sun5i_timer_interrupt,
-       .dev_id = &sun5i_clockevent,
-};
+static cycle_t sun5i_clksrc_read(struct clocksource *clksrc)
+{
+       struct sun5i_timer_clksrc *cs = to_sun5i_timer_clksrc(clksrc);
+
+       return ~readl(cs->timer.base + TIMER_CNTVAL_LO_REG(1));
+}
+
+static int sun5i_rate_cb_clksrc(struct notifier_block *nb,
+                               unsigned long event, void *data)
+{
+       struct clk_notifier_data *ndata = data;
+       struct sun5i_timer *timer = to_sun5i_timer(nb);
+       struct sun5i_timer_clksrc *cs = container_of(timer, struct sun5i_timer_clksrc, timer);
+
+       switch (event) {
+       case PRE_RATE_CHANGE:
+               clocksource_unregister(&cs->clksrc);
+               break;
+
+       case POST_RATE_CHANGE:
+               clocksource_register_hz(&cs->clksrc, ndata->new_rate);
+               break;
+
+       default:
+               break;
+       }
+
+       return NOTIFY_DONE;
+}
+
+static int __init sun5i_setup_clocksource(struct device_node *node,
+                                         void __iomem *base,
+                                         struct clk *clk, int irq)
+{
+       struct sun5i_timer_clksrc *cs;
+       unsigned long rate;
+       int ret;
+
+       cs = kzalloc(sizeof(*cs), GFP_KERNEL);
+       if (!cs)
+               return -ENOMEM;
+
+       ret = clk_prepare_enable(clk);
+       if (ret) {
+               pr_err("Couldn't enable parent clock\n");
+               goto err_free;
+       }
+
+       rate = clk_get_rate(clk);
+
+       cs->timer.base = base;
+       cs->timer.clk = clk;
+       cs->timer.clk_rate_cb.notifier_call = sun5i_rate_cb_clksrc;
+       cs->timer.clk_rate_cb.next = NULL;
+
+       ret = clk_notifier_register(clk, &cs->timer.clk_rate_cb);
+       if (ret) {
+               pr_err("Unable to register clock notifier.\n");
+               goto err_disable_clk;
+       }
+
+       writel(~0, base + TIMER_INTVAL_LO_REG(1));
+       writel(TIMER_CTL_ENABLE | TIMER_CTL_RELOAD,
+              base + TIMER_CTL_REG(1));
+
+       cs->clksrc.name = node->name;
+       cs->clksrc.rating = 340;
+       cs->clksrc.read = sun5i_clksrc_read;
+       cs->clksrc.mask = CLOCKSOURCE_MASK(32);
+       cs->clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS;
+
+       ret = clocksource_register_hz(&cs->clksrc, rate);
+       if (ret) {
+               pr_err("Couldn't register clock source.\n");
+               goto err_remove_notifier;
+       }
+
+       return 0;
+
+err_remove_notifier:
+       clk_notifier_unregister(clk, &cs->timer.clk_rate_cb);
+err_disable_clk:
+       clk_disable_unprepare(clk);
+err_free:
+       kfree(cs);
+       return ret;
+}
+
+static int sun5i_rate_cb_clkevt(struct notifier_block *nb,
+                               unsigned long event, void *data)
+{
+       struct clk_notifier_data *ndata = data;
+       struct sun5i_timer *timer = to_sun5i_timer(nb);
+       struct sun5i_timer_clkevt *ce = container_of(timer, struct sun5i_timer_clkevt, timer);
+
+       if (event == POST_RATE_CHANGE) {
+               clockevents_update_freq(&ce->clkevt, ndata->new_rate);
+               ce->timer.ticks_per_jiffy = DIV_ROUND_UP(ndata->new_rate, HZ);
+       }
+
+       return NOTIFY_DONE;
+}
+
+static int __init sun5i_setup_clockevent(struct device_node *node, void __iomem *base,
+                                        struct clk *clk, int irq)
+{
+       struct sun5i_timer_clkevt *ce;
+       unsigned long rate;
+       int ret;
+       u32 val;
+
+       ce = kzalloc(sizeof(*ce), GFP_KERNEL);
+       if (!ce)
+               return -ENOMEM;
+
+       ret = clk_prepare_enable(clk);
+       if (ret) {
+               pr_err("Couldn't enable parent clock\n");
+               goto err_free;
+       }
+
+       rate = clk_get_rate(clk);
+
+       ce->timer.base = base;
+       ce->timer.ticks_per_jiffy = DIV_ROUND_UP(rate, HZ);
+       ce->timer.clk = clk;
+       ce->timer.clk_rate_cb.notifier_call = sun5i_rate_cb_clkevt;
+       ce->timer.clk_rate_cb.next = NULL;
+
+       ret = clk_notifier_register(clk, &ce->timer.clk_rate_cb);
+       if (ret) {
+               pr_err("Unable to register clock notifier.\n");
+               goto err_disable_clk;
+       }
+
+       ce->clkevt.name = node->name;
+       ce->clkevt.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
+       ce->clkevt.set_next_event = sun5i_clkevt_next_event;
+       ce->clkevt.set_mode = sun5i_clkevt_mode;
+       ce->clkevt.rating = 340;
+       ce->clkevt.irq = irq;
+       ce->clkevt.cpumask = cpu_possible_mask;
+
+       /* Enable timer0 interrupt */
+       val = readl(base + TIMER_IRQ_EN_REG);
+       writel(val | TIMER_IRQ_EN(0), base + TIMER_IRQ_EN_REG);
+
+       clockevents_config_and_register(&ce->clkevt, rate,
+                                       TIMER_SYNC_TICKS, 0xffffffff);
+
+       ret = request_irq(irq, sun5i_timer_interrupt, IRQF_TIMER | IRQF_IRQPOLL,
+                         "sun5i_timer0", ce);
+       if (ret) {
+               pr_err("Unable to register interrupt\n");
+               goto err_remove_notifier;
+       }
+
+       return 0;
+
+err_remove_notifier:
+       clk_notifier_unregister(clk, &ce->timer.clk_rate_cb);
+err_disable_clk:
+       clk_disable_unprepare(clk);
+err_free:
+       kfree(ce);
+       return ret;
+}
 
 static void __init sun5i_timer_init(struct device_node *node)
 {
        struct reset_control *rstc;
-       unsigned long rate;
+       void __iomem *timer_base;
        struct clk *clk;
-       int ret, irq;
-       u32 val;
+       int irq;
 
-       timer_base = of_iomap(node, 0);
+       timer_base = of_io_request_and_map(node, 0, of_node_full_name(node));
        if (!timer_base)
                panic("Can't map registers");
 
@@ -155,35 +334,13 @@ static void __init sun5i_timer_init(struct device_node *node)
        clk = of_clk_get(node, 0);
        if (IS_ERR(clk))
                panic("Can't get timer clock");
-       clk_prepare_enable(clk);
-       rate = clk_get_rate(clk);
 
        rstc = of_reset_control_get(node, NULL);
        if (!IS_ERR(rstc))
                reset_control_deassert(rstc);
 
-       writel(~0, timer_base + TIMER_INTVAL_LO_REG(1));
-       writel(TIMER_CTL_ENABLE | TIMER_CTL_RELOAD,
-              timer_base + TIMER_CTL_REG(1));
-
-       clocksource_mmio_init(timer_base + TIMER_CNTVAL_LO_REG(1), node->name,
-                             rate, 340, 32, clocksource_mmio_readl_down);
-
-       ticks_per_jiffy = DIV_ROUND_UP(rate, HZ);
-
-       /* Enable timer0 interrupt */
-       val = readl(timer_base + TIMER_IRQ_EN_REG);
-       writel(val | TIMER_IRQ_EN(0), timer_base + TIMER_IRQ_EN_REG);
-
-       sun5i_clockevent.cpumask = cpu_possible_mask;
-       sun5i_clockevent.irq = irq;
-
-       clockevents_config_and_register(&sun5i_clockevent, rate,
-                                       TIMER_SYNC_TICKS, 0xffffffff);
-
-       ret = setup_irq(irq, &sun5i_timer_irq);
-       if (ret)
-               pr_warn("failed to setup irq %d\n", irq);
+       sun5i_setup_clocksource(node, timer_base, clk, irq);
+       sun5i_setup_clockevent(node, timer_base, clk, irq);
 }
 CLOCKSOURCE_OF_DECLARE(sun5i_a13, "allwinner,sun5i-a13-hstimer",
                       sun5i_timer_init);
index 28e59a48b35fdb993b28f621bdb0b065c0057e73..8ae655c364f48aeeeec7e4717aab6e6bfe2d9f95 100644 (file)
@@ -1698,15 +1698,18 @@ void cpufreq_resume(void)
                    || __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS))
                        pr_err("%s: Failed to start governor for policy: %p\n",
                                __func__, policy);
-
-               /*
-                * schedule call cpufreq_update_policy() for boot CPU, i.e. last
-                * policy in list. It will verify that the current freq is in
-                * sync with what we believe it to be.
-                */
-               if (list_is_last(&policy->policy_list, &cpufreq_policy_list))
-                       schedule_work(&policy->update);
        }
+
+       /*
+        * schedule call cpufreq_update_policy() for first-online CPU, as that
+        * wouldn't be hotplugged-out on suspend. It will verify that the
+        * current freq is in sync with what we believe it to be.
+        */
+       policy = cpufreq_cpu_get_raw(cpumask_first(cpu_online_mask));
+       if (WARN_ON(!policy))
+               return;
+
+       schedule_work(&policy->update);
 }
 
 /**
index 080bd2dbde4ba5408504a451e9454b51497202e3..7a73a279e179a52b9ea209e00b3f61f797c4d4cb 100644 (file)
@@ -330,9 +330,6 @@ int cpuidle_enable_device(struct cpuidle_device *dev)
        if (!dev->registered)
                return -EINVAL;
 
-       if (!dev->state_count)
-               dev->state_count = drv->state_count;
-
        ret = cpuidle_add_device_sysfs(dev);
        if (ret)
                return ret;
index 2697e87d5b34ff9ae520f1130220afb4d0876dff..5db147859b9047db626d66e64bef2897a41822f2 100644 (file)
@@ -13,7 +13,7 @@
 #include <linux/sched.h>
 #include <linux/cpuidle.h>
 #include <linux/cpumask.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 
 #include "cpuidle.h"
 
@@ -130,21 +130,20 @@ static inline void __cpuidle_unset_driver(struct cpuidle_driver *drv)
 #endif
 
 /**
- * cpuidle_setup_broadcast_timer - enable/disable the broadcast timer
+ * cpuidle_setup_broadcast_timer - enable/disable the broadcast timer on a cpu
  * @arg: a void pointer used to match the SMP cross call API
  *
- * @arg is used as a value of type 'long' with one of the two values:
- * - CLOCK_EVT_NOTIFY_BROADCAST_ON
- * - CLOCK_EVT_NOTIFY_BROADCAST_OFF
+ * If @arg is NULL broadcast is disabled otherwise enabled
  *
- * Set the broadcast timer notification for the current CPU.  This function
- * is executed per CPU by an SMP cross call.  It not supposed to be called
- * directly.
+ * This function is executed per CPU by an SMP cross call.  It's not
+ * supposed to be called directly.
  */
 static void cpuidle_setup_broadcast_timer(void *arg)
 {
-       int cpu = smp_processor_id();
-       clockevents_notify((long)(arg), &cpu);
+       if (arg)
+               tick_broadcast_enable();
+       else
+               tick_broadcast_disable();
 }
 
 /**
@@ -239,7 +238,7 @@ static int __cpuidle_register_driver(struct cpuidle_driver *drv)
 
        if (drv->bctimer)
                on_each_cpu_mask(drv->cpumask, cpuidle_setup_broadcast_timer,
-                                (void *)CLOCK_EVT_NOTIFY_BROADCAST_ON, 1);
+                                (void *)1, 1);
 
        poll_idle_init(drv);
 
@@ -263,7 +262,7 @@ static void __cpuidle_unregister_driver(struct cpuidle_driver *drv)
        if (drv->bctimer) {
                drv->bctimer = 0;
                on_each_cpu_mask(drv->cpumask, cpuidle_setup_broadcast_timer,
-                                (void *)CLOCK_EVT_NOTIFY_BROADCAST_OFF, 1);
+                                NULL, 1);
        }
 
        __cpuidle_unset_driver(drv);
index 97c5903b4606cf0fa02aecec826cbed605796234..832a2c3f01ffccf691842507955078299799eb86 100644 (file)
@@ -401,7 +401,7 @@ static int cpuidle_add_state_sysfs(struct cpuidle_device *device)
        struct cpuidle_driver *drv = cpuidle_get_cpu_driver(device);
 
        /* state statistics */
-       for (i = 0; i < device->state_count; i++) {
+       for (i = 0; i < drv->state_count; i++) {
                kobj = kzalloc(sizeof(struct cpuidle_state_kobj), GFP_KERNEL);
                if (!kobj)
                        goto error_state;
@@ -433,9 +433,10 @@ error_state:
  */
 static void cpuidle_remove_state_sysfs(struct cpuidle_device *device)
 {
+       struct cpuidle_driver *drv = cpuidle_get_cpu_driver(device);
        int i;
 
-       for (i = 0; i < device->state_count; i++)
+       for (i = 0; i < drv->state_count; i++)
                cpuidle_free_state_kobj(device, i);
 }
 
index a874b6ec6650f33275c8a59ddb1d00a78c4271e9..942ca541dcbdc293e7a3dc11ef494584503f4a4e 100644 (file)
@@ -51,19 +51,6 @@ config INTEL_MIC_X100_DMA
          OS and tools for MIC to use with this driver are available from
          <http://software.intel.com/en-us/mic-developer>.
 
-config INTEL_MID_DMAC
-       tristate "Intel MID DMA support for Peripheral DMA controllers"
-       depends on PCI && X86
-       select DMA_ENGINE
-       default n
-       help
-         Enable support for the Intel(R) MID DMA engine present
-         in Intel MID chipsets.
-
-         Say Y here if you have such a chipset.
-
-         If unsure, say N.
-
 config ASYNC_TX_ENABLE_CHANNEL_SWITCH
        bool
 
index f915f61ec5747193757e6f2b6da61ebd0eea4d83..539d4825bd766ed956629e0b1d0aa4e9b91acf25 100644 (file)
@@ -6,7 +6,6 @@ obj-$(CONFIG_DMA_VIRTUAL_CHANNELS) += virt-dma.o
 obj-$(CONFIG_DMA_ACPI) += acpi-dma.o
 obj-$(CONFIG_DMA_OF) += of-dma.o
 
-obj-$(CONFIG_INTEL_MID_DMAC) += intel_mid_dma.o
 obj-$(CONFIG_DMATEST) += dmatest.o
 obj-$(CONFIG_INTEL_IOATDMA) += ioat/
 obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
index 512cb8e2805e797ef12760d2e445652186410137..ceedafbd23e01fcda6968fb04707ae141cff3a88 100644 (file)
@@ -903,6 +903,11 @@ static const struct cppi_glue_infos *get_glue_info(struct device *dev)
        return of_id->data;
 }
 
+#define CPPI41_DMA_BUSWIDTHS   (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+                               BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+                               BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \
+                               BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
+
 static int cppi41_dma_probe(struct platform_device *pdev)
 {
        struct cppi41_dd *cdd;
@@ -926,6 +931,10 @@ static int cppi41_dma_probe(struct platform_device *pdev)
        cdd->ddev.device_issue_pending = cppi41_dma_issue_pending;
        cdd->ddev.device_prep_slave_sg = cppi41_dma_prep_slave_sg;
        cdd->ddev.device_terminate_all = cppi41_stop_chan;
+       cdd->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+       cdd->ddev.src_addr_widths = CPPI41_DMA_BUSWIDTHS;
+       cdd->ddev.dst_addr_widths = CPPI41_DMA_BUSWIDTHS;
+       cdd->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
        cdd->ddev.dev = dev;
        INIT_LIST_HEAD(&cdd->ddev.channels);
        cpp41_dma_info.dma_cap = cdd->ddev.cap_mask;
index f15712f2fec6c06949c23f845eeb8bbb776b8907..ac336a961dea97be01894027b9f4bd16d5566bad 100644 (file)
@@ -859,9 +859,6 @@ int dma_async_device_register(struct dma_device *device)
        BUG_ON(!device->device_issue_pending);
        BUG_ON(!device->dev);
 
-       WARN(dma_has_cap(DMA_SLAVE, device->cap_mask) && !device->directions,
-            "this driver doesn't support generic slave capabilities reporting\n");
-
        /* note: this only matters in the
         * CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH=n case
         */
diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c
deleted file mode 100644 (file)
index 5aaead9..0000000
+++ /dev/null
@@ -1,1447 +0,0 @@
-/*
- *  intel_mid_dma.c - Intel Langwell DMA Drivers
- *
- *  Copyright (C) 2008-10 Intel Corp
- *  Author: Vinod Koul <vinod.koul@intel.com>
- *  The driver design is based on dw_dmac driver
- *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; version 2 of the License.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *
- */
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/pm_runtime.h>
-#include <linux/intel_mid_dma.h>
-#include <linux/module.h>
-
-#include "dmaengine.h"
-
-#define MAX_CHAN       4 /*max ch across controllers*/
-#include "intel_mid_dma_regs.h"
-
-#define INTEL_MID_DMAC1_ID             0x0814
-#define INTEL_MID_DMAC2_ID             0x0813
-#define INTEL_MID_GP_DMAC2_ID          0x0827
-#define INTEL_MFLD_DMAC1_ID            0x0830
-#define LNW_PERIPHRAL_MASK_BASE                0xFFAE8008
-#define LNW_PERIPHRAL_MASK_SIZE                0x10
-#define LNW_PERIPHRAL_STATUS           0x0
-#define LNW_PERIPHRAL_MASK             0x8
-
-struct intel_mid_dma_probe_info {
-       u8 max_chan;
-       u8 ch_base;
-       u16 block_size;
-       u32 pimr_mask;
-};
-
-#define INFO(_max_chan, _ch_base, _block_size, _pimr_mask) \
-       ((kernel_ulong_t)&(struct intel_mid_dma_probe_info) {   \
-               .max_chan = (_max_chan),                        \
-               .ch_base = (_ch_base),                          \
-               .block_size = (_block_size),                    \
-               .pimr_mask = (_pimr_mask),                      \
-       })
-
-/*****************************************************************************
-Utility Functions*/
-/**
- * get_ch_index        -       convert status to channel
- * @status: status mask
- * @base: dma ch base value
- *
- * Modify the status mask and return the channel index needing
- * attention (or -1 if neither)
- */
-static int get_ch_index(int *status, unsigned int base)
-{
-       int i;
-       for (i = 0; i < MAX_CHAN; i++) {
-               if (*status & (1 << (i + base))) {
-                       *status = *status & ~(1 << (i + base));
-                       pr_debug("MDMA: index %d New status %x\n", i, *status);
-                       return i;
-               }
-       }
-       return -1;
-}
-
-/**
- * get_block_ts        -       calculates dma transaction length
- * @len: dma transfer length
- * @tx_width: dma transfer src width
- * @block_size: dma controller max block size
- *
- * Based on src width calculate the DMA trsaction length in data items
- * return data items or FFFF if exceeds max length for block
- */
-static int get_block_ts(int len, int tx_width, int block_size)
-{
-       int byte_width = 0, block_ts = 0;
-
-       switch (tx_width) {
-       case DMA_SLAVE_BUSWIDTH_1_BYTE:
-               byte_width = 1;
-               break;
-       case DMA_SLAVE_BUSWIDTH_2_BYTES:
-               byte_width = 2;
-               break;
-       case DMA_SLAVE_BUSWIDTH_4_BYTES:
-       default:
-               byte_width = 4;
-               break;
-       }
-
-       block_ts = len/byte_width;
-       if (block_ts > block_size)
-               block_ts = 0xFFFF;
-       return block_ts;
-}
-
-/*****************************************************************************
-DMAC1 interrupt Functions*/
-
-/**
- * dmac1_mask_periphral_intr - mask the periphral interrupt
- * @mid: dma device for which masking is required
- *
- * Masks the DMA periphral interrupt
- * this is valid for DMAC1 family controllers only
- * This controller should have periphral mask registers already mapped
- */
-static void dmac1_mask_periphral_intr(struct middma_device *mid)
-{
-       u32 pimr;
-
-       if (mid->pimr_mask) {
-               pimr = readl(mid->mask_reg + LNW_PERIPHRAL_MASK);
-               pimr |= mid->pimr_mask;
-               writel(pimr, mid->mask_reg + LNW_PERIPHRAL_MASK);
-       }
-       return;
-}
-
-/**
- * dmac1_unmask_periphral_intr -       unmask the periphral interrupt
- * @midc: dma channel for which masking is required
- *
- * UnMasks the DMA periphral interrupt,
- * this is valid for DMAC1 family controllers only
- * This controller should have periphral mask registers already mapped
- */
-static void dmac1_unmask_periphral_intr(struct intel_mid_dma_chan *midc)
-{
-       u32 pimr;
-       struct middma_device *mid = to_middma_device(midc->chan.device);
-
-       if (mid->pimr_mask) {
-               pimr = readl(mid->mask_reg + LNW_PERIPHRAL_MASK);
-               pimr &= ~mid->pimr_mask;
-               writel(pimr, mid->mask_reg + LNW_PERIPHRAL_MASK);
-       }
-       return;
-}
-
-/**
- * enable_dma_interrupt -      enable the periphral interrupt
- * @midc: dma channel for which enable interrupt is required
- *
- * Enable the DMA periphral interrupt,
- * this is valid for DMAC1 family controllers only
- * This controller should have periphral mask registers already mapped
- */
-static void enable_dma_interrupt(struct intel_mid_dma_chan *midc)
-{
-       dmac1_unmask_periphral_intr(midc);
-
-       /*en ch interrupts*/
-       iowrite32(UNMASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_TFR);
-       iowrite32(UNMASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_ERR);
-       return;
-}
-
-/**
- * disable_dma_interrupt -     disable the periphral interrupt
- * @midc: dma channel for which disable interrupt is required
- *
- * Disable the DMA periphral interrupt,
- * this is valid for DMAC1 family controllers only
- * This controller should have periphral mask registers already mapped
- */
-static void disable_dma_interrupt(struct intel_mid_dma_chan *midc)
-{
-       /*Check LPE PISR, make sure fwd is disabled*/
-       iowrite32(MASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_BLOCK);
-       iowrite32(MASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_TFR);
-       iowrite32(MASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_ERR);
-       return;
-}
-
-/*****************************************************************************
-DMA channel helper Functions*/
-/**
- * mid_desc_get                -       get a descriptor
- * @midc: dma channel for which descriptor is required
- *
- * Obtain a descriptor for the channel. Returns NULL if none are free.
- * Once the descriptor is returned it is private until put on another
- * list or freed
- */
-static struct intel_mid_dma_desc *midc_desc_get(struct intel_mid_dma_chan *midc)
-{
-       struct intel_mid_dma_desc *desc, *_desc;
-       struct intel_mid_dma_desc *ret = NULL;
-
-       spin_lock_bh(&midc->lock);
-       list_for_each_entry_safe(desc, _desc, &midc->free_list, desc_node) {
-               if (async_tx_test_ack(&desc->txd)) {
-                       list_del(&desc->desc_node);
-                       ret = desc;
-                       break;
-               }
-       }
-       spin_unlock_bh(&midc->lock);
-       return ret;
-}
-
-/**
- * mid_desc_put                -       put a descriptor
- * @midc: dma channel for which descriptor is required
- * @desc: descriptor to put
- *
- * Return a descriptor from lwn_desc_get back to the free pool
- */
-static void midc_desc_put(struct intel_mid_dma_chan *midc,
-                       struct intel_mid_dma_desc *desc)
-{
-       if (desc) {
-               spin_lock_bh(&midc->lock);
-               list_add_tail(&desc->desc_node, &midc->free_list);
-               spin_unlock_bh(&midc->lock);
-       }
-}
-/**
- * midc_dostart                -               begin a DMA transaction
- * @midc: channel for which txn is to be started
- * @first: first descriptor of series
- *
- * Load a transaction into the engine. This must be called with midc->lock
- * held and bh disabled.
- */
-static void midc_dostart(struct intel_mid_dma_chan *midc,
-                       struct intel_mid_dma_desc *first)
-{
-       struct middma_device *mid = to_middma_device(midc->chan.device);
-
-       /*  channel is idle */
-       if (midc->busy && test_ch_en(midc->dma_base, midc->ch_id)) {
-               /*error*/
-               pr_err("ERR_MDMA: channel is busy in start\n");
-               /* The tasklet will hopefully advance the queue... */
-               return;
-       }
-       midc->busy = true;
-       /*write registers and en*/
-       iowrite32(first->sar, midc->ch_regs + SAR);
-       iowrite32(first->dar, midc->ch_regs + DAR);
-       iowrite32(first->lli_phys, midc->ch_regs + LLP);
-       iowrite32(first->cfg_hi, midc->ch_regs + CFG_HIGH);
-       iowrite32(first->cfg_lo, midc->ch_regs + CFG_LOW);
-       iowrite32(first->ctl_lo, midc->ch_regs + CTL_LOW);
-       iowrite32(first->ctl_hi, midc->ch_regs + CTL_HIGH);
-       pr_debug("MDMA:TX SAR %x,DAR %x,CFGL %x,CFGH %x,CTLH %x, CTLL %x\n",
-               (int)first->sar, (int)first->dar, first->cfg_hi,
-               first->cfg_lo, first->ctl_hi, first->ctl_lo);
-       first->status = DMA_IN_PROGRESS;
-
-       iowrite32(ENABLE_CHANNEL(midc->ch_id), mid->dma_base + DMA_CHAN_EN);
-}
-
-/**
- * midc_descriptor_complete    -       process completed descriptor
- * @midc: channel owning the descriptor
- * @desc: the descriptor itself
- *
- * Process a completed descriptor and perform any callbacks upon
- * the completion. The completion handling drops the lock during the
- * callbacks but must be called with the lock held.
- */
-static void midc_descriptor_complete(struct intel_mid_dma_chan *midc,
-               struct intel_mid_dma_desc *desc)
-               __releases(&midc->lock) __acquires(&midc->lock)
-{
-       struct dma_async_tx_descriptor  *txd = &desc->txd;
-       dma_async_tx_callback callback_txd = NULL;
-       struct intel_mid_dma_lli        *llitem;
-       void *param_txd = NULL;
-
-       dma_cookie_complete(txd);
-       callback_txd = txd->callback;
-       param_txd = txd->callback_param;
-
-       if (desc->lli != NULL) {
-               /*clear the DONE bit of completed LLI in memory*/
-               llitem = desc->lli + desc->current_lli;
-               llitem->ctl_hi &= CLEAR_DONE;
-               if (desc->current_lli < desc->lli_length-1)
-                       (desc->current_lli)++;
-               else
-                       desc->current_lli = 0;
-       }
-       spin_unlock_bh(&midc->lock);
-       if (callback_txd) {
-               pr_debug("MDMA: TXD callback set ... calling\n");
-               callback_txd(param_txd);
-       }
-       if (midc->raw_tfr) {
-               desc->status = DMA_COMPLETE;
-               if (desc->lli != NULL) {
-                       pci_pool_free(desc->lli_pool, desc->lli,
-                                               desc->lli_phys);
-                       pci_pool_destroy(desc->lli_pool);
-                       desc->lli = NULL;
-               }
-               list_move(&desc->desc_node, &midc->free_list);
-               midc->busy = false;
-       }
-       spin_lock_bh(&midc->lock);
-
-}
-/**
- * midc_scan_descriptors -             check the descriptors in channel
- *                                     mark completed when tx is completete
- * @mid: device
- * @midc: channel to scan
- *
- * Walk the descriptor chain for the device and process any entries
- * that are complete.
- */
-static void midc_scan_descriptors(struct middma_device *mid,
-                               struct intel_mid_dma_chan *midc)
-{
-       struct intel_mid_dma_desc *desc = NULL, *_desc = NULL;
-
-       /*tx is complete*/
-       list_for_each_entry_safe(desc, _desc, &midc->active_list, desc_node) {
-               if (desc->status == DMA_IN_PROGRESS)
-                       midc_descriptor_complete(midc, desc);
-       }
-       return;
-       }
-/**
- * midc_lli_fill_sg -          Helper function to convert
- *                             SG list to Linked List Items.
- *@midc: Channel
- *@desc: DMA descriptor
- *@sglist: Pointer to SG list
- *@sglen: SG list length
- *@flags: DMA transaction flags
- *
- * Walk through the SG list and convert the SG list into Linked
- * List Items (LLI).
- */
-static int midc_lli_fill_sg(struct intel_mid_dma_chan *midc,
-                               struct intel_mid_dma_desc *desc,
-                               struct scatterlist *sglist,
-                               unsigned int sglen,
-                               unsigned int flags)
-{
-       struct intel_mid_dma_slave *mids;
-       struct scatterlist  *sg;
-       dma_addr_t lli_next, sg_phy_addr;
-       struct intel_mid_dma_lli *lli_bloc_desc;
-       union intel_mid_dma_ctl_lo ctl_lo;
-       union intel_mid_dma_ctl_hi ctl_hi;
-       int i;
-
-       pr_debug("MDMA: Entered midc_lli_fill_sg\n");
-       mids = midc->mid_slave;
-
-       lli_bloc_desc = desc->lli;
-       lli_next = desc->lli_phys;
-
-       ctl_lo.ctl_lo = desc->ctl_lo;
-       ctl_hi.ctl_hi = desc->ctl_hi;
-       for_each_sg(sglist, sg, sglen, i) {
-               /*Populate CTL_LOW and LLI values*/
-               if (i != sglen - 1) {
-                       lli_next = lli_next +
-                               sizeof(struct intel_mid_dma_lli);
-               } else {
-               /*Check for circular list, otherwise terminate LLI to ZERO*/
-                       if (flags & DMA_PREP_CIRCULAR_LIST) {
-                               pr_debug("MDMA: LLI is configured in circular mode\n");
-                               lli_next = desc->lli_phys;
-                       } else {
-                               lli_next = 0;
-                               ctl_lo.ctlx.llp_dst_en = 0;
-                               ctl_lo.ctlx.llp_src_en = 0;
-                       }
-               }
-               /*Populate CTL_HI values*/
-               ctl_hi.ctlx.block_ts = get_block_ts(sg_dma_len(sg),
-                                                       desc->width,
-                                                       midc->dma->block_size);
-               /*Populate SAR and DAR values*/
-               sg_phy_addr = sg_dma_address(sg);
-               if (desc->dirn ==  DMA_MEM_TO_DEV) {
-                       lli_bloc_desc->sar  = sg_phy_addr;
-                       lli_bloc_desc->dar  = mids->dma_slave.dst_addr;
-               } else if (desc->dirn ==  DMA_DEV_TO_MEM) {
-                       lli_bloc_desc->sar  = mids->dma_slave.src_addr;
-                       lli_bloc_desc->dar  = sg_phy_addr;
-               }
-               /*Copy values into block descriptor in system memroy*/
-               lli_bloc_desc->llp = lli_next;
-               lli_bloc_desc->ctl_lo = ctl_lo.ctl_lo;
-               lli_bloc_desc->ctl_hi = ctl_hi.ctl_hi;
-
-               lli_bloc_desc++;
-       }
-       /*Copy very first LLI values to descriptor*/
-       desc->ctl_lo = desc->lli->ctl_lo;
-       desc->ctl_hi = desc->lli->ctl_hi;
-       desc->sar = desc->lli->sar;
-       desc->dar = desc->lli->dar;
-
-       return 0;
-}
-/*****************************************************************************
-DMA engine callback Functions*/
-/**
- * intel_mid_dma_tx_submit -   callback to submit DMA transaction
- * @tx: dma engine descriptor
- *
- * Submit the DMA transaction for this descriptor, start if ch idle
- */
-static dma_cookie_t intel_mid_dma_tx_submit(struct dma_async_tx_descriptor *tx)
-{
-       struct intel_mid_dma_desc       *desc = to_intel_mid_dma_desc(tx);
-       struct intel_mid_dma_chan       *midc = to_intel_mid_dma_chan(tx->chan);
-       dma_cookie_t            cookie;
-
-       spin_lock_bh(&midc->lock);
-       cookie = dma_cookie_assign(tx);
-
-       if (list_empty(&midc->active_list))
-               list_add_tail(&desc->desc_node, &midc->active_list);
-       else
-               list_add_tail(&desc->desc_node, &midc->queue);
-
-       midc_dostart(midc, desc);
-       spin_unlock_bh(&midc->lock);
-
-       return cookie;
-}
-
-/**
- * intel_mid_dma_issue_pending -       callback to issue pending txn
- * @chan: chan where pending trascation needs to be checked and submitted
- *
- * Call for scan to issue pending descriptors
- */
-static void intel_mid_dma_issue_pending(struct dma_chan *chan)
-{
-       struct intel_mid_dma_chan       *midc = to_intel_mid_dma_chan(chan);
-
-       spin_lock_bh(&midc->lock);
-       if (!list_empty(&midc->queue))
-               midc_scan_descriptors(to_middma_device(chan->device), midc);
-       spin_unlock_bh(&midc->lock);
-}
-
-/**
- * intel_mid_dma_tx_status -   Return status of txn
- * @chan: chan for where status needs to be checked
- * @cookie: cookie for txn
- * @txstate: DMA txn state
- *
- * Return status of DMA txn
- */
-static enum dma_status intel_mid_dma_tx_status(struct dma_chan *chan,
-                                               dma_cookie_t cookie,
-                                               struct dma_tx_state *txstate)
-{
-       struct intel_mid_dma_chan *midc = to_intel_mid_dma_chan(chan);
-       enum dma_status ret;
-
-       ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret != DMA_COMPLETE) {
-               spin_lock_bh(&midc->lock);
-               midc_scan_descriptors(to_middma_device(chan->device), midc);
-               spin_unlock_bh(&midc->lock);
-
-               ret = dma_cookie_status(chan, cookie, txstate);
-       }
-
-       return ret;
-}
-
-static int intel_mid_dma_config(struct dma_chan *chan,
-                               struct dma_slave_config *slave)
-{
-       struct intel_mid_dma_chan       *midc = to_intel_mid_dma_chan(chan);
-       struct intel_mid_dma_slave *mid_slave;
-
-       BUG_ON(!midc);
-       BUG_ON(!slave);
-       pr_debug("MDMA: slave control called\n");
-
-       mid_slave = to_intel_mid_dma_slave(slave);
-
-       BUG_ON(!mid_slave);
-
-       midc->mid_slave = mid_slave;
-       return 0;
-}
-
-static int intel_mid_dma_terminate_all(struct dma_chan *chan)
-{
-       struct intel_mid_dma_chan       *midc = to_intel_mid_dma_chan(chan);
-       struct middma_device    *mid = to_middma_device(chan->device);
-       struct intel_mid_dma_desc       *desc, *_desc;
-       union intel_mid_dma_cfg_lo cfg_lo;
-
-       spin_lock_bh(&midc->lock);
-       if (midc->busy == false) {
-               spin_unlock_bh(&midc->lock);
-               return 0;
-       }
-       /*Suspend and disable the channel*/
-       cfg_lo.cfg_lo = ioread32(midc->ch_regs + CFG_LOW);
-       cfg_lo.cfgx.ch_susp = 1;
-       iowrite32(cfg_lo.cfg_lo, midc->ch_regs + CFG_LOW);
-       iowrite32(DISABLE_CHANNEL(midc->ch_id), mid->dma_base + DMA_CHAN_EN);
-       midc->busy = false;
-       /* Disable interrupts */
-       disable_dma_interrupt(midc);
-       midc->descs_allocated = 0;
-
-       spin_unlock_bh(&midc->lock);
-       list_for_each_entry_safe(desc, _desc, &midc->active_list, desc_node) {
-               if (desc->lli != NULL) {
-                       pci_pool_free(desc->lli_pool, desc->lli,
-                                               desc->lli_phys);
-                       pci_pool_destroy(desc->lli_pool);
-                       desc->lli = NULL;
-               }
-               list_move(&desc->desc_node, &midc->free_list);
-       }
-       return 0;
-}
-
-
-/**
- * intel_mid_dma_prep_memcpy - Prep memcpy txn
- * @chan: chan for DMA transfer
- * @dest: destn address
- * @src: src address
- * @len: DMA transfer len
- * @flags: DMA flags
- *
- * Perform a DMA memcpy. Note we support slave periphral DMA transfers only
- * The periphral txn details should be filled in slave structure properly
- * Returns the descriptor for this txn
- */
-static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy(
-                       struct dma_chan *chan, dma_addr_t dest,
-                       dma_addr_t src, size_t len, unsigned long flags)
-{
-       struct intel_mid_dma_chan *midc;
-       struct intel_mid_dma_desc *desc = NULL;
-       struct intel_mid_dma_slave *mids;
-       union intel_mid_dma_ctl_lo ctl_lo;
-       union intel_mid_dma_ctl_hi ctl_hi;
-       union intel_mid_dma_cfg_lo cfg_lo;
-       union intel_mid_dma_cfg_hi cfg_hi;
-       enum dma_slave_buswidth width;
-
-       pr_debug("MDMA: Prep for memcpy\n");
-       BUG_ON(!chan);
-       if (!len)
-               return NULL;
-
-       midc = to_intel_mid_dma_chan(chan);
-       BUG_ON(!midc);
-
-       mids = midc->mid_slave;
-       BUG_ON(!mids);
-
-       pr_debug("MDMA:called for DMA %x CH %d Length %zu\n",
-                               midc->dma->pci_id, midc->ch_id, len);
-       pr_debug("MDMA:Cfg passed Mode %x, Dirn %x, HS %x, Width %x\n",
-                       mids->cfg_mode, mids->dma_slave.direction,
-                       mids->hs_mode, mids->dma_slave.src_addr_width);
-
-       /*calculate CFG_LO*/
-       if (mids->hs_mode == LNW_DMA_SW_HS) {
-               cfg_lo.cfg_lo = 0;
-               cfg_lo.cfgx.hs_sel_dst = 1;
-               cfg_lo.cfgx.hs_sel_src = 1;
-       } else if (mids->hs_mode == LNW_DMA_HW_HS)
-               cfg_lo.cfg_lo = 0x00000;
-
-       /*calculate CFG_HI*/
-       if (mids->cfg_mode == LNW_DMA_MEM_TO_MEM) {
-               /*SW HS only*/
-               cfg_hi.cfg_hi = 0;
-       } else {
-               cfg_hi.cfg_hi = 0;
-               if (midc->dma->pimr_mask) {
-                       cfg_hi.cfgx.protctl = 0x0; /*default value*/
-                       cfg_hi.cfgx.fifo_mode = 1;
-                       if (mids->dma_slave.direction == DMA_MEM_TO_DEV) {
-                               cfg_hi.cfgx.src_per = 0;
-                               if (mids->device_instance == 0)
-                                       cfg_hi.cfgx.dst_per = 3;
-                               if (mids->device_instance == 1)
-                                       cfg_hi.cfgx.dst_per = 1;
-                       } else if (mids->dma_slave.direction == DMA_DEV_TO_MEM) {
-                               if (mids->device_instance == 0)
-                                       cfg_hi.cfgx.src_per = 2;
-                               if (mids->device_instance == 1)
-                                       cfg_hi.cfgx.src_per = 0;
-                               cfg_hi.cfgx.dst_per = 0;
-                       }
-               } else {
-                       cfg_hi.cfgx.protctl = 0x1; /*default value*/
-                       cfg_hi.cfgx.src_per = cfg_hi.cfgx.dst_per =
-                                       midc->ch_id - midc->dma->chan_base;
-               }
-       }
-
-       /*calculate CTL_HI*/
-       ctl_hi.ctlx.reser = 0;
-       ctl_hi.ctlx.done  = 0;
-       width = mids->dma_slave.src_addr_width;
-
-       ctl_hi.ctlx.block_ts = get_block_ts(len, width, midc->dma->block_size);
-       pr_debug("MDMA:calc len %d for block size %d\n",
-                               ctl_hi.ctlx.block_ts, midc->dma->block_size);
-       /*calculate CTL_LO*/
-       ctl_lo.ctl_lo = 0;
-       ctl_lo.ctlx.int_en = 1;
-       ctl_lo.ctlx.dst_msize = mids->dma_slave.src_maxburst;
-       ctl_lo.ctlx.src_msize = mids->dma_slave.dst_maxburst;
-
-       /*
-        * Here we need some translation from "enum dma_slave_buswidth"
-        * to the format for our dma controller
-        *              standard        intel_mid_dmac's format
-        *               1 Byte                 0b000
-        *               2 Bytes                0b001
-        *               4 Bytes                0b010
-        */
-       ctl_lo.ctlx.dst_tr_width = mids->dma_slave.dst_addr_width / 2;
-       ctl_lo.ctlx.src_tr_width = mids->dma_slave.src_addr_width / 2;
-
-       if (mids->cfg_mode == LNW_DMA_MEM_TO_MEM) {
-               ctl_lo.ctlx.tt_fc = 0;
-               ctl_lo.ctlx.sinc = 0;
-               ctl_lo.ctlx.dinc = 0;
-       } else {
-               if (mids->dma_slave.direction == DMA_MEM_TO_DEV) {
-                       ctl_lo.ctlx.sinc = 0;
-                       ctl_lo.ctlx.dinc = 2;
-                       ctl_lo.ctlx.tt_fc = 1;
-               } else if (mids->dma_slave.direction == DMA_DEV_TO_MEM) {
-                       ctl_lo.ctlx.sinc = 2;
-                       ctl_lo.ctlx.dinc = 0;
-                       ctl_lo.ctlx.tt_fc = 2;
-               }
-       }
-
-       pr_debug("MDMA:Calc CTL LO %x, CTL HI %x, CFG LO %x, CFG HI %x\n",
-               ctl_lo.ctl_lo, ctl_hi.ctl_hi, cfg_lo.cfg_lo, cfg_hi.cfg_hi);
-
-       enable_dma_interrupt(midc);
-
-       desc = midc_desc_get(midc);
-       if (desc == NULL)
-               goto err_desc_get;
-       desc->sar = src;
-       desc->dar = dest ;
-       desc->len = len;
-       desc->cfg_hi = cfg_hi.cfg_hi;
-       desc->cfg_lo = cfg_lo.cfg_lo;
-       desc->ctl_lo = ctl_lo.ctl_lo;
-       desc->ctl_hi = ctl_hi.ctl_hi;
-       desc->width = width;
-       desc->dirn = mids->dma_slave.direction;
-       desc->lli_phys = 0;
-       desc->lli = NULL;
-       desc->lli_pool = NULL;
-       return &desc->txd;
-
-err_desc_get:
-       pr_err("ERR_MDMA: Failed to get desc\n");
-       midc_desc_put(midc, desc);
-       return NULL;
-}
-/**
- * intel_mid_dma_prep_slave_sg -       Prep slave sg txn
- * @chan: chan for DMA transfer
- * @sgl: scatter gather list
- * @sg_len: length of sg txn
- * @direction: DMA transfer dirtn
- * @flags: DMA flags
- * @context: transfer context (ignored)
- *
- * Prepares LLI based periphral transfer
- */
-static struct dma_async_tx_descriptor *intel_mid_dma_prep_slave_sg(
-                       struct dma_chan *chan, struct scatterlist *sgl,
-                       unsigned int sg_len, enum dma_transfer_direction direction,
-                       unsigned long flags, void *context)
-{
-       struct intel_mid_dma_chan *midc = NULL;
-       struct intel_mid_dma_slave *mids = NULL;
-       struct intel_mid_dma_desc *desc = NULL;
-       struct dma_async_tx_descriptor *txd = NULL;
-       union intel_mid_dma_ctl_lo ctl_lo;
-
-       pr_debug("MDMA: Prep for slave SG\n");
-
-       if (!sg_len) {
-               pr_err("MDMA: Invalid SG length\n");
-               return NULL;
-       }
-       midc = to_intel_mid_dma_chan(chan);
-       BUG_ON(!midc);
-
-       mids = midc->mid_slave;
-       BUG_ON(!mids);
-
-       if (!midc->dma->pimr_mask) {
-               /* We can still handle sg list with only one item */
-               if (sg_len == 1) {
-                       txd = intel_mid_dma_prep_memcpy(chan,
-                                               mids->dma_slave.dst_addr,
-                                               mids->dma_slave.src_addr,
-                                               sg_dma_len(sgl),
-                                               flags);
-                       return txd;
-               } else {
-                       pr_warn("MDMA: SG list is not supported by this controller\n");
-                       return  NULL;
-               }
-       }
-
-       pr_debug("MDMA: SG Length = %d, direction = %d, Flags = %#lx\n",
-                       sg_len, direction, flags);
-
-       txd = intel_mid_dma_prep_memcpy(chan, 0, 0, sg_dma_len(sgl), flags);
-       if (NULL == txd) {
-               pr_err("MDMA: Prep memcpy failed\n");
-               return NULL;
-       }
-
-       desc = to_intel_mid_dma_desc(txd);
-       desc->dirn = direction;
-       ctl_lo.ctl_lo = desc->ctl_lo;
-       ctl_lo.ctlx.llp_dst_en = 1;
-       ctl_lo.ctlx.llp_src_en = 1;
-       desc->ctl_lo = ctl_lo.ctl_lo;
-       desc->lli_length = sg_len;
-       desc->current_lli = 0;
-       /* DMA coherent memory pool for LLI descriptors*/
-       desc->lli_pool = pci_pool_create("intel_mid_dma_lli_pool",
-                               midc->dma->pdev,
-                               (sizeof(struct intel_mid_dma_lli)*sg_len),
-                               32, 0);
-       if (NULL == desc->lli_pool) {
-               pr_err("MID_DMA:LLI pool create failed\n");
-               return NULL;
-       }
-
-       desc->lli = pci_pool_alloc(desc->lli_pool, GFP_KERNEL, &desc->lli_phys);
-       if (!desc->lli) {
-               pr_err("MID_DMA: LLI alloc failed\n");
-               pci_pool_destroy(desc->lli_pool);
-               return NULL;
-       }
-
-       midc_lli_fill_sg(midc, desc, sgl, sg_len, flags);
-       if (flags & DMA_PREP_INTERRUPT) {
-               iowrite32(UNMASK_INTR_REG(midc->ch_id),
-                               midc->dma_base + MASK_BLOCK);
-               pr_debug("MDMA:Enabled Block interrupt\n");
-       }
-       return &desc->txd;
-}
-
-/**
- * intel_mid_dma_free_chan_resources - Frees dma resources
- * @chan: chan requiring attention
- *
- * Frees the allocated resources on this DMA chan
- */
-static void intel_mid_dma_free_chan_resources(struct dma_chan *chan)
-{
-       struct intel_mid_dma_chan       *midc = to_intel_mid_dma_chan(chan);
-       struct middma_device    *mid = to_middma_device(chan->device);
-       struct intel_mid_dma_desc       *desc, *_desc;
-
-       if (true == midc->busy) {
-               /*trying to free ch in use!!!!!*/
-               pr_err("ERR_MDMA: trying to free ch in use\n");
-       }
-       spin_lock_bh(&midc->lock);
-       midc->descs_allocated = 0;
-       list_for_each_entry_safe(desc, _desc, &midc->active_list, desc_node) {
-               list_del(&desc->desc_node);
-               pci_pool_free(mid->dma_pool, desc, desc->txd.phys);
-       }
-       list_for_each_entry_safe(desc, _desc, &midc->free_list, desc_node) {
-               list_del(&desc->desc_node);
-               pci_pool_free(mid->dma_pool, desc, desc->txd.phys);
-       }
-       list_for_each_entry_safe(desc, _desc, &midc->queue, desc_node) {
-               list_del(&desc->desc_node);
-               pci_pool_free(mid->dma_pool, desc, desc->txd.phys);
-       }
-       spin_unlock_bh(&midc->lock);
-       midc->in_use = false;
-       midc->busy = false;
-       /* Disable CH interrupts */
-       iowrite32(MASK_INTR_REG(midc->ch_id), mid->dma_base + MASK_BLOCK);
-       iowrite32(MASK_INTR_REG(midc->ch_id), mid->dma_base + MASK_ERR);
-       pm_runtime_put(&mid->pdev->dev);
-}
-
-/**
- * intel_mid_dma_alloc_chan_resources -        Allocate dma resources
- * @chan: chan requiring attention
- *
- * Allocates DMA resources on this chan
- * Return the descriptors allocated
- */
-static int intel_mid_dma_alloc_chan_resources(struct dma_chan *chan)
-{
-       struct intel_mid_dma_chan       *midc = to_intel_mid_dma_chan(chan);
-       struct middma_device    *mid = to_middma_device(chan->device);
-       struct intel_mid_dma_desc       *desc;
-       dma_addr_t              phys;
-       int     i = 0;
-
-       pm_runtime_get_sync(&mid->pdev->dev);
-
-       if (mid->state == SUSPENDED) {
-               if (dma_resume(&mid->pdev->dev)) {
-                       pr_err("ERR_MDMA: resume failed");
-                       return -EFAULT;
-               }
-       }
-
-       /* ASSERT:  channel is idle */
-       if (test_ch_en(mid->dma_base, midc->ch_id)) {
-               /*ch is not idle*/
-               pr_err("ERR_MDMA: ch not idle\n");
-               pm_runtime_put(&mid->pdev->dev);
-               return -EIO;
-       }
-       dma_cookie_init(chan);
-
-       spin_lock_bh(&midc->lock);
-       while (midc->descs_allocated < DESCS_PER_CHANNEL) {
-               spin_unlock_bh(&midc->lock);
-               desc = pci_pool_alloc(mid->dma_pool, GFP_KERNEL, &phys);
-               if (!desc) {
-                       pr_err("ERR_MDMA: desc failed\n");
-                       pm_runtime_put(&mid->pdev->dev);
-                       return -ENOMEM;
-                       /*check*/
-               }
-               dma_async_tx_descriptor_init(&desc->txd, chan);
-               desc->txd.tx_submit = intel_mid_dma_tx_submit;
-               desc->txd.flags = DMA_CTRL_ACK;
-               desc->txd.phys = phys;
-               spin_lock_bh(&midc->lock);
-               i = ++midc->descs_allocated;
-               list_add_tail(&desc->desc_node, &midc->free_list);
-       }
-       spin_unlock_bh(&midc->lock);
-       midc->in_use = true;
-       midc->busy = false;
-       pr_debug("MID_DMA: Desc alloc done ret: %d desc\n", i);
-       return i;
-}
-
-/**
- * midc_handle_error - Handle DMA txn error
- * @mid: controller where error occurred
- * @midc: chan where error occurred
- *
- * Scan the descriptor for error
- */
-static void midc_handle_error(struct middma_device *mid,
-               struct intel_mid_dma_chan *midc)
-{
-       midc_scan_descriptors(mid, midc);
-}
-
-/**
- * dma_tasklet -       DMA interrupt tasklet
- * @data: tasklet arg (the controller structure)
- *
- * Scan the controller for interrupts for completion/error
- * Clear the interrupt and call for handling completion/error
- */
-static void dma_tasklet(unsigned long data)
-{
-       struct middma_device *mid = NULL;
-       struct intel_mid_dma_chan *midc = NULL;
-       u32 status, raw_tfr, raw_block;
-       int i;
-
-       mid = (struct middma_device *)data;
-       if (mid == NULL) {
-               pr_err("ERR_MDMA: tasklet Null param\n");
-               return;
-       }
-       pr_debug("MDMA: in tasklet for device %x\n", mid->pci_id);
-       raw_tfr = ioread32(mid->dma_base + RAW_TFR);
-       raw_block = ioread32(mid->dma_base + RAW_BLOCK);
-       status = raw_tfr | raw_block;
-       status &= mid->intr_mask;
-       while (status) {
-               /*txn interrupt*/
-               i = get_ch_index(&status, mid->chan_base);
-               if (i < 0) {
-                       pr_err("ERR_MDMA:Invalid ch index %x\n", i);
-                       return;
-               }
-               midc = &mid->ch[i];
-               if (midc == NULL) {
-                       pr_err("ERR_MDMA:Null param midc\n");
-                       return;
-               }
-               pr_debug("MDMA:Tx complete interrupt %x, Ch No %d Index %d\n",
-                               status, midc->ch_id, i);
-               midc->raw_tfr = raw_tfr;
-               midc->raw_block = raw_block;
-               spin_lock_bh(&midc->lock);
-               /*clearing this interrupts first*/
-               iowrite32((1 << midc->ch_id), mid->dma_base + CLEAR_TFR);
-               if (raw_block) {
-                       iowrite32((1 << midc->ch_id),
-                               mid->dma_base + CLEAR_BLOCK);
-               }
-               midc_scan_descriptors(mid, midc);
-               pr_debug("MDMA:Scan of desc... complete, unmasking\n");
-               iowrite32(UNMASK_INTR_REG(midc->ch_id),
-                               mid->dma_base + MASK_TFR);
-               if (raw_block) {
-                       iowrite32(UNMASK_INTR_REG(midc->ch_id),
-                               mid->dma_base + MASK_BLOCK);
-               }
-               spin_unlock_bh(&midc->lock);
-       }
-
-       status = ioread32(mid->dma_base + RAW_ERR);
-       status &= mid->intr_mask;
-       while (status) {
-               /*err interrupt*/
-               i = get_ch_index(&status, mid->chan_base);
-               if (i < 0) {
-                       pr_err("ERR_MDMA:Invalid ch index %x\n", i);
-                       return;
-               }
-               midc = &mid->ch[i];
-               if (midc == NULL) {
-                       pr_err("ERR_MDMA:Null param midc\n");
-                       return;
-               }
-               pr_debug("MDMA:Tx complete interrupt %x, Ch No %d Index %d\n",
-                               status, midc->ch_id, i);
-
-               iowrite32((1 << midc->ch_id), mid->dma_base + CLEAR_ERR);
-               spin_lock_bh(&midc->lock);
-               midc_handle_error(mid, midc);
-               iowrite32(UNMASK_INTR_REG(midc->ch_id),
-                               mid->dma_base + MASK_ERR);
-               spin_unlock_bh(&midc->lock);
-       }
-       pr_debug("MDMA:Exiting takslet...\n");
-       return;
-}
-
-static void dma_tasklet1(unsigned long data)
-{
-       pr_debug("MDMA:in takslet1...\n");
-       return dma_tasklet(data);
-}
-
-static void dma_tasklet2(unsigned long data)
-{
-       pr_debug("MDMA:in takslet2...\n");
-       return dma_tasklet(data);
-}
-
-/**
- * intel_mid_dma_interrupt -   DMA ISR
- * @irq: IRQ where interrupt occurred
- * @data: ISR cllback data (the controller structure)
- *
- * See if this is our interrupt if so then schedule the tasklet
- * otherwise ignore
- */
-static irqreturn_t intel_mid_dma_interrupt(int irq, void *data)
-{
-       struct middma_device *mid = data;
-       u32 tfr_status, err_status;
-       int call_tasklet = 0;
-
-       tfr_status = ioread32(mid->dma_base + RAW_TFR);
-       err_status = ioread32(mid->dma_base + RAW_ERR);
-       if (!tfr_status && !err_status)
-               return IRQ_NONE;
-
-       /*DMA Interrupt*/
-       pr_debug("MDMA:Got an interrupt on irq %d\n", irq);
-       pr_debug("MDMA: Status %x, Mask %x\n", tfr_status, mid->intr_mask);
-       tfr_status &= mid->intr_mask;
-       if (tfr_status) {
-               /*need to disable intr*/
-               iowrite32((tfr_status << INT_MASK_WE), mid->dma_base + MASK_TFR);
-               iowrite32((tfr_status << INT_MASK_WE), mid->dma_base + MASK_BLOCK);
-               pr_debug("MDMA: Calling tasklet %x\n", tfr_status);
-               call_tasklet = 1;
-       }
-       err_status &= mid->intr_mask;
-       if (err_status) {
-               iowrite32((err_status << INT_MASK_WE),
-                         mid->dma_base + MASK_ERR);
-               call_tasklet = 1;
-       }
-       if (call_tasklet)
-               tasklet_schedule(&mid->tasklet);
-
-       return IRQ_HANDLED;
-}
-
-static irqreturn_t intel_mid_dma_interrupt1(int irq, void *data)
-{
-       return intel_mid_dma_interrupt(irq, data);
-}
-
-static irqreturn_t intel_mid_dma_interrupt2(int irq, void *data)
-{
-       return intel_mid_dma_interrupt(irq, data);
-}
-
-/**
- * mid_setup_dma -     Setup the DMA controller
- * @pdev: Controller PCI device structure
- *
- * Initialize the DMA controller, channels, registers with DMA engine,
- * ISR. Initialize DMA controller channels.
- */
-static int mid_setup_dma(struct pci_dev *pdev)
-{
-       struct middma_device *dma = pci_get_drvdata(pdev);
-       int err, i;
-
-       /* DMA coherent memory pool for DMA descriptor allocations */
-       dma->dma_pool = pci_pool_create("intel_mid_dma_desc_pool", pdev,
-                                       sizeof(struct intel_mid_dma_desc),
-                                       32, 0);
-       if (NULL == dma->dma_pool) {
-               pr_err("ERR_MDMA:pci_pool_create failed\n");
-               err = -ENOMEM;
-               goto err_dma_pool;
-       }
-
-       INIT_LIST_HEAD(&dma->common.channels);
-       dma->pci_id = pdev->device;
-       if (dma->pimr_mask) {
-               dma->mask_reg = ioremap(LNW_PERIPHRAL_MASK_BASE,
-                                       LNW_PERIPHRAL_MASK_SIZE);
-               if (dma->mask_reg == NULL) {
-                       pr_err("ERR_MDMA:Can't map periphral intr space !!\n");
-                       err = -ENOMEM;
-                       goto err_ioremap;
-               }
-       } else
-               dma->mask_reg = NULL;
-
-       pr_debug("MDMA:Adding %d channel for this controller\n", dma->max_chan);
-       /*init CH structures*/
-       dma->intr_mask = 0;
-       dma->state = RUNNING;
-       for (i = 0; i < dma->max_chan; i++) {
-               struct intel_mid_dma_chan *midch = &dma->ch[i];
-
-               midch->chan.device = &dma->common;
-               dma_cookie_init(&midch->chan);
-               midch->ch_id = dma->chan_base + i;
-               pr_debug("MDMA:Init CH %d, ID %d\n", i, midch->ch_id);
-
-               midch->dma_base = dma->dma_base;
-               midch->ch_regs = dma->dma_base + DMA_CH_SIZE * midch->ch_id;
-               midch->dma = dma;
-               dma->intr_mask |= 1 << (dma->chan_base + i);
-               spin_lock_init(&midch->lock);
-
-               INIT_LIST_HEAD(&midch->active_list);
-               INIT_LIST_HEAD(&midch->queue);
-               INIT_LIST_HEAD(&midch->free_list);
-               /*mask interrupts*/
-               iowrite32(MASK_INTR_REG(midch->ch_id),
-                       dma->dma_base + MASK_BLOCK);
-               iowrite32(MASK_INTR_REG(midch->ch_id),
-                       dma->dma_base + MASK_SRC_TRAN);
-               iowrite32(MASK_INTR_REG(midch->ch_id),
-                       dma->dma_base + MASK_DST_TRAN);
-               iowrite32(MASK_INTR_REG(midch->ch_id),
-                       dma->dma_base + MASK_ERR);
-               iowrite32(MASK_INTR_REG(midch->ch_id),
-                       dma->dma_base + MASK_TFR);
-
-               disable_dma_interrupt(midch);
-               list_add_tail(&midch->chan.device_node, &dma->common.channels);
-       }
-       pr_debug("MDMA: Calc Mask as %x for this controller\n", dma->intr_mask);
-
-       /*init dma structure*/
-       dma_cap_zero(dma->common.cap_mask);
-       dma_cap_set(DMA_MEMCPY, dma->common.cap_mask);
-       dma_cap_set(DMA_SLAVE, dma->common.cap_mask);
-       dma_cap_set(DMA_PRIVATE, dma->common.cap_mask);
-       dma->common.dev = &pdev->dev;
-
-       dma->common.device_alloc_chan_resources =
-                                       intel_mid_dma_alloc_chan_resources;
-       dma->common.device_free_chan_resources =
-                                       intel_mid_dma_free_chan_resources;
-
-       dma->common.device_tx_status = intel_mid_dma_tx_status;
-       dma->common.device_prep_dma_memcpy = intel_mid_dma_prep_memcpy;
-       dma->common.device_issue_pending = intel_mid_dma_issue_pending;
-       dma->common.device_prep_slave_sg = intel_mid_dma_prep_slave_sg;
-       dma->common.device_config = intel_mid_dma_config;
-       dma->common.device_terminate_all = intel_mid_dma_terminate_all;
-
-       /*enable dma cntrl*/
-       iowrite32(REG_BIT0, dma->dma_base + DMA_CFG);
-
-       /*register irq */
-       if (dma->pimr_mask) {
-               pr_debug("MDMA:Requesting irq shared for DMAC1\n");
-               err = request_irq(pdev->irq, intel_mid_dma_interrupt1,
-                       IRQF_SHARED, "INTEL_MID_DMAC1", dma);
-               if (0 != err)
-                       goto err_irq;
-       } else {
-               dma->intr_mask = 0x03;
-               pr_debug("MDMA:Requesting irq for DMAC2\n");
-               err = request_irq(pdev->irq, intel_mid_dma_interrupt2,
-                       IRQF_SHARED, "INTEL_MID_DMAC2", dma);
-               if (0 != err)
-                       goto err_irq;
-       }
-       /*register device w/ engine*/
-       err = dma_async_device_register(&dma->common);
-       if (0 != err) {
-               pr_err("ERR_MDMA:device_register failed: %d\n", err);
-               goto err_engine;
-       }
-       if (dma->pimr_mask) {
-               pr_debug("setting up tasklet1 for DMAC1\n");
-               tasklet_init(&dma->tasklet, dma_tasklet1, (unsigned long)dma);
-       } else {
-               pr_debug("setting up tasklet2 for DMAC2\n");
-               tasklet_init(&dma->tasklet, dma_tasklet2, (unsigned long)dma);
-       }
-       return 0;
-
-err_engine:
-       free_irq(pdev->irq, dma);
-err_irq:
-       if (dma->mask_reg)
-               iounmap(dma->mask_reg);
-err_ioremap:
-       pci_pool_destroy(dma->dma_pool);
-err_dma_pool:
-       pr_err("ERR_MDMA:setup_dma failed: %d\n", err);
-       return err;
-
-}
-
-/**
- * middma_shutdown -   Shutdown the DMA controller
- * @pdev: Controller PCI device structure
- *
- * Called by remove
- * Unregister DMa controller, clear all structures and free interrupt
- */
-static void middma_shutdown(struct pci_dev *pdev)
-{
-       struct middma_device *device = pci_get_drvdata(pdev);
-
-       dma_async_device_unregister(&device->common);
-       pci_pool_destroy(device->dma_pool);
-       if (device->mask_reg)
-               iounmap(device->mask_reg);
-       if (device->dma_base)
-               iounmap(device->dma_base);
-       free_irq(pdev->irq, device);
-       return;
-}
-
-/**
- * intel_mid_dma_probe -       PCI Probe
- * @pdev: Controller PCI device structure
- * @id: pci device id structure
- *
- * Initialize the PCI device, map BARs, query driver data.
- * Call setup_dma to complete contoller and chan initilzation
- */
-static int intel_mid_dma_probe(struct pci_dev *pdev,
-                                       const struct pci_device_id *id)
-{
-       struct middma_device *device;
-       u32 base_addr, bar_size;
-       struct intel_mid_dma_probe_info *info;
-       int err;
-
-       pr_debug("MDMA: probe for %x\n", pdev->device);
-       info = (void *)id->driver_data;
-       pr_debug("MDMA: CH %d, base %d, block len %d, Periphral mask %x\n",
-                               info->max_chan, info->ch_base,
-                               info->block_size, info->pimr_mask);
-
-       err = pci_enable_device(pdev);
-       if (err)
-               goto err_enable_device;
-
-       err = pci_request_regions(pdev, "intel_mid_dmac");
-       if (err)
-               goto err_request_regions;
-
-       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-       if (err)
-               goto err_set_dma_mask;
-
-       err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-       if (err)
-               goto err_set_dma_mask;
-
-       device = kzalloc(sizeof(*device), GFP_KERNEL);
-       if (!device) {
-               pr_err("ERR_MDMA:kzalloc failed probe\n");
-               err = -ENOMEM;
-               goto err_kzalloc;
-       }
-       device->pdev = pci_dev_get(pdev);
-
-       base_addr = pci_resource_start(pdev, 0);
-       bar_size  = pci_resource_len(pdev, 0);
-       device->dma_base = ioremap_nocache(base_addr, DMA_REG_SIZE);
-       if (!device->dma_base) {
-               pr_err("ERR_MDMA:ioremap failed\n");
-               err = -ENOMEM;
-               goto err_ioremap;
-       }
-       pci_set_drvdata(pdev, device);
-       pci_set_master(pdev);
-       device->max_chan = info->max_chan;
-       device->chan_base = info->ch_base;
-       device->block_size = info->block_size;
-       device->pimr_mask = info->pimr_mask;
-
-       err = mid_setup_dma(pdev);
-       if (err)
-               goto err_dma;
-
-       pm_runtime_put_noidle(&pdev->dev);
-       pm_runtime_allow(&pdev->dev);
-       return 0;
-
-err_dma:
-       iounmap(device->dma_base);
-err_ioremap:
-       pci_dev_put(pdev);
-       kfree(device);
-err_kzalloc:
-err_set_dma_mask:
-       pci_release_regions(pdev);
-       pci_disable_device(pdev);
-err_request_regions:
-err_enable_device:
-       pr_err("ERR_MDMA:Probe failed %d\n", err);
-       return err;
-}
-
-/**
- * intel_mid_dma_remove -      PCI remove
- * @pdev: Controller PCI device structure
- *
- * Free up all resources and data
- * Call shutdown_dma to complete contoller and chan cleanup
- */
-static void intel_mid_dma_remove(struct pci_dev *pdev)
-{
-       struct middma_device *device = pci_get_drvdata(pdev);
-
-       pm_runtime_get_noresume(&pdev->dev);
-       pm_runtime_forbid(&pdev->dev);
-       middma_shutdown(pdev);
-       pci_dev_put(pdev);
-       kfree(device);
-       pci_release_regions(pdev);
-       pci_disable_device(pdev);
-}
-
-/* Power Management */
-/*
-* dma_suspend - PCI suspend function
-*
-* @pci: PCI device structure
-* @state: PM message
-*
-* This function is called by OS when a power event occurs
-*/
-static int dma_suspend(struct device *dev)
-{
-       struct pci_dev *pci = to_pci_dev(dev);
-       int i;
-       struct middma_device *device = pci_get_drvdata(pci);
-       pr_debug("MDMA: dma_suspend called\n");
-
-       for (i = 0; i < device->max_chan; i++) {
-               if (device->ch[i].in_use)
-                       return -EAGAIN;
-       }
-       dmac1_mask_periphral_intr(device);
-       device->state = SUSPENDED;
-       pci_save_state(pci);
-       pci_disable_device(pci);
-       pci_set_power_state(pci, PCI_D3hot);
-       return 0;
-}
-
-/**
-* dma_resume - PCI resume function
-*
-* @pci:        PCI device structure
-*
-* This function is called by OS when a power event occurs
-*/
-int dma_resume(struct device *dev)
-{
-       struct pci_dev *pci = to_pci_dev(dev);
-       int ret;
-       struct middma_device *device = pci_get_drvdata(pci);
-
-       pr_debug("MDMA: dma_resume called\n");
-       pci_set_power_state(pci, PCI_D0);
-       pci_restore_state(pci);
-       ret = pci_enable_device(pci);
-       if (ret) {
-               pr_err("MDMA: device can't be enabled for %x\n", pci->device);
-               return ret;
-       }
-       device->state = RUNNING;
-       iowrite32(REG_BIT0, device->dma_base + DMA_CFG);
-       return 0;
-}
-
-static int dma_runtime_suspend(struct device *dev)
-{
-       struct pci_dev *pci_dev = to_pci_dev(dev);
-       struct middma_device *device = pci_get_drvdata(pci_dev);
-
-       device->state = SUSPENDED;
-       return 0;
-}
-
-static int dma_runtime_resume(struct device *dev)
-{
-       struct pci_dev *pci_dev = to_pci_dev(dev);
-       struct middma_device *device = pci_get_drvdata(pci_dev);
-
-       device->state = RUNNING;
-       iowrite32(REG_BIT0, device->dma_base + DMA_CFG);
-       return 0;
-}
-
-static int dma_runtime_idle(struct device *dev)
-{
-       struct pci_dev *pdev = to_pci_dev(dev);
-       struct middma_device *device = pci_get_drvdata(pdev);
-       int i;
-
-       for (i = 0; i < device->max_chan; i++) {
-               if (device->ch[i].in_use)
-                       return -EAGAIN;
-       }
-
-       return 0;
-}
-
-/******************************************************************************
-* PCI stuff
-*/
-static struct pci_device_id intel_mid_dma_ids[] = {
-       { PCI_VDEVICE(INTEL, INTEL_MID_DMAC1_ID),       INFO(2, 6, 4095, 0x200020)},
-       { PCI_VDEVICE(INTEL, INTEL_MID_DMAC2_ID),       INFO(2, 0, 2047, 0)},
-       { PCI_VDEVICE(INTEL, INTEL_MID_GP_DMAC2_ID),    INFO(2, 0, 2047, 0)},
-       { PCI_VDEVICE(INTEL, INTEL_MFLD_DMAC1_ID),      INFO(4, 0, 4095, 0x400040)},
-       { 0, }
-};
-MODULE_DEVICE_TABLE(pci, intel_mid_dma_ids);
-
-static const struct dev_pm_ops intel_mid_dma_pm = {
-       .runtime_suspend = dma_runtime_suspend,
-       .runtime_resume = dma_runtime_resume,
-       .runtime_idle = dma_runtime_idle,
-       .suspend = dma_suspend,
-       .resume = dma_resume,
-};
-
-static struct pci_driver intel_mid_dma_pci_driver = {
-       .name           =       "Intel MID DMA",
-       .id_table       =       intel_mid_dma_ids,
-       .probe          =       intel_mid_dma_probe,
-       .remove         =       intel_mid_dma_remove,
-#ifdef CONFIG_PM
-       .driver = {
-               .pm = &intel_mid_dma_pm,
-       },
-#endif
-};
-
-static int __init intel_mid_dma_init(void)
-{
-       pr_debug("INFO_MDMA: LNW DMA Driver Version %s\n",
-                       INTEL_MID_DMA_DRIVER_VERSION);
-       return pci_register_driver(&intel_mid_dma_pci_driver);
-}
-fs_initcall(intel_mid_dma_init);
-
-static void __exit intel_mid_dma_exit(void)
-{
-       pci_unregister_driver(&intel_mid_dma_pci_driver);
-}
-module_exit(intel_mid_dma_exit);
-
-MODULE_AUTHOR("Vinod Koul <vinod.koul@intel.com>");
-MODULE_DESCRIPTION("Intel (R) MID DMAC Driver");
-MODULE_LICENSE("GPL v2");
-MODULE_VERSION(INTEL_MID_DMA_DRIVER_VERSION);
diff --git a/drivers/dma/intel_mid_dma_regs.h b/drivers/dma/intel_mid_dma_regs.h
deleted file mode 100644 (file)
index 17b4219..0000000
+++ /dev/null
@@ -1,299 +0,0 @@
-/*
- *  intel_mid_dma_regs.h - Intel MID DMA Drivers
- *
- *  Copyright (C) 2008-10 Intel Corp
- *  Author: Vinod Koul <vinod.koul@intel.com>
- *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; version 2 of the License.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *
- */
-#ifndef __INTEL_MID_DMAC_REGS_H__
-#define __INTEL_MID_DMAC_REGS_H__
-
-#include <linux/dmaengine.h>
-#include <linux/dmapool.h>
-#include <linux/pci_ids.h>
-
-#define INTEL_MID_DMA_DRIVER_VERSION "1.1.0"
-
-#define        REG_BIT0                0x00000001
-#define        REG_BIT8                0x00000100
-#define INT_MASK_WE            0x8
-#define CLEAR_DONE             0xFFFFEFFF
-#define UNMASK_INTR_REG(chan_num) \
-       ((REG_BIT0 << chan_num) | (REG_BIT8 << chan_num))
-#define MASK_INTR_REG(chan_num) (REG_BIT8 << chan_num)
-
-#define ENABLE_CHANNEL(chan_num) \
-       ((REG_BIT0 << chan_num) | (REG_BIT8 << chan_num))
-
-#define DISABLE_CHANNEL(chan_num) \
-       (REG_BIT8 << chan_num)
-
-#define DESCS_PER_CHANNEL      16
-/*DMA Registers*/
-/*registers associated with channel programming*/
-#define DMA_REG_SIZE           0x400
-#define DMA_CH_SIZE            0x58
-
-/*CH X REG = (DMA_CH_SIZE)*CH_NO + REG*/
-#define SAR                    0x00 /* Source Address Register*/
-#define DAR                    0x08 /* Destination Address Register*/
-#define LLP                    0x10 /* Linked List Pointer Register*/
-#define CTL_LOW                        0x18 /* Control Register*/
-#define CTL_HIGH               0x1C /* Control Register*/
-#define CFG_LOW                        0x40 /* Configuration Register Low*/
-#define CFG_HIGH               0x44 /* Configuration Register high*/
-
-#define STATUS_TFR             0x2E8
-#define STATUS_BLOCK           0x2F0
-#define STATUS_ERR             0x308
-
-#define RAW_TFR                        0x2C0
-#define RAW_BLOCK              0x2C8
-#define RAW_ERR                        0x2E0
-
-#define MASK_TFR               0x310
-#define MASK_BLOCK             0x318
-#define MASK_SRC_TRAN          0x320
-#define MASK_DST_TRAN          0x328
-#define MASK_ERR               0x330
-
-#define CLEAR_TFR              0x338
-#define CLEAR_BLOCK            0x340
-#define CLEAR_SRC_TRAN         0x348
-#define CLEAR_DST_TRAN         0x350
-#define CLEAR_ERR              0x358
-
-#define INTR_STATUS            0x360
-#define DMA_CFG                        0x398
-#define DMA_CHAN_EN            0x3A0
-
-/*DMA channel control registers*/
-union intel_mid_dma_ctl_lo {
-       struct {
-               u32     int_en:1;       /*enable or disable interrupts*/
-                                       /*should be 0*/
-               u32     dst_tr_width:3; /*destination transfer width*/
-                                       /*usually 32 bits = 010*/
-               u32     src_tr_width:3; /*source transfer width*/
-                                       /*usually 32 bits = 010*/
-               u32     dinc:2;         /*destination address inc/dec*/
-                                       /*For mem:INC=00, Periphral NoINC=11*/
-               u32     sinc:2;         /*source address inc or dec, as above*/
-               u32     dst_msize:3;    /*destination burst transaction length*/
-                                       /*always = 16 ie 011*/
-               u32     src_msize:3;    /*source burst transaction length*/
-                                       /*always = 16 ie 011*/
-               u32     reser1:3;
-               u32     tt_fc:3;        /*transfer type and flow controller*/
-                                       /*M-M = 000
-                                         P-M = 010
-                                         M-P = 001*/
-               u32     dms:2;          /*destination master select = 0*/
-               u32     sms:2;          /*source master select = 0*/
-               u32     llp_dst_en:1;   /*enable/disable destination LLP = 0*/
-               u32     llp_src_en:1;   /*enable/disable source LLP = 0*/
-               u32     reser2:3;
-       } ctlx;
-       u32     ctl_lo;
-};
-
-union intel_mid_dma_ctl_hi {
-       struct {
-               u32     block_ts:12;    /*block transfer size*/
-               u32     done:1;         /*Done - updated by DMAC*/
-               u32     reser:19;       /*configured by DMAC*/
-       } ctlx;
-       u32     ctl_hi;
-
-};
-
-/*DMA channel configuration registers*/
-union intel_mid_dma_cfg_lo {
-       struct {
-               u32     reser1:5;
-               u32     ch_prior:3;     /*channel priority = 0*/
-               u32     ch_susp:1;      /*channel suspend = 0*/
-               u32     fifo_empty:1;   /*FIFO empty or not R bit = 0*/
-               u32     hs_sel_dst:1;   /*select HW/SW destn handshaking*/
-                                       /*HW = 0, SW = 1*/
-               u32     hs_sel_src:1;   /*select HW/SW src handshaking*/
-               u32     reser2:6;
-               u32     dst_hs_pol:1;   /*dest HS interface polarity*/
-               u32     src_hs_pol:1;   /*src HS interface polarity*/
-               u32     max_abrst:10;   /*max AMBA burst len = 0 (no sw limit*/
-               u32     reload_src:1;   /*auto reload src addr =1 if src is P*/
-               u32     reload_dst:1;   /*AR destn addr =1 if dstn is P*/
-       } cfgx;
-       u32     cfg_lo;
-};
-
-union intel_mid_dma_cfg_hi {
-       struct {
-               u32     fcmode:1;       /*flow control mode = 1*/
-               u32     fifo_mode:1;    /*FIFO mode select = 1*/
-               u32     protctl:3;      /*protection control = 0*/
-               u32     rsvd:2;
-               u32     src_per:4;      /*src hw HS interface*/
-               u32     dst_per:4;      /*dstn hw HS interface*/
-               u32     reser2:17;
-       } cfgx;
-       u32     cfg_hi;
-};
-
-
-/**
- * struct intel_mid_dma_chan - internal mid representation of a DMA channel
- * @chan: dma_chan strcture represetation for mid chan
- * @ch_regs: MMIO register space pointer to channel register
- * @dma_base: MMIO register space DMA engine base pointer
- * @ch_id: DMA channel id
- * @lock: channel spinlock
- * @active_list: current active descriptors
- * @queue: current queued up descriptors
- * @free_list: current free descriptors
- * @slave: dma slave structure
- * @descs_allocated: total number of descriptors allocated
- * @dma: dma device structure pointer
- * @busy: bool representing if ch is busy (active txn) or not
- * @in_use: bool representing if ch is in use or not
- * @raw_tfr: raw trf interrupt received
- * @raw_block: raw block interrupt received
- */
-struct intel_mid_dma_chan {
-       struct dma_chan         chan;
-       void __iomem            *ch_regs;
-       void __iomem            *dma_base;
-       int                     ch_id;
-       spinlock_t              lock;
-       struct list_head        active_list;
-       struct list_head        queue;
-       struct list_head        free_list;
-       unsigned int            descs_allocated;
-       struct middma_device    *dma;
-       bool                    busy;
-       bool                    in_use;
-       u32                     raw_tfr;
-       u32                     raw_block;
-       struct intel_mid_dma_slave *mid_slave;
-};
-
-static inline struct intel_mid_dma_chan *to_intel_mid_dma_chan(
-                                               struct dma_chan *chan)
-{
-       return container_of(chan, struct intel_mid_dma_chan, chan);
-}
-
-enum intel_mid_dma_state {
-       RUNNING = 0,
-       SUSPENDED,
-};
-/**
- * struct middma_device - internal representation of a DMA device
- * @pdev: PCI device
- * @dma_base: MMIO register space pointer of DMA
- * @dma_pool: for allocating DMA descriptors
- * @common: embedded struct dma_device
- * @tasklet: dma tasklet for processing interrupts
- * @ch: per channel data
- * @pci_id: DMA device PCI ID
- * @intr_mask: Interrupt mask to be used
- * @mask_reg: MMIO register for periphral mask
- * @chan_base: Base ch index (read from driver data)
- * @max_chan: max number of chs supported (from drv_data)
- * @block_size: Block size of DMA transfer supported (from drv_data)
- * @pimr_mask: MMIO register addr for periphral interrupt (from drv_data)
- * @state: dma PM device state
- */
-struct middma_device {
-       struct pci_dev          *pdev;
-       void __iomem            *dma_base;
-       struct pci_pool         *dma_pool;
-       struct dma_device       common;
-       struct tasklet_struct   tasklet;
-       struct intel_mid_dma_chan ch[MAX_CHAN];
-       unsigned int            pci_id;
-       unsigned int            intr_mask;
-       void __iomem            *mask_reg;
-       int                     chan_base;
-       int                     max_chan;
-       int                     block_size;
-       unsigned int            pimr_mask;
-       enum intel_mid_dma_state state;
-};
-
-static inline struct middma_device *to_middma_device(struct dma_device *common)
-{
-       return container_of(common, struct middma_device, common);
-}
-
-struct intel_mid_dma_desc {
-       void __iomem                    *block; /*ch ptr*/
-       struct list_head                desc_node;
-       struct dma_async_tx_descriptor  txd;
-       size_t                          len;
-       dma_addr_t                      sar;
-       dma_addr_t                      dar;
-       u32                             cfg_hi;
-       u32                             cfg_lo;
-       u32                             ctl_lo;
-       u32                             ctl_hi;
-       struct pci_pool                 *lli_pool;
-       struct intel_mid_dma_lli        *lli;
-       dma_addr_t                      lli_phys;
-       unsigned int                    lli_length;
-       unsigned int                    current_lli;
-       dma_addr_t                      next;
-       enum dma_transfer_direction             dirn;
-       enum dma_status                 status;
-       enum dma_slave_buswidth         width; /*width of DMA txn*/
-       enum intel_mid_dma_mode         cfg_mode; /*mode configuration*/
-
-};
-
-struct intel_mid_dma_lli {
-       dma_addr_t                      sar;
-       dma_addr_t                      dar;
-       dma_addr_t                      llp;
-       u32                             ctl_lo;
-       u32                             ctl_hi;
-} __attribute__ ((packed));
-
-static inline int test_ch_en(void __iomem *dma, u32 ch_no)
-{
-       u32 en_reg = ioread32(dma + DMA_CHAN_EN);
-       return (en_reg >> ch_no) & 0x1;
-}
-
-static inline struct intel_mid_dma_desc *to_intel_mid_dma_desc
-               (struct dma_async_tx_descriptor *txd)
-{
-       return container_of(txd, struct intel_mid_dma_desc, txd);
-}
-
-static inline struct intel_mid_dma_slave *to_intel_mid_dma_slave
-               (struct dma_slave_config *slave)
-{
-       return container_of(slave, struct intel_mid_dma_slave, dma_slave);
-}
-
-
-int dma_resume(struct device *dev);
-
-#endif /*__INTEL_MID_DMAC_REGS_H__*/
index 5907c1718f8c74fbe4a7d2c3bceb21759ed07422..92772fffc52ff292fc38e8acd09bf0002d332c8b 100644 (file)
@@ -20,8 +20,7 @@ static struct msr __percpu *msrs;
  */
 static atomic_t drv_instances = ATOMIC_INIT(0);
 
-/* Per-node driver instances */
-static struct mem_ctl_info **mcis;
+/* Per-node stuff */
 static struct ecc_settings **ecc_stngs;
 
 /*
@@ -903,9 +902,17 @@ static int k8_early_channel_count(struct amd64_pvt *pvt)
 /* On F10h and later ErrAddr is MC4_ADDR[47:1] */
 static u64 get_error_address(struct amd64_pvt *pvt, struct mce *m)
 {
-       u64 addr;
+       u16 mce_nid = amd_get_nb_id(m->extcpu);
+       struct mem_ctl_info *mci;
        u8 start_bit = 1;
        u8 end_bit   = 47;
+       u64 addr;
+
+       mci = edac_mc_find(mce_nid);
+       if (!mci)
+               return 0;
+
+       pvt = mci->pvt_info;
 
        if (pvt->fam == 0xf) {
                start_bit = 3;
@@ -918,17 +925,13 @@ static u64 get_error_address(struct amd64_pvt *pvt, struct mce *m)
         * Erratum 637 workaround
         */
        if (pvt->fam == 0x15) {
-               struct amd64_pvt *pvt;
                u64 cc6_base, tmp_addr;
                u32 tmp;
-               u16 mce_nid;
                u8 intlv_en;
 
                if ((addr & GENMASK_ULL(47, 24)) >> 24 != 0x00fdf7)
                        return addr;
 
-               mce_nid = amd_get_nb_id(m->extcpu);
-               pvt     = mcis[mce_nid]->pvt_info;
 
                amd64_read_pci_cfg(pvt->F1, DRAM_LOCAL_NODE_LIM, &tmp);
                intlv_en = tmp >> 21 & 0x7;
@@ -1511,7 +1514,7 @@ static int f1x_lookup_addr_in_dct(u64 in_addr, u8 nid, u8 dct)
        int cs_found = -EINVAL;
        int csrow;
 
-       mci = mcis[nid];
+       mci = edac_mc_find(nid);
        if (!mci)
                return cs_found;
 
@@ -2663,34 +2666,6 @@ static bool ecc_enabled(struct pci_dev *F3, u16 nid)
        return true;
 }
 
-static int set_mc_sysfs_attrs(struct mem_ctl_info *mci)
-{
-       struct amd64_pvt *pvt = mci->pvt_info;
-       int rc;
-
-       rc = amd64_create_sysfs_dbg_files(mci);
-       if (rc < 0)
-               return rc;
-
-       if (pvt->fam >= 0x10) {
-               rc = amd64_create_sysfs_inject_files(mci);
-               if (rc < 0)
-                       return rc;
-       }
-
-       return 0;
-}
-
-static void del_mc_sysfs_attrs(struct mem_ctl_info *mci)
-{
-       struct amd64_pvt *pvt = mci->pvt_info;
-
-       amd64_remove_sysfs_dbg_files(mci);
-
-       if (pvt->fam >= 0x10)
-               amd64_remove_sysfs_inject_files(mci);
-}
-
 static void setup_mci_misc_attrs(struct mem_ctl_info *mci,
                                 struct amd64_family_type *fam)
 {
@@ -2778,6 +2753,16 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
        return fam_type;
 }
 
+static const struct attribute_group *amd64_edac_attr_groups[] = {
+#ifdef CONFIG_EDAC_DEBUG
+       &amd64_edac_dbg_group,
+#endif
+#ifdef CONFIG_EDAC_AMD64_ERROR_INJECTION
+       &amd64_edac_inj_group,
+#endif
+       NULL
+};
+
 static int init_one_instance(struct pci_dev *F2)
 {
        struct amd64_pvt *pvt = NULL;
@@ -2844,14 +2829,10 @@ static int init_one_instance(struct pci_dev *F2)
                mci->edac_cap = EDAC_FLAG_NONE;
 
        ret = -ENODEV;
-       if (edac_mc_add_mc(mci)) {
+       if (edac_mc_add_mc_with_groups(mci, amd64_edac_attr_groups)) {
                edac_dbg(1, "failed edac_mc_add_mc()\n");
                goto err_add_mc;
        }
-       if (set_mc_sysfs_attrs(mci)) {
-               edac_dbg(1, "failed edac_mc_add_mc()\n");
-               goto err_add_sysfs;
-       }
 
        /* register stuff with EDAC MCE */
        if (report_gart_errors)
@@ -2859,14 +2840,10 @@ static int init_one_instance(struct pci_dev *F2)
 
        amd_register_ecc_decoder(decode_bus_error);
 
-       mcis[nid] = mci;
-
        atomic_inc(&drv_instances);
 
        return 0;
 
-err_add_sysfs:
-       edac_mc_del_mc(mci->pdev);
 err_add_mc:
        edac_mc_free(mci);
 
@@ -2940,7 +2917,6 @@ static void remove_one_instance(struct pci_dev *pdev)
        mci = find_mci_by_dev(&pdev->dev);
        WARN_ON(!mci);
 
-       del_mc_sysfs_attrs(mci);
        /* Remove from EDAC CORE tracking list */
        mci = edac_mc_del_mc(&pdev->dev);
        if (!mci)
@@ -2961,7 +2937,6 @@ static void remove_one_instance(struct pci_dev *pdev)
 
        /* Free the EDAC CORE resources */
        mci->pvt_info = NULL;
-       mcis[nid] = NULL;
 
        kfree(pvt);
        edac_mc_free(mci);
@@ -2999,7 +2974,7 @@ static void setup_pci_device(void)
        if (pci_ctl)
                return;
 
-       mci = mcis[0];
+       mci = edac_mc_find(0);
        if (!mci)
                return;
 
@@ -3023,9 +2998,8 @@ static int __init amd64_edac_init(void)
                goto err_ret;
 
        err = -ENOMEM;
-       mcis      = kzalloc(amd_nb_num() * sizeof(mcis[0]), GFP_KERNEL);
        ecc_stngs = kzalloc(amd_nb_num() * sizeof(ecc_stngs[0]), GFP_KERNEL);
-       if (!(mcis && ecc_stngs))
+       if (!ecc_stngs)
                goto err_free;
 
        msrs = msrs_alloc();
@@ -3056,9 +3030,6 @@ err_pci:
        msrs = NULL;
 
 err_free:
-       kfree(mcis);
-       mcis = NULL;
-
        kfree(ecc_stngs);
        ecc_stngs = NULL;
 
@@ -3076,9 +3047,6 @@ static void __exit amd64_edac_exit(void)
        kfree(ecc_stngs);
        ecc_stngs = NULL;
 
-       kfree(mcis);
-       mcis = NULL;
-
        msrs_free(msrs);
        msrs = NULL;
 }
index d8468c6679257b26b0157da63fd1d6bdd0633ba4..4bdec752d33096bfdfa9c5c201c81dba3c217af8 100644 (file)
@@ -453,31 +453,11 @@ struct ecc_settings {
 };
 
 #ifdef CONFIG_EDAC_DEBUG
-int amd64_create_sysfs_dbg_files(struct mem_ctl_info *mci);
-void amd64_remove_sysfs_dbg_files(struct mem_ctl_info *mci);
-
-#else
-static inline int amd64_create_sysfs_dbg_files(struct mem_ctl_info *mci)
-{
-       return 0;
-}
-static void inline amd64_remove_sysfs_dbg_files(struct mem_ctl_info *mci)
-{
-}
+extern const struct attribute_group amd64_edac_dbg_group;
 #endif
 
 #ifdef CONFIG_EDAC_AMD64_ERROR_INJECTION
-int amd64_create_sysfs_inject_files(struct mem_ctl_info *mci);
-void amd64_remove_sysfs_inject_files(struct mem_ctl_info *mci);
-
-#else
-static inline int amd64_create_sysfs_inject_files(struct mem_ctl_info *mci)
-{
-       return 0;
-}
-static inline void amd64_remove_sysfs_inject_files(struct mem_ctl_info *mci)
-{
-}
+extern const struct attribute_group amd64_edac_inj_group;
 #endif
 
 /*
index 2c1bbf7406058f4f80e1edb170db160c488cdcbc..4709c60798482e1e0b276ef35d810a30801111d4 100644 (file)
@@ -40,34 +40,15 @@ static DEVICE_ATTR(topmem, S_IRUGO, amd64_top_mem_show, NULL);
 static DEVICE_ATTR(topmem2, S_IRUGO, amd64_top_mem2_show, NULL);
 static DEVICE_ATTR(dram_hole, S_IRUGO, amd64_hole_show, NULL);
 
-int amd64_create_sysfs_dbg_files(struct mem_ctl_info *mci)
-{
-       int rc;
-
-       rc = device_create_file(&mci->dev, &dev_attr_dhar);
-       if (rc < 0)
-               return rc;
-       rc = device_create_file(&mci->dev, &dev_attr_dbam);
-       if (rc < 0)
-               return rc;
-       rc = device_create_file(&mci->dev, &dev_attr_topmem);
-       if (rc < 0)
-               return rc;
-       rc = device_create_file(&mci->dev, &dev_attr_topmem2);
-       if (rc < 0)
-               return rc;
-       rc = device_create_file(&mci->dev, &dev_attr_dram_hole);
-       if (rc < 0)
-               return rc;
-
-       return 0;
-}
-
-void amd64_remove_sysfs_dbg_files(struct mem_ctl_info *mci)
-{
-       device_remove_file(&mci->dev, &dev_attr_dhar);
-       device_remove_file(&mci->dev, &dev_attr_dbam);
-       device_remove_file(&mci->dev, &dev_attr_topmem);
-       device_remove_file(&mci->dev, &dev_attr_topmem2);
-       device_remove_file(&mci->dev, &dev_attr_dram_hole);
-}
+static struct attribute *amd64_edac_dbg_attrs[] = {
+       &dev_attr_dhar.attr,
+       &dev_attr_dbam.attr,
+       &dev_attr_topmem.attr,
+       &dev_attr_topmem2.attr,
+       &dev_attr_dram_hole.attr,
+       NULL
+};
+
+const struct attribute_group amd64_edac_dbg_group = {
+       .attrs = amd64_edac_dbg_attrs,
+};
index 0d66ae68d4681a9bb1ceedfca97fcafbde144d2c..e14977ff95dbb49dcd1038018091f37561cf86d4 100644 (file)
@@ -207,35 +207,28 @@ static DEVICE_ATTR(inject_write, S_IWUSR,
 static DEVICE_ATTR(inject_read,  S_IWUSR,
                   NULL, amd64_inject_read_store);
 
-
-int amd64_create_sysfs_inject_files(struct mem_ctl_info *mci)
+static struct attribute *amd64_edac_inj_attrs[] = {
+       &dev_attr_inject_section.attr,
+       &dev_attr_inject_word.attr,
+       &dev_attr_inject_ecc_vector.attr,
+       &dev_attr_inject_write.attr,
+       &dev_attr_inject_read.attr,
+       NULL
+};
+
+static umode_t amd64_edac_inj_is_visible(struct kobject *kobj,
+                                        struct attribute *attr, int idx)
 {
-       int rc;
-
-       rc = device_create_file(&mci->dev, &dev_attr_inject_section);
-       if (rc < 0)
-               return rc;
-       rc = device_create_file(&mci->dev, &dev_attr_inject_word);
-       if (rc < 0)
-               return rc;
-       rc = device_create_file(&mci->dev, &dev_attr_inject_ecc_vector);
-       if (rc < 0)
-               return rc;
-       rc = device_create_file(&mci->dev, &dev_attr_inject_write);
-       if (rc < 0)
-               return rc;
-       rc = device_create_file(&mci->dev, &dev_attr_inject_read);
-       if (rc < 0)
-               return rc;
-
-       return 0;
-}
+       struct device *dev = kobj_to_dev(kobj);
+       struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev);
+       struct amd64_pvt *pvt = mci->pvt_info;
 
-void amd64_remove_sysfs_inject_files(struct mem_ctl_info *mci)
-{
-       device_remove_file(&mci->dev, &dev_attr_inject_section);
-       device_remove_file(&mci->dev, &dev_attr_inject_word);
-       device_remove_file(&mci->dev, &dev_attr_inject_ecc_vector);
-       device_remove_file(&mci->dev, &dev_attr_inject_write);
-       device_remove_file(&mci->dev, &dev_attr_inject_read);
+       if (pvt->fam < 0x10)
+               return 0;
+       return attr->mode;
 }
+
+const struct attribute_group amd64_edac_inj_group = {
+       .attrs = amd64_edac_inj_attrs,
+       .is_visible = amd64_edac_inj_is_visible,
+};
index 6c9f381e8fe6731d0c9c02bce1c793b0f0c302b7..ad42587c3f4d6e60d25ccb49fd9550b3341e6984 100644 (file)
@@ -446,7 +446,9 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
                                   unsigned n_layers,
                                   struct edac_mc_layer *layers,
                                   unsigned sz_pvt);
-extern int edac_mc_add_mc(struct mem_ctl_info *mci);
+extern int edac_mc_add_mc_with_groups(struct mem_ctl_info *mci,
+                                     const struct attribute_group **groups);
+#define edac_mc_add_mc(mci)    edac_mc_add_mc_with_groups(mci, NULL)
 extern void edac_mc_free(struct mem_ctl_info *mci);
 extern struct mem_ctl_info *edac_mc_find(int idx);
 extern struct mem_ctl_info *find_mci_by_dev(struct device *dev);
index 1747906f10cedcd2f9b528065b4ae139fc34fdb4..af3be1914dbb8f85496473c4c3c5977edd0ed17a 100644 (file)
@@ -710,9 +710,10 @@ struct mem_ctl_info *edac_mc_find(int idx)
 EXPORT_SYMBOL(edac_mc_find);
 
 /**
- * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
- *                 create sysfs entries associated with mci structure
+ * edac_mc_add_mc_with_groups: Insert the 'mci' structure into the mci
+ *     global list and create sysfs entries associated with mci structure
  * @mci: pointer to the mci structure to be added to the list
+ * @groups: optional attribute groups for the driver-specific sysfs entries
  *
  * Return:
  *     0       Success
@@ -720,7 +721,8 @@ EXPORT_SYMBOL(edac_mc_find);
  */
 
 /* FIXME - should a warning be printed if no error detection? correction? */
-int edac_mc_add_mc(struct mem_ctl_info *mci)
+int edac_mc_add_mc_with_groups(struct mem_ctl_info *mci,
+                              const struct attribute_group **groups)
 {
        int ret = -EINVAL;
        edac_dbg(0, "\n");
@@ -771,7 +773,7 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
 
        mci->bus = &mc_bus[mci->mc_idx];
 
-       if (edac_create_sysfs_mci_device(mci)) {
+       if (edac_create_sysfs_mci_device(mci, groups)) {
                edac_mc_printk(mci, KERN_WARNING,
                        "failed to create sysfs device\n");
                goto fail1;
@@ -805,7 +807,7 @@ fail0:
        mutex_unlock(&mem_ctls_mutex);
        return ret;
 }
-EXPORT_SYMBOL_GPL(edac_mc_add_mc);
+EXPORT_SYMBOL_GPL(edac_mc_add_mc_with_groups);
 
 /**
  * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
index c84eecb191ef2a14364a57370bb1b6952bb9bcb6..112d63ad115470529c18e3c6d876cfdda000a9e5 100644 (file)
@@ -323,13 +323,14 @@ DEVICE_CHANNEL(ch5_dimm_label, S_IRUGO | S_IWUSR,
        channel_dimm_label_show, channel_dimm_label_store, 5);
 
 /* Total possible dynamic DIMM Label attribute file table */
-static struct device_attribute *dynamic_csrow_dimm_attr[] = {
-       &dev_attr_legacy_ch0_dimm_label.attr,
-       &dev_attr_legacy_ch1_dimm_label.attr,
-       &dev_attr_legacy_ch2_dimm_label.attr,
-       &dev_attr_legacy_ch3_dimm_label.attr,
-       &dev_attr_legacy_ch4_dimm_label.attr,
-       &dev_attr_legacy_ch5_dimm_label.attr
+static struct attribute *dynamic_csrow_dimm_attr[] = {
+       &dev_attr_legacy_ch0_dimm_label.attr.attr,
+       &dev_attr_legacy_ch1_dimm_label.attr.attr,
+       &dev_attr_legacy_ch2_dimm_label.attr.attr,
+       &dev_attr_legacy_ch3_dimm_label.attr.attr,
+       &dev_attr_legacy_ch4_dimm_label.attr.attr,
+       &dev_attr_legacy_ch5_dimm_label.attr.attr,
+       NULL
 };
 
 /* possible dynamic channel ce_count attribute files */
@@ -347,13 +348,45 @@ DEVICE_CHANNEL(ch5_ce_count, S_IRUGO,
                   channel_ce_count_show, NULL, 5);
 
 /* Total possible dynamic ce_count attribute file table */
-static struct device_attribute *dynamic_csrow_ce_count_attr[] = {
-       &dev_attr_legacy_ch0_ce_count.attr,
-       &dev_attr_legacy_ch1_ce_count.attr,
-       &dev_attr_legacy_ch2_ce_count.attr,
-       &dev_attr_legacy_ch3_ce_count.attr,
-       &dev_attr_legacy_ch4_ce_count.attr,
-       &dev_attr_legacy_ch5_ce_count.attr
+static struct attribute *dynamic_csrow_ce_count_attr[] = {
+       &dev_attr_legacy_ch0_ce_count.attr.attr,
+       &dev_attr_legacy_ch1_ce_count.attr.attr,
+       &dev_attr_legacy_ch2_ce_count.attr.attr,
+       &dev_attr_legacy_ch3_ce_count.attr.attr,
+       &dev_attr_legacy_ch4_ce_count.attr.attr,
+       &dev_attr_legacy_ch5_ce_count.attr.attr,
+       NULL
+};
+
+static umode_t csrow_dev_is_visible(struct kobject *kobj,
+                                   struct attribute *attr, int idx)
+{
+       struct device *dev = kobj_to_dev(kobj);
+       struct csrow_info *csrow = container_of(dev, struct csrow_info, dev);
+
+       if (idx >= csrow->nr_channels)
+               return 0;
+       /* Only expose populated DIMMs */
+       if (!csrow->channels[idx]->dimm->nr_pages)
+               return 0;
+       return attr->mode;
+}
+
+
+static const struct attribute_group csrow_dev_dimm_group = {
+       .attrs = dynamic_csrow_dimm_attr,
+       .is_visible = csrow_dev_is_visible,
+};
+
+static const struct attribute_group csrow_dev_ce_count_group = {
+       .attrs = dynamic_csrow_ce_count_attr,
+       .is_visible = csrow_dev_is_visible,
+};
+
+static const struct attribute_group *csrow_dev_groups[] = {
+       &csrow_dev_dimm_group,
+       &csrow_dev_ce_count_group,
+       NULL
 };
 
 static inline int nr_pages_per_csrow(struct csrow_info *csrow)
@@ -370,13 +403,12 @@ static inline int nr_pages_per_csrow(struct csrow_info *csrow)
 static int edac_create_csrow_object(struct mem_ctl_info *mci,
                                    struct csrow_info *csrow, int index)
 {
-       int err, chan;
-
        if (csrow->nr_channels > EDAC_NR_CHANNELS)
                return -ENODEV;
 
        csrow->dev.type = &csrow_attr_type;
        csrow->dev.bus = mci->bus;
+       csrow->dev.groups = csrow_dev_groups;
        device_initialize(&csrow->dev);
        csrow->dev.parent = &mci->dev;
        csrow->mci = mci;
@@ -386,45 +418,13 @@ static int edac_create_csrow_object(struct mem_ctl_info *mci,
        edac_dbg(0, "creating (virtual) csrow node %s\n",
                 dev_name(&csrow->dev));
 
-       err = device_add(&csrow->dev);
-       if (err < 0)
-               return err;
-
-       for (chan = 0; chan < csrow->nr_channels; chan++) {
-               /* Only expose populated DIMMs */
-               if (!csrow->channels[chan]->dimm->nr_pages)
-                       continue;
-               err = device_create_file(&csrow->dev,
-                                        dynamic_csrow_dimm_attr[chan]);
-               if (err < 0)
-                       goto error;
-               err = device_create_file(&csrow->dev,
-                                        dynamic_csrow_ce_count_attr[chan]);
-               if (err < 0) {
-                       device_remove_file(&csrow->dev,
-                                          dynamic_csrow_dimm_attr[chan]);
-                       goto error;
-               }
-       }
-
-       return 0;
-
-error:
-       for (--chan; chan >= 0; chan--) {
-               device_remove_file(&csrow->dev,
-                                       dynamic_csrow_dimm_attr[chan]);
-               device_remove_file(&csrow->dev,
-                                          dynamic_csrow_ce_count_attr[chan]);
-       }
-       put_device(&csrow->dev);
-
-       return err;
+       return device_add(&csrow->dev);
 }
 
 /* Create a CSROW object under specifed edac_mc_device */
 static int edac_create_csrow_objects(struct mem_ctl_info *mci)
 {
-       int err, i, chan;
+       int err, i;
        struct csrow_info *csrow;
 
        for (i = 0; i < mci->nr_csrows; i++) {
@@ -446,14 +446,6 @@ error:
                csrow = mci->csrows[i];
                if (!nr_pages_per_csrow(csrow))
                        continue;
-               for (chan = csrow->nr_channels - 1; chan >= 0; chan--) {
-                       if (!csrow->channels[chan]->dimm->nr_pages)
-                               continue;
-                       device_remove_file(&csrow->dev,
-                                               dynamic_csrow_dimm_attr[chan]);
-                       device_remove_file(&csrow->dev,
-                                               dynamic_csrow_ce_count_attr[chan]);
-               }
                put_device(&mci->csrows[i]->dev);
        }
 
@@ -462,23 +454,13 @@ error:
 
 static void edac_delete_csrow_objects(struct mem_ctl_info *mci)
 {
-       int i, chan;
+       int i;
        struct csrow_info *csrow;
 
        for (i = mci->nr_csrows - 1; i >= 0; i--) {
                csrow = mci->csrows[i];
                if (!nr_pages_per_csrow(csrow))
                        continue;
-               for (chan = csrow->nr_channels - 1; chan >= 0; chan--) {
-                       if (!csrow->channels[chan]->dimm->nr_pages)
-                               continue;
-                       edac_dbg(1, "Removing csrow %d channel %d sysfs nodes\n",
-                                i, chan);
-                       device_remove_file(&csrow->dev,
-                                               dynamic_csrow_dimm_attr[chan]);
-                       device_remove_file(&csrow->dev,
-                                               dynamic_csrow_ce_count_attr[chan]);
-               }
                device_unregister(&mci->csrows[i]->dev);
        }
 }
@@ -863,7 +845,8 @@ static DEVICE_ATTR(ce_count, S_IRUGO, mci_ce_count_show, NULL);
 static DEVICE_ATTR(max_location, S_IRUGO, mci_max_location_show, NULL);
 
 /* memory scrubber attribute file */
-static DEVICE_ATTR(sdram_scrub_rate, 0, NULL, NULL);
+DEVICE_ATTR(sdram_scrub_rate, 0, mci_sdram_scrub_rate_show,
+           mci_sdram_scrub_rate_store); /* umode set later in is_visible */
 
 static struct attribute *mci_attrs[] = {
        &dev_attr_reset_counters.attr,
@@ -875,11 +858,29 @@ static struct attribute *mci_attrs[] = {
        &dev_attr_ue_count.attr,
        &dev_attr_ce_count.attr,
        &dev_attr_max_location.attr,
+       &dev_attr_sdram_scrub_rate.attr,
        NULL
 };
 
+static umode_t mci_attr_is_visible(struct kobject *kobj,
+                                  struct attribute *attr, int idx)
+{
+       struct device *dev = kobj_to_dev(kobj);
+       struct mem_ctl_info *mci = to_mci(dev);
+       umode_t mode = 0;
+
+       if (attr != &dev_attr_sdram_scrub_rate.attr)
+               return attr->mode;
+       if (mci->get_sdram_scrub_rate)
+               mode |= S_IRUGO;
+       if (mci->set_sdram_scrub_rate)
+               mode |= S_IWUSR;
+       return mode;
+}
+
 static struct attribute_group mci_attr_grp = {
        .attrs  = mci_attrs,
+       .is_visible = mci_attr_is_visible,
 };
 
 static const struct attribute_group *mci_attr_groups[] = {
@@ -913,7 +914,7 @@ int __init edac_debugfs_init(void)
        return 0;
 }
 
-void __exit edac_debugfs_exit(void)
+void edac_debugfs_exit(void)
 {
        debugfs_remove(edac_debugfs);
 }
@@ -973,7 +974,8 @@ nomem:
  *     0       Success
  *     !0      Failure
  */
-int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
+int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
+                                const struct attribute_group **groups)
 {
        int i, err;
 
@@ -997,6 +999,7 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
 
        mci->dev.parent = mci_pdev;
        mci->dev.bus = mci->bus;
+       mci->dev.groups = groups;
        dev_set_name(&mci->dev, "mc%d", mci->mc_idx);
        dev_set_drvdata(&mci->dev, mci);
        pm_runtime_forbid(&mci->dev);
@@ -1008,23 +1011,6 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
                goto fail_unregister_bus;
        }
 
-       if (mci->set_sdram_scrub_rate || mci->get_sdram_scrub_rate) {
-               if (mci->get_sdram_scrub_rate) {
-                       dev_attr_sdram_scrub_rate.attr.mode |= S_IRUGO;
-                       dev_attr_sdram_scrub_rate.show = &mci_sdram_scrub_rate_show;
-               }
-
-               if (mci->set_sdram_scrub_rate) {
-                       dev_attr_sdram_scrub_rate.attr.mode |= S_IWUSR;
-                       dev_attr_sdram_scrub_rate.store = &mci_sdram_scrub_rate_store;
-               }
-
-               err = device_create_file(&mci->dev, &dev_attr_sdram_scrub_rate);
-               if (err) {
-                       edac_dbg(1, "failure: create sdram_scrub_rate\n");
-                       goto fail_unregister_dev;
-               }
-       }
        /*
         * Create the dimm/rank devices
         */
@@ -1071,7 +1057,6 @@ fail_unregister_dimm:
 
                device_unregister(&dimm->dev);
        }
-fail_unregister_dev:
        device_unregister(&mci->dev);
 fail_unregister_bus:
        bus_unregister(mci->bus);
@@ -1170,7 +1155,7 @@ int __init edac_mc_sysfs_init(void)
        return err;
 }
 
-void __exit edac_mc_sysfs_exit(void)
+void edac_mc_sysfs_exit(void)
 {
        device_unregister(mci_pdev);
        edac_put_sysfs_subsys();
index e6d1691dfa45f310d7f7cbec0b8f391836daff22..9cb082a19d8a7ae2fbd7a2ae146bd3cd98a59618 100644 (file)
@@ -112,20 +112,23 @@ static int __init edac_init(void)
 
        err = edac_mc_sysfs_init();
        if (err)
-               goto error;
+               goto err_sysfs;
 
        edac_debugfs_init();
 
-       /* Setup/Initialize the workq for this core */
        err = edac_workqueue_setup();
        if (err) {
-               edac_printk(KERN_ERR, EDAC_MC, "init WorkQueue failure\n");
-               goto error;
+               edac_printk(KERN_ERR, EDAC_MC, "Failure initializing workqueue\n");
+               goto err_wq;
        }
 
        return 0;
 
-error:
+err_wq:
+       edac_debugfs_exit();
+       edac_mc_sysfs_exit();
+
+err_sysfs:
        return err;
 }
 
index f2118bfcf8dfbd861d24754320ac0a0439cfb9ed..26ecc52e073d8b5dc246aa2897cd59fdf54700ad 100644 (file)
@@ -22,7 +22,8 @@
        /* on edac_mc_sysfs.c */
 int edac_mc_sysfs_init(void);
 void edac_mc_sysfs_exit(void);
-extern int edac_create_sysfs_mci_device(struct mem_ctl_info *mci);
+extern int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
+                                       const struct attribute_group **groups);
 extern void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci);
 void edac_unregister_sysfs(struct mem_ctl_info *mci);
 extern int edac_get_log_ue(void);
index f784de1dc7937f097265050b3336bcebf4534ef7..11260cc3360e44b8105f0351fd198026d205c685 100644 (file)
@@ -124,6 +124,13 @@ static ssize_t highbank_mc_inject_ctrl(struct device *dev,
 
 static DEVICE_ATTR(inject_ctrl, S_IWUSR, NULL, highbank_mc_inject_ctrl);
 
+static struct attribute *highbank_dev_attrs[] = {
+       &dev_attr_inject_ctrl.attr,
+       NULL
+};
+
+ATTRIBUTE_GROUPS(highbank_dev);
+
 struct hb_mc_settings {
        int     err_offset;
        int     int_offset;
@@ -139,7 +146,7 @@ static struct hb_mc_settings mw_settings = {
        .int_offset = MW_DDR_ECC_INT_BASE,
 };
 
-static struct of_device_id hb_ddr_ctrl_of_match[] = {
+static const struct of_device_id hb_ddr_ctrl_of_match[] = {
        { .compatible = "calxeda,hb-ddr-ctrl",          .data = &hb_settings },
        { .compatible = "calxeda,ecx-2000-ddr-ctrl",    .data = &mw_settings },
        {},
@@ -231,7 +238,7 @@ static int highbank_mc_probe(struct platform_device *pdev)
        dimm->mtype = MEM_DDR3;
        dimm->edac_mode = EDAC_SECDED;
 
-       res = edac_mc_add_mc(mci);
+       res = edac_mc_add_mc_with_groups(mci, highbank_dev_groups);
        if (res < 0)
                goto err;
 
@@ -243,8 +250,6 @@ static int highbank_mc_probe(struct platform_device *pdev)
                goto err2;
        }
 
-       device_create_file(&mci->dev, &dev_attr_inject_ctrl);
-
        devres_close_group(&pdev->dev, NULL);
        return 0;
 err2:
@@ -259,7 +264,6 @@ static int highbank_mc_remove(struct platform_device *pdev)
 {
        struct mem_ctl_info *mci = platform_get_drvdata(pdev);
 
-       device_remove_file(&mci->dev, &dev_attr_inject_ctrl);
        edac_mc_del_mc(&pdev->dev);
        edac_mc_free(mci);
        return 0;
index 9cd0b301f81ba5a5a3fb52dfcc87ef337d7bcb41..01087a38da226d08bd7e08da5a183885fbf420b1 100644 (file)
@@ -1157,27 +1157,24 @@ static DEVICE_ATTR(inject_eccmask, S_IRUGO | S_IWUSR,
 static DEVICE_ATTR(inject_enable, S_IRUGO | S_IWUSR,
                   i7core_inject_enable_show, i7core_inject_enable_store);
 
+static struct attribute *i7core_dev_attrs[] = {
+       &dev_attr_inject_section.attr,
+       &dev_attr_inject_type.attr,
+       &dev_attr_inject_eccmask.attr,
+       &dev_attr_inject_enable.attr,
+       NULL
+};
+
+ATTRIBUTE_GROUPS(i7core_dev);
+
 static int i7core_create_sysfs_devices(struct mem_ctl_info *mci)
 {
        struct i7core_pvt *pvt = mci->pvt_info;
        int rc;
 
-       rc = device_create_file(&mci->dev, &dev_attr_inject_section);
-       if (rc < 0)
-               return rc;
-       rc = device_create_file(&mci->dev, &dev_attr_inject_type);
-       if (rc < 0)
-               return rc;
-       rc = device_create_file(&mci->dev, &dev_attr_inject_eccmask);
-       if (rc < 0)
-               return rc;
-       rc = device_create_file(&mci->dev, &dev_attr_inject_enable);
-       if (rc < 0)
-               return rc;
-
        pvt->addrmatch_dev = kzalloc(sizeof(*pvt->addrmatch_dev), GFP_KERNEL);
        if (!pvt->addrmatch_dev)
-               return rc;
+               return -ENOMEM;
 
        pvt->addrmatch_dev->type = &addrmatch_type;
        pvt->addrmatch_dev->bus = mci->dev.bus;
@@ -1198,7 +1195,7 @@ static int i7core_create_sysfs_devices(struct mem_ctl_info *mci)
                if (!pvt->chancounts_dev) {
                        put_device(pvt->addrmatch_dev);
                        device_del(pvt->addrmatch_dev);
-                       return rc;
+                       return -ENOMEM;
                }
 
                pvt->chancounts_dev->type = &all_channel_counts_type;
@@ -1223,11 +1220,6 @@ static void i7core_delete_sysfs_devices(struct mem_ctl_info *mci)
 
        edac_dbg(1, "\n");
 
-       device_remove_file(&mci->dev, &dev_attr_inject_section);
-       device_remove_file(&mci->dev, &dev_attr_inject_type);
-       device_remove_file(&mci->dev, &dev_attr_inject_eccmask);
-       device_remove_file(&mci->dev, &dev_attr_inject_enable);
-
        if (!pvt->is_registered) {
                put_device(pvt->chancounts_dev);
                device_del(pvt->chancounts_dev);
@@ -2259,7 +2251,7 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev)
                enable_sdram_scrub_setting(mci);
 
        /* add this new MC control structure to EDAC's list of MCs */
-       if (unlikely(edac_mc_add_mc(mci))) {
+       if (unlikely(edac_mc_add_mc_with_groups(mci, i7core_dev_groups))) {
                edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
                /* FIXME: perhaps some code should go here that disables error
                 * reporting if we just enabled it
index b4705d9366bf33d3c6223aacf8f47b8461b25a59..4d4110364f021cc77dc699d2384a60ce88067b40 100644 (file)
@@ -350,8 +350,6 @@ fail:
        return -ENODEV;
 }
 
-EXPORT_SYMBOL_GPL(i82443bxgx_edacmc_probe1);
-
 /* returns count (>= 0), or negative on error */
 static int i82443bxgx_edacmc_init_one(struct pci_dev *pdev,
                                      const struct pci_device_id *ent)
@@ -384,8 +382,6 @@ static void i82443bxgx_edacmc_remove_one(struct pci_dev *pdev)
        edac_mc_free(mci);
 }
 
-EXPORT_SYMBOL_GPL(i82443bxgx_edacmc_remove_one);
-
 static const struct pci_device_id i82443bxgx_pci_tbl[] = {
        {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_0)},
        {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443BX_2)},
@@ -445,9 +441,7 @@ fail1:
        pci_unregister_driver(&i82443bxgx_edacmc_driver);
 
 fail0:
-       if (mci_pdev != NULL)
-               pci_dev_put(mci_pdev);
-
+       pci_dev_put(mci_pdev);
        return pci_rc;
 }
 
index 4382343a7c60ed3cc4d5d1ea4f7661c62b1fe5bc..ee1078cd3b966a1b473ca76b164947177cd9fdbc 100644 (file)
@@ -343,20 +343,15 @@ fail1:
        pci_unregister_driver(&i82860_driver);
 
 fail0:
-       if (mci_pdev != NULL)
-               pci_dev_put(mci_pdev);
-
+       pci_dev_put(mci_pdev);
        return pci_rc;
 }
 
 static void __exit i82860_exit(void)
 {
        edac_dbg(3, "\n");
-
        pci_unregister_driver(&i82860_driver);
-
-       if (mci_pdev != NULL)
-               pci_dev_put(mci_pdev);
+       pci_dev_put(mci_pdev);
 }
 
 module_init(i82860_init);
index 64b68320249f0e5e05db5ef8a0eccc0a2925dade..c26a513f88697bf690f34e66ada74ec091ac764d 100644 (file)
@@ -576,9 +576,7 @@ fail1:
        pci_unregister_driver(&i82875p_driver);
 
 fail0:
-       if (mci_pdev != NULL)
-               pci_dev_put(mci_pdev);
-
+       pci_dev_put(mci_pdev);
        return pci_rc;
 }
 
index 10b10521f62e19a550479755ad41247683d336e6..35ab66c623a3061e4ab03de7221b4852fbd59f63 100644 (file)
@@ -685,9 +685,7 @@ fail1:
        pci_unregister_driver(&i82975x_driver);
 
 fail0:
-       if (mci_pdev != NULL)
-               pci_dev_put(mci_pdev);
-
+       pci_dev_put(mci_pdev);
        return pci_rc;
 }
 
index 1fa76a588af31ebec60889566a0a67eb4daa5a49..68bf234bdfe66c84d3d88d0fb339bb9fcbd451df 100644 (file)
@@ -134,29 +134,14 @@ DEVICE_ATTR(inject_data_lo, S_IRUGO | S_IWUSR,
 DEVICE_ATTR(inject_ctrl, S_IRUGO | S_IWUSR,
            mpc85xx_mc_inject_ctrl_show, mpc85xx_mc_inject_ctrl_store);
 
-static int mpc85xx_create_sysfs_attributes(struct mem_ctl_info *mci)
-{
-       int rc;
-
-       rc = device_create_file(&mci->dev, &dev_attr_inject_data_hi);
-       if (rc < 0)
-               return rc;
-       rc = device_create_file(&mci->dev, &dev_attr_inject_data_lo);
-       if (rc < 0)
-               return rc;
-       rc = device_create_file(&mci->dev, &dev_attr_inject_ctrl);
-       if (rc < 0)
-               return rc;
+static struct attribute *mpc85xx_dev_attrs[] = {
+       &dev_attr_inject_data_hi.attr,
+       &dev_attr_inject_data_lo.attr,
+       &dev_attr_inject_ctrl.attr,
+       NULL
+};
 
-       return 0;
-}
-
-static void mpc85xx_remove_sysfs_attributes(struct mem_ctl_info *mci)
-{
-       device_remove_file(&mci->dev, &dev_attr_inject_data_hi);
-       device_remove_file(&mci->dev, &dev_attr_inject_data_lo);
-       device_remove_file(&mci->dev, &dev_attr_inject_ctrl);
-}
+ATTRIBUTE_GROUPS(mpc85xx_dev);
 
 /**************************** PCI Err device ***************************/
 #ifdef CONFIG_PCI
@@ -685,7 +670,7 @@ static int mpc85xx_l2_err_remove(struct platform_device *op)
        return 0;
 }
 
-static struct of_device_id mpc85xx_l2_err_of_match[] = {
+static const struct of_device_id mpc85xx_l2_err_of_match[] = {
 /* deprecate the fsl,85.. forms in the future, 2.6.30? */
        { .compatible = "fsl,8540-l2-cache-controller", },
        { .compatible = "fsl,8541-l2-cache-controller", },
@@ -1106,13 +1091,7 @@ static int mpc85xx_mc_err_probe(struct platform_device *op)
        /* clear all error bits */
        out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DETECT, ~0);
 
-       if (edac_mc_add_mc(mci)) {
-               edac_dbg(3, "failed edac_mc_add_mc()\n");
-               goto err;
-       }
-
-       if (mpc85xx_create_sysfs_attributes(mci)) {
-               edac_mc_del_mc(mci->pdev);
+       if (edac_mc_add_mc_with_groups(mci, mpc85xx_dev_groups)) {
                edac_dbg(3, "failed edac_mc_add_mc()\n");
                goto err;
        }
@@ -1176,13 +1155,12 @@ static int mpc85xx_mc_err_remove(struct platform_device *op)
                 orig_ddr_err_disable);
        out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_SBE, orig_ddr_err_sbe);
 
-       mpc85xx_remove_sysfs_attributes(mci);
        edac_mc_del_mc(&op->dev);
        edac_mc_free(mci);
        return 0;
 }
 
-static struct of_device_id mpc85xx_mc_err_of_match[] = {
+static const struct of_device_id mpc85xx_mc_err_of_match[] = {
 /* deprecate the fsl,85.. forms in the future, 2.6.30? */
        { .compatible = "fsl,8540-memory-controller", },
        { .compatible = "fsl,8541-memory-controller", },
index 4bd10f94f0683a3c14e44f0fb8e5f41bb92ab83e..bb19e0732681ce6a4af6c2e980c0c4bb382889f9 100644 (file)
@@ -209,35 +209,18 @@ static DEVICE_ATTR(row, S_IRUGO | S_IWUSR,
 static DEVICE_ATTR(col, S_IRUGO | S_IWUSR,
                   octeon_mc_inject_col_show, octeon_mc_inject_col_store);
 
+static struct attribute *octeon_dev_attrs[] = {
+       &dev_attr_inject.attr,
+       &dev_attr_error_type.attr,
+       &dev_attr_dimm.attr,
+       &dev_attr_rank.attr,
+       &dev_attr_bank.attr,
+       &dev_attr_row.attr,
+       &dev_attr_col.attr,
+       NULL
+};
 
-static int octeon_set_mc_sysfs_attributes(struct mem_ctl_info *mci)
-{
-       int rc;
-
-       rc = device_create_file(&mci->dev, &dev_attr_inject);
-       if (rc < 0)
-               return rc;
-       rc = device_create_file(&mci->dev, &dev_attr_error_type);
-       if (rc < 0)
-               return rc;
-       rc = device_create_file(&mci->dev, &dev_attr_dimm);
-       if (rc < 0)
-               return rc;
-       rc = device_create_file(&mci->dev, &dev_attr_rank);
-       if (rc < 0)
-               return rc;
-       rc = device_create_file(&mci->dev, &dev_attr_bank);
-       if (rc < 0)
-               return rc;
-       rc = device_create_file(&mci->dev, &dev_attr_row);
-       if (rc < 0)
-               return rc;
-       rc = device_create_file(&mci->dev, &dev_attr_col);
-       if (rc < 0)
-               return rc;
-
-       return 0;
-}
+ATTRIBUTE_GROUPS(octeon_dev);
 
 static int octeon_lmc_edac_probe(struct platform_device *pdev)
 {
@@ -271,18 +254,12 @@ static int octeon_lmc_edac_probe(struct platform_device *pdev)
                mci->ctl_name = "octeon-lmc-err";
                mci->edac_check = octeon_lmc_edac_poll;
 
-               if (edac_mc_add_mc(mci)) {
+               if (edac_mc_add_mc_with_groups(mci, octeon_dev_groups)) {
                        dev_err(&pdev->dev, "edac_mc_add_mc() failed\n");
                        edac_mc_free(mci);
                        return -ENXIO;
                }
 
-               if (octeon_set_mc_sysfs_attributes(mci)) {
-                       dev_err(&pdev->dev, "octeon_set_mc_sysfs_attributes() failed\n");
-                       return -ENXIO;
-               }
-
-
                cfg0.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(mc));
                cfg0.s.intr_ded_ena = 0;        /* We poll */
                cfg0.s.intr_sec_ena = 0;
@@ -309,18 +286,12 @@ static int octeon_lmc_edac_probe(struct platform_device *pdev)
                mci->ctl_name = "co_lmc_err";
                mci->edac_check = octeon_lmc_edac_poll_o2;
 
-               if (edac_mc_add_mc(mci)) {
+               if (edac_mc_add_mc_with_groups(mci, octeon_dev_groups)) {
                        dev_err(&pdev->dev, "edac_mc_add_mc() failed\n");
                        edac_mc_free(mci);
                        return -ENXIO;
                }
 
-               if (octeon_set_mc_sysfs_attributes(mci)) {
-                       dev_err(&pdev->dev, "octeon_set_mc_sysfs_attributes() failed\n");
-                       return -ENXIO;
-               }
-
-
                en.u64 = cvmx_read_csr(CVMX_LMCX_MEM_CFG0(mc));
                en.s.intr_ded_ena = 0;  /* We poll */
                en.s.intr_sec_ena = 0;
index 1b64fd06082171be84ab2ce41e6d3e293bd0984e..3515b381c1312612f56953bc267ee7d5d23b0f84 100644 (file)
@@ -193,7 +193,7 @@ static int ppc4xx_edac_remove(struct platform_device *device);
  * Device tree node type and compatible tuples this driver can match
  * on.
  */
-static struct of_device_id ppc4xx_edac_match[] = {
+static const struct of_device_id ppc4xx_edac_match[] = {
        {
                .compatible     = "ibm,sdram-4xx-ddr2"
        },
index 1c9691535e13ff7fd66a81662c5f363f71a406b2..fc153aea2f6cff1ce6b5043c11d4e16f24176ae0 100644 (file)
@@ -512,7 +512,7 @@ static int synps_edac_mc_remove(struct platform_device *pdev)
        return 0;
 }
 
-static struct of_device_id synps_edac_match[] = {
+static const struct of_device_id synps_edac_match[] = {
        { .compatible = "xlnx,zynq-ddrc-a05", },
        { /* end of table */ }
 };
index 2eebd28b4c40af2789c32e0008f2b60006fc03ac..6e45a43ffe8476686bcaee1157a7acc641fc3e6b 100644 (file)
@@ -17,7 +17,9 @@
  */
 static const char dmi_empty_string[] = "        ";
 
-static u16 __initdata dmi_ver;
+static u32 dmi_ver __initdata;
+static u32 dmi_len;
+static u16 dmi_num;
 /*
  * Catch too early calls to dmi_check_system():
  */
@@ -78,7 +80,7 @@ static const char * __init dmi_string(const struct dmi_header *dm, u8 s)
  *     We have to be cautious here. We have seen BIOSes with DMI pointers
  *     pointing to completely the wrong place for example
  */
-static void dmi_table(u8 *buf, u32 len, int num,
+static void dmi_table(u8 *buf,
                      void (*decode)(const struct dmi_header *, void *),
                      void *private_data)
 {
@@ -91,8 +93,8 @@ static void dmi_table(u8 *buf, u32 len, int num,
         * off the end of the table (should never happen but sometimes does
         * on bogus implementations.)
         */
-       while ((!num || i < num) &&
-              (data - buf + sizeof(struct dmi_header)) <= len) {
+       while ((!dmi_num || i < dmi_num) &&
+              (data - buf + sizeof(struct dmi_header)) <= dmi_len) {
                const struct dmi_header *dm = (const struct dmi_header *)data;
 
                /*
@@ -101,9 +103,9 @@ static void dmi_table(u8 *buf, u32 len, int num,
                 *  table in dmi_decode or dmi_string
                 */
                data += dm->length;
-               while ((data - buf < len - 1) && (data[0] || data[1]))
+               while ((data - buf < dmi_len - 1) && (data[0] || data[1]))
                        data++;
-               if (data - buf < len - 1)
+               if (data - buf < dmi_len - 1)
                        decode(dm, private_data);
 
                /*
@@ -118,8 +120,6 @@ static void dmi_table(u8 *buf, u32 len, int num,
 }
 
 static phys_addr_t dmi_base;
-static u32 dmi_len;
-static u16 dmi_num;
 
 static int __init dmi_walk_early(void (*decode)(const struct dmi_header *,
                void *))
@@ -130,7 +130,7 @@ static int __init dmi_walk_early(void (*decode)(const struct dmi_header *,
        if (buf == NULL)
                return -1;
 
-       dmi_table(buf, dmi_len, dmi_num, decode, NULL);
+       dmi_table(buf, decode, NULL);
 
        add_device_randomness(buf, dmi_len);
 
@@ -201,7 +201,7 @@ static void __init dmi_save_uuid(const struct dmi_header *dm, int slot,
         * the UUID are supposed to be little-endian encoded.  The specification
         * says that this is the defacto standard.
         */
-       if (dmi_ver >= 0x0206)
+       if (dmi_ver >= 0x020600)
                sprintf(s, "%pUL", d);
        else
                sprintf(s, "%pUB", d);
@@ -473,7 +473,7 @@ static void __init dmi_format_ids(char *buf, size_t len)
  */
 static int __init dmi_present(const u8 *buf)
 {
-       int smbios_ver;
+       u32 smbios_ver;
 
        if (memcmp(buf, "_SM_", 4) == 0 &&
            buf[5] < 32 && dmi_checksum(buf, buf[5])) {
@@ -506,14 +506,16 @@ static int __init dmi_present(const u8 *buf)
                if (dmi_walk_early(dmi_decode) == 0) {
                        if (smbios_ver) {
                                dmi_ver = smbios_ver;
-                               pr_info("SMBIOS %d.%d present.\n",
-                                      dmi_ver >> 8, dmi_ver & 0xFF);
+                               pr_info("SMBIOS %d.%d%s present.\n",
+                                       dmi_ver >> 8, dmi_ver & 0xFF,
+                                       (dmi_ver < 0x0300) ? "" : ".x");
                        } else {
                                dmi_ver = (buf[14] & 0xF0) << 4 |
                                           (buf[14] & 0x0F);
                                pr_info("Legacy DMI %d.%d present.\n",
                                       dmi_ver >> 8, dmi_ver & 0xFF);
                        }
+                       dmi_ver <<= 8;
                        dmi_format_ids(dmi_ids_string, sizeof(dmi_ids_string));
                        printk(KERN_DEBUG "DMI: %s\n", dmi_ids_string);
                        return 0;
@@ -531,14 +533,16 @@ static int __init dmi_smbios3_present(const u8 *buf)
 {
        if (memcmp(buf, "_SM3_", 5) == 0 &&
            buf[6] < 32 && dmi_checksum(buf, buf[6])) {
-               dmi_ver = get_unaligned_be16(buf + 7);
+               dmi_ver = get_unaligned_be32(buf + 6);
+               dmi_ver &= 0xFFFFFF;
                dmi_num = 0;                    /* No longer specified */
                dmi_len = get_unaligned_le32(buf + 12);
                dmi_base = get_unaligned_le64(buf + 16);
 
                if (dmi_walk_early(dmi_decode) == 0) {
-                       pr_info("SMBIOS %d.%d present.\n",
-                               dmi_ver >> 8, dmi_ver & 0xFF);
+                       pr_info("SMBIOS %d.%d.%d present.\n",
+                               dmi_ver >> 16, (dmi_ver >> 8) & 0xFF,
+                               dmi_ver & 0xFF);
                        dmi_format_ids(dmi_ids_string, sizeof(dmi_ids_string));
                        pr_debug("DMI: %s\n", dmi_ids_string);
                        return 0;
@@ -893,7 +897,7 @@ int dmi_walk(void (*decode)(const struct dmi_header *, void *),
        if (buf == NULL)
                return -1;
 
-       dmi_table(buf, dmi_len, dmi_num, decode, private_data);
+       dmi_table(buf, decode, private_data);
 
        dmi_unmap(buf);
        return 0;
index dcae482a9a17c51a554e5928664e1b960b297588..e29560e6b40b0e5f28a141e7c88ceba1bdfa22ff 100644 (file)
@@ -175,7 +175,7 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
        unsigned long initrd_addr;
        u64 initrd_size = 0;
        unsigned long fdt_addr = 0;  /* Original DTB */
-       u64 fdt_size = 0;  /* We don't get size from configuration table */
+       unsigned long fdt_size = 0;
        char *cmdline_ptr = NULL;
        int cmdline_size = 0;
        unsigned long new_fdt_addr;
@@ -239,8 +239,7 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
        } else {
                status = handle_cmdline_files(sys_table, image, cmdline_ptr,
                                              "dtb=",
-                                             ~0UL, (unsigned long *)&fdt_addr,
-                                             (unsigned long *)&fdt_size);
+                                             ~0UL, &fdt_addr, &fdt_size);
 
                if (status != EFI_SUCCESS) {
                        pr_efi_err(sys_table, "Failed to load device tree!\n");
@@ -252,7 +251,7 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
                pr_efi(sys_table, "Using DTB from command line\n");
        } else {
                /* Look for a device tree configuration table entry. */
-               fdt_addr = (uintptr_t)get_fdt(sys_table);
+               fdt_addr = (uintptr_t)get_fdt(sys_table, &fdt_size);
                if (fdt_addr)
                        pr_efi(sys_table, "Using DTB from configuration table\n");
        }
index 47437b16b18697c2d624eadd8513d9bdfa8f5299..e334a01cf92f8243392ddd66616c8563306ccf5d 100644 (file)
@@ -41,7 +41,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
                                            unsigned long fdt_addr,
                                            unsigned long fdt_size);
 
-void *get_fdt(efi_system_table_t *sys_table);
+void *get_fdt(efi_system_table_t *sys_table, unsigned long *fdt_size);
 
 void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size,
                     unsigned long desc_size, efi_memory_desc_t *runtime_map,
index 91da56c4fd540cb5d381848e582f3f4453568af3..ef5d764e2a27ea506775e7c117dc291b9749ded1 100644 (file)
@@ -323,7 +323,7 @@ fail:
        return EFI_LOAD_ERROR;
 }
 
-void *get_fdt(efi_system_table_t *sys_table)
+void *get_fdt(efi_system_table_t *sys_table, unsigned long *fdt_size)
 {
        efi_guid_t fdt_guid = DEVICE_TREE_GUID;
        efi_config_table_t *tables;
@@ -336,6 +336,11 @@ void *get_fdt(efi_system_table_t *sys_table)
        for (i = 0; i < sys_table->nr_tables; i++)
                if (efi_guidcmp(tables[i].guid, fdt_guid) == 0) {
                        fdt = (void *) tables[i].table;
+                       if (fdt_check_header(fdt) != 0) {
+                               pr_efi_err(sys_table, "Invalid header detected on UEFI supplied FDT, ignoring ...\n");
+                               return NULL;
+                       }
+                       *fdt_size = fdt_totalsize(fdt);
                        break;
         }
 
index 679b10e34fb545f23c827f9699bfef1c9f4268bd..b6f076b213bcfde496a61824335b68d1760b5613 100644 (file)
@@ -2121,7 +2121,7 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
        connector = drm_connector_find(dev, out_resp->connector_id);
        if (!connector) {
                ret = -ENOENT;
-               goto out;
+               goto out_unlock;
        }
 
        for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++)
@@ -2201,6 +2201,8 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
 
 out:
        drm_modeset_unlock(&dev->mode_config.connection_mutex);
+
+out_unlock:
        mutex_unlock(&dev->mode_config.mutex);
 
        return ret;
index cc6ea53d2b81951553d4b135a1760cb127e574c2..5c66b568bb8162345046fcce0118c5bf74eddc2f 100644 (file)
@@ -1095,6 +1095,7 @@ static void vlv_save_gunit_s0ix_state(struct drm_i915_private *dev_priv)
        /* Gunit-Display CZ domain, 0x182028-0x1821CF */
        s->gu_ctl0              = I915_READ(VLV_GU_CTL0);
        s->gu_ctl1              = I915_READ(VLV_GU_CTL1);
+       s->pcbr                 = I915_READ(VLV_PCBR);
        s->clock_gate_dis2      = I915_READ(VLV_GUNIT_CLOCK_GATE2);
 
        /*
@@ -1189,6 +1190,7 @@ static void vlv_restore_gunit_s0ix_state(struct drm_i915_private *dev_priv)
        /* Gunit-Display CZ domain, 0x182028-0x1821CF */
        I915_WRITE(VLV_GU_CTL0,                 s->gu_ctl0);
        I915_WRITE(VLV_GU_CTL1,                 s->gu_ctl1);
+       I915_WRITE(VLV_PCBR,                    s->pcbr);
        I915_WRITE(VLV_GUNIT_CLOCK_GATE2,       s->clock_gate_dis2);
 }
 
@@ -1197,19 +1199,7 @@ int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool force_on)
        u32 val;
        int err;
 
-       val = I915_READ(VLV_GTLC_SURVIVABILITY_REG);
-       WARN_ON(!!(val & VLV_GFX_CLK_FORCE_ON_BIT) == force_on);
-
 #define COND (I915_READ(VLV_GTLC_SURVIVABILITY_REG) & VLV_GFX_CLK_STATUS_BIT)
-       /* Wait for a previous force-off to settle */
-       if (force_on) {
-               err = wait_for(!COND, 20);
-               if (err) {
-                       DRM_ERROR("timeout waiting for GFX clock force-off (%08x)\n",
-                                 I915_READ(VLV_GTLC_SURVIVABILITY_REG));
-                       return err;
-               }
-       }
 
        val = I915_READ(VLV_GTLC_SURVIVABILITY_REG);
        val &= ~VLV_GFX_CLK_FORCE_ON_BIT;
index 8727086cf48ccce9e6548df8cf4e1d0df59012e7..b4faa2df9d3d8151c4e7aff8853be20937414096 100644 (file)
@@ -1094,6 +1094,7 @@ struct vlv_s0ix_state {
        /* Display 2 CZ domain */
        u32 gu_ctl0;
        u32 gu_ctl1;
+       u32 pcbr;
        u32 clock_gate_dis2;
 };
 
index 110fade9cb74680f0f37115353fe166f7edeca3d..25d9e72627e9df97cb607de8038d2310d28c214b 100644 (file)
@@ -510,6 +510,7 @@ config SENSORS_G762
 config SENSORS_GPIO_FAN
        tristate "GPIO fan"
        depends on GPIOLIB
+       depends on THERMAL || THERMAL=n
        help
          If you say yes here you get support for fans connected to GPIO lines.
 
@@ -599,8 +600,8 @@ config SENSORS_IT87
        help
          If you say yes here you get support for ITE IT8705F, IT8712F,
          IT8716F, IT8718F, IT8720F, IT8721F, IT8726F, IT8728F, IT8758E,
-         IT8771E, IT8772E, IT8782F, IT8783E/F and IT8603E sensor chips,
-         and the SiS950 clone.
+         IT8771E, IT8772E, IT8781F, IT8782F, IT8783E/F, IT8786E, IT8790E,
+         IT8603E, IT8620E, and IT8623E sensor chips, and the SiS950 clone.
 
          This driver can also be built as a module.  If so, the module
          will be called it87.
@@ -624,7 +625,7 @@ config SENSORS_JC42
          mobile devices and servers.  Support will include, but not be limited
          to, ADT7408, AT30TS00, CAT34TS02, CAT6095, MAX6604, MCP9804, MCP9805,
          MCP98242, MCP98243, MCP98244, MCP9843, SE97, SE98, STTS424(E),
-         STTS2002, STTS3000, TSE2002B3, TSE2002GB2, TS3000B3, and TS3000GB2.
+         STTS2002, STTS3000, TSE2002, TSE2004, TS3000, and TS3001.
 
          This driver can also be built as a module.  If so, the module
          will be called jc42.
@@ -1145,6 +1146,16 @@ config SENSORS_NCT7802
          This driver can also be built as a module.  If so, the module
          will be called nct7802.
 
+config SENSORS_NCT7904
+       tristate "Nuvoton NCT7904"
+       depends on I2C
+       help
+         If you say yes here you get support for the Nuvoton NCT7904
+         hardware monitoring chip, including manual fan speed control.
+
+         This driver can also be built as a module.  If so, the module
+         will be called nct7904.
+
 config SENSORS_PCF8591
        tristate "Philips PCF8591 ADC/DAC"
        depends on I2C
@@ -1164,6 +1175,7 @@ source drivers/hwmon/pmbus/Kconfig
 config SENSORS_PWM_FAN
        tristate "PWM fan"
        depends on (PWM && OF) || COMPILE_TEST
+       depends on THERMAL || THERMAL=n
        help
          If you say yes here you get support for fans connected to PWM lines.
          The driver uses the generic PWM interface, thus it will work on a
index 6c941472e707a51b2dfb5f6889aea07bad4ff5b7..b4a40f17e2aa5211f767323f736cab872c16528d 100644 (file)
@@ -120,6 +120,7 @@ obj-$(CONFIG_SENSORS_MENF21BMC_HWMON) += menf21bmc_hwmon.o
 obj-$(CONFIG_SENSORS_NCT6683)  += nct6683.o
 obj-$(CONFIG_SENSORS_NCT6775)  += nct6775.o
 obj-$(CONFIG_SENSORS_NCT7802)  += nct7802.o
+obj-$(CONFIG_SENSORS_NCT7904)  += nct7904.o
 obj-$(CONFIG_SENSORS_NTC_THERMISTOR)   += ntc_thermistor.o
 obj-$(CONFIG_SENSORS_PC87360)  += pc87360.o
 obj-$(CONFIG_SENSORS_PC87427)  += pc87427.o
index 5b7fec824f10a0eafc8109d7e9632789d6017e69..ed303ba3a59393533a1f0d06b95abc53a5febcdd 100644 (file)
@@ -397,14 +397,13 @@ static int create_core_attrs(struct temp_data *tdata, struct device *dev,
                        struct device_attribute *devattr, char *buf) = {
                        show_label, show_crit_alarm, show_temp, show_tjmax,
                        show_ttarget };
-       static const char *const names[TOTAL_ATTRS] = {
-                                       "temp%d_label", "temp%d_crit_alarm",
-                                       "temp%d_input", "temp%d_crit",
-                                       "temp%d_max" };
+       static const char *const suffixes[TOTAL_ATTRS] = {
+               "label", "crit_alarm", "input", "crit", "max"
+       };
 
        for (i = 0; i < tdata->attr_size; i++) {
-               snprintf(tdata->attr_name[i], CORETEMP_NAME_LENGTH, names[i],
-                       attr_no);
+               snprintf(tdata->attr_name[i], CORETEMP_NAME_LENGTH,
+                        "temp%d_%s", attr_no, suffixes[i]);
                sysfs_attr_init(&tdata->sd_attrs[i].dev_attr.attr);
                tdata->sd_attrs[i].dev_attr.attr.name = tdata->attr_name[i];
                tdata->sd_attrs[i].dev_attr.attr.mode = S_IRUGO;
index 36abf814b8c77c57c0728beda6afac69b92cc802..a3dae6d0082a0d08e4183f63e27b79ae5510b863 100644 (file)
 #include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/of_gpio.h>
+#include <linux/thermal.h>
 
 struct gpio_fan_data {
        struct platform_device  *pdev;
        struct device           *hwmon_dev;
+       /* Cooling device if any */
+       struct thermal_cooling_device *cdev;
        struct mutex            lock; /* lock GPIOs operations. */
        int                     num_ctrl;
        unsigned                *ctrl;
@@ -387,6 +390,53 @@ static int fan_ctrl_init(struct gpio_fan_data *fan_data,
        return 0;
 }
 
+static int gpio_fan_get_max_state(struct thermal_cooling_device *cdev,
+                                 unsigned long *state)
+{
+       struct gpio_fan_data *fan_data = cdev->devdata;
+
+       if (!fan_data)
+               return -EINVAL;
+
+       *state = fan_data->num_speed - 1;
+       return 0;
+}
+
+static int gpio_fan_get_cur_state(struct thermal_cooling_device *cdev,
+                                 unsigned long *state)
+{
+       struct gpio_fan_data *fan_data = cdev->devdata;
+       int r;
+
+       if (!fan_data)
+               return -EINVAL;
+
+       r = get_fan_speed_index(fan_data);
+       if (r < 0)
+               return r;
+
+       *state = r;
+       return 0;
+}
+
+static int gpio_fan_set_cur_state(struct thermal_cooling_device *cdev,
+                                 unsigned long state)
+{
+       struct gpio_fan_data *fan_data = cdev->devdata;
+
+       if (!fan_data)
+               return -EINVAL;
+
+       set_fan_speed(fan_data, state);
+       return 0;
+}
+
+static const struct thermal_cooling_device_ops gpio_fan_cool_ops = {
+       .get_max_state = gpio_fan_get_max_state,
+       .get_cur_state = gpio_fan_get_cur_state,
+       .set_cur_state = gpio_fan_set_cur_state,
+};
+
 #ifdef CONFIG_OF_GPIO
 /*
  * Translate OpenFirmware node properties into platform_data
@@ -404,10 +454,32 @@ static int gpio_fan_get_of_pdata(struct device *dev,
 
        node = dev->of_node;
 
+       /* Alarm GPIO if one exists */
+       if (of_gpio_named_count(node, "alarm-gpios") > 0) {
+               struct gpio_fan_alarm *alarm;
+               int val;
+               enum of_gpio_flags flags;
+
+               alarm = devm_kzalloc(dev, sizeof(struct gpio_fan_alarm),
+                                       GFP_KERNEL);
+               if (!alarm)
+                       return -ENOMEM;
+
+               val = of_get_named_gpio_flags(node, "alarm-gpios", 0, &flags);
+               if (val < 0)
+                       return val;
+               alarm->gpio = val;
+               alarm->active_low = flags & OF_GPIO_ACTIVE_LOW;
+
+               pdata->alarm = alarm;
+       }
+
        /* Fill GPIO pin array */
        pdata->num_ctrl = of_gpio_count(node);
        if (pdata->num_ctrl <= 0) {
-               dev_err(dev, "gpios DT property empty / missing");
+               if (pdata->alarm)
+                       return 0;
+               dev_err(dev, "DT properties empty / missing");
                return -ENODEV;
        }
        ctrl = devm_kzalloc(dev, pdata->num_ctrl * sizeof(unsigned),
@@ -460,26 +532,6 @@ static int gpio_fan_get_of_pdata(struct device *dev,
        }
        pdata->speed = speed;
 
-       /* Alarm GPIO if one exists */
-       if (of_gpio_named_count(node, "alarm-gpios") > 0) {
-               struct gpio_fan_alarm *alarm;
-               int val;
-               enum of_gpio_flags flags;
-
-               alarm = devm_kzalloc(dev, sizeof(struct gpio_fan_alarm),
-                                       GFP_KERNEL);
-               if (!alarm)
-                       return -ENOMEM;
-
-               val = of_get_named_gpio_flags(node, "alarm-gpios", 0, &flags);
-               if (val < 0)
-                       return val;
-               alarm->gpio = val;
-               alarm->active_low = flags & OF_GPIO_ACTIVE_LOW;
-
-               pdata->alarm = alarm;
-       }
-
        return 0;
 }
 
@@ -495,6 +547,11 @@ static int gpio_fan_probe(struct platform_device *pdev)
        struct gpio_fan_data *fan_data;
        struct gpio_fan_platform_data *pdata = dev_get_platdata(&pdev->dev);
 
+       fan_data = devm_kzalloc(&pdev->dev, sizeof(struct gpio_fan_data),
+                               GFP_KERNEL);
+       if (!fan_data)
+               return -ENOMEM;
+
 #ifdef CONFIG_OF_GPIO
        if (!pdata) {
                pdata = devm_kzalloc(&pdev->dev,
@@ -512,11 +569,6 @@ static int gpio_fan_probe(struct platform_device *pdev)
                return -EINVAL;
 #endif /* CONFIG_OF_GPIO */
 
-       fan_data = devm_kzalloc(&pdev->dev, sizeof(struct gpio_fan_data),
-                               GFP_KERNEL);
-       if (!fan_data)
-               return -ENOMEM;
-
        fan_data->pdev = pdev;
        platform_set_drvdata(pdev, fan_data);
        mutex_init(&fan_data->lock);
@@ -544,18 +596,39 @@ static int gpio_fan_probe(struct platform_device *pdev)
                                                       gpio_fan_groups);
        if (IS_ERR(fan_data->hwmon_dev))
                return PTR_ERR(fan_data->hwmon_dev);
+#ifdef CONFIG_OF_GPIO
+       /* Optional cooling device register for Device tree platforms */
+       fan_data->cdev = thermal_of_cooling_device_register(pdev->dev.of_node,
+                                                           "gpio-fan",
+                                                           fan_data,
+                                                           &gpio_fan_cool_ops);
+#else /* CONFIG_OF_GPIO */
+       /* Optional cooling device register for non Device tree platforms */
+       fan_data->cdev = thermal_cooling_device_register("gpio-fan", fan_data,
+                                                        &gpio_fan_cool_ops);
+#endif /* CONFIG_OF_GPIO */
 
        dev_info(&pdev->dev, "GPIO fan initialized\n");
 
        return 0;
 }
 
-static void gpio_fan_shutdown(struct platform_device *pdev)
+static int gpio_fan_remove(struct platform_device *pdev)
 {
-       struct gpio_fan_data *fan_data = dev_get_drvdata(&pdev->dev);
+       struct gpio_fan_data *fan_data = platform_get_drvdata(pdev);
+
+       if (!IS_ERR(fan_data->cdev))
+               thermal_cooling_device_unregister(fan_data->cdev);
 
        if (fan_data->ctrl)
                set_fan_speed(fan_data, 0);
+
+       return 0;
+}
+
+static void gpio_fan_shutdown(struct platform_device *pdev)
+{
+       gpio_fan_remove(pdev);
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -589,6 +662,7 @@ static SIMPLE_DEV_PM_OPS(gpio_fan_pm, gpio_fan_suspend, gpio_fan_resume);
 
 static struct platform_driver gpio_fan_driver = {
        .probe          = gpio_fan_probe,
+       .remove         = gpio_fan_remove,
        .shutdown       = gpio_fan_shutdown,
        .driver = {
                .name   = "gpio-fan",
index 030e7ff589be9558033ad8283537133c9c72dcf0..21b9c72f16bd7423dda3870b7e6eaf1ee047a6b5 100644 (file)
@@ -56,15 +56,10 @@ static u8 const temp_sensor_sig[]  = {0x74, 0x65, 0x6D};
 static u8 const watt_sensor_sig[]  = {0x41, 0x43};
 
 #define PEX_NUM_SENSOR_FUNCS   3
-static char const * const power_sensor_name_templates[] = {
-       "%s%d_average",
-       "%s%d_average_lowest",
-       "%s%d_average_highest"
-};
-static char const * const temp_sensor_name_templates[] = {
-       "%s%d_input",
-       "%s%d_input_lowest",
-       "%s%d_input_highest"
+static const char * const sensor_name_suffixes[] = {
+       "",
+       "_lowest",
+       "_highest"
 };
 
 static void ibmpex_msg_handler(struct ipmi_recv_msg *msg, void *user_msg_data);
@@ -355,9 +350,11 @@ static int create_sensor(struct ibmpex_bmc_data *data, int type,
                return -ENOMEM;
 
        if (type == TEMP_SENSOR)
-               sprintf(n, temp_sensor_name_templates[func], "temp", counter);
+               sprintf(n, "temp%d_input%s",
+                       counter, sensor_name_suffixes[func]);
        else if (type == POWER_SENSOR)
-               sprintf(n, power_sensor_name_templates[func], "power", counter);
+               sprintf(n, "power%d_average%s",
+                       counter, sensor_name_suffixes[func]);
 
        sysfs_attr_init(&data->sensors[sensor].attr[func].dev_attr.attr);
        data->sensors[sensor].attr[func].dev_attr.attr.name = n;
index febe8175d36ced56b1894d663f3cf84f3b3f341b..4255514b2c72d917c2e2151f3d1fac405033e935 100644 (file)
 #include <linux/platform_device.h>
 #include <asm/opal.h>
 #include <linux/err.h>
+#include <asm/cputhreads.h>
+#include <asm/smp.h>
 
 #define MAX_ATTR_LEN   32
+#define MAX_LABEL_LEN  64
 
 /* Sensor suffix name from DT */
 #define DT_FAULT_ATTR_SUFFIX           "faulted"
  */
 enum sensors {
        FAN,
-       AMBIENT_TEMP,
+       TEMP,
        POWER_SUPPLY,
        POWER_INPUT,
        MAX_SENSOR_TYPE,
 };
 
+#define INVALID_INDEX (-1U)
+
 static struct sensor_group {
        const char *name;
        const char *compatible;
        struct attribute_group group;
        u32 attr_count;
+       u32 hwmon_index;
 } sensor_groups[] = {
        {"fan", "ibm,opal-sensor-cooling-fan"},
        {"temp", "ibm,opal-sensor-amb-temp"},
@@ -64,7 +70,10 @@ static struct sensor_group {
 
 struct sensor_data {
        u32 id; /* An opaque id of the firmware for each sensor */
+       u32 hwmon_index;
+       u32 opal_index;
        enum sensors type;
+       char label[MAX_LABEL_LEN];
        char name[MAX_ATTR_LEN];
        struct device_attribute dev_attr;
 };
@@ -87,7 +96,7 @@ static ssize_t show_sensor(struct device *dev, struct device_attribute *devattr,
                return ret;
 
        /* Convert temperature to milli-degrees */
-       if (sdata->type == AMBIENT_TEMP)
+       if (sdata->type == TEMP)
                x *= 1000;
        /* Convert power to micro-watts */
        else if (sdata->type == POWER_INPUT)
@@ -96,8 +105,65 @@ static ssize_t show_sensor(struct device *dev, struct device_attribute *devattr,
        return sprintf(buf, "%u\n", x);
 }
 
-static int get_sensor_index_attr(const char *name, u32 *index,
-                                       char *attr)
+static ssize_t show_label(struct device *dev, struct device_attribute *devattr,
+                         char *buf)
+{
+       struct sensor_data *sdata = container_of(devattr, struct sensor_data,
+                                                dev_attr);
+
+       return sprintf(buf, "%s\n", sdata->label);
+}
+
+static int __init get_logical_cpu(int hwcpu)
+{
+       int cpu;
+
+       for_each_possible_cpu(cpu)
+               if (get_hard_smp_processor_id(cpu) == hwcpu)
+                       return cpu;
+
+       return -ENOENT;
+}
+
+static void __init make_sensor_label(struct device_node *np,
+                                    struct sensor_data *sdata,
+                                    const char *label)
+{
+       u32 id;
+       size_t n;
+
+       n = snprintf(sdata->label, sizeof(sdata->label), "%s", label);
+
+       /*
+        * Core temp pretty print
+        */
+       if (!of_property_read_u32(np, "ibm,pir", &id)) {
+               int cpuid = get_logical_cpu(id);
+
+               if (cpuid >= 0)
+                       /*
+                        * The digital thermal sensors are associated
+                        * with a core. Let's print out the range of
+                        * cpu ids corresponding to the hardware
+                        * threads of the core.
+                        */
+                       n += snprintf(sdata->label + n,
+                                     sizeof(sdata->label) - n, " %d-%d",
+                                     cpuid, cpuid + threads_per_core - 1);
+               else
+                       n += snprintf(sdata->label + n,
+                                     sizeof(sdata->label) - n, " phy%d", id);
+       }
+
+       /*
+        * Membuffer pretty print
+        */
+       if (!of_property_read_u32(np, "ibm,chip-id", &id))
+               n += snprintf(sdata->label + n, sizeof(sdata->label) - n,
+                             " %d", id & 0xffff);
+}
+
+static int get_sensor_index_attr(const char *name, u32 *index, char *attr)
 {
        char *hash_pos = strchr(name, '#');
        char buf[8] = { 0 };
@@ -127,46 +193,90 @@ static int get_sensor_index_attr(const char *name, u32 *index,
        return 0;
 }
 
+static const char *convert_opal_attr_name(enum sensors type,
+                                         const char *opal_attr)
+{
+       const char *attr_name = NULL;
+
+       if (!strcmp(opal_attr, DT_FAULT_ATTR_SUFFIX)) {
+               attr_name = "fault";
+       } else if (!strcmp(opal_attr, DT_DATA_ATTR_SUFFIX)) {
+               attr_name = "input";
+       } else if (!strcmp(opal_attr, DT_THRESHOLD_ATTR_SUFFIX)) {
+               if (type == TEMP)
+                       attr_name = "max";
+               else if (type == FAN)
+                       attr_name = "min";
+       }
+
+       return attr_name;
+}
+
 /*
  * This function translates the DT node name into the 'hwmon' attribute name.
  * IBMPOWERNV device node appear like cooling-fan#2-data, amb-temp#1-thrs etc.
  * which need to be mapped as fan2_input, temp1_max respectively before
  * populating them inside hwmon device class.
  */
-static int create_hwmon_attr_name(struct device *dev, enum sensors type,
-                                        const char *node_name,
-                                        char *hwmon_attr_name)
+static const char *parse_opal_node_name(const char *node_name,
+                                       enum sensors type, u32 *index)
 {
        char attr_suffix[MAX_ATTR_LEN];
-       char *attr_name;
-       u32 index;
+       const char *attr_name;
        int err;
 
-       err = get_sensor_index_attr(node_name, &index, attr_suffix);
-       if (err) {
-               dev_err(dev, "Sensor device node name '%s' is invalid\n",
-                       node_name);
-               return err;
-       }
+       err = get_sensor_index_attr(node_name, index, attr_suffix);
+       if (err)
+               return ERR_PTR(err);
 
-       if (!strcmp(attr_suffix, DT_FAULT_ATTR_SUFFIX)) {
-               attr_name = "fault";
-       } else if (!strcmp(attr_suffix, DT_DATA_ATTR_SUFFIX)) {
-               attr_name = "input";
-       } else if (!strcmp(attr_suffix, DT_THRESHOLD_ATTR_SUFFIX)) {
-               if (type == AMBIENT_TEMP)
-                       attr_name = "max";
-               else if (type == FAN)
-                       attr_name = "min";
-               else
-                       return -ENOENT;
-       } else {
-               return -ENOENT;
+       attr_name = convert_opal_attr_name(type, attr_suffix);
+       if (!attr_name)
+               return ERR_PTR(-ENOENT);
+
+       return attr_name;
+}
+
+static int get_sensor_type(struct device_node *np)
+{
+       enum sensors type;
+       const char *str;
+
+       for (type = 0; type < MAX_SENSOR_TYPE; type++) {
+               if (of_device_is_compatible(np, sensor_groups[type].compatible))
+                       return type;
        }
 
-       snprintf(hwmon_attr_name, MAX_ATTR_LEN, "%s%d_%s",
-                sensor_groups[type].name, index, attr_name);
-       return 0;
+       /*
+        * Let's check if we have a newer device tree
+        */
+       if (!of_device_is_compatible(np, "ibm,opal-sensor"))
+               return MAX_SENSOR_TYPE;
+
+       if (of_property_read_string(np, "sensor-type", &str))
+               return MAX_SENSOR_TYPE;
+
+       for (type = 0; type < MAX_SENSOR_TYPE; type++)
+               if (!strcmp(str, sensor_groups[type].name))
+                       return type;
+
+       return MAX_SENSOR_TYPE;
+}
+
+static u32 get_sensor_hwmon_index(struct sensor_data *sdata,
+                                 struct sensor_data *sdata_table, int count)
+{
+       int i;
+
+       /*
+        * We don't use the OPAL index on newer device trees
+        */
+       if (sdata->opal_index != INVALID_INDEX) {
+               for (i = 0; i < count; i++)
+                       if (sdata_table[i].opal_index == sdata->opal_index &&
+                           sdata_table[i].type == sdata->type)
+                               return sdata_table[i].hwmon_index;
+       }
+       return ++sensor_groups[sdata->type].hwmon_index;
 }
 
 static int populate_attr_groups(struct platform_device *pdev)
@@ -178,15 +288,22 @@ static int populate_attr_groups(struct platform_device *pdev)
 
        opal = of_find_node_by_path("/ibm,opal/sensors");
        for_each_child_of_node(opal, np) {
+               const char *label;
+
                if (np->name == NULL)
                        continue;
 
-               for (type = 0; type < MAX_SENSOR_TYPE; type++)
-                       if (of_device_is_compatible(np,
-                                       sensor_groups[type].compatible)) {
-                               sensor_groups[type].attr_count++;
-                               break;
-                       }
+               type = get_sensor_type(np);
+               if (type == MAX_SENSOR_TYPE)
+                       continue;
+
+               sensor_groups[type].attr_count++;
+
+               /*
+                * add a new attribute for labels
+                */
+               if (!of_property_read_string(np, "label", &label))
+                       sensor_groups[type].attr_count++;
        }
 
        of_node_put(opal);
@@ -207,6 +324,21 @@ static int populate_attr_groups(struct platform_device *pdev)
        return 0;
 }
 
+static void create_hwmon_attr(struct sensor_data *sdata, const char *attr_name,
+                             ssize_t (*show)(struct device *dev,
+                                             struct device_attribute *attr,
+                                             char *buf))
+{
+       snprintf(sdata->name, MAX_ATTR_LEN, "%s%d_%s",
+                sensor_groups[sdata->type].name, sdata->hwmon_index,
+                attr_name);
+
+       sysfs_attr_init(&sdata->dev_attr.attr);
+       sdata->dev_attr.attr.name = sdata->name;
+       sdata->dev_attr.attr.mode = S_IRUGO;
+       sdata->dev_attr.show = show;
+}
+
 /*
  * Iterate through the device tree for each child of 'sensors' node, create
  * a sysfs attribute file, the file is named by translating the DT node name
@@ -233,18 +365,23 @@ static int create_device_attrs(struct platform_device *pdev)
        }
 
        for_each_child_of_node(opal, np) {
+               const char *attr_name;
+               u32 opal_index;
+               const char *label;
+
                if (np->name == NULL)
                        continue;
 
-               for (type = 0; type < MAX_SENSOR_TYPE; type++)
-                       if (of_device_is_compatible(np,
-                                       sensor_groups[type].compatible))
-                               break;
-
+               type = get_sensor_type(np);
                if (type == MAX_SENSOR_TYPE)
                        continue;
 
-               if (of_property_read_u32(np, "sensor-id", &sensor_id)) {
+               /*
+                * Newer device trees use a "sensor-data" property
+                * name for input.
+                */
+               if (of_property_read_u32(np, "sensor-id", &sensor_id) &&
+                   of_property_read_u32(np, "sensor-data", &sensor_id)) {
                        dev_info(&pdev->dev,
                                 "'sensor-id' missing in the node '%s'\n",
                                 np->name);
@@ -253,18 +390,46 @@ static int create_device_attrs(struct platform_device *pdev)
 
                sdata[count].id = sensor_id;
                sdata[count].type = type;
-               err = create_hwmon_attr_name(&pdev->dev, type, np->name,
-                                            sdata[count].name);
-               if (err)
-                       goto exit_put_node;
 
-               sysfs_attr_init(&sdata[count].dev_attr.attr);
-               sdata[count].dev_attr.attr.name = sdata[count].name;
-               sdata[count].dev_attr.attr.mode = S_IRUGO;
-               sdata[count].dev_attr.show = show_sensor;
+               /*
+                * If we can not parse the node name, it means we are
+                * running on a newer device tree. We can just forget
+                * about the OPAL index and use a defaut value for the
+                * hwmon attribute name
+                */
+               attr_name = parse_opal_node_name(np->name, type, &opal_index);
+               if (IS_ERR(attr_name)) {
+                       attr_name = "input";
+                       opal_index = INVALID_INDEX;
+               }
+
+               sdata[count].opal_index = opal_index;
+               sdata[count].hwmon_index =
+                       get_sensor_hwmon_index(&sdata[count], sdata, count);
+
+               create_hwmon_attr(&sdata[count], attr_name, show_sensor);
 
                pgroups[type]->attrs[sensor_groups[type].attr_count++] =
                                &sdata[count++].dev_attr.attr;
+
+               if (!of_property_read_string(np, "label", &label)) {
+                       /*
+                        * For the label attribute, we can reuse the
+                        * "properties" of the previous "input"
+                        * attribute. They are related to the same
+                        * sensor.
+                        */
+                       sdata[count].type = type;
+                       sdata[count].opal_index = sdata[count - 1].opal_index;
+                       sdata[count].hwmon_index = sdata[count - 1].hwmon_index;
+
+                       make_sensor_label(np, &sdata[count], label);
+
+                       create_hwmon_attr(&sdata[count], "label", show_label);
+
+                       pgroups[type]->attrs[sensor_groups[type].attr_count++] =
+                               &sdata[count++].dev_attr.attr;
+               }
        }
 
 exit_put_node:
index 409116c52cc57f650c8939fc6c7b7c50d3a1daea..d0ee556e8ce00447d9d16d8c3adb81de4160bfae 100644 (file)
@@ -11,6 +11,7 @@
  *  similar parts.  The other devices are supported by different drivers.
  *
  *  Supports: IT8603E  Super I/O chip w/LPC interface
+ *            IT8620E  Super I/O chip w/LPC interface
  *            IT8623E  Super I/O chip w/LPC interface
  *            IT8705F  Super I/O chip w/LPC interface
  *            IT8712F  Super I/O chip w/LPC interface
  *            IT8758E  Super I/O chip w/LPC interface
  *            IT8771E  Super I/O chip w/LPC interface
  *            IT8772E  Super I/O chip w/LPC interface
+ *            IT8781F  Super I/O chip w/LPC interface
  *            IT8782F  Super I/O chip w/LPC interface
  *            IT8783E/F Super I/O chip w/LPC interface
+ *            IT8786E  Super I/O chip w/LPC interface
+ *            IT8790E  Super I/O chip w/LPC interface
  *            Sis950   A clone of the IT8705F
  *
  *  Copyright (C) 2001 Chris Gauthron
@@ -66,7 +70,7 @@
 #define DRVNAME "it87"
 
 enum chips { it87, it8712, it8716, it8718, it8720, it8721, it8728, it8771,
-            it8772, it8782, it8783, it8603 };
+            it8772, it8781, it8782, it8783, it8786, it8790, it8603, it8620 };
 
 static unsigned short force_id;
 module_param(force_id, ushort, 0);
@@ -146,15 +150,20 @@ static inline void superio_exit(void)
 #define IT8728F_DEVID 0x8728
 #define IT8771E_DEVID 0x8771
 #define IT8772E_DEVID 0x8772
+#define IT8781F_DEVID 0x8781
 #define IT8782F_DEVID 0x8782
 #define IT8783E_DEVID 0x8783
+#define IT8786E_DEVID 0x8786
+#define IT8790E_DEVID 0x8790
 #define IT8603E_DEVID 0x8603
+#define IT8620E_DEVID 0x8620
 #define IT8623E_DEVID 0x8623
 #define IT87_ACT_REG  0x30
 #define IT87_BASE_REG 0x60
 
 /* Logical device 7 registers (IT8712F and later) */
 #define IT87_SIO_GPIO1_REG     0x25
+#define IT87_SIO_GPIO2_REG     0x26
 #define IT87_SIO_GPIO3_REG     0x27
 #define IT87_SIO_GPIO5_REG     0x29
 #define IT87_SIO_PINX1_REG     0x2a    /* Pin selection */
@@ -207,11 +216,11 @@ static bool fix_pwm_polarity;
 
 /* Monitors: 9 voltage (0 to 7, battery), 3 temp (1 to 3), 3 fan (1 to 3) */
 
-static const u8 IT87_REG_FAN[]         = { 0x0d, 0x0e, 0x0f, 0x80, 0x82 };
-static const u8 IT87_REG_FAN_MIN[]     = { 0x10, 0x11, 0x12, 0x84, 0x86 };
-static const u8 IT87_REG_FANX[]                = { 0x18, 0x19, 0x1a, 0x81, 0x83 };
-static const u8 IT87_REG_FANX_MIN[]    = { 0x1b, 0x1c, 0x1d, 0x85, 0x87 };
-static const u8 IT87_REG_TEMP_OFFSET[] = { 0x56, 0x57, 0x59 };
+static const u8 IT87_REG_FAN[]         = { 0x0d, 0x0e, 0x0f, 0x80, 0x82, 0x4c };
+static const u8 IT87_REG_FAN_MIN[]     = { 0x10, 0x11, 0x12, 0x84, 0x86, 0x4e };
+static const u8 IT87_REG_FANX[]        = { 0x18, 0x19, 0x1a, 0x81, 0x83, 0x4d };
+static const u8 IT87_REG_FANX_MIN[]    = { 0x1b, 0x1c, 0x1d, 0x85, 0x87, 0x4f };
+static const u8 IT87_REG_TEMP_OFFSET[] = { 0x56, 0x57, 0x59 };
 
 #define IT87_REG_FAN_MAIN_CTRL 0x13
 #define IT87_REG_FAN_CTL       0x14
@@ -238,6 +247,7 @@ static const u8 IT87_REG_TEMP_OFFSET[]      = { 0x56, 0x57, 0x59 };
 
 struct it87_devices {
        const char *name;
+       const char * const suffix;
        u16 features;
        u8 peci_mask;
        u8 old_peci_mask;
@@ -250,79 +260,131 @@ struct it87_devices {
 #define FEAT_TEMP_OFFSET       (1 << 4)
 #define FEAT_TEMP_PECI         (1 << 5)
 #define FEAT_TEMP_OLD_PECI     (1 << 6)
+#define FEAT_FAN16_CONFIG      (1 << 7)        /* Need to enable 16-bit fans */
+#define FEAT_FIVE_FANS         (1 << 8)        /* Supports five fans */
+#define FEAT_VID               (1 << 9)        /* Set if chip supports VID */
+#define FEAT_IN7_INTERNAL      (1 << 10)       /* Set if in7 is internal */
+#define FEAT_SIX_FANS          (1 << 11)       /* Supports six fans */
 
 static const struct it87_devices it87_devices[] = {
        [it87] = {
                .name = "it87",
+               .suffix = "F",
                .features = FEAT_OLD_AUTOPWM,   /* may need to overwrite */
        },
        [it8712] = {
                .name = "it8712",
-               .features = FEAT_OLD_AUTOPWM,   /* may need to overwrite */
+               .suffix = "F",
+               .features = FEAT_OLD_AUTOPWM | FEAT_VID,
+                                               /* may need to overwrite */
        },
        [it8716] = {
                .name = "it8716",
-               .features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET,
+               .suffix = "F",
+               .features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET | FEAT_VID
+                 | FEAT_FAN16_CONFIG | FEAT_FIVE_FANS,
        },
        [it8718] = {
                .name = "it8718",
-               .features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET
-                 | FEAT_TEMP_OLD_PECI,
+               .suffix = "F",
+               .features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET | FEAT_VID
+                 | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG | FEAT_FIVE_FANS,
                .old_peci_mask = 0x4,
        },
        [it8720] = {
                .name = "it8720",
-               .features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET
-                 | FEAT_TEMP_OLD_PECI,
+               .suffix = "F",
+               .features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET | FEAT_VID
+                 | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG | FEAT_FIVE_FANS,
                .old_peci_mask = 0x4,
        },
        [it8721] = {
                .name = "it8721",
+               .suffix = "F",
                .features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
-                 | FEAT_TEMP_OFFSET | FEAT_TEMP_OLD_PECI | FEAT_TEMP_PECI,
+                 | FEAT_TEMP_OFFSET | FEAT_TEMP_OLD_PECI | FEAT_TEMP_PECI
+                 | FEAT_FAN16_CONFIG | FEAT_FIVE_FANS | FEAT_IN7_INTERNAL,
                .peci_mask = 0x05,
                .old_peci_mask = 0x02,  /* Actually reports PCH */
        },
        [it8728] = {
                .name = "it8728",
+               .suffix = "F",
                .features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
-                 | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI,
+                 | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_FIVE_FANS
+                 | FEAT_IN7_INTERNAL,
                .peci_mask = 0x07,
        },
        [it8771] = {
                .name = "it8771",
+               .suffix = "E",
                .features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
-                 | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI,
-                                       /* PECI: guesswork */
-                                       /* 12mV ADC (OHM) */
-                                       /* 16 bit fans (OHM) */
+                 | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL,
+                               /* PECI: guesswork */
+                               /* 12mV ADC (OHM) */
+                               /* 16 bit fans (OHM) */
+                               /* three fans, always 16 bit (guesswork) */
                .peci_mask = 0x07,
        },
        [it8772] = {
                .name = "it8772",
+               .suffix = "E",
                .features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
-                 | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI,
-                                       /* PECI (coreboot) */
-                                       /* 12mV ADC (HWSensors4, OHM) */
-                                       /* 16 bit fans (HWSensors4, OHM) */
+                 | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL,
+                               /* PECI (coreboot) */
+                               /* 12mV ADC (HWSensors4, OHM) */
+                               /* 16 bit fans (HWSensors4, OHM) */
+                               /* three fans, always 16 bit (datasheet) */
                .peci_mask = 0x07,
        },
+       [it8781] = {
+               .name = "it8781",
+               .suffix = "F",
+               .features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET
+                 | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG,
+               .old_peci_mask = 0x4,
+       },
        [it8782] = {
                .name = "it8782",
+               .suffix = "F",
                .features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET
-                 | FEAT_TEMP_OLD_PECI,
+                 | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG,
                .old_peci_mask = 0x4,
        },
        [it8783] = {
                .name = "it8783",
+               .suffix = "E/F",
                .features = FEAT_16BIT_FANS | FEAT_TEMP_OFFSET
-                 | FEAT_TEMP_OLD_PECI,
+                 | FEAT_TEMP_OLD_PECI | FEAT_FAN16_CONFIG,
                .old_peci_mask = 0x4,
        },
+       [it8786] = {
+               .name = "it8786",
+               .suffix = "E",
+               .features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
+                 | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL,
+               .peci_mask = 0x07,
+       },
+       [it8790] = {
+               .name = "it8790",
+               .suffix = "E",
+               .features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
+                 | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL,
+               .peci_mask = 0x07,
+       },
        [it8603] = {
                .name = "it8603",
+               .suffix = "E",
                .features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
-                 | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI,
+                 | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_IN7_INTERNAL,
+               .peci_mask = 0x07,
+       },
+       [it8620] = {
+               .name = "it8620",
+               .suffix = "E",
+               .features = FEAT_NEWER_AUTOPWM | FEAT_12MV_ADC | FEAT_16BIT_FANS
+                 | FEAT_TEMP_OFFSET | FEAT_TEMP_PECI | FEAT_SIX_FANS
+                 | FEAT_IN7_INTERNAL,
                .peci_mask = 0x07,
        },
 };
@@ -337,6 +399,12 @@ static const struct it87_devices it87_devices[] = {
 #define has_temp_old_peci(data, nr) \
                                (((data)->features & FEAT_TEMP_OLD_PECI) && \
                                 ((data)->old_peci_mask & (1 << nr)))
+#define has_fan16_config(data) ((data)->features & FEAT_FAN16_CONFIG)
+#define has_five_fans(data)    ((data)->features & (FEAT_FIVE_FANS | \
+                                                    FEAT_SIX_FANS))
+#define has_vid(data)          ((data)->features & FEAT_VID)
+#define has_in7_internal(data) ((data)->features & FEAT_IN7_INTERNAL)
+#define has_six_fans(data)     ((data)->features & FEAT_SIX_FANS)
 
 struct it87_sio_data {
        enum chips type;
@@ -373,7 +441,7 @@ struct it87_data {
        u16 in_scaled;          /* Internal voltage sensors are scaled */
        u8 in[10][3];           /* [nr][0]=in, [1]=min, [2]=max */
        u8 has_fan;             /* Bitfield, fans enabled */
-       u16 fan[5][2];          /* Register values, [nr][0]=fan, [1]=min */
+       u16 fan[6][2];          /* Register values, [nr][0]=fan, [1]=min */
        u8 has_temp;            /* Bitfield, temp sensors enabled */
        s8 temp[3][4];          /* [nr][0]=temp, [1]=min, [2]=max, [3]=offset */
        u8 sensor;              /* Register value (IT87_REG_TEMP_ENABLE) */
@@ -475,15 +543,25 @@ static int DIV_TO_REG(int val)
 }
 #define DIV_FROM_REG(val) (1 << (val))
 
+/*
+ * PWM base frequencies. The frequency has to be divided by either 128 or 256,
+ * depending on the chip type, to calculate the actual PWM frequency.
+ *
+ * Some of the chip datasheets suggest a base frequency of 51 kHz instead
+ * of 750 kHz for the slowest base frequency, resulting in a PWM frequency
+ * of 200 Hz. Sometimes both PWM frequency select registers are affected,
+ * sometimes just one. It is unknown if this is a datasheet error or real,
+ * so this is ignored for now.
+ */
 static const unsigned int pwm_freq[8] = {
-       48000000 / 128,
-       24000000 / 128,
-       12000000 / 128,
-       8000000 / 128,
-       6000000 / 128,
-       3000000 / 128,
-       1500000 / 128,
-       750000 / 128,
+       48000000,
+       24000000,
+       12000000,
+       8000000,
+       6000000,
+       3000000,
+       1500000,
+       750000,
 };
 
 static int it87_probe(struct platform_device *pdev);
@@ -801,8 +879,11 @@ static ssize_t show_pwm_freq(struct device *dev, struct device_attribute *attr,
 {
        struct it87_data *data = it87_update_device(dev);
        int index = (data->fan_ctl >> 4) & 0x07;
+       unsigned int freq;
 
-       return sprintf(buf, "%u\n", pwm_freq[index]);
+       freq = pwm_freq[index] / (has_newer_autopwm(data) ? 256 : 128);
+
+       return sprintf(buf, "%u\n", freq);
 }
 
 static ssize_t set_fan(struct device *dev, struct device_attribute *attr,
@@ -1024,6 +1105,9 @@ static ssize_t set_pwm_freq(struct device *dev,
        if (kstrtoul(buf, 10, &val) < 0)
                return -EINVAL;
 
+       val = clamp_val(val, 0, 1000000);
+       val *= has_newer_autopwm(data) ? 256 : 128;
+
        /* Search for the nearest available frequency */
        for (i = 0; i < 7; i++) {
                if (val > (pwm_freq[i] + pwm_freq[i+1]) / 2)
@@ -1196,6 +1280,10 @@ static SENSOR_DEVICE_ATTR_2(fan5_input, S_IRUGO, show_fan, NULL, 4, 0);
 static SENSOR_DEVICE_ATTR_2(fan5_min, S_IRUGO | S_IWUSR, show_fan, set_fan,
                            4, 1);
 
+static SENSOR_DEVICE_ATTR_2(fan6_input, S_IRUGO, show_fan, NULL, 5, 0);
+static SENSOR_DEVICE_ATTR_2(fan6_min, S_IRUGO | S_IWUSR, show_fan, set_fan,
+                           5, 1);
+
 static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR,
                          show_pwm_enable, set_pwm_enable, 0);
 static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, show_pwm, set_pwm, 0);
@@ -1326,6 +1414,7 @@ static SENSOR_DEVICE_ATTR(fan2_alarm, S_IRUGO, show_alarm, NULL, 1);
 static SENSOR_DEVICE_ATTR(fan3_alarm, S_IRUGO, show_alarm, NULL, 2);
 static SENSOR_DEVICE_ATTR(fan4_alarm, S_IRUGO, show_alarm, NULL, 3);
 static SENSOR_DEVICE_ATTR(fan5_alarm, S_IRUGO, show_alarm, NULL, 6);
+static SENSOR_DEVICE_ATTR(fan6_alarm, S_IRUGO, show_alarm, NULL, 7);
 static SENSOR_DEVICE_ATTR(temp1_alarm, S_IRUGO, show_alarm, NULL, 16);
 static SENSOR_DEVICE_ATTR(temp2_alarm, S_IRUGO, show_alarm, NULL, 17);
 static SENSOR_DEVICE_ATTR(temp3_alarm, S_IRUGO, show_alarm, NULL, 18);
@@ -1376,6 +1465,7 @@ static SENSOR_DEVICE_ATTR(fan2_beep, S_IRUGO, show_beep, set_beep, 0);
 static SENSOR_DEVICE_ATTR(fan3_beep, S_IRUGO, show_beep, set_beep, 0);
 static SENSOR_DEVICE_ATTR(fan4_beep, S_IRUGO, show_beep, set_beep, 0);
 static SENSOR_DEVICE_ATTR(fan5_beep, S_IRUGO, show_beep, set_beep, 0);
+static SENSOR_DEVICE_ATTR(fan6_beep, S_IRUGO, show_beep, set_beep, 0);
 static SENSOR_DEVICE_ATTR(temp1_beep, S_IRUGO | S_IWUSR,
                          show_beep, set_beep, 2);
 static SENSOR_DEVICE_ATTR(temp2_beep, S_IRUGO, show_beep, NULL, 2);
@@ -1579,7 +1669,7 @@ static struct attribute *it87_attributes_temp_beep[] = {
        &sensor_dev_attr_temp3_beep.dev_attr.attr,
 };
 
-static struct attribute *it87_attributes_fan[5][3+1] = { {
+static struct attribute *it87_attributes_fan[6][3+1] = { {
        &sensor_dev_attr_fan1_input.dev_attr.attr,
        &sensor_dev_attr_fan1_min.dev_attr.attr,
        &sensor_dev_attr_fan1_alarm.dev_attr.attr,
@@ -1604,14 +1694,20 @@ static struct attribute *it87_attributes_fan[5][3+1] = { {
        &sensor_dev_attr_fan5_min.dev_attr.attr,
        &sensor_dev_attr_fan5_alarm.dev_attr.attr,
        NULL
+}, {
+       &sensor_dev_attr_fan6_input.dev_attr.attr,
+       &sensor_dev_attr_fan6_min.dev_attr.attr,
+       &sensor_dev_attr_fan6_alarm.dev_attr.attr,
+       NULL
 } };
 
-static const struct attribute_group it87_group_fan[5] = {
+static const struct attribute_group it87_group_fan[6] = {
        { .attrs = it87_attributes_fan[0] },
        { .attrs = it87_attributes_fan[1] },
        { .attrs = it87_attributes_fan[2] },
        { .attrs = it87_attributes_fan[3] },
        { .attrs = it87_attributes_fan[4] },
+       { .attrs = it87_attributes_fan[5] },
 };
 
 static const struct attribute *it87_attributes_fan_div[] = {
@@ -1693,6 +1789,7 @@ static struct attribute *it87_attributes_fan_beep[] = {
        &sensor_dev_attr_fan3_beep.dev_attr.attr,
        &sensor_dev_attr_fan4_beep.dev_attr.attr,
        &sensor_dev_attr_fan5_beep.dev_attr.attr,
+       &sensor_dev_attr_fan6_beep.dev_attr.attr,
 };
 
 static struct attribute *it87_attributes_vid[] = {
@@ -1724,6 +1821,7 @@ static int __init it87_find(unsigned short *address,
        int err;
        u16 chip_type;
        const char *board_vendor, *board_name;
+       const struct it87_devices *config;
 
        err = superio_enter();
        if (err)
@@ -1761,16 +1859,28 @@ static int __init it87_find(unsigned short *address,
        case IT8772E_DEVID:
                sio_data->type = it8772;
                break;
+       case IT8781F_DEVID:
+               sio_data->type = it8781;
+               break;
        case IT8782F_DEVID:
                sio_data->type = it8782;
                break;
        case IT8783E_DEVID:
                sio_data->type = it8783;
                break;
+       case IT8786E_DEVID:
+               sio_data->type = it8786;
+               break;
+       case IT8790E_DEVID:
+               sio_data->type = it8790;
+               break;
        case IT8603E_DEVID:
        case IT8623E_DEVID:
                sio_data->type = it8603;
                break;
+       case IT8620E_DEVID:
+               sio_data->type = it8620;
+               break;
        case 0xffff:    /* No device at all */
                goto exit;
        default:
@@ -1792,30 +1902,34 @@ static int __init it87_find(unsigned short *address,
 
        err = 0;
        sio_data->revision = superio_inb(DEVREV) & 0x0f;
-       pr_info("Found IT%04x%c chip at 0x%x, revision %d\n", chip_type,
-               chip_type == 0x8771 || chip_type == 0x8772 ||
-               chip_type == 0x8603 ? 'E' : 'F', *address,
-               sio_data->revision);
+       pr_info("Found IT%04x%s chip at 0x%x, revision %d\n", chip_type,
+               it87_devices[sio_data->type].suffix,
+               *address, sio_data->revision);
+
+       config = &it87_devices[sio_data->type];
+
+       /* in7 (VSB or VCCH5V) is always internal on some chips */
+       if (has_in7_internal(config))
+               sio_data->internal |= (1 << 1);
 
        /* in8 (Vbat) is always internal */
-       sio_data->internal = (1 << 2);
+       sio_data->internal |= (1 << 2);
+
        /* Only the IT8603E has in9 */
        if (sio_data->type != it8603)
                sio_data->skip_in |= (1 << 9);
 
-       /* Read GPIO config and VID value from LDN 7 (GPIO) */
-       if (sio_data->type == it87) {
-               /* The IT8705F doesn't have VID pins at all */
+       if (!has_vid(config))
                sio_data->skip_vid = 1;
 
+       /* Read GPIO config and VID value from LDN 7 (GPIO) */
+       if (sio_data->type == it87) {
                /* The IT8705F has a different LD number for GPIO */
                superio_select(5);
                sio_data->beep_pin = superio_inb(IT87_SIO_BEEP_PIN_REG) & 0x3f;
        } else if (sio_data->type == it8783) {
                int reg25, reg27, reg2a, reg2c, regef;
 
-               sio_data->skip_vid = 1; /* No VID */
-
                superio_select(GPIO);
 
                reg25 = superio_inb(IT87_SIO_GPIO1_REG);
@@ -1881,7 +1995,6 @@ static int __init it87_find(unsigned short *address,
        } else if (sio_data->type == it8603) {
                int reg27, reg29;
 
-               sio_data->skip_vid = 1; /* No VID */
                superio_select(GPIO);
 
                reg27 = superio_inb(IT87_SIO_GPIO3_REG);
@@ -1902,13 +2015,35 @@ static int __init it87_find(unsigned short *address,
                sio_data->skip_in |= (1 << 5); /* No VIN5 */
                sio_data->skip_in |= (1 << 6); /* No VIN6 */
 
-               /* no fan4 */
-               sio_data->skip_pwm |= (1 << 3);
-               sio_data->skip_fan |= (1 << 3);
-
-               sio_data->internal |= (1 << 1); /* in7 is VSB */
                sio_data->internal |= (1 << 3); /* in9 is AVCC */
 
+               sio_data->beep_pin = superio_inb(IT87_SIO_BEEP_PIN_REG) & 0x3f;
+       } else if (sio_data->type == it8620) {
+               int reg;
+
+               superio_select(GPIO);
+
+               /* Check for fan4, fan5 */
+               reg = superio_inb(IT87_SIO_GPIO2_REG);
+               if (!(reg & (1 << 5)))
+                       sio_data->skip_fan |= (1 << 3);
+               if (!(reg & (1 << 4)))
+                       sio_data->skip_fan |= (1 << 4);
+
+               /* Check for pwm3, fan3 */
+               reg = superio_inb(IT87_SIO_GPIO3_REG);
+               if (reg & (1 << 6))
+                       sio_data->skip_pwm |= (1 << 2);
+               if (reg & (1 << 7))
+                       sio_data->skip_fan |= (1 << 2);
+
+               /* Check for pwm2, fan2 */
+               reg = superio_inb(IT87_SIO_GPIO5_REG);
+               if (reg & (1 << 1))
+                       sio_data->skip_pwm |= (1 << 1);
+               if (reg & (1 << 2))
+                       sio_data->skip_fan |= (1 << 1);
+
                sio_data->beep_pin = superio_inb(IT87_SIO_BEEP_PIN_REG) & 0x3f;
        } else {
                int reg;
@@ -1917,15 +2052,7 @@ static int __init it87_find(unsigned short *address,
                superio_select(GPIO);
 
                reg = superio_inb(IT87_SIO_GPIO3_REG);
-               if (sio_data->type == it8721 || sio_data->type == it8728 ||
-                   sio_data->type == it8771 || sio_data->type == it8772 ||
-                   sio_data->type == it8782) {
-                       /*
-                        * IT8721F/IT8758E, and IT8782F don't have VID pins
-                        * at all, not sure about the IT8728F and compatibles.
-                        */
-                       sio_data->skip_vid = 1;
-               } else {
+               if (!sio_data->skip_vid) {
                        /* We need at least 4 VID pins */
                        if (reg & 0x0f) {
                                pr_info("VID is disabled (pins used for GPIO)\n");
@@ -1975,10 +2102,7 @@ static int __init it87_find(unsigned short *address,
                }
                if (reg & (1 << 0))
                        sio_data->internal |= (1 << 0);
-               if ((reg & (1 << 1)) || sio_data->type == it8721 ||
-                   sio_data->type == it8728 ||
-                   sio_data->type == it8771 ||
-                   sio_data->type == it8772)
+               if (reg & (1 << 1))
                        sio_data->internal |= (1 << 1);
 
                /*
@@ -2050,7 +2174,7 @@ static void it87_remove_files(struct device *dev)
                        sysfs_remove_file(&dev->kobj,
                                          it87_attributes_temp_beep[i]);
        }
-       for (i = 0; i < 5; i++) {
+       for (i = 0; i < 6; i++) {
                if (!(data->has_fan & (1 << i)))
                        continue;
                sysfs_remove_group(&dev->kobj, &it87_group_fan[i]);
@@ -2062,7 +2186,7 @@ static void it87_remove_files(struct device *dev)
                                          it87_attributes_fan_div[i]);
        }
        for (i = 0; i < 3; i++) {
-               if (sio_data->skip_pwm & (1 << 0))
+               if (sio_data->skip_pwm & (1 << i))
                        continue;
                sysfs_remove_group(&dev->kobj, &it87_group_pwm[i]);
                if (has_old_autopwm(data))
@@ -2112,13 +2236,14 @@ static int it87_probe(struct platform_device *pdev)
        case it87:
                if (sio_data->revision >= 0x03) {
                        data->features &= ~FEAT_OLD_AUTOPWM;
-                       data->features |= FEAT_16BIT_FANS;
+                       data->features |= FEAT_FAN16_CONFIG | FEAT_16BIT_FANS;
                }
                break;
        case it8712:
                if (sio_data->revision >= 0x08) {
                        data->features &= ~FEAT_OLD_AUTOPWM;
-                       data->features |= FEAT_16BIT_FANS;
+                       data->features |= FEAT_FAN16_CONFIG | FEAT_16BIT_FANS |
+                                         FEAT_FIVE_FANS;
                }
                break;
        default:
@@ -2147,7 +2272,8 @@ static int it87_probe(struct platform_device *pdev)
                        data->in_scaled |= (1 << 8);    /* in8 is Vbat */
                if (sio_data->internal & (1 << 3))
                        data->in_scaled |= (1 << 9);    /* in9 is AVCC */
-       } else if (sio_data->type == it8782 || sio_data->type == it8783) {
+       } else if (sio_data->type == it8781 || sio_data->type == it8782 ||
+                  sio_data->type == it8783) {
                if (sio_data->internal & (1 << 0))
                        data->in_scaled |= (1 << 3);    /* in3 is VCC5V */
                if (sio_data->internal & (1 << 1))
@@ -2205,7 +2331,7 @@ static int it87_probe(struct platform_device *pdev)
 
        /* Do not create fan files for disabled fans */
        fan_beep_need_rw = 1;
-       for (i = 0; i < 5; i++) {
+       for (i = 0; i < 6; i++) {
                if (!(data->has_fan & (1 << i)))
                        continue;
                err = sysfs_create_group(&dev->kobj, &it87_group_fan[i]);
@@ -2450,24 +2576,26 @@ static void it87_init_device(struct platform_device *pdev)
        }
        data->has_fan = (data->fan_main_ctrl >> 4) & 0x07;
 
-       /* Set tachometers to 16-bit mode if needed, IT8603E (and IT8728F?)
-        * has it by default */
-       if (has_16bit_fans(data) && data->type != it8603) {
-               tmp = it87_read_value(data, IT87_REG_FAN_16BIT);
+       tmp = it87_read_value(data, IT87_REG_FAN_16BIT);
+
+       /* Set tachometers to 16-bit mode if needed */
+       if (has_fan16_config(data)) {
                if (~tmp & 0x07 & data->has_fan) {
                        dev_dbg(&pdev->dev,
                                "Setting fan1-3 to 16-bit mode\n");
                        it87_write_value(data, IT87_REG_FAN_16BIT,
                                         tmp | 0x07);
                }
-               /* IT8705F, IT8782F, and IT8783E/F only support three fans. */
-               if (data->type != it87 && data->type != it8782 &&
-                   data->type != it8783) {
-                       if (tmp & (1 << 4))
-                               data->has_fan |= (1 << 3); /* fan4 enabled */
-                       if (tmp & (1 << 5))
-                               data->has_fan |= (1 << 4); /* fan5 enabled */
-               }
+       }
+
+       /* Check for additional fans */
+       if (has_five_fans(data)) {
+               if (tmp & (1 << 4))
+                       data->has_fan |= (1 << 3); /* fan4 enabled */
+               if (tmp & (1 << 5))
+                       data->has_fan |= (1 << 4); /* fan5 enabled */
+               if (has_six_fans(data) && (tmp & (1 << 2)))
+                       data->has_fan |= (1 << 5); /* fan6 enabled */
        }
 
        /* Fan input pins may be used for alternative functions */
@@ -2535,7 +2663,7 @@ static struct it87_data *it87_update_device(struct device *dev)
                if (data->type == it8603)
                        data->in[9][0] = it87_read_value(data, 0x2f);
 
-               for (i = 0; i < 5; i++) {
+               for (i = 0; i < 6; i++) {
                        /* Skip disabled fans */
                        if (!(data->has_fan & (1 << i)))
                                continue;
index 996bdfd5cf25f93679857407951253d1f1e5b5ca..9887d3224a865e9aa0de33e8d38894600cc0be3b 100644 (file)
@@ -87,11 +87,14 @@ static const unsigned short normal_i2c[] = {
 #define AT30TSE004_DEVID_MASK  0xffff
 
 /* IDT */
-#define TS3000B3_DEVID         0x2903  /* Also matches TSE2002B3 */
-#define TS3000B3_DEVID_MASK    0xffff
+#define TSE2004_DEVID          0x2200
+#define TSE2004_DEVID_MASK     0xff00
 
-#define TS3000GB2_DEVID                0x2912  /* Also matches TSE2002GB2 */
-#define TS3000GB2_DEVID_MASK   0xffff
+#define TS3000_DEVID           0x2900  /* Also matches TSE2002 */
+#define TS3000_DEVID_MASK      0xff00
+
+#define TS3001_DEVID           0x3000
+#define TS3001_DEVID_MASK      0xff00
 
 /* Maxim */
 #define MAX6604_DEVID          0x3e00
@@ -152,8 +155,9 @@ static struct jc42_chips jc42_chips[] = {
        { ADT_MANID, ADT7408_DEVID, ADT7408_DEVID_MASK },
        { ATMEL_MANID, AT30TS00_DEVID, AT30TS00_DEVID_MASK },
        { ATMEL_MANID2, AT30TSE004_DEVID, AT30TSE004_DEVID_MASK },
-       { IDT_MANID, TS3000B3_DEVID, TS3000B3_DEVID_MASK },
-       { IDT_MANID, TS3000GB2_DEVID, TS3000GB2_DEVID_MASK },
+       { IDT_MANID, TSE2004_DEVID, TSE2004_DEVID_MASK },
+       { IDT_MANID, TS3000_DEVID, TS3000_DEVID_MASK },
+       { IDT_MANID, TS3001_DEVID, TS3001_DEVID_MASK },
        { MAX_MANID, MAX6604_DEVID, MAX6604_DEVID_MASK },
        { MCP_MANID, MCP9804_DEVID, MCP9804_DEVID_MASK },
        { MCP_MANID, MCP98242_DEVID, MCP98242_DEVID_MASK },
index 1be41177b620f5179f254f0e8936a74bfb79fe94..4fcb481032992f475e8d196dc3a9dbcfa2407b30 100644 (file)
@@ -57,6 +57,7 @@
 #include <linux/err.h>
 #include <linux/mutex.h>
 #include <linux/acpi.h>
+#include <linux/dmi.h>
 #include <linux/io.h>
 #include "lm75.h"
 
@@ -880,12 +881,12 @@ struct nct6775_data {
        u16 have_temp;
        u16 have_temp_fixed;
        u16 have_in;
-#ifdef CONFIG_PM
+
        /* Remember extra register values over suspend/resume */
        u8 vbat;
        u8 fandiv1;
        u8 fandiv2;
-#endif
+       u8 sio_reg_enable;
 };
 
 struct nct6775_sio_data {
@@ -3178,6 +3179,10 @@ nct6775_check_fan_inputs(struct nct6775_data *data)
        int sioreg = data->sioreg;
        int regval;
 
+       /* Store SIO_REG_ENABLE for use during resume */
+       superio_select(sioreg, NCT6775_LD_HWM);
+       data->sio_reg_enable = superio_inb(sioreg, SIO_REG_ENABLE);
+
        /* fan4 and fan5 share some pins with the GPIO and serial flash */
        if (data->kind == nct6775) {
                regval = superio_inb(sioreg, 0x2c);
@@ -3195,21 +3200,38 @@ nct6775_check_fan_inputs(struct nct6775_data *data)
                pwm6pin = false;
        } else if (data->kind == nct6776) {
                bool gpok = superio_inb(sioreg, 0x27) & 0x80;
+               const char *board_vendor, *board_name;
 
-               superio_select(sioreg, NCT6775_LD_HWM);
-               regval = superio_inb(sioreg, SIO_REG_ENABLE);
+               board_vendor = dmi_get_system_info(DMI_BOARD_VENDOR);
+               board_name = dmi_get_system_info(DMI_BOARD_NAME);
+
+               if (board_name && board_vendor &&
+                   !strcmp(board_vendor, "ASRock")) {
+                       /*
+                        * Auxiliary fan monitoring is not enabled on ASRock
+                        * Z77 Pro4-M if booted in UEFI Ultra-FastBoot mode.
+                        * Observed with BIOS version 2.00.
+                        */
+                       if (!strcmp(board_name, "Z77 Pro4-M")) {
+                               if ((data->sio_reg_enable & 0xe0) != 0xe0) {
+                                       data->sio_reg_enable |= 0xe0;
+                                       superio_outb(sioreg, SIO_REG_ENABLE,
+                                                    data->sio_reg_enable);
+                               }
+                       }
+               }
 
-               if (regval & 0x80)
+               if (data->sio_reg_enable & 0x80)
                        fan3pin = gpok;
                else
                        fan3pin = !(superio_inb(sioreg, 0x24) & 0x40);
 
-               if (regval & 0x40)
+               if (data->sio_reg_enable & 0x40)
                        fan4pin = gpok;
                else
                        fan4pin = superio_inb(sioreg, 0x1C) & 0x01;
 
-               if (regval & 0x20)
+               if (data->sio_reg_enable & 0x20)
                        fan5pin = gpok;
                else
                        fan5pin = superio_inb(sioreg, 0x1C) & 0x02;
@@ -3989,8 +4011,7 @@ static void nct6791_enable_io_mapping(int sioaddr)
        }
 }
 
-#ifdef CONFIG_PM
-static int nct6775_suspend(struct device *dev)
+static int __maybe_unused nct6775_suspend(struct device *dev)
 {
        struct nct6775_data *data = nct6775_update_device(dev);
 
@@ -4005,22 +4026,29 @@ static int nct6775_suspend(struct device *dev)
        return 0;
 }
 
-static int nct6775_resume(struct device *dev)
+static int __maybe_unused nct6775_resume(struct device *dev)
 {
        struct nct6775_data *data = dev_get_drvdata(dev);
+       int sioreg = data->sioreg;
        int i, j, err = 0;
+       u8 reg;
 
        mutex_lock(&data->update_lock);
        data->bank = 0xff;              /* Force initial bank selection */
 
-       if (data->kind == nct6791 || data->kind == nct6792) {
-               err = superio_enter(data->sioreg);
-               if (err)
-                       goto abort;
+       err = superio_enter(sioreg);
+       if (err)
+               goto abort;
 
-               nct6791_enable_io_mapping(data->sioreg);
-               superio_exit(data->sioreg);
-       }
+       superio_select(sioreg, NCT6775_LD_HWM);
+       reg = superio_inb(sioreg, SIO_REG_ENABLE);
+       if (reg != data->sio_reg_enable)
+               superio_outb(sioreg, SIO_REG_ENABLE, data->sio_reg_enable);
+
+       if (data->kind == nct6791 || data->kind == nct6792)
+               nct6791_enable_io_mapping(sioreg);
+
+       superio_exit(sioreg);
 
        /* Restore limits */
        for (i = 0; i < data->in_num; i++) {
@@ -4066,22 +4094,12 @@ abort:
        return err;
 }
 
-static const struct dev_pm_ops nct6775_dev_pm_ops = {
-       .suspend = nct6775_suspend,
-       .resume = nct6775_resume,
-       .freeze = nct6775_suspend,
-       .restore = nct6775_resume,
-};
-
-#define NCT6775_DEV_PM_OPS     (&nct6775_dev_pm_ops)
-#else
-#define NCT6775_DEV_PM_OPS     NULL
-#endif /* CONFIG_PM */
+static SIMPLE_DEV_PM_OPS(nct6775_dev_pm_ops, nct6775_suspend, nct6775_resume);
 
 static struct platform_driver nct6775_driver = {
        .driver = {
                .name   = DRVNAME,
-               .pm     = NCT6775_DEV_PM_OPS,
+               .pm     = &nct6775_dev_pm_ops,
        },
        .probe          = nct6775_probe,
 };
diff --git a/drivers/hwmon/nct7904.c b/drivers/hwmon/nct7904.c
new file mode 100644 (file)
index 0000000..b77b82f
--- /dev/null
@@ -0,0 +1,593 @@
+/*
+ * nct7904.c - driver for Nuvoton NCT7904D.
+ *
+ * Copyright (c) 2015 Kontron
+ * Author: Vadim V. Vlasov <vvlasov@dev.rtsoft.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/i2c.h>
+#include <linux/mutex.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+
+#define VENDOR_ID_REG          0x7A    /* Any bank */
+#define NUVOTON_ID             0x50
+#define CHIP_ID_REG            0x7B    /* Any bank */
+#define NCT7904_ID             0xC5
+#define DEVICE_ID_REG          0x7C    /* Any bank */
+
+#define BANK_SEL_REG           0xFF
+#define BANK_0                 0x00
+#define BANK_1                 0x01
+#define BANK_2                 0x02
+#define BANK_3                 0x03
+#define BANK_4                 0x04
+#define BANK_MAX               0x04
+
+#define FANIN_MAX              12      /* Counted from 1 */
+#define VSEN_MAX               21      /* VSEN1..14, 3VDD, VBAT, V3VSB,
+                                          LTD (not a voltage), VSEN17..19 */
+#define FANCTL_MAX             4       /* Counted from 1 */
+#define TCPU_MAX               8       /* Counted from 1 */
+#define TEMP_MAX               4       /* Counted from 1 */
+
+#define VT_ADC_CTRL0_REG       0x20    /* Bank 0 */
+#define VT_ADC_CTRL1_REG       0x21    /* Bank 0 */
+#define VT_ADC_CTRL2_REG       0x22    /* Bank 0 */
+#define FANIN_CTRL0_REG                0x24
+#define FANIN_CTRL1_REG                0x25
+#define DTS_T_CTRL0_REG                0x26
+#define DTS_T_CTRL1_REG                0x27
+#define VT_ADC_MD_REG          0x2E
+
+#define VSEN1_HV_REG           0x40    /* Bank 0; 2 regs (HV/LV) per sensor */
+#define TEMP_CH1_HV_REG                0x42    /* Bank 0; same as VSEN2_HV */
+#define LTD_HV_REG             0x62    /* Bank 0; 2 regs in VSEN range */
+#define FANIN1_HV_REG          0x80    /* Bank 0; 2 regs (HV/LV) per sensor */
+#define T_CPU1_HV_REG          0xA0    /* Bank 0; 2 regs (HV/LV) per sensor */
+
+#define PRTS_REG               0x03    /* Bank 2 */
+#define FANCTL1_FMR_REG                0x00    /* Bank 3; 1 reg per channel */
+#define FANCTL1_OUT_REG                0x10    /* Bank 3; 1 reg per channel */
+
+static const unsigned short normal_i2c[] = {
+       0x2d, 0x2e, I2C_CLIENT_END
+};
+
+struct nct7904_data {
+       struct i2c_client *client;
+       struct mutex bank_lock;
+       int bank_sel;
+       u32 fanin_mask;
+       u32 vsen_mask;
+       u32 tcpu_mask;
+       u8 fan_mode[FANCTL_MAX];
+};
+
+/* Access functions */
+static int nct7904_bank_lock(struct nct7904_data *data, unsigned bank)
+{
+       int ret;
+
+       mutex_lock(&data->bank_lock);
+       if (data->bank_sel == bank)
+               return 0;
+       ret = i2c_smbus_write_byte_data(data->client, BANK_SEL_REG, bank);
+       if (ret == 0)
+               data->bank_sel = bank;
+       else
+               data->bank_sel = -1;
+       return ret;
+}
+
+static inline void nct7904_bank_release(struct nct7904_data *data)
+{
+       mutex_unlock(&data->bank_lock);
+}
+
+/* Read 1-byte register. Returns unsigned reg or -ERRNO on error. */
+static int nct7904_read_reg(struct nct7904_data *data,
+                           unsigned bank, unsigned reg)
+{
+       struct i2c_client *client = data->client;
+       int ret;
+
+       ret = nct7904_bank_lock(data, bank);
+       if (ret == 0)
+               ret = i2c_smbus_read_byte_data(client, reg);
+
+       nct7904_bank_release(data);
+       return ret;
+}
+
+/*
+ * Read 2-byte register. Returns register in big-endian format or
+ * -ERRNO on error.
+ */
+static int nct7904_read_reg16(struct nct7904_data *data,
+                             unsigned bank, unsigned reg)
+{
+       struct i2c_client *client = data->client;
+       int ret, hi;
+
+       ret = nct7904_bank_lock(data, bank);
+       if (ret == 0) {
+               ret = i2c_smbus_read_byte_data(client, reg);
+               if (ret >= 0) {
+                       hi = ret;
+                       ret = i2c_smbus_read_byte_data(client, reg + 1);
+                       if (ret >= 0)
+                               ret |= hi << 8;
+               }
+       }
+
+       nct7904_bank_release(data);
+       return ret;
+}
+
+/* Write 1-byte register. Returns 0 or -ERRNO on error. */
+static int nct7904_write_reg(struct nct7904_data *data,
+                            unsigned bank, unsigned reg, u8 val)
+{
+       struct i2c_client *client = data->client;
+       int ret;
+
+       ret = nct7904_bank_lock(data, bank);
+       if (ret == 0)
+               ret = i2c_smbus_write_byte_data(client, reg, val);
+
+       nct7904_bank_release(data);
+       return ret;
+}
+
+/* FANIN ATTR */
+static ssize_t show_fan(struct device *dev,
+                       struct device_attribute *devattr, char *buf)
+{
+       int index = to_sensor_dev_attr(devattr)->index;
+       struct nct7904_data *data = dev_get_drvdata(dev);
+       int ret;
+       unsigned cnt, rpm;
+
+       ret = nct7904_read_reg16(data, BANK_0, FANIN1_HV_REG + index * 2);
+       if (ret < 0)
+               return ret;
+       cnt = ((ret & 0xff00) >> 3) | (ret & 0x1f);
+       if (cnt == 0x1fff)
+               rpm = 0;
+       else
+               rpm = 1350000 / cnt;
+       return sprintf(buf, "%u\n", rpm);
+}
+
+static umode_t nct7904_fanin_is_visible(struct kobject *kobj,
+                                       struct attribute *a, int n)
+{
+       struct device *dev = container_of(kobj, struct device, kobj);
+       struct nct7904_data *data = dev_get_drvdata(dev);
+
+       if (data->fanin_mask & (1 << n))
+               return a->mode;
+       return 0;
+}
+
+static SENSOR_DEVICE_ATTR(fan1_input, S_IRUGO, show_fan, NULL, 0);
+static SENSOR_DEVICE_ATTR(fan2_input, S_IRUGO, show_fan, NULL, 1);
+static SENSOR_DEVICE_ATTR(fan3_input, S_IRUGO, show_fan, NULL, 2);
+static SENSOR_DEVICE_ATTR(fan4_input, S_IRUGO, show_fan, NULL, 3);
+static SENSOR_DEVICE_ATTR(fan5_input, S_IRUGO, show_fan, NULL, 4);
+static SENSOR_DEVICE_ATTR(fan6_input, S_IRUGO, show_fan, NULL, 5);
+static SENSOR_DEVICE_ATTR(fan7_input, S_IRUGO, show_fan, NULL, 6);
+static SENSOR_DEVICE_ATTR(fan8_input, S_IRUGO, show_fan, NULL, 7);
+static SENSOR_DEVICE_ATTR(fan9_input, S_IRUGO, show_fan, NULL, 8);
+static SENSOR_DEVICE_ATTR(fan10_input, S_IRUGO, show_fan, NULL, 9);
+static SENSOR_DEVICE_ATTR(fan11_input, S_IRUGO, show_fan, NULL, 10);
+static SENSOR_DEVICE_ATTR(fan12_input, S_IRUGO, show_fan, NULL, 11);
+
+static struct attribute *nct7904_fanin_attrs[] = {
+       &sensor_dev_attr_fan1_input.dev_attr.attr,
+       &sensor_dev_attr_fan2_input.dev_attr.attr,
+       &sensor_dev_attr_fan3_input.dev_attr.attr,
+       &sensor_dev_attr_fan4_input.dev_attr.attr,
+       &sensor_dev_attr_fan5_input.dev_attr.attr,
+       &sensor_dev_attr_fan6_input.dev_attr.attr,
+       &sensor_dev_attr_fan7_input.dev_attr.attr,
+       &sensor_dev_attr_fan8_input.dev_attr.attr,
+       &sensor_dev_attr_fan9_input.dev_attr.attr,
+       &sensor_dev_attr_fan10_input.dev_attr.attr,
+       &sensor_dev_attr_fan11_input.dev_attr.attr,
+       &sensor_dev_attr_fan12_input.dev_attr.attr,
+       NULL
+};
+
+static const struct attribute_group nct7904_fanin_group = {
+       .attrs = nct7904_fanin_attrs,
+       .is_visible = nct7904_fanin_is_visible,
+};
+
+/* VSEN ATTR */
+static ssize_t show_voltage(struct device *dev,
+                           struct device_attribute *devattr, char *buf)
+{
+       int index = to_sensor_dev_attr(devattr)->index;
+       struct nct7904_data *data = dev_get_drvdata(dev);
+       int ret;
+       int volt;
+
+       ret = nct7904_read_reg16(data, BANK_0, VSEN1_HV_REG + index * 2);
+       if (ret < 0)
+               return ret;
+       volt = ((ret & 0xff00) >> 5) | (ret & 0x7);
+       if (index < 14)
+               volt *= 2; /* 0.002V scale */
+       else
+               volt *= 6; /* 0.006V scale */
+
+       return sprintf(buf, "%d\n", volt);
+}
+
+static ssize_t show_ltemp(struct device *dev,
+                         struct device_attribute *devattr, char *buf)
+{
+       struct nct7904_data *data = dev_get_drvdata(dev);
+       int ret;
+       int temp;
+
+       ret = nct7904_read_reg16(data, BANK_0, LTD_HV_REG);
+       if (ret < 0)
+               return ret;
+       temp = ((ret & 0xff00) >> 5) | (ret & 0x7);
+       temp = sign_extend32(temp, 10) * 125;
+
+       return sprintf(buf, "%d\n", temp);
+}
+
+static umode_t nct7904_vsen_is_visible(struct kobject *kobj,
+                                      struct attribute *a, int n)
+{
+       struct device *dev = container_of(kobj, struct device, kobj);
+       struct nct7904_data *data = dev_get_drvdata(dev);
+
+       if (data->vsen_mask & (1 << n))
+               return a->mode;
+       return 0;
+}
+
+static SENSOR_DEVICE_ATTR(in1_input, S_IRUGO, show_voltage, NULL, 0);
+static SENSOR_DEVICE_ATTR(in2_input, S_IRUGO, show_voltage, NULL, 1);
+static SENSOR_DEVICE_ATTR(in3_input, S_IRUGO, show_voltage, NULL, 2);
+static SENSOR_DEVICE_ATTR(in4_input, S_IRUGO, show_voltage, NULL, 3);
+static SENSOR_DEVICE_ATTR(in5_input, S_IRUGO, show_voltage, NULL, 4);
+static SENSOR_DEVICE_ATTR(in6_input, S_IRUGO, show_voltage, NULL, 5);
+static SENSOR_DEVICE_ATTR(in7_input, S_IRUGO, show_voltage, NULL, 6);
+static SENSOR_DEVICE_ATTR(in8_input, S_IRUGO, show_voltage, NULL, 7);
+static SENSOR_DEVICE_ATTR(in9_input, S_IRUGO, show_voltage, NULL, 8);
+static SENSOR_DEVICE_ATTR(in10_input, S_IRUGO, show_voltage, NULL, 9);
+static SENSOR_DEVICE_ATTR(in11_input, S_IRUGO, show_voltage, NULL, 10);
+static SENSOR_DEVICE_ATTR(in12_input, S_IRUGO, show_voltage, NULL, 11);
+static SENSOR_DEVICE_ATTR(in13_input, S_IRUGO, show_voltage, NULL, 12);
+static SENSOR_DEVICE_ATTR(in14_input, S_IRUGO, show_voltage, NULL, 13);
+/*
+ * Next 3 voltage sensors have specific names in the Nuvoton doc
+ * (3VDD, VBAT, 3VSB) but we use vacant numbers for them.
+ */
+static SENSOR_DEVICE_ATTR(in15_input, S_IRUGO, show_voltage, NULL, 14);
+static SENSOR_DEVICE_ATTR(in16_input, S_IRUGO, show_voltage, NULL, 15);
+static SENSOR_DEVICE_ATTR(in20_input, S_IRUGO, show_voltage, NULL, 16);
+/* This is not a voltage, but a local temperature sensor. */
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, show_ltemp, NULL, 0);
+static SENSOR_DEVICE_ATTR(in17_input, S_IRUGO, show_voltage, NULL, 18);
+static SENSOR_DEVICE_ATTR(in18_input, S_IRUGO, show_voltage, NULL, 19);
+static SENSOR_DEVICE_ATTR(in19_input, S_IRUGO, show_voltage, NULL, 20);
+
+static struct attribute *nct7904_vsen_attrs[] = {
+       &sensor_dev_attr_in1_input.dev_attr.attr,
+       &sensor_dev_attr_in2_input.dev_attr.attr,
+       &sensor_dev_attr_in3_input.dev_attr.attr,
+       &sensor_dev_attr_in4_input.dev_attr.attr,
+       &sensor_dev_attr_in5_input.dev_attr.attr,
+       &sensor_dev_attr_in6_input.dev_attr.attr,
+       &sensor_dev_attr_in7_input.dev_attr.attr,
+       &sensor_dev_attr_in8_input.dev_attr.attr,
+       &sensor_dev_attr_in9_input.dev_attr.attr,
+       &sensor_dev_attr_in10_input.dev_attr.attr,
+       &sensor_dev_attr_in11_input.dev_attr.attr,
+       &sensor_dev_attr_in12_input.dev_attr.attr,
+       &sensor_dev_attr_in13_input.dev_attr.attr,
+       &sensor_dev_attr_in14_input.dev_attr.attr,
+       &sensor_dev_attr_in15_input.dev_attr.attr,
+       &sensor_dev_attr_in16_input.dev_attr.attr,
+       &sensor_dev_attr_in20_input.dev_attr.attr,
+       &sensor_dev_attr_temp1_input.dev_attr.attr,
+       &sensor_dev_attr_in17_input.dev_attr.attr,
+       &sensor_dev_attr_in18_input.dev_attr.attr,
+       &sensor_dev_attr_in19_input.dev_attr.attr,
+       NULL
+};
+
+static const struct attribute_group nct7904_vsen_group = {
+       .attrs = nct7904_vsen_attrs,
+       .is_visible = nct7904_vsen_is_visible,
+};
+
+/* CPU_TEMP ATTR */
+static ssize_t show_tcpu(struct device *dev,
+                        struct device_attribute *devattr, char *buf)
+{
+       int index = to_sensor_dev_attr(devattr)->index;
+       struct nct7904_data *data = dev_get_drvdata(dev);
+       int ret;
+       int temp;
+
+       ret = nct7904_read_reg16(data, BANK_0, T_CPU1_HV_REG + index * 2);
+       if (ret < 0)
+               return ret;
+
+       temp = ((ret & 0xff00) >> 5) | (ret & 0x7);
+       temp = sign_extend32(temp, 10) * 125;
+       return sprintf(buf, "%d\n", temp);
+}
+
+static umode_t nct7904_tcpu_is_visible(struct kobject *kobj,
+                                      struct attribute *a, int n)
+{
+       struct device *dev = container_of(kobj, struct device, kobj);
+       struct nct7904_data *data = dev_get_drvdata(dev);
+
+       if (data->tcpu_mask & (1 << n))
+               return a->mode;
+       return 0;
+}
+
+/* "temp1_input" reserved for local temp */
+static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, show_tcpu, NULL, 0);
+static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, show_tcpu, NULL, 1);
+static SENSOR_DEVICE_ATTR(temp4_input, S_IRUGO, show_tcpu, NULL, 2);
+static SENSOR_DEVICE_ATTR(temp5_input, S_IRUGO, show_tcpu, NULL, 3);
+static SENSOR_DEVICE_ATTR(temp6_input, S_IRUGO, show_tcpu, NULL, 4);
+static SENSOR_DEVICE_ATTR(temp7_input, S_IRUGO, show_tcpu, NULL, 5);
+static SENSOR_DEVICE_ATTR(temp8_input, S_IRUGO, show_tcpu, NULL, 6);
+static SENSOR_DEVICE_ATTR(temp9_input, S_IRUGO, show_tcpu, NULL, 7);
+
+static struct attribute *nct7904_tcpu_attrs[] = {
+       &sensor_dev_attr_temp2_input.dev_attr.attr,
+       &sensor_dev_attr_temp3_input.dev_attr.attr,
+       &sensor_dev_attr_temp4_input.dev_attr.attr,
+       &sensor_dev_attr_temp5_input.dev_attr.attr,
+       &sensor_dev_attr_temp6_input.dev_attr.attr,
+       &sensor_dev_attr_temp7_input.dev_attr.attr,
+       &sensor_dev_attr_temp8_input.dev_attr.attr,
+       &sensor_dev_attr_temp9_input.dev_attr.attr,
+       NULL
+};
+
+static const struct attribute_group nct7904_tcpu_group = {
+       .attrs = nct7904_tcpu_attrs,
+       .is_visible = nct7904_tcpu_is_visible,
+};
+
+/* PWM ATTR */
+static ssize_t store_pwm(struct device *dev, struct device_attribute *devattr,
+                        const char *buf, size_t count)
+{
+       int index = to_sensor_dev_attr(devattr)->index;
+       struct nct7904_data *data = dev_get_drvdata(dev);
+       unsigned long val;
+       int ret;
+
+       if (kstrtoul(buf, 10, &val) < 0)
+               return -EINVAL;
+       if (val > 255)
+               return -EINVAL;
+
+       ret = nct7904_write_reg(data, BANK_3, FANCTL1_OUT_REG + index, val);
+
+       return ret ? ret : count;
+}
+
+static ssize_t show_pwm(struct device *dev,
+                       struct device_attribute *devattr, char *buf)
+{
+       int index = to_sensor_dev_attr(devattr)->index;
+       struct nct7904_data *data = dev_get_drvdata(dev);
+       int val;
+
+       val = nct7904_read_reg(data, BANK_3, FANCTL1_OUT_REG + index);
+       if (val < 0)
+               return val;
+
+       return sprintf(buf, "%d\n", val);
+}
+
+static ssize_t store_mode(struct device *dev, struct device_attribute *devattr,
+                         const char *buf, size_t count)
+{
+       int index = to_sensor_dev_attr(devattr)->index;
+       struct nct7904_data *data = dev_get_drvdata(dev);
+       unsigned long val;
+       int ret;
+
+       if (kstrtoul(buf, 10, &val) < 0)
+               return -EINVAL;
+       if (val > 1 || (val && !data->fan_mode[index]))
+               return -EINVAL;
+
+       ret = nct7904_write_reg(data, BANK_3, FANCTL1_FMR_REG + index,
+                               val ? data->fan_mode[index] : 0);
+
+       return ret ? ret : count;
+}
+
+/* Return 0 for manual mode or 1 for SmartFan mode */
+static ssize_t show_mode(struct device *dev,
+                        struct device_attribute *devattr, char *buf)
+{
+       int index = to_sensor_dev_attr(devattr)->index;
+       struct nct7904_data *data = dev_get_drvdata(dev);
+       int val;
+
+       val = nct7904_read_reg(data, BANK_3, FANCTL1_FMR_REG + index);
+       if (val < 0)
+               return val;
+
+       return sprintf(buf, "%d\n", val ? 1 : 0);
+}
+
+/* 2 attributes per channel: pwm and mode */
+static SENSOR_DEVICE_ATTR(fan1_pwm, S_IRUGO | S_IWUSR,
+                       show_pwm, store_pwm, 0);
+static SENSOR_DEVICE_ATTR(fan1_mode, S_IRUGO | S_IWUSR,
+                       show_mode, store_mode, 0);
+static SENSOR_DEVICE_ATTR(fan2_pwm, S_IRUGO | S_IWUSR,
+                       show_pwm, store_pwm, 1);
+static SENSOR_DEVICE_ATTR(fan2_mode, S_IRUGO | S_IWUSR,
+                       show_mode, store_mode, 1);
+static SENSOR_DEVICE_ATTR(fan3_pwm, S_IRUGO | S_IWUSR,
+                       show_pwm, store_pwm, 2);
+static SENSOR_DEVICE_ATTR(fan3_mode, S_IRUGO | S_IWUSR,
+                       show_mode, store_mode, 2);
+static SENSOR_DEVICE_ATTR(fan4_pwm, S_IRUGO | S_IWUSR,
+                       show_pwm, store_pwm, 3);
+static SENSOR_DEVICE_ATTR(fan4_mode, S_IRUGO | S_IWUSR,
+                       show_mode, store_mode, 3);
+
+static struct attribute *nct7904_fanctl_attrs[] = {
+       &sensor_dev_attr_fan1_pwm.dev_attr.attr,
+       &sensor_dev_attr_fan1_mode.dev_attr.attr,
+       &sensor_dev_attr_fan2_pwm.dev_attr.attr,
+       &sensor_dev_attr_fan2_mode.dev_attr.attr,
+       &sensor_dev_attr_fan3_pwm.dev_attr.attr,
+       &sensor_dev_attr_fan3_mode.dev_attr.attr,
+       &sensor_dev_attr_fan4_pwm.dev_attr.attr,
+       &sensor_dev_attr_fan4_mode.dev_attr.attr,
+       NULL
+};
+
+static const struct attribute_group nct7904_fanctl_group = {
+       .attrs = nct7904_fanctl_attrs,
+};
+
+static const struct attribute_group *nct7904_groups[] = {
+       &nct7904_fanin_group,
+       &nct7904_vsen_group,
+       &nct7904_tcpu_group,
+       &nct7904_fanctl_group,
+       NULL
+};
+
+/* Return 0 if detection is successful, -ENODEV otherwise */
+static int nct7904_detect(struct i2c_client *client,
+                         struct i2c_board_info *info)
+{
+       struct i2c_adapter *adapter = client->adapter;
+
+       if (!i2c_check_functionality(adapter,
+                                    I2C_FUNC_SMBUS_READ_BYTE |
+                                    I2C_FUNC_SMBUS_WRITE_BYTE_DATA))
+               return -ENODEV;
+
+       /* Determine the chip type. */
+       if (i2c_smbus_read_byte_data(client, VENDOR_ID_REG) != NUVOTON_ID ||
+           i2c_smbus_read_byte_data(client, CHIP_ID_REG) != NCT7904_ID ||
+           (i2c_smbus_read_byte_data(client, DEVICE_ID_REG) & 0xf0) != 0x50 ||
+           (i2c_smbus_read_byte_data(client, BANK_SEL_REG) & 0xf8) != 0x00)
+               return -ENODEV;
+
+       strlcpy(info->type, "nct7904", I2C_NAME_SIZE);
+
+       return 0;
+}
+
+static int nct7904_probe(struct i2c_client *client,
+                        const struct i2c_device_id *id)
+{
+       struct nct7904_data *data;
+       struct device *hwmon_dev;
+       struct device *dev = &client->dev;
+       int ret, i;
+       u32 mask;
+
+       data = devm_kzalloc(dev, sizeof(struct nct7904_data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+
+       data->client = client;
+       mutex_init(&data->bank_lock);
+       data->bank_sel = -1;
+
+       /* Setup sensor groups. */
+       /* FANIN attributes */
+       ret = nct7904_read_reg16(data, BANK_0, FANIN_CTRL0_REG);
+       if (ret < 0)
+               return ret;
+       data->fanin_mask = (ret >> 8) | ((ret & 0xff) << 8);
+
+       /*
+        * VSEN attributes
+        *
+        * Note: voltage sensors overlap with external temperature
+        * sensors. So, if we ever decide to support the latter
+        * we will have to adjust 'vsen_mask' accordingly.
+        */
+       mask = 0;
+       ret = nct7904_read_reg16(data, BANK_0, VT_ADC_CTRL0_REG);
+       if (ret >= 0)
+               mask = (ret >> 8) | ((ret & 0xff) << 8);
+       ret = nct7904_read_reg(data, BANK_0, VT_ADC_CTRL2_REG);
+       if (ret >= 0)
+               mask |= (ret << 16);
+       data->vsen_mask = mask;
+
+       /* CPU_TEMP attributes */
+       ret = nct7904_read_reg16(data, BANK_0, DTS_T_CTRL0_REG);
+       if (ret < 0)
+               return ret;
+       data->tcpu_mask = ((ret >> 8) & 0xf) | ((ret & 0xf) << 4);
+
+       for (i = 0; i < FANCTL_MAX; i++) {
+               ret = nct7904_read_reg(data, BANK_3, FANCTL1_FMR_REG + i);
+               if (ret < 0)
+                       return ret;
+               data->fan_mode[i] = ret;
+       }
+
+       hwmon_dev =
+               devm_hwmon_device_register_with_groups(dev, client->name, data,
+                                                      nct7904_groups);
+       return PTR_ERR_OR_ZERO(hwmon_dev);
+}
+
+static const struct i2c_device_id nct7904_id[] = {
+       {"nct7904", 0},
+       {}
+};
+
+static struct i2c_driver nct7904_driver = {
+       .class = I2C_CLASS_HWMON,
+       .driver = {
+               .name = "nct7904",
+       },
+       .probe = nct7904_probe,
+       .id_table = nct7904_id,
+       .detect = nct7904_detect,
+       .address_list = normal_i2c,
+};
+
+module_i2c_driver(nct7904_driver);
+
+MODULE_AUTHOR("Vadim V. Vlasov <vvlasov@dev.rtsoft.ru>");
+MODULE_DESCRIPTION("Hwmon driver for NUVOTON NCT7904");
+MODULE_LICENSE("GPL");
index 1991d9032c3843de2ffcd20b82f5790d22ce2684..2d9a712699ff5d541e831629834b23882b4fa606 100644 (file)
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 #include <linux/sysfs.h>
+#include <linux/thermal.h>
 
 #define MAX_PWM 255
 
 struct pwm_fan_ctx {
        struct mutex lock;
        struct pwm_device *pwm;
-       unsigned char pwm_value;
+       unsigned int pwm_value;
+       unsigned int pwm_fan_state;
+       unsigned int pwm_fan_max_state;
+       unsigned int *pwm_fan_cooling_levels;
+       struct thermal_cooling_device *cdev;
 };
 
-static ssize_t set_pwm(struct device *dev, struct device_attribute *attr,
-                      const char *buf, size_t count)
+static int  __set_pwm(struct pwm_fan_ctx *ctx, unsigned long pwm)
 {
-       struct pwm_fan_ctx *ctx = dev_get_drvdata(dev);
-       unsigned long pwm, duty;
-       ssize_t ret;
-
-       if (kstrtoul(buf, 10, &pwm) || pwm > MAX_PWM)
-               return -EINVAL;
+       unsigned long duty;
+       int ret = 0;
 
        mutex_lock(&ctx->lock);
-
        if (ctx->pwm_value == pwm)
-               goto exit_set_pwm_no_change;
-
-       if (pwm == 0) {
-               pwm_disable(ctx->pwm);
-               goto exit_set_pwm;
-       }
+               goto exit_set_pwm_err;
 
        duty = DIV_ROUND_UP(pwm * (ctx->pwm->period - 1), MAX_PWM);
        ret = pwm_config(ctx->pwm, duty, ctx->pwm->period);
        if (ret)
                goto exit_set_pwm_err;
 
+       if (pwm == 0)
+               pwm_disable(ctx->pwm);
+
        if (ctx->pwm_value == 0) {
                ret = pwm_enable(ctx->pwm);
                if (ret)
                        goto exit_set_pwm_err;
        }
 
-exit_set_pwm:
        ctx->pwm_value = pwm;
-exit_set_pwm_no_change:
-       ret = count;
 exit_set_pwm_err:
        mutex_unlock(&ctx->lock);
        return ret;
 }
 
+static void pwm_fan_update_state(struct pwm_fan_ctx *ctx, unsigned long pwm)
+{
+       int i;
+
+       for (i = 0; i < ctx->pwm_fan_max_state; ++i)
+               if (pwm < ctx->pwm_fan_cooling_levels[i + 1])
+                       break;
+
+       ctx->pwm_fan_state = i;
+}
+
+static ssize_t set_pwm(struct device *dev, struct device_attribute *attr,
+                      const char *buf, size_t count)
+{
+       struct pwm_fan_ctx *ctx = dev_get_drvdata(dev);
+       unsigned long pwm;
+       int ret;
+
+       if (kstrtoul(buf, 10, &pwm) || pwm > MAX_PWM)
+               return -EINVAL;
+
+       ret = __set_pwm(ctx, pwm);
+       if (ret)
+               return ret;
+
+       pwm_fan_update_state(ctx, pwm);
+       return count;
+}
+
 static ssize_t show_pwm(struct device *dev,
                        struct device_attribute *attr, char *buf)
 {
@@ -91,10 +114,108 @@ static struct attribute *pwm_fan_attrs[] = {
 
 ATTRIBUTE_GROUPS(pwm_fan);
 
+/* thermal cooling device callbacks */
+static int pwm_fan_get_max_state(struct thermal_cooling_device *cdev,
+                                unsigned long *state)
+{
+       struct pwm_fan_ctx *ctx = cdev->devdata;
+
+       if (!ctx)
+               return -EINVAL;
+
+       *state = ctx->pwm_fan_max_state;
+
+       return 0;
+}
+
+static int pwm_fan_get_cur_state(struct thermal_cooling_device *cdev,
+                                unsigned long *state)
+{
+       struct pwm_fan_ctx *ctx = cdev->devdata;
+
+       if (!ctx)
+               return -EINVAL;
+
+       *state = ctx->pwm_fan_state;
+
+       return 0;
+}
+
+static int
+pwm_fan_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state)
+{
+       struct pwm_fan_ctx *ctx = cdev->devdata;
+       int ret;
+
+       if (!ctx || (state > ctx->pwm_fan_max_state))
+               return -EINVAL;
+
+       if (state == ctx->pwm_fan_state)
+               return 0;
+
+       ret = __set_pwm(ctx, ctx->pwm_fan_cooling_levels[state]);
+       if (ret) {
+               dev_err(&cdev->device, "Cannot set pwm!\n");
+               return ret;
+       }
+
+       ctx->pwm_fan_state = state;
+
+       return ret;
+}
+
+static const struct thermal_cooling_device_ops pwm_fan_cooling_ops = {
+       .get_max_state = pwm_fan_get_max_state,
+       .get_cur_state = pwm_fan_get_cur_state,
+       .set_cur_state = pwm_fan_set_cur_state,
+};
+
+static int pwm_fan_of_get_cooling_data(struct device *dev,
+                                      struct pwm_fan_ctx *ctx)
+{
+       struct device_node *np = dev->of_node;
+       int num, i, ret;
+
+       if (!of_find_property(np, "cooling-levels", NULL))
+               return 0;
+
+       ret = of_property_count_u32_elems(np, "cooling-levels");
+       if (ret <= 0) {
+               dev_err(dev, "Wrong data!\n");
+               return ret ? : -EINVAL;
+       }
+
+       num = ret;
+       ctx->pwm_fan_cooling_levels = devm_kzalloc(dev, num * sizeof(u32),
+                                                  GFP_KERNEL);
+       if (!ctx->pwm_fan_cooling_levels)
+               return -ENOMEM;
+
+       ret = of_property_read_u32_array(np, "cooling-levels",
+                                        ctx->pwm_fan_cooling_levels, num);
+       if (ret) {
+               dev_err(dev, "Property 'cooling-levels' cannot be read!\n");
+               return ret;
+       }
+
+       for (i = 0; i < num; i++) {
+               if (ctx->pwm_fan_cooling_levels[i] > MAX_PWM) {
+                       dev_err(dev, "PWM fan state[%d]:%d > %d\n", i,
+                               ctx->pwm_fan_cooling_levels[i], MAX_PWM);
+                       return -EINVAL;
+               }
+       }
+
+       ctx->pwm_fan_max_state = num - 1;
+
+       return 0;
+}
+
 static int pwm_fan_probe(struct platform_device *pdev)
 {
-       struct device *hwmon;
+       struct thermal_cooling_device *cdev;
        struct pwm_fan_ctx *ctx;
+       struct device *hwmon;
        int duty_cycle;
        int ret;
 
@@ -136,6 +257,26 @@ static int pwm_fan_probe(struct platform_device *pdev)
                pwm_disable(ctx->pwm);
                return PTR_ERR(hwmon);
        }
+
+       ret = pwm_fan_of_get_cooling_data(&pdev->dev, ctx);
+       if (ret)
+               return ret;
+
+       ctx->pwm_fan_state = ctx->pwm_fan_max_state;
+       if (IS_ENABLED(CONFIG_THERMAL)) {
+               cdev = thermal_of_cooling_device_register(pdev->dev.of_node,
+                                                         "pwm-fan", ctx,
+                                                         &pwm_fan_cooling_ops);
+               if (IS_ERR(cdev)) {
+                       dev_err(&pdev->dev,
+                               "Failed to register pwm-fan as cooling device");
+                       pwm_disable(ctx->pwm);
+                       return PTR_ERR(cdev);
+               }
+               ctx->cdev = cdev;
+               thermal_cdev_update(cdev);
+       }
+
        return 0;
 }
 
@@ -143,6 +284,7 @@ static int pwm_fan_remove(struct platform_device *pdev)
 {
        struct pwm_fan_ctx *ctx = platform_get_drvdata(pdev);
 
+       thermal_cooling_device_unregister(ctx->cdev);
        if (ctx->pwm_value)
                pwm_disable(ctx->pwm);
        return 0;
@@ -177,7 +319,7 @@ static int pwm_fan_resume(struct device *dev)
 
 static SIMPLE_DEV_PM_OPS(pwm_fan_pm, pwm_fan_suspend, pwm_fan_resume);
 
-static struct of_device_id of_pwm_fan_match[] = {
+static const struct of_device_id of_pwm_fan_match[] = {
        { .compatible = "pwm-fan", },
        {},
 };
index cf1848b8fb321a0e4f867352730956543088feb0..8ba419d343f86770b537e74fffb933b25a08b082 100644 (file)
@@ -193,7 +193,7 @@ static struct vexpress_hwmon_type vexpress_hwmon_energy = {
        },
 };
 
-static struct of_device_id vexpress_hwmon_of_match[] = {
+static const struct of_device_id vexpress_hwmon_of_match[] = {
 #if !defined(CONFIG_REGULATOR_VEXPRESS)
        {
                .compatible = "arm,vexpress-volt",
index b0e58522780d48c49b9a1ebff182d5fa7a43d93c..5c979d0667a2210d2d73873e8766d07ea0facb22 100644 (file)
@@ -55,7 +55,7 @@
 
 #include <linux/kernel.h>
 #include <linux/cpuidle.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 #include <trace/events/power.h>
 #include <linux/sched.h>
 #include <linux/notifier.h>
@@ -638,12 +638,12 @@ static int intel_idle(struct cpuidle_device *dev,
                leave_mm(cpu);
 
        if (!(lapic_timer_reliable_states & (1 << (cstate))))
-               clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
+               tick_broadcast_enter();
 
        mwait_idle_with_hints(eax, ecx);
 
        if (!(lapic_timer_reliable_states & (1 << (cstate))))
-               clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
+               tick_broadcast_exit();
 
        return index;
 }
@@ -665,13 +665,12 @@ static void intel_idle_freeze(struct cpuidle_device *dev,
 
 static void __setup_broadcast_timer(void *arg)
 {
-       unsigned long reason = (unsigned long)arg;
-       int cpu = smp_processor_id();
-
-       reason = reason ?
-               CLOCK_EVT_NOTIFY_BROADCAST_ON : CLOCK_EVT_NOTIFY_BROADCAST_OFF;
+       unsigned long on = (unsigned long)arg;
 
-       clockevents_notify(reason, &cpu);
+       if (on)
+               tick_broadcast_enable();
+       else
+               tick_broadcast_disable();
 }
 
 static int cpu_hotplug_notify(struct notifier_block *n,
index 717daad71fb101b2b97efd03b1bb41d8b35ecfe7..e6178787ce3dd4a9b0e33b80b72ac39f73e25e33 100644 (file)
@@ -249,6 +249,7 @@ static void md_make_request(struct request_queue *q, struct bio *bio)
        const int rw = bio_data_dir(bio);
        struct mddev *mddev = q->queuedata;
        unsigned int sectors;
+       int cpu;
 
        if (mddev == NULL || mddev->pers == NULL
            || !mddev->ready) {
@@ -284,7 +285,10 @@ static void md_make_request(struct request_queue *q, struct bio *bio)
        sectors = bio_sectors(bio);
        mddev->pers->make_request(mddev, bio);
 
-       generic_start_io_acct(rw, sectors, &mddev->gendisk->part0);
+       cpu = part_stat_lock();
+       part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
+       part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
+       part_stat_unlock();
 
        if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
                wake_up(&mddev->sb_wait);
index 3ed9f42ddca65e10351a1a453f16383e63c52634..3b5d7f704aa346ad27b4806c9e36ee9ba984edc7 100644 (file)
@@ -313,7 +313,7 @@ static struct strip_zone *find_zone(struct r0conf *conf,
 
 /*
  * remaps the bio to the target device. we separate two flows.
- * power 2 flow and a general flow for the sake of perfromance
+ * power 2 flow and a general flow for the sake of performance
 */
 static struct md_rdev *map_sector(struct mddev *mddev, struct strip_zone *zone,
                                sector_t sector, sector_t *sector_offset)
@@ -524,6 +524,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
                        split = bio;
                }
 
+               sector = bio->bi_iter.bi_sector;
                zone = find_zone(mddev->private, &sector);
                tmp_dev = map_sector(mddev, zone, sector, &sector);
                split->bi_bdev = tmp_dev->bdev;
index 5d2d8f45b4b62a1005e261383b7c858d3c5e539e..67faa8d6950efb1e4f8b24c0aad0e99027473b43 100644 (file)
@@ -1240,7 +1240,7 @@ static int rtl2832_probe(struct i2c_client *client,
        dev->regmap_config.max_register = 5 * 0x100,
        dev->regmap_config.ranges = regmap_range_cfg,
        dev->regmap_config.num_ranges = ARRAY_SIZE(regmap_range_cfg),
-       dev->regmap_config.cache_type = REGCACHE_RBTREE,
+       dev->regmap_config.cache_type = REGCACHE_NONE,
        dev->regmap = regmap_init(&client->dev, &regmap_bus, client,
                                  &dev->regmap_config);
        if (IS_ERR(dev->regmap)) {
index e4901a503c7366dc10a8ded1c61e84683f62124e..63c0ee5d0bf5eca7511df486fe300b3a468301b5 100644 (file)
@@ -1339,14 +1339,13 @@ static int vidioc_querycap(struct file *file, void  *priv,
        strlcpy(cap->driver, dev->name, sizeof(cap->driver));
        strlcpy(cap->card, cx23885_boards[tsport->dev->board].name,
                sizeof(cap->card));
-       sprintf(cap->bus_info, "PCI:%s", pci_name(dev->pci));
-       cap->capabilities =
-               V4L2_CAP_VIDEO_CAPTURE |
-               V4L2_CAP_READWRITE     |
-               V4L2_CAP_STREAMING     |
-               0;
+       sprintf(cap->bus_info, "PCIe:%s", pci_name(dev->pci));
+       cap->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_READWRITE |
+                          V4L2_CAP_STREAMING;
        if (dev->tuner_type != TUNER_ABSENT)
-               cap->capabilities |= V4L2_CAP_TUNER;
+               cap->device_caps |= V4L2_CAP_TUNER;
+       cap->capabilities = cap->device_caps | V4L2_CAP_VBI_CAPTURE |
+               V4L2_CAP_AUDIO | V4L2_CAP_DEVICE_CAPS;
 
        return 0;
 }
index 12f7452edce3708a989eb0e7f71a94def8a31665..a92ff4249d100d33e84acaf50b372eb5e2a190b0 100644 (file)
@@ -1845,6 +1845,9 @@ static void exynos4_jpeg_set_img_addr(struct s5p_jpeg_ctx *ctx)
        struct s5p_jpeg_addr jpeg_addr;
        u32 pix_size, padding_bytes = 0;
 
+       jpeg_addr.cb = 0;
+       jpeg_addr.cr = 0;
+
        pix_size = ctx->cap_q.w * ctx->cap_q.h;
 
        if (ctx->mode == S5P_JPEG_ENCODE) {
index e8c2cad9396272ed9fa6f43c0fbab2699ba0073e..0974b9a7a584fb85f6aa268257264f995b5baef3 100644 (file)
@@ -20,7 +20,7 @@
 
 void exynos3250_jpeg_reset(void __iomem *regs)
 {
-       u32 reg = 0;
+       u32 reg = 1;
        int count = 1000;
 
        writel(1, regs + EXYNOS3250_SW_RESET);
index 8e44a59d8ec20f17bcfd601fd4a8c7abdaf8a530..98374e8bad3e99ffdf5a70d26b923e6e0c265bf3 100644 (file)
@@ -833,6 +833,7 @@ static int s5p_mfc_open(struct file *file)
        q->type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
        q->io_modes = VB2_MMAP;
        q->drv_priv = &ctx->fh;
+       q->lock = &dev->mfc_mutex;
        if (vdev == dev->vfd_dec) {
                q->io_modes = VB2_MMAP;
                q->ops = get_dec_queue_ops();
index 15f7663dd9f5c193ba28968b2da5326f2bf19ea4..24262bbb1a3568fe87875bedfcd4d52f42867e4e 100644 (file)
@@ -29,7 +29,7 @@
 
 /* Offset base used to differentiate between CAPTURE and OUTPUT
 *  while mmaping */
-#define DST_QUEUE_OFF_BASE      (TASK_SIZE / 2)
+#define DST_QUEUE_OFF_BASE     (1 << 30)
 
 #define MFC_BANK1_ALLOC_CTX    0
 #define MFC_BANK2_ALLOC_CTX    1
index de2b8c69daa535dd6ea906dcd476638e3185c72f..22dfb3effda8912ded31f8afabe089857b55ac78 100644 (file)
@@ -302,7 +302,7 @@ struct s5p_mfc_hw_ops {
        void (*write_info)(struct s5p_mfc_ctx *ctx, unsigned int data,
                        unsigned int ofs);
        unsigned int (*read_info)(struct s5p_mfc_ctx *ctx,
-                       unsigned int ofs);
+                       unsigned long ofs);
        int (*get_dspl_y_adr)(struct s5p_mfc_dev *dev);
        int (*get_dec_y_adr)(struct s5p_mfc_dev *dev);
        int (*get_dspl_status)(struct s5p_mfc_dev *dev);
index 0c4fcf2dfd09c68cd41e12481b44944a585a52fc..b09bcd14049133cb63c1d35150b5a465b75e694d 100644 (file)
@@ -263,15 +263,15 @@ static void s5p_mfc_release_dev_context_buffer_v5(struct s5p_mfc_dev *dev)
 static void s5p_mfc_write_info_v5(struct s5p_mfc_ctx *ctx, unsigned int data,
                        unsigned int ofs)
 {
-       writel(data, (volatile void __iomem *)(ctx->shm.virt + ofs));
+       writel(data, (void *)(ctx->shm.virt + ofs));
        wmb();
 }
 
 static unsigned int s5p_mfc_read_info_v5(struct s5p_mfc_ctx *ctx,
-                               unsigned int ofs)
+                               unsigned long ofs)
 {
        rmb();
-       return readl((volatile void __iomem *)(ctx->shm.virt + ofs));
+       return readl((void *)(ctx->shm.virt + ofs));
 }
 
 static void s5p_mfc_dec_calc_dpb_size_v5(struct s5p_mfc_ctx *ctx)
index d826c58b5d538bfcc24fc3dc0f6539d3bbd5320e..cefad184fe969296db59858103f4c6e713c6acd3 100644 (file)
@@ -1852,17 +1852,17 @@ static void s5p_mfc_write_info_v6(struct s5p_mfc_ctx *ctx, unsigned int data,
                unsigned int ofs)
 {
        s5p_mfc_clock_on();
-       writel(data, (volatile void __iomem *)((unsigned long)ofs));
+       writel(data, (void *)((unsigned long)ofs));
        s5p_mfc_clock_off();
 }
 
 static unsigned int
-s5p_mfc_read_info_v6(struct s5p_mfc_ctx *ctx, unsigned int ofs)
+s5p_mfc_read_info_v6(struct s5p_mfc_ctx *ctx, unsigned long ofs)
 {
        int ret;
 
        s5p_mfc_clock_on();
-       ret = readl((volatile void __iomem *)((unsigned long)ofs));
+       ret = readl((void *)ofs);
        s5p_mfc_clock_off();
 
        return ret;
index 5a1835dd65e858e5622f5b01fbe5fe4f4b63e5bf..697aaed42486fb71fe3195be01e2c0b61ee2bb14 100644 (file)
@@ -20,6 +20,7 @@ if VIDEO_SAMSUNG_S5P_TV
 config VIDEO_SAMSUNG_S5P_HDMI
        tristate "Samsung HDMI Driver"
        depends on VIDEO_V4L2
+       depends on I2C
        depends on VIDEO_SAMSUNG_S5P_TV
        select VIDEO_SAMSUNG_S5P_HDMIPHY
        help
index a901b62485576489d4ed4867a131d7a6b7d69386..2554f3719b9e365584fb9b105c1ffd7d65bcd1b9 100644 (file)
@@ -1158,6 +1158,7 @@ static int sh_veu_probe(struct platform_device *pdev)
        }
 
        *vdev = sh_veu_videodev;
+       vdev->v4l2_dev = &veu->v4l2_dev;
        spin_lock_init(&veu->lock);
        mutex_init(&veu->fop_lock);
        vdev->lock = &veu->fop_lock;
index 8526bf5c8429aecbb5afa8030fca537b0720b2ab..c835beb2a1a8f30e14f48a34566b842dbdce96cf 100644 (file)
@@ -843,6 +843,8 @@ static int isi_camera_set_bus_param(struct soc_camera_device *icd)
        if (isi->pdata.full_mode)
                cfg1 |= ISI_CFG1_FULL_MODE;
 
+       cfg1 |= ISI_CFG1_THMASK_BEATS_16;
+
        isi_writel(isi, ISI_CTRL, ISI_CTRL_DIS);
        isi_writel(isi, ISI_CFG1, cfg1);
 
index cee7b56f840499440142bc598fbd588bd8469f67..66634b469c9899f043f79a69c4c232599ec8ee0f 100644 (file)
@@ -1665,7 +1665,7 @@ eclkreg:
 eaddpdev:
        platform_device_put(sasc->pdev);
 eallocpdev:
-       devm_kfree(ici->v4l2_dev.dev, sasc);
+       devm_kfree(ici->v4l2_dev.dev, info);
        dev_err(ici->v4l2_dev.dev, "group probe failed: %d\n", ret);
 
        return ret;
index 77dcfdf547ac8f62e4cc2ea5a8fa086478e545b5..87fc0fe29ebd30e91ee58488cf6398df111ece63 100644 (file)
@@ -780,8 +780,6 @@ static int rtl2832u_frontend_callback(void *adapter_priv, int component,
                case TUNER_RTL2832_TUA9001:
                        return rtl2832u_tua9001_tuner_callback(d, cmd, arg);
                }
-       default:
-               return -EINVAL;
        }
 
        return 0;
index 60af3b167f3b73e02071074a8e01ce39499c8d98..3fd94fe7e1eb1652b7dfc5df4d5983f3733eb65b 100644 (file)
@@ -1,6 +1,7 @@
 menuconfig USB_GSPCA
        tristate "GSPCA based webcams"
        depends on VIDEO_V4L2
+       depends on INPUT || INPUT=n
        default m
        ---help---
          Say Y here if you want to enable selecting webcams based
index bc08a829bc132068c0b51661f9459293ef30c142..cc16e76a24933c41a9cc81ddb5b2beb0193fcbe2 100644 (file)
@@ -3230,18 +3230,13 @@ int vb2_thread_stop(struct vb2_queue *q)
 
        if (threadio == NULL)
                return 0;
-       call_void_qop(q, wait_finish, q);
        threadio->stop = true;
-       vb2_internal_streamoff(q, q->type);
-       call_void_qop(q, wait_prepare, q);
+       /* Wake up all pending sleeps in the thread */
+       vb2_queue_error(q);
        err = kthread_stop(threadio->thread);
-       q->fileio = NULL;
-       fileio->req.count = 0;
-       vb2_reqbufs(q, &fileio->req);
-       kfree(fileio);
+       __vb2_cleanup_fileio(q);
        threadio->thread = NULL;
        kfree(threadio);
-       q->fileio = NULL;
        q->threadio = NULL;
        return err;
 }
index b481d20c83727aa46e30119380f02be200a41479..69e0483adfee02e569f89ece65a31f73c0a96f35 100644 (file)
@@ -632,8 +632,7 @@ static void *vb2_dc_get_userptr(void *alloc_ctx, unsigned long vaddr,
        }
 
        /* extract page list from userspace mapping */
-       ret = vb2_dc_get_user_pages(start, pages, n_pages, vma,
-                                   dma_dir == DMA_FROM_DEVICE);
+       ret = vb2_dc_get_user_pages(start, pages, n_pages, vma, dma_dir);
        if (ret) {
                unsigned long pfn;
                if (vb2_dc_get_user_pfn(start, n_pages, vma, &pfn) == 0) {
index 38552a31304aff8cf2747d0f6278897bd6b87d72..65fed7146e9bac2a407df1c05710d6a6eb75e6c4 100644 (file)
@@ -202,16 +202,17 @@ static void enclosure_remove_links(struct enclosure_component *cdev)
 {
        char name[ENCLOSURE_NAME_SIZE];
 
+       enclosure_link_name(cdev, name);
+
        /*
         * In odd circumstances, like multipath devices, something else may
         * already have removed the links, so check for this condition first.
         */
-       if (!cdev->dev->kobj.sd)
-               return;
+       if (cdev->dev->kobj.sd)
+               sysfs_remove_link(&cdev->dev->kobj, name);
 
-       enclosure_link_name(cdev, name);
-       sysfs_remove_link(&cdev->dev->kobj, name);
-       sysfs_remove_link(&cdev->cdev.kobj, "device");
+       if (cdev->cdev.kobj.sd)
+               sysfs_remove_link(&cdev->cdev.kobj, "device");
 }
 
 static int enclosure_add_links(struct enclosure_component *cdev)
index 82dc5748f873b72ea355b517c606d59dc6235c31..7f327121e6d7c43416a3ca6820cd39111796d683 100644 (file)
@@ -1210,7 +1210,7 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *_die_args)
 
                if (((die_args->trapnr == X86_TRAP_MF) ||
                     (die_args->trapnr == X86_TRAP_XF)) &&
-                   !user_mode_vm(die_args->regs))
+                   !user_mode(die_args->regs))
                        xpc_die_deactivate();
 
                break;
index 23f10f72e5f391d1c32b63b8a2e33dd2a7eb777e..c296bc098fe23684f4be66195b2342d7cd0159ef 100644 (file)
@@ -897,6 +897,7 @@ int __mmc_claim_host(struct mmc_host *host, atomic_t *abort)
        DECLARE_WAITQUEUE(wait, current);
        unsigned long flags;
        int stop;
+       bool pm = false;
 
        might_sleep();
 
@@ -916,15 +917,18 @@ int __mmc_claim_host(struct mmc_host *host, atomic_t *abort)
                host->claimed = 1;
                host->claimer = current;
                host->claim_cnt += 1;
+               if (host->claim_cnt == 1)
+                       pm = true;
        } else
                wake_up(&host->wq);
        spin_unlock_irqrestore(&host->lock, flags);
        remove_wait_queue(&host->wq, &wait);
-       if (host->ops->enable && !stop && host->claim_cnt == 1)
-               host->ops->enable(host);
+
+       if (pm)
+               pm_runtime_get_sync(mmc_dev(host));
+
        return stop;
 }
-
 EXPORT_SYMBOL(__mmc_claim_host);
 
 /**
@@ -940,9 +944,6 @@ void mmc_release_host(struct mmc_host *host)
 
        WARN_ON(!host->claimed);
 
-       if (host->ops->disable && host->claim_cnt == 1)
-               host->ops->disable(host);
-
        spin_lock_irqsave(&host->lock, flags);
        if (--host->claim_cnt) {
                /* Release for nested claim */
@@ -952,6 +953,8 @@ void mmc_release_host(struct mmc_host *host)
                host->claimer = NULL;
                spin_unlock_irqrestore(&host->lock, flags);
                wake_up(&host->wq);
+               pm_runtime_mark_last_busy(mmc_dev(host));
+               pm_runtime_put_autosuspend(mmc_dev(host));
        }
 }
 EXPORT_SYMBOL(mmc_release_host);
index 1d41e8541f388d7a2f74f04e469b72841609f03b..c84131e2862565136d62c32ccd3f6cb87b472111 100644 (file)
@@ -11,6 +11,7 @@
  */
 
 #include <linux/err.h>
+#include <linux/of.h>
 #include <linux/slab.h>
 #include <linux/stat.h>
 #include <linux/pm_runtime.h>
@@ -336,6 +337,8 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd)
 {
        int err = 0, idx;
        unsigned int part_size;
+       struct device_node *np;
+       bool broken_hpi = false;
 
        /* Version is coded in the CSD_STRUCTURE byte in the EXT_CSD register */
        card->ext_csd.raw_ext_csd_structure = ext_csd[EXT_CSD_STRUCTURE];
@@ -349,6 +352,11 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd)
                }
        }
 
+       np = mmc_of_find_child_device(card->host, 0);
+       if (np && of_device_is_compatible(np, "mmc-card"))
+               broken_hpi = of_property_read_bool(np, "broken-hpi");
+       of_node_put(np);
+
        /*
         * The EXT_CSD format is meant to be forward compatible. As long
         * as CSD_STRUCTURE does not change, all values for EXT_CSD_REV
@@ -494,7 +502,7 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd)
                }
 
                /* check whether the eMMC card supports HPI */
-               if (ext_csd[EXT_CSD_HPI_FEATURES] & 0x1) {
+               if (!broken_hpi && (ext_csd[EXT_CSD_HPI_FEATURES] & 0x1)) {
                        card->ext_csd.hpi = 1;
                        if (ext_csd[EXT_CSD_HPI_FEATURES] & 0x2)
                                card->ext_csd.hpi_cmd = MMC_STOP_TRANSMISSION;
index 862356123d78c7def0c4f7530c86b223ff1adf32..ab21297811610089bc79f11c413be479a0114fe0 100644 (file)
@@ -19,7 +19,7 @@
 
 struct mmc_pwrseq_match {
        const char *compatible;
-       int (*alloc)(struct mmc_host *host, struct device *dev);
+       struct mmc_pwrseq *(*alloc)(struct mmc_host *host, struct device *dev);
 };
 
 static struct mmc_pwrseq_match pwrseq_match[] = {
@@ -52,6 +52,7 @@ int mmc_pwrseq_alloc(struct mmc_host *host)
        struct platform_device *pdev;
        struct device_node *np;
        struct mmc_pwrseq_match *match;
+       struct mmc_pwrseq *pwrseq;
        int ret = 0;
 
        np = of_parse_phandle(host->parent->of_node, "mmc-pwrseq", 0);
@@ -70,9 +71,14 @@ int mmc_pwrseq_alloc(struct mmc_host *host)
                goto err;
        }
 
-       ret = match->alloc(host, &pdev->dev);
-       if (!ret)
-               dev_info(host->parent, "allocated mmc-pwrseq\n");
+       pwrseq = match->alloc(host, &pdev->dev);
+       if (IS_ERR(pwrseq)) {
+               ret = PTR_ERR(host->pwrseq);
+               goto err;
+       }
+
+       host->pwrseq = pwrseq;
+       dev_info(host->parent, "allocated mmc-pwrseq\n");
 
 err:
        of_node_put(np);
@@ -109,4 +115,6 @@ void mmc_pwrseq_free(struct mmc_host *host)
 
        if (pwrseq && pwrseq->ops && pwrseq->ops->free)
                pwrseq->ops->free(host);
+
+       host->pwrseq = NULL;
 }
index aba3409e8d6e81debf567cffae1bb0ca0b65872c..096da48c6a7ecbb0ba294e6fd1bf9513b02bca36 100644 (file)
@@ -27,8 +27,10 @@ void mmc_pwrseq_post_power_on(struct mmc_host *host);
 void mmc_pwrseq_power_off(struct mmc_host *host);
 void mmc_pwrseq_free(struct mmc_host *host);
 
-int mmc_pwrseq_simple_alloc(struct mmc_host *host, struct device *dev);
-int mmc_pwrseq_emmc_alloc(struct mmc_host *host, struct device *dev);
+struct mmc_pwrseq *mmc_pwrseq_simple_alloc(struct mmc_host *host,
+                                          struct device *dev);
+struct mmc_pwrseq *mmc_pwrseq_emmc_alloc(struct mmc_host *host,
+                                        struct device *dev);
 
 #else
 
index a2d545904fbf6977a4f186a1a0c99503c945157b..9d6d2fb217967d069ff2f18db0b403dff05483e0 100644 (file)
@@ -49,7 +49,6 @@ static void mmc_pwrseq_emmc_free(struct mmc_host *host)
        unregister_restart_handler(&pwrseq->reset_nb);
        gpiod_put(pwrseq->reset_gpio);
        kfree(pwrseq);
-       host->pwrseq = NULL;
 }
 
 static struct mmc_pwrseq_ops mmc_pwrseq_emmc_ops = {
@@ -67,14 +66,15 @@ static int mmc_pwrseq_emmc_reset_nb(struct notifier_block *this,
        return NOTIFY_DONE;
 }
 
-int mmc_pwrseq_emmc_alloc(struct mmc_host *host, struct device *dev)
+struct mmc_pwrseq *mmc_pwrseq_emmc_alloc(struct mmc_host *host,
+                                        struct device *dev)
 {
        struct mmc_pwrseq_emmc *pwrseq;
        int ret = 0;
 
        pwrseq = kzalloc(sizeof(struct mmc_pwrseq_emmc), GFP_KERNEL);
        if (!pwrseq)
-               return -ENOMEM;
+               return ERR_PTR(-ENOMEM);
 
        pwrseq->reset_gpio = gpiod_get_index(dev, "reset", 0, GPIOD_OUT_LOW);
        if (IS_ERR(pwrseq->reset_gpio)) {
@@ -92,10 +92,9 @@ int mmc_pwrseq_emmc_alloc(struct mmc_host *host, struct device *dev)
        register_restart_handler(&pwrseq->reset_nb);
 
        pwrseq->pwrseq.ops = &mmc_pwrseq_emmc_ops;
-       host->pwrseq = &pwrseq->pwrseq;
 
-       return 0;
+       return &pwrseq->pwrseq;
 free:
        kfree(pwrseq);
-       return ret;
+       return ERR_PTR(ret);
 }
index c53f14a7ce546533c300313a54078e1bad327bf9..0b14b83a53d6c9614a2ac897893f5cc1690a5b6e 100644 (file)
@@ -85,7 +85,6 @@ static void mmc_pwrseq_simple_free(struct mmc_host *host)
                clk_put(pwrseq->ext_clk);
 
        kfree(pwrseq);
-       host->pwrseq = NULL;
 }
 
 static struct mmc_pwrseq_ops mmc_pwrseq_simple_ops = {
@@ -95,7 +94,8 @@ static struct mmc_pwrseq_ops mmc_pwrseq_simple_ops = {
        .free = mmc_pwrseq_simple_free,
 };
 
-int mmc_pwrseq_simple_alloc(struct mmc_host *host, struct device *dev)
+struct mmc_pwrseq *mmc_pwrseq_simple_alloc(struct mmc_host *host,
+                                          struct device *dev)
 {
        struct mmc_pwrseq_simple *pwrseq;
        int i, nr_gpios, ret = 0;
@@ -107,7 +107,7 @@ int mmc_pwrseq_simple_alloc(struct mmc_host *host, struct device *dev)
        pwrseq = kzalloc(sizeof(struct mmc_pwrseq_simple) + nr_gpios *
                         sizeof(struct gpio_desc *), GFP_KERNEL);
        if (!pwrseq)
-               return -ENOMEM;
+               return ERR_PTR(-ENOMEM);
 
        pwrseq->ext_clk = clk_get(dev, "ext_clock");
        if (IS_ERR(pwrseq->ext_clk) &&
@@ -133,13 +133,12 @@ int mmc_pwrseq_simple_alloc(struct mmc_host *host, struct device *dev)
 
        pwrseq->nr_gpios = nr_gpios;
        pwrseq->pwrseq.ops = &mmc_pwrseq_simple_ops;
-       host->pwrseq = &pwrseq->pwrseq;
 
-       return 0;
+       return &pwrseq->pwrseq;
 clk_put:
        if (!IS_ERR(pwrseq->ext_clk))
                clk_put(pwrseq->ext_clk);
 free:
        kfree(pwrseq);
-       return ret;
+       return ERR_PTR(ret);
 }
index ce6cc47206b0f3d6d91b4b69f35a66720cf14c47..5bc6c7dbbd6088153b6ee2bda65730caccea0a8a 100644 (file)
@@ -293,19 +293,22 @@ static int sdio_enable_4bit_bus(struct mmc_card *card)
        int err;
 
        if (card->type == MMC_TYPE_SDIO)
-               return sdio_enable_wide(card);
-
-       if ((card->host->caps & MMC_CAP_4_BIT_DATA) &&
-               (card->scr.bus_widths & SD_SCR_BUS_WIDTH_4)) {
+               err = sdio_enable_wide(card);
+       else if ((card->host->caps & MMC_CAP_4_BIT_DATA) &&
+                (card->scr.bus_widths & SD_SCR_BUS_WIDTH_4)) {
                err = mmc_app_set_bus_width(card, MMC_BUS_WIDTH_4);
                if (err)
                        return err;
+               err = sdio_enable_wide(card);
+               if (err <= 0)
+                       mmc_app_set_bus_width(card, MMC_BUS_WIDTH_1);
        } else
                return 0;
 
-       err = sdio_enable_wide(card);
-       if (err <= 0)
-               mmc_app_set_bus_width(card, MMC_BUS_WIDTH_1);
+       if (err > 0) {
+               mmc_set_bus_width(card->host, MMC_BUS_WIDTH_4);
+               err = 0;
+       }
 
        return err;
 }
@@ -547,13 +550,8 @@ static int mmc_sdio_init_uhs_card(struct mmc_card *card)
        /*
         * Switch to wider bus (if supported).
         */
-       if (card->host->caps & MMC_CAP_4_BIT_DATA) {
+       if (card->host->caps & MMC_CAP_4_BIT_DATA)
                err = sdio_enable_4bit_bus(card);
-               if (err > 0) {
-                       mmc_set_bus_width(card->host, MMC_BUS_WIDTH_4);
-                       err = 0;
-               }
-       }
 
        /* Set the driver strength for the card */
        sdio_select_driver_type(card);
@@ -803,9 +801,7 @@ try_again:
                 * Switch to wider bus (if supported).
                 */
                err = sdio_enable_4bit_bus(card);
-               if (err > 0)
-                       mmc_set_bus_width(card->host, MMC_BUS_WIDTH_4);
-               else if (err)
+               if (err)
                        goto remove;
        }
 finish:
@@ -983,10 +979,6 @@ static int mmc_sdio_resume(struct mmc_host *host)
        } else if (mmc_card_keep_power(host) && mmc_card_wake_sdio_irq(host)) {
                /* We may have switched to 1-bit mode during suspend */
                err = sdio_enable_4bit_bus(host->card);
-               if (err > 0) {
-                       mmc_set_bus_width(host, MMC_BUS_WIDTH_4);
-                       err = 0;
-               }
        }
 
        if (!err && host->sdio_irqs) {
index 61ac63a3776a789e2b98903118ab3198a00b9ffd..7f4db908f89b1a03e2f572b91e2331fa2fc44563 100644 (file)
@@ -132,7 +132,7 @@ config MMC_SDHCI_OF_ARASAN
 config MMC_SDHCI_OF_ESDHC
        tristate "SDHCI OF support for the Freescale eSDHC controller"
        depends on MMC_SDHCI_PLTFM
-       depends on PPC_OF
+       depends on PPC
        select MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER
        help
          This selects the Freescale eSDHC controller support.
@@ -144,7 +144,7 @@ config MMC_SDHCI_OF_ESDHC
 config MMC_SDHCI_OF_HLWD
        tristate "SDHCI OF support for the Nintendo Wii SDHCI controllers"
        depends on MMC_SDHCI_PLTFM
-       depends on PPC_OF
+       depends on PPC
        select MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER
        help
          This selects the Secure Digital Host Controller Interface (SDHCI)
@@ -230,7 +230,7 @@ config MMC_SDHCI_PXAV3
        tristate "Marvell MMP2 SD Host Controller support (PXAV3)"
        depends on CLKDEV_LOOKUP
        depends on MMC_SDHCI_PLTFM
-       depends on ARCH_MMP || COMPILE_TEST
+       depends on ARCH_BERLIN || ARCH_MMP || ARCH_MVEBU || COMPILE_TEST
        default CPU_MMP2
        help
          This selects the Marvell(R) PXAV3 SD Host Controller.
@@ -255,6 +255,7 @@ config MMC_SDHCI_PXAV2
 config MMC_SDHCI_SPEAR
        tristate "SDHCI support on ST SPEAr platform"
        depends on MMC_SDHCI && PLAT_SPEAR
+       depends on OF
        help
          This selects the Secure Digital Host Controller Interface (SDHCI)
          often referrered to as the HSMMC block in some of the ST SPEAR range
@@ -307,6 +308,20 @@ config MMC_SDHCI_F_SDH30
 
          If unsure, say N.
 
+config MMC_SDHCI_IPROC
+       tristate "SDHCI platform support for the iProc SD/MMC Controller"
+       depends on ARCH_BCM_IPROC || COMPILE_TEST
+       depends on MMC_SDHCI_PLTFM
+       default ARCH_BCM_IPROC
+       select MMC_SDHCI_IO_ACCESSORS
+       help
+         This selects the iProc SD/MMC controller.
+
+         If you have an IPROC platform with SD or MMC devices,
+         say Y or M here.
+
+         If unsure, say N.
+
 config MMC_MOXART
        tristate "MOXART SD/MMC Host Controller support"
        depends on ARCH_MOXART && MMC
index 6a7cfe0de332689fdb8a8631e1327057b54b0e4f..711e913450f5be77d3aa0690931d827697f706c1 100644 (file)
@@ -71,6 +71,7 @@ obj-$(CONFIG_MMC_SDHCI_OF_ESDHC)      += sdhci-of-esdhc.o
 obj-$(CONFIG_MMC_SDHCI_OF_HLWD)                += sdhci-of-hlwd.o
 obj-$(CONFIG_MMC_SDHCI_BCM_KONA)       += sdhci-bcm-kona.o
 obj-$(CONFIG_MMC_SDHCI_BCM2835)                += sdhci-bcm2835.o
+obj-$(CONFIG_MMC_SDHCI_IPROC)          += sdhci-iproc.o
 obj-$(CONFIG_MMC_SDHCI_MSM)            += sdhci-msm.o
 obj-$(CONFIG_MMC_SDHCI_ST)             += sdhci-st.o
 
index c97001e15227aba5b337c5cb07da0a799969d6dd..0aa44e679df4965350abc9e7cce4f0d8e42da2ec 100644 (file)
 #define ATMCI_REGS_SIZE                0x100
 
 /* Register access macros */
-#define atmci_readl(port,reg)                          \
+#ifdef CONFIG_AVR32
+#define atmci_readl(port, reg)                 \
        __raw_readl((port)->regs + reg)
-#define atmci_writel(port,reg,value)                   \
+#define atmci_writel(port, reg, value)                 \
        __raw_writel((value), (port)->regs + reg)
+#else
+#define atmci_readl(port, reg)                 \
+       readl_relaxed((port)->regs + reg)
+#define atmci_writel(port, reg, value)                 \
+       writel_relaxed((value), (port)->regs + reg)
+#endif
 
 /* On AVR chips the Peripheral DMA Controller is not connected to MCI. */
 #ifdef CONFIG_AVR32
index fe32948c6114dbf9dd72820f958d3337872d4ec0..e761eb1b1441339e33a84822ff863b20165e97cf 100644 (file)
@@ -40,7 +40,12 @@ struct dw_mci_exynos_priv_data {
        u8                              ciu_div;
        u32                             sdr_timing;
        u32                             ddr_timing;
+       u32                             hs400_timing;
+       u32                             tuned_sample;
        u32                             cur_speed;
+       u32                             dqs_delay;
+       u32                             saved_dqs_en;
+       u32                             saved_strobe_ctrl;
 };
 
 static struct dw_mci_exynos_compatible {
@@ -71,6 +76,21 @@ static struct dw_mci_exynos_compatible {
        },
 };
 
+static inline u8 dw_mci_exynos_get_ciu_div(struct dw_mci *host)
+{
+       struct dw_mci_exynos_priv_data *priv = host->priv;
+
+       if (priv->ctrl_type == DW_MCI_TYPE_EXYNOS4412)
+               return EXYNOS4412_FIXED_CIU_CLK_DIV;
+       else if (priv->ctrl_type == DW_MCI_TYPE_EXYNOS4210)
+               return EXYNOS4210_FIXED_CIU_CLK_DIV;
+       else if (priv->ctrl_type == DW_MCI_TYPE_EXYNOS7 ||
+                       priv->ctrl_type == DW_MCI_TYPE_EXYNOS7_SMU)
+               return SDMMC_CLKSEL_GET_DIV(mci_readl(host, CLKSEL64)) + 1;
+       else
+               return SDMMC_CLKSEL_GET_DIV(mci_readl(host, CLKSEL)) + 1;
+}
+
 static int dw_mci_exynos_priv_init(struct dw_mci *host)
 {
        struct dw_mci_exynos_priv_data *priv = host->priv;
@@ -85,6 +105,16 @@ static int dw_mci_exynos_priv_init(struct dw_mci *host)
                           SDMMC_MPSCTRL_NON_SECURE_WRITE_BIT);
        }
 
+       if (priv->ctrl_type >= DW_MCI_TYPE_EXYNOS5420) {
+               priv->saved_strobe_ctrl = mci_readl(host, HS400_DLINE_CTRL);
+               priv->saved_dqs_en = mci_readl(host, HS400_DQS_EN);
+               priv->saved_dqs_en |= AXI_NON_BLOCKING_WR;
+               mci_writel(host, HS400_DQS_EN, priv->saved_dqs_en);
+               if (!priv->dqs_delay)
+                       priv->dqs_delay =
+                               DQS_CTRL_GET_RD_DELAY(priv->saved_strobe_ctrl);
+       }
+
        return 0;
 }
 
@@ -97,6 +127,26 @@ static int dw_mci_exynos_setup_clock(struct dw_mci *host)
        return 0;
 }
 
+static void dw_mci_exynos_set_clksel_timing(struct dw_mci *host, u32 timing)
+{
+       struct dw_mci_exynos_priv_data *priv = host->priv;
+       u32 clksel;
+
+       if (priv->ctrl_type == DW_MCI_TYPE_EXYNOS7 ||
+               priv->ctrl_type == DW_MCI_TYPE_EXYNOS7_SMU)
+               clksel = mci_readl(host, CLKSEL64);
+       else
+               clksel = mci_readl(host, CLKSEL);
+
+       clksel = (clksel & ~SDMMC_CLKSEL_TIMING_MASK) | timing;
+
+       if (priv->ctrl_type == DW_MCI_TYPE_EXYNOS7 ||
+               priv->ctrl_type == DW_MCI_TYPE_EXYNOS7_SMU)
+               mci_writel(host, CLKSEL64, clksel);
+       else
+               mci_writel(host, CLKSEL, clksel);
+}
+
 #ifdef CONFIG_PM_SLEEP
 static int dw_mci_exynos_suspend(struct device *dev)
 {
@@ -172,30 +222,38 @@ static void dw_mci_exynos_prepare_command(struct dw_mci *host, u32 *cmdr)
        }
 }
 
-static void dw_mci_exynos_set_ios(struct dw_mci *host, struct mmc_ios *ios)
+static void dw_mci_exynos_config_hs400(struct dw_mci *host, u32 timing)
 {
        struct dw_mci_exynos_priv_data *priv = host->priv;
-       unsigned int wanted = ios->clock;
-       unsigned long actual;
-       u8 div = priv->ciu_div + 1;
+       u32 dqs, strobe;
 
-       if (ios->timing == MMC_TIMING_MMC_DDR52) {
-               if (priv->ctrl_type == DW_MCI_TYPE_EXYNOS7 ||
-                       priv->ctrl_type == DW_MCI_TYPE_EXYNOS7_SMU)
-                       mci_writel(host, CLKSEL64, priv->ddr_timing);
-               else
-                       mci_writel(host, CLKSEL, priv->ddr_timing);
-               /* Should be double rate for DDR mode */
-               if (ios->bus_width == MMC_BUS_WIDTH_8)
-                       wanted <<= 1;
+       /*
+        * Not supported to configure register
+        * related to HS400
+        */
+       if (priv->ctrl_type < DW_MCI_TYPE_EXYNOS5420)
+               return;
+
+       dqs = priv->saved_dqs_en;
+       strobe = priv->saved_strobe_ctrl;
+
+       if (timing == MMC_TIMING_MMC_HS400) {
+               dqs |= DATA_STROBE_EN;
+               strobe = DQS_CTRL_RD_DELAY(strobe, priv->dqs_delay);
        } else {
-               if (priv->ctrl_type == DW_MCI_TYPE_EXYNOS7 ||
-                       priv->ctrl_type == DW_MCI_TYPE_EXYNOS7_SMU)
-                       mci_writel(host, CLKSEL64, priv->sdr_timing);
-               else
-                       mci_writel(host, CLKSEL, priv->sdr_timing);
+               dqs &= ~DATA_STROBE_EN;
        }
 
+       mci_writel(host, HS400_DQS_EN, dqs);
+       mci_writel(host, HS400_DLINE_CTRL, strobe);
+}
+
+static void dw_mci_exynos_adjust_clock(struct dw_mci *host, unsigned int wanted)
+{
+       struct dw_mci_exynos_priv_data *priv = host->priv;
+       unsigned long actual;
+       u8 div;
+       int ret;
        /*
         * Don't care if wanted clock is zero or
         * ciu clock is unavailable
@@ -207,17 +265,52 @@ static void dw_mci_exynos_set_ios(struct dw_mci *host, struct mmc_ios *ios)
        if (wanted < EXYNOS_CCLKIN_MIN)
                wanted = EXYNOS_CCLKIN_MIN;
 
-       if (wanted != priv->cur_speed) {
-               int ret = clk_set_rate(host->ciu_clk, wanted * div);
-               if (ret)
-                       dev_warn(host->dev,
-                               "failed to set clk-rate %u error: %d\n",
-                                wanted * div, ret);
-               actual = clk_get_rate(host->ciu_clk);
-               host->bus_hz = actual / div;
-               priv->cur_speed = wanted;
-               host->current_speed = 0;
+       if (wanted == priv->cur_speed)
+               return;
+
+       div = dw_mci_exynos_get_ciu_div(host);
+       ret = clk_set_rate(host->ciu_clk, wanted * div);
+       if (ret)
+               dev_warn(host->dev,
+                       "failed to set clk-rate %u error: %d\n",
+                       wanted * div, ret);
+       actual = clk_get_rate(host->ciu_clk);
+       host->bus_hz = actual / div;
+       priv->cur_speed = wanted;
+       host->current_speed = 0;
+}
+
+static void dw_mci_exynos_set_ios(struct dw_mci *host, struct mmc_ios *ios)
+{
+       struct dw_mci_exynos_priv_data *priv = host->priv;
+       unsigned int wanted = ios->clock;
+       u32 timing = ios->timing, clksel;
+
+       switch (timing) {
+       case MMC_TIMING_MMC_HS400:
+               /* Update tuned sample timing */
+               clksel = SDMMC_CLKSEL_UP_SAMPLE(
+                               priv->hs400_timing, priv->tuned_sample);
+               wanted <<= 1;
+               break;
+       case MMC_TIMING_MMC_DDR52:
+               clksel = priv->ddr_timing;
+               /* Should be double rate for DDR mode */
+               if (ios->bus_width == MMC_BUS_WIDTH_8)
+                       wanted <<= 1;
+               break;
+       default:
+               clksel = priv->sdr_timing;
        }
+
+       /* Set clock timing for the requested speed mode*/
+       dw_mci_exynos_set_clksel_timing(host, clksel);
+
+       /* Configure setting for HS400 */
+       dw_mci_exynos_config_hs400(host, timing);
+
+       /* Configure clock rate */
+       dw_mci_exynos_adjust_clock(host, wanted);
 }
 
 static int dw_mci_exynos_parse_dt(struct dw_mci *host)
@@ -260,6 +353,16 @@ static int dw_mci_exynos_parse_dt(struct dw_mci *host)
                return ret;
 
        priv->ddr_timing = SDMMC_CLKSEL_TIMING(timing[0], timing[1], div);
+
+       ret = of_property_read_u32_array(np,
+                       "samsung,dw-mshc-hs400-timing", timing, 2);
+       if (!ret && of_property_read_u32(np,
+                               "samsung,read-strobe-delay", &priv->dqs_delay))
+               dev_dbg(host->dev,
+                       "read-strobe-delay is not found, assuming usage of default value\n");
+
+       priv->hs400_timing = SDMMC_CLKSEL_TIMING(timing[0], timing[1],
+                                               HS400_FIXED_CIU_CLK_DIV);
        host->priv = priv;
        return 0;
 }
@@ -285,7 +388,7 @@ static inline void dw_mci_exynos_set_clksmpl(struct dw_mci *host, u8 sample)
                clksel = mci_readl(host, CLKSEL64);
        else
                clksel = mci_readl(host, CLKSEL);
-       clksel = (clksel & ~0x7) | SDMMC_CLKSEL_CCLK_SAMPLE(sample);
+       clksel = SDMMC_CLKSEL_UP_SAMPLE(clksel, sample);
        if (priv->ctrl_type == DW_MCI_TYPE_EXYNOS7 ||
                priv->ctrl_type == DW_MCI_TYPE_EXYNOS7_SMU)
                mci_writel(host, CLKSEL64, clksel);
@@ -304,13 +407,16 @@ static inline u8 dw_mci_exynos_move_next_clksmpl(struct dw_mci *host)
                clksel = mci_readl(host, CLKSEL64);
        else
                clksel = mci_readl(host, CLKSEL);
+
        sample = (clksel + 1) & 0x7;
-       clksel = (clksel & ~0x7) | sample;
+       clksel = SDMMC_CLKSEL_UP_SAMPLE(clksel, sample);
+
        if (priv->ctrl_type == DW_MCI_TYPE_EXYNOS7 ||
                priv->ctrl_type == DW_MCI_TYPE_EXYNOS7_SMU)
                mci_writel(host, CLKSEL64, clksel);
        else
                mci_writel(host, CLKSEL, clksel);
+
        return sample;
 }
 
@@ -343,6 +449,7 @@ out:
 static int dw_mci_exynos_execute_tuning(struct dw_mci_slot *slot)
 {
        struct dw_mci *host = slot->host;
+       struct dw_mci_exynos_priv_data *priv = host->priv;
        struct mmc_host *mmc = slot->mmc;
        u8 start_smpl, smpl, candiates = 0;
        s8 found = -1;
@@ -360,14 +467,27 @@ static int dw_mci_exynos_execute_tuning(struct dw_mci_slot *slot)
        } while (start_smpl != smpl);
 
        found = dw_mci_exynos_get_best_clksmpl(candiates);
-       if (found >= 0)
+       if (found >= 0) {
                dw_mci_exynos_set_clksmpl(host, found);
-       else
+               priv->tuned_sample = found;
+       } else {
                ret = -EIO;
+       }
 
        return ret;
 }
 
+static int dw_mci_exynos_prepare_hs400_tuning(struct dw_mci *host,
+                                       struct mmc_ios *ios)
+{
+       struct dw_mci_exynos_priv_data *priv = host->priv;
+
+       dw_mci_exynos_set_clksel_timing(host, priv->hs400_timing);
+       dw_mci_exynos_adjust_clock(host, (ios->clock) << 1);
+
+       return 0;
+}
+
 /* Common capabilities of Exynos4/Exynos5 SoC */
 static unsigned long exynos_dwmmc_caps[4] = {
        MMC_CAP_1_8V_DDR | MMC_CAP_8_BIT_DATA | MMC_CAP_CMD23,
@@ -384,6 +504,7 @@ static const struct dw_mci_drv_data exynos_drv_data = {
        .set_ios                = dw_mci_exynos_set_ios,
        .parse_dt               = dw_mci_exynos_parse_dt,
        .execute_tuning         = dw_mci_exynos_execute_tuning,
+       .prepare_hs400_tuning   = dw_mci_exynos_prepare_hs400_tuning,
 };
 
 static const struct of_device_id dw_mci_exynos_match[] = {
index 7872ce586b558e07461c748f600f6ef6777bb12b..595c934e6166cf1b9aef2c18a8a28ec3c3062163 100644 (file)
 #ifndef _DW_MMC_EXYNOS_H_
 #define _DW_MMC_EXYNOS_H_
 
-/* Extended Register's Offset */
 #define SDMMC_CLKSEL                   0x09C
 #define SDMMC_CLKSEL64                 0x0A8
 
+/* Extended Register's Offset */
+#define SDMMC_HS400_DQS_EN             0x180
+#define SDMMC_HS400_ASYNC_FIFO_CTRL    0x184
+#define SDMMC_HS400_DLINE_CTRL         0x188
+
 /* CLKSEL register defines */
 #define SDMMC_CLKSEL_CCLK_SAMPLE(x)    (((x) & 7) << 0)
 #define SDMMC_CLKSEL_CCLK_DRIVE(x)     (((x) & 7) << 16)
 #define SDMMC_CLKSEL_CCLK_DIVIDER(x)   (((x) & 7) << 24)
 #define SDMMC_CLKSEL_GET_DRV_WD3(x)    (((x) >> 16) & 0x7)
+#define SDMMC_CLKSEL_GET_DIV(x)                (((x) >> 24) & 0x7)
+#define SDMMC_CLKSEL_UP_SAMPLE(x, y)   (((x) & ~SDMMC_CLKSEL_CCLK_SAMPLE(7)) |\
+                                        SDMMC_CLKSEL_CCLK_SAMPLE(y))
 #define SDMMC_CLKSEL_TIMING(x, y, z)   (SDMMC_CLKSEL_CCLK_SAMPLE(x) |  \
                                         SDMMC_CLKSEL_CCLK_DRIVE(y) |   \
                                         SDMMC_CLKSEL_CCLK_DIVIDER(z))
+#define SDMMC_CLKSEL_TIMING_MASK       SDMMC_CLKSEL_TIMING(0x7, 0x7, 0x7)
 #define SDMMC_CLKSEL_WAKEUP_INT                BIT(11)
 
+/* RCLK_EN register defines */
+#define DATA_STROBE_EN                 BIT(0)
+#define AXI_NON_BLOCKING_WR    BIT(7)
+
+/* DLINE_CTRL register defines */
+#define DQS_CTRL_RD_DELAY(x, y)                (((x) & ~0x3FF) | ((y) & 0x3FF))
+#define DQS_CTRL_GET_RD_DELAY(x)       ((x) & 0x3FF)
+
 /* Protector Register */
 #define SDMMC_EMMCP_BASE       0x1000
 #define SDMMC_MPSECURITY       (SDMMC_EMMCP_BASE + 0x0010)
@@ -49,6 +65,7 @@
 /* Fixed clock divider */
 #define EXYNOS4210_FIXED_CIU_CLK_DIV   2
 #define EXYNOS4412_FIXED_CIU_CLK_DIV   4
+#define HS400_FIXED_CIU_CLK_DIV                1
 
 /* Minimal required clock frequency for cclkin, unit: HZ */
 #define EXYNOS_CCLKIN_MIN      50000000
index e2a726a503ee147072bbd5afaf8692dab53ca87d..dbf166f94f1b6a1457238e29b435f83171509e26 100644 (file)
@@ -76,12 +76,20 @@ static int dw_mci_rockchip_init(struct dw_mci *host)
        return 0;
 }
 
+/* Common capabilities of RK3288 SoC */
+static unsigned long dw_mci_rk3288_dwmmc_caps[4] = {
+       MMC_CAP_RUNTIME_RESUME, /* emmc */
+       MMC_CAP_RUNTIME_RESUME, /* sdmmc */
+       MMC_CAP_RUNTIME_RESUME, /* sdio0 */
+       MMC_CAP_RUNTIME_RESUME, /* sdio1 */
+};
 static const struct dw_mci_drv_data rk2928_drv_data = {
        .prepare_command        = dw_mci_rockchip_prepare_command,
        .init                   = dw_mci_rockchip_init,
 };
 
 static const struct dw_mci_drv_data rk3288_drv_data = {
+       .caps                   = dw_mci_rk3288_dwmmc_caps,
        .prepare_command        = dw_mci_rockchip_prepare_command,
        .set_ios                = dw_mci_rk3288_set_ios,
        .setup_clock    = dw_mci_rk3288_setup_clock,
index 4d2e3c2e183057ddd13e29c1d1b8398a01dd7c5c..38b29265cc7c7625484db40b4d38f2148a2c1aa6 100644 (file)
@@ -69,7 +69,8 @@ struct idmac_desc_64addr {
 
        u32             des2;   /*Buffer sizes */
 #define IDMAC_64ADDR_SET_BUFFER1_SIZE(d, s) \
-       ((d)->des2 = ((d)->des2 & 0x03ffe000) | ((s) & 0x1fff))
+       ((d)->des2 = ((d)->des2 & cpu_to_le32(0x03ffe000)) | \
+        ((cpu_to_le32(s)) & cpu_to_le32(0x1fff)))
 
        u32             des3;   /* Reserved */
 
@@ -81,7 +82,7 @@ struct idmac_desc_64addr {
 };
 
 struct idmac_desc {
-       u32             des0;   /* Control Descriptor */
+       __le32          des0;   /* Control Descriptor */
 #define IDMAC_DES0_DIC BIT(1)
 #define IDMAC_DES0_LD  BIT(2)
 #define IDMAC_DES0_FD  BIT(3)
@@ -90,18 +91,19 @@ struct idmac_desc {
 #define IDMAC_DES0_CES BIT(30)
 #define IDMAC_DES0_OWN BIT(31)
 
-       u32             des1;   /* Buffer sizes */
+       __le32          des1;   /* Buffer sizes */
 #define IDMAC_SET_BUFFER1_SIZE(d, s) \
        ((d)->des1 = ((d)->des1 & 0x03ffe000) | ((s) & 0x1fff))
 
-       u32             des2;   /* buffer 1 physical address */
+       __le32          des2;   /* buffer 1 physical address */
 
-       u32             des3;   /* buffer 2 physical address */
+       __le32          des3;   /* buffer 2 physical address */
 };
 #endif /* CONFIG_MMC_DW_IDMAC */
 
 static bool dw_mci_reset(struct dw_mci *host);
 static bool dw_mci_ctrl_reset(struct dw_mci *host, u32 reset);
+static int dw_mci_card_busy(struct mmc_host *mmc);
 
 #if defined(CONFIG_DEBUG_FS)
 static int dw_mci_req_show(struct seq_file *s, void *v)
@@ -335,6 +337,31 @@ static u32 dw_mci_prep_stop_abort(struct dw_mci *host, struct mmc_command *cmd)
        return cmdr;
 }
 
+static void dw_mci_wait_while_busy(struct dw_mci *host, u32 cmd_flags)
+{
+       unsigned long timeout = jiffies + msecs_to_jiffies(500);
+
+       /*
+        * Databook says that before issuing a new data transfer command
+        * we need to check to see if the card is busy.  Data transfer commands
+        * all have SDMMC_CMD_PRV_DAT_WAIT set, so we'll key off that.
+        *
+        * ...also allow sending for SDMMC_CMD_VOLT_SWITCH where busy is
+        * expected.
+        */
+       if ((cmd_flags & SDMMC_CMD_PRV_DAT_WAIT) &&
+           !(cmd_flags & SDMMC_CMD_VOLT_SWITCH)) {
+               while (mci_readl(host, STATUS) & SDMMC_STATUS_BUSY) {
+                       if (time_after(jiffies, timeout)) {
+                               /* Command will fail; we'll pass error then */
+                               dev_err(host->dev, "Busy; trying anyway\n");
+                               break;
+                       }
+                       udelay(10);
+               }
+       }
+}
+
 static void dw_mci_start_command(struct dw_mci *host,
                                 struct mmc_command *cmd, u32 cmd_flags)
 {
@@ -345,6 +372,7 @@ static void dw_mci_start_command(struct dw_mci *host,
 
        mci_writel(host, CMDARG, cmd->arg);
        wmb();
+       dw_mci_wait_while_busy(host, cmd_flags);
 
        mci_writel(host, CMD, cmd_flags | SDMMC_CMD_START);
 }
@@ -477,23 +505,23 @@ static void dw_mci_translate_sglist(struct dw_mci *host, struct mmc_data *data,
                         * Set the OWN bit and disable interrupts for this
                         * descriptor
                         */
-                       desc->des0 = IDMAC_DES0_OWN | IDMAC_DES0_DIC |
-                                               IDMAC_DES0_CH;
+                       desc->des0 = cpu_to_le32(IDMAC_DES0_OWN |
+                                       IDMAC_DES0_DIC | IDMAC_DES0_CH);
                        /* Buffer length */
                        IDMAC_SET_BUFFER1_SIZE(desc, length);
 
                        /* Physical address to DMA to/from */
-                       desc->des2 = mem_addr;
+                       desc->des2 = cpu_to_le32(mem_addr);
                }
 
                /* Set first descriptor */
                desc = host->sg_cpu;
-               desc->des0 |= IDMAC_DES0_FD;
+               desc->des0 |= cpu_to_le32(IDMAC_DES0_FD);
 
                /* Set last descriptor */
                desc = host->sg_cpu + (i - 1) * sizeof(struct idmac_desc);
-               desc->des0 &= ~(IDMAC_DES0_CH | IDMAC_DES0_DIC);
-               desc->des0 |= IDMAC_DES0_LD;
+               desc->des0 &= cpu_to_le32(~(IDMAC_DES0_CH | IDMAC_DES0_DIC));
+               desc->des0 |= cpu_to_le32(IDMAC_DES0_LD);
        }
 
        wmb();
@@ -562,12 +590,12 @@ static int dw_mci_idmac_init(struct dw_mci *host)
 
                /* Forward link the descriptor list */
                for (i = 0, p = host->sg_cpu; i < host->ring_size - 1; i++, p++)
-                       p->des3 = host->sg_dma + (sizeof(struct idmac_desc) *
-                                                               (i + 1));
+                       p->des3 = cpu_to_le32(host->sg_dma +
+                                       (sizeof(struct idmac_desc) * (i + 1)));
 
                /* Set the last descriptor as the end-of-ring descriptor */
-               p->des3 = host->sg_dma;
-               p->des0 = IDMAC_DES0_ER;
+               p->des3 = cpu_to_le32(host->sg_dma);
+               p->des0 = cpu_to_le32(IDMAC_DES0_ER);
        }
 
        dw_mci_idmac_reset(host);
@@ -737,6 +765,7 @@ static void dw_mci_ctrl_rd_thld(struct dw_mci *host, struct mmc_data *data)
                return;
 
        if (host->timing != MMC_TIMING_MMC_HS200 &&
+           host->timing != MMC_TIMING_MMC_HS400 &&
            host->timing != MMC_TIMING_UHS_SDR104)
                goto disable;
 
@@ -876,6 +905,7 @@ static void mci_send_cmd(struct dw_mci_slot *slot, u32 cmd, u32 arg)
 
        mci_writel(host, CMDARG, arg);
        wmb();
+       dw_mci_wait_while_busy(host, cmd);
        mci_writel(host, CMD, SDMMC_CMD_START | cmd);
 
        while (time_before(jiffies, timeout)) {
@@ -992,6 +1022,26 @@ static void __dw_mci_start_request(struct dw_mci *host,
 
        dw_mci_start_command(host, cmd, cmdflags);
 
+       if (cmd->opcode == SD_SWITCH_VOLTAGE) {
+               unsigned long irqflags;
+
+               /*
+                * Databook says to fail after 2ms w/ no response, but evidence
+                * shows that sometimes the cmd11 interrupt takes over 130ms.
+                * We'll set to 500ms, plus an extra jiffy just in case jiffies
+                * is just about to roll over.
+                *
+                * We do this whole thing under spinlock and only if the
+                * command hasn't already completed (indicating the the irq
+                * already ran so we don't want the timeout).
+                */
+               spin_lock_irqsave(&host->irq_lock, irqflags);
+               if (!test_bit(EVENT_CMD_COMPLETE, &host->pending_events))
+                       mod_timer(&host->cmd11_timer,
+                               jiffies + msecs_to_jiffies(500) + 1);
+               spin_unlock_irqrestore(&host->irq_lock, irqflags);
+       }
+
        if (mrq->stop)
                host->stop_cmdr = dw_mci_prepare_command(slot->mmc, mrq->stop);
        else
@@ -1084,7 +1134,8 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
        regs = mci_readl(slot->host, UHS_REG);
 
        /* DDR mode set */
-       if (ios->timing == MMC_TIMING_MMC_DDR52)
+       if (ios->timing == MMC_TIMING_MMC_DDR52 ||
+           ios->timing == MMC_TIMING_MMC_HS400)
                regs |= ((0x1 << slot->id) << 16);
        else
                regs &= ~((0x1 << slot->id) << 16);
@@ -1101,12 +1152,6 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
        if (drv_data && drv_data->set_ios)
                drv_data->set_ios(slot->host, ios);
 
-       /* Slot specific timing and width adjustment */
-       dw_mci_setup_bus(slot, false);
-
-       if (slot->host->state == STATE_WAITING_CMD11_DONE && ios->clock != 0)
-               slot->host->state = STATE_IDLE;
-
        switch (ios->power_mode) {
        case MMC_POWER_UP:
                if (!IS_ERR(mmc->supply.vmmc)) {
@@ -1125,23 +1170,39 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
                mci_writel(slot->host, PWREN, regs);
                break;
        case MMC_POWER_ON:
-               if (!IS_ERR(mmc->supply.vqmmc) && !slot->host->vqmmc_enabled) {
-                       ret = regulator_enable(mmc->supply.vqmmc);
-                       if (ret < 0)
-                               dev_err(slot->host->dev,
-                                       "failed to enable vqmmc regulator\n");
-                       else
+               if (!slot->host->vqmmc_enabled) {
+                       if (!IS_ERR(mmc->supply.vqmmc)) {
+                               ret = regulator_enable(mmc->supply.vqmmc);
+                               if (ret < 0)
+                                       dev_err(slot->host->dev,
+                                               "failed to enable vqmmc\n");
+                               else
+                                       slot->host->vqmmc_enabled = true;
+
+                       } else {
+                               /* Keep track so we don't reset again */
                                slot->host->vqmmc_enabled = true;
+                       }
+
+                       /* Reset our state machine after powering on */
+                       dw_mci_ctrl_reset(slot->host,
+                                         SDMMC_CTRL_ALL_RESET_FLAGS);
                }
+
+               /* Adjust clock / bus width after power is up */
+               dw_mci_setup_bus(slot, false);
+
                break;
        case MMC_POWER_OFF:
+               /* Turn clock off before power goes down */
+               dw_mci_setup_bus(slot, false);
+
                if (!IS_ERR(mmc->supply.vmmc))
                        mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0);
 
-               if (!IS_ERR(mmc->supply.vqmmc) && slot->host->vqmmc_enabled) {
+               if (!IS_ERR(mmc->supply.vqmmc) && slot->host->vqmmc_enabled)
                        regulator_disable(mmc->supply.vqmmc);
-                       slot->host->vqmmc_enabled = false;
-               }
+               slot->host->vqmmc_enabled = false;
 
                regs = mci_readl(slot->host, PWREN);
                regs &= ~(1 << slot->id);
@@ -1150,6 +1211,9 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
        default:
                break;
        }
+
+       if (slot->host->state == STATE_WAITING_CMD11_DONE && ios->clock != 0)
+               slot->host->state = STATE_IDLE;
 }
 
 static int dw_mci_card_busy(struct mmc_host *mmc)
@@ -1323,6 +1387,18 @@ static int dw_mci_execute_tuning(struct mmc_host *mmc, u32 opcode)
        return err;
 }
 
+static int dw_mci_prepare_hs400_tuning(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+       struct dw_mci_slot *slot = mmc_priv(mmc);
+       struct dw_mci *host = slot->host;
+       const struct dw_mci_drv_data *drv_data = host->drv_data;
+
+       if (drv_data && drv_data->prepare_hs400_tuning)
+               return drv_data->prepare_hs400_tuning(host, ios);
+
+       return 0;
+}
+
 static const struct mmc_host_ops dw_mci_ops = {
        .request                = dw_mci_request,
        .pre_req                = dw_mci_pre_req,
@@ -1335,6 +1411,7 @@ static const struct mmc_host_ops dw_mci_ops = {
        .card_busy              = dw_mci_card_busy,
        .start_signal_voltage_switch = dw_mci_switch_voltage,
        .init_card              = dw_mci_init_card,
+       .prepare_hs400_tuning   = dw_mci_prepare_hs400_tuning,
 };
 
 static void dw_mci_request_end(struct dw_mci *host, struct mmc_request *mrq)
@@ -1520,7 +1597,10 @@ static void dw_mci_tasklet_func(unsigned long priv)
                        if (test_and_clear_bit(EVENT_DATA_ERROR,
                                               &host->pending_events)) {
                                dw_mci_stop_dma(host);
-                               send_stop_abort(host, data);
+                               if (data->stop ||
+                                   !(host->data_status & (SDMMC_INT_DRTO |
+                                                          SDMMC_INT_EBE)))
+                                       send_stop_abort(host, data);
                                state = STATE_DATA_ERROR;
                                break;
                        }
@@ -1547,7 +1627,10 @@ static void dw_mci_tasklet_func(unsigned long priv)
                        if (test_and_clear_bit(EVENT_DATA_ERROR,
                                               &host->pending_events)) {
                                dw_mci_stop_dma(host);
-                               send_stop_abort(host, data);
+                               if (data->stop ||
+                                   !(host->data_status & (SDMMC_INT_DRTO |
+                                                          SDMMC_INT_EBE)))
+                                       send_stop_abort(host, data);
                                state = STATE_DATA_ERROR;
                                break;
                        }
@@ -1685,8 +1768,7 @@ static void dw_mci_push_data16(struct dw_mci *host, void *buf, int cnt)
                buf += len;
                cnt -= len;
                if (host->part_buf_count == 2) {
-                       mci_writew(host, DATA(host->data_offset),
-                                       host->part_buf16);
+                       mci_fifo_writew(host->fifo_reg, host->part_buf16);
                        host->part_buf_count = 0;
                }
        }
@@ -1703,15 +1785,14 @@ static void dw_mci_push_data16(struct dw_mci *host, void *buf, int cnt)
                        cnt -= len;
                        /* push data from aligned buffer into fifo */
                        for (i = 0; i < items; ++i)
-                               mci_writew(host, DATA(host->data_offset),
-                                               aligned_buf[i]);
+                               mci_fifo_writew(host->fifo_reg, aligned_buf[i]);
                }
        } else
 #endif
        {
                u16 *pdata = buf;
                for (; cnt >= 2; cnt -= 2)
-                       mci_writew(host, DATA(host->data_offset), *pdata++);
+                       mci_fifo_writew(host->fifo_reg, *pdata++);
                buf = pdata;
        }
        /* put anything remaining in the part_buf */
@@ -1720,8 +1801,7 @@ static void dw_mci_push_data16(struct dw_mci *host, void *buf, int cnt)
                 /* Push data if we have reached the expected data length */
                if ((data->bytes_xfered + init_cnt) ==
                    (data->blksz * data->blocks))
-                       mci_writew(host, DATA(host->data_offset),
-                                  host->part_buf16);
+                       mci_fifo_writew(host->fifo_reg, host->part_buf16);
        }
 }
 
@@ -1736,8 +1816,7 @@ static void dw_mci_pull_data16(struct dw_mci *host, void *buf, int cnt)
                        int items = len >> 1;
                        int i;
                        for (i = 0; i < items; ++i)
-                               aligned_buf[i] = mci_readw(host,
-                                               DATA(host->data_offset));
+                               aligned_buf[i] = mci_fifo_readw(host->fifo_reg);
                        /* memcpy from aligned buffer into output buffer */
                        memcpy(buf, aligned_buf, len);
                        buf += len;
@@ -1748,11 +1827,11 @@ static void dw_mci_pull_data16(struct dw_mci *host, void *buf, int cnt)
        {
                u16 *pdata = buf;
                for (; cnt >= 2; cnt -= 2)
-                       *pdata++ = mci_readw(host, DATA(host->data_offset));
+                       *pdata++ = mci_fifo_readw(host->fifo_reg);
                buf = pdata;
        }
        if (cnt) {
-               host->part_buf16 = mci_readw(host, DATA(host->data_offset));
+               host->part_buf16 = mci_fifo_readw(host->fifo_reg);
                dw_mci_pull_final_bytes(host, buf, cnt);
        }
 }
@@ -1768,8 +1847,7 @@ static void dw_mci_push_data32(struct dw_mci *host, void *buf, int cnt)
                buf += len;
                cnt -= len;
                if (host->part_buf_count == 4) {
-                       mci_writel(host, DATA(host->data_offset),
-                                       host->part_buf32);
+                       mci_fifo_writel(host->fifo_reg, host->part_buf32);
                        host->part_buf_count = 0;
                }
        }
@@ -1786,15 +1864,14 @@ static void dw_mci_push_data32(struct dw_mci *host, void *buf, int cnt)
                        cnt -= len;
                        /* push data from aligned buffer into fifo */
                        for (i = 0; i < items; ++i)
-                               mci_writel(host, DATA(host->data_offset),
-                                               aligned_buf[i]);
+                               mci_fifo_writel(host->fifo_reg, aligned_buf[i]);
                }
        } else
 #endif
        {
                u32 *pdata = buf;
                for (; cnt >= 4; cnt -= 4)
-                       mci_writel(host, DATA(host->data_offset), *pdata++);
+                       mci_fifo_writel(host->fifo_reg, *pdata++);
                buf = pdata;
        }
        /* put anything remaining in the part_buf */
@@ -1803,8 +1880,7 @@ static void dw_mci_push_data32(struct dw_mci *host, void *buf, int cnt)
                 /* Push data if we have reached the expected data length */
                if ((data->bytes_xfered + init_cnt) ==
                    (data->blksz * data->blocks))
-                       mci_writel(host, DATA(host->data_offset),
-                                  host->part_buf32);
+                       mci_fifo_writel(host->fifo_reg, host->part_buf32);
        }
 }
 
@@ -1819,8 +1895,7 @@ static void dw_mci_pull_data32(struct dw_mci *host, void *buf, int cnt)
                        int items = len >> 2;
                        int i;
                        for (i = 0; i < items; ++i)
-                               aligned_buf[i] = mci_readl(host,
-                                               DATA(host->data_offset));
+                               aligned_buf[i] = mci_fifo_readl(host->fifo_reg);
                        /* memcpy from aligned buffer into output buffer */
                        memcpy(buf, aligned_buf, len);
                        buf += len;
@@ -1831,11 +1906,11 @@ static void dw_mci_pull_data32(struct dw_mci *host, void *buf, int cnt)
        {
                u32 *pdata = buf;
                for (; cnt >= 4; cnt -= 4)
-                       *pdata++ = mci_readl(host, DATA(host->data_offset));
+                       *pdata++ = mci_fifo_readl(host->fifo_reg);
                buf = pdata;
        }
        if (cnt) {
-               host->part_buf32 = mci_readl(host, DATA(host->data_offset));
+               host->part_buf32 = mci_fifo_readl(host->fifo_reg);
                dw_mci_pull_final_bytes(host, buf, cnt);
        }
 }
@@ -1852,8 +1927,7 @@ static void dw_mci_push_data64(struct dw_mci *host, void *buf, int cnt)
                cnt -= len;
 
                if (host->part_buf_count == 8) {
-                       mci_writeq(host, DATA(host->data_offset),
-                                       host->part_buf);
+                       mci_fifo_writeq(host->fifo_reg, host->part_buf);
                        host->part_buf_count = 0;
                }
        }
@@ -1870,15 +1944,14 @@ static void dw_mci_push_data64(struct dw_mci *host, void *buf, int cnt)
                        cnt -= len;
                        /* push data from aligned buffer into fifo */
                        for (i = 0; i < items; ++i)
-                               mci_writeq(host, DATA(host->data_offset),
-                                               aligned_buf[i]);
+                               mci_fifo_writeq(host->fifo_reg, aligned_buf[i]);
                }
        } else
 #endif
        {
                u64 *pdata = buf;
                for (; cnt >= 8; cnt -= 8)
-                       mci_writeq(host, DATA(host->data_offset), *pdata++);
+                       mci_fifo_writeq(host->fifo_reg, *pdata++);
                buf = pdata;
        }
        /* put anything remaining in the part_buf */
@@ -1887,8 +1960,7 @@ static void dw_mci_push_data64(struct dw_mci *host, void *buf, int cnt)
                /* Push data if we have reached the expected data length */
                if ((data->bytes_xfered + init_cnt) ==
                    (data->blksz * data->blocks))
-                       mci_writeq(host, DATA(host->data_offset),
-                                  host->part_buf);
+                       mci_fifo_writeq(host->fifo_reg, host->part_buf);
        }
 }
 
@@ -1903,8 +1975,8 @@ static void dw_mci_pull_data64(struct dw_mci *host, void *buf, int cnt)
                        int items = len >> 3;
                        int i;
                        for (i = 0; i < items; ++i)
-                               aligned_buf[i] = mci_readq(host,
-                                               DATA(host->data_offset));
+                               aligned_buf[i] = mci_fifo_readq(host->fifo_reg);
+
                        /* memcpy from aligned buffer into output buffer */
                        memcpy(buf, aligned_buf, len);
                        buf += len;
@@ -1915,11 +1987,11 @@ static void dw_mci_pull_data64(struct dw_mci *host, void *buf, int cnt)
        {
                u64 *pdata = buf;
                for (; cnt >= 8; cnt -= 8)
-                       *pdata++ = mci_readq(host, DATA(host->data_offset));
+                       *pdata++ = mci_fifo_readq(host->fifo_reg);
                buf = pdata;
        }
        if (cnt) {
-               host->part_buf = mci_readq(host, DATA(host->data_offset));
+               host->part_buf = mci_fifo_readq(host->fifo_reg);
                dw_mci_pull_final_bytes(host, buf, cnt);
        }
 }
@@ -2097,9 +2169,20 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id)
                /* Check volt switch first, since it can look like an error */
                if ((host->state == STATE_SENDING_CMD11) &&
                    (pending & SDMMC_INT_VOLT_SWITCH)) {
+                       unsigned long irqflags;
+
                        mci_writel(host, RINTSTS, SDMMC_INT_VOLT_SWITCH);
                        pending &= ~SDMMC_INT_VOLT_SWITCH;
+
+                       /*
+                        * Hold the lock; we know cmd11_timer can't be kicked
+                        * off after the lock is released, so safe to delete.
+                        */
+                       spin_lock_irqsave(&host->irq_lock, irqflags);
                        dw_mci_cmd_interrupt(host, pending);
+                       spin_unlock_irqrestore(&host->irq_lock, irqflags);
+
+                       del_timer(&host->cmd11_timer);
                }
 
                if (pending & DW_MCI_CMD_ERROR_FLAGS) {
@@ -2156,6 +2239,10 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id)
                /* Handle SDIO Interrupts */
                for (i = 0; i < host->num_slots; i++) {
                        struct dw_mci_slot *slot = host->slot[i];
+
+                       if (!slot)
+                               continue;
+
                        if (pending & SDMMC_INT_SDIO(slot->sdio_id)) {
                                mci_writel(host, RINTSTS,
                                           SDMMC_INT_SDIO(slot->sdio_id));
@@ -2506,6 +2593,20 @@ ciu_out:
        return ret;
 }
 
+static void dw_mci_cmd11_timer(unsigned long arg)
+{
+       struct dw_mci *host = (struct dw_mci *)arg;
+
+       if (host->state != STATE_SENDING_CMD11) {
+               dev_warn(host->dev, "Unexpected CMD11 timeout\n");
+               return;
+       }
+
+       host->cmd_status = SDMMC_INT_RTO;
+       set_bit(EVENT_CMD_COMPLETE, &host->pending_events);
+       tasklet_schedule(&host->tasklet);
+}
+
 #ifdef CONFIG_OF
 static struct dw_mci_of_quirks {
        char *quirk;
@@ -2574,6 +2675,34 @@ static struct dw_mci_board *dw_mci_parse_dt(struct dw_mci *host)
 }
 #endif /* CONFIG_OF */
 
+static void dw_mci_enable_cd(struct dw_mci *host)
+{
+       struct dw_mci_board *brd = host->pdata;
+       unsigned long irqflags;
+       u32 temp;
+       int i;
+
+       /* No need for CD if broken card detection */
+       if (brd->quirks & DW_MCI_QUIRK_BROKEN_CARD_DETECTION)
+               return;
+
+       /* No need for CD if all slots have a non-error GPIO */
+       for (i = 0; i < host->num_slots; i++) {
+               struct dw_mci_slot *slot = host->slot[i];
+
+               if (IS_ERR_VALUE(mmc_gpio_get_cd(slot->mmc)))
+                       break;
+       }
+       if (i == host->num_slots)
+               return;
+
+       spin_lock_irqsave(&host->irq_lock, irqflags);
+       temp = mci_readl(host, INTMASK);
+       temp  |= SDMMC_INT_CD;
+       mci_writel(host, INTMASK, temp);
+       spin_unlock_irqrestore(&host->irq_lock, irqflags);
+}
+
 int dw_mci_probe(struct dw_mci *host)
 {
        const struct dw_mci_drv_data *drv_data = host->drv_data;
@@ -2652,6 +2781,9 @@ int dw_mci_probe(struct dw_mci *host)
                }
        }
 
+       setup_timer(&host->cmd11_timer,
+                   dw_mci_cmd11_timer, (unsigned long)host);
+
        host->quirks = host->pdata->quirks;
 
        spin_lock_init(&host->lock);
@@ -2731,9 +2863,9 @@ int dw_mci_probe(struct dw_mci *host)
        dev_info(host->dev, "Version ID is %04x\n", host->verid);
 
        if (host->verid < DW_MMC_240A)
-               host->data_offset = DATA_OFFSET;
+               host->fifo_reg = host->regs + DATA_OFFSET;
        else
-               host->data_offset = DATA_240A_OFFSET;
+               host->fifo_reg = host->regs + DATA_240A_OFFSET;
 
        tasklet_init(&host->tasklet, dw_mci_tasklet_func, (unsigned long)host);
        ret = devm_request_irq(host->dev, host->irq, dw_mci_interrupt,
@@ -2747,13 +2879,13 @@ int dw_mci_probe(struct dw_mci *host)
                host->num_slots = ((mci_readl(host, HCON) >> 1) & 0x1F) + 1;
 
        /*
-        * Enable interrupts for command done, data over, data empty, card det,
+        * Enable interrupts for command done, data over, data empty,
         * receive ready and error such as transmit, receive timeout, crc error
         */
        mci_writel(host, RINTSTS, 0xFFFFFFFF);
        mci_writel(host, INTMASK, SDMMC_INT_CMD_DONE | SDMMC_INT_DATA_OVER |
                   SDMMC_INT_TXDR | SDMMC_INT_RXDR |
-                  DW_MCI_ERROR_FLAGS | SDMMC_INT_CD);
+                  DW_MCI_ERROR_FLAGS);
        mci_writel(host, CTRL, SDMMC_CTRL_INT_ENABLE); /* Enable mci interrupt */
 
        dev_info(host->dev, "DW MMC controller at irq %d, "
@@ -2778,6 +2910,9 @@ int dw_mci_probe(struct dw_mci *host)
                goto err_dmaunmap;
        }
 
+       /* Now that slots are all setup, we can enable card detect */
+       dw_mci_enable_cd(host);
+
        if (host->quirks & DW_MCI_QUIRK_IDMAC_DTO)
                dev_info(host->dev, "Internal DMAC interrupt fix enabled.\n");
 
@@ -2864,7 +2999,7 @@ int dw_mci_resume(struct dw_mci *host)
        mci_writel(host, RINTSTS, 0xFFFFFFFF);
        mci_writel(host, INTMASK, SDMMC_INT_CMD_DONE | SDMMC_INT_DATA_OVER |
                   SDMMC_INT_TXDR | SDMMC_INT_RXDR |
-                  DW_MCI_ERROR_FLAGS | SDMMC_INT_CD);
+                  DW_MCI_ERROR_FLAGS);
        mci_writel(host, CTRL, SDMMC_CTRL_INT_ENABLE);
 
        for (i = 0; i < host->num_slots; i++) {
@@ -2876,6 +3011,10 @@ int dw_mci_resume(struct dw_mci *host)
                        dw_mci_setup_bus(slot, true);
                }
        }
+
+       /* Now that slots are all setup, we can enable card detect */
+       dw_mci_enable_cd(host);
+
        return 0;
 }
 EXPORT_SYMBOL(dw_mci_resume);
index 18c4afe683b83c60d0941230b34735a64f58a6e4..f45ab91de33946ac86abc72ae8305eb1f18aa55e 100644 (file)
 #define SDMMC_CTRL_ALL_RESET_FLAGS \
        (SDMMC_CTRL_RESET | SDMMC_CTRL_FIFO_RESET | SDMMC_CTRL_DMA_RESET)
 
+/* FIFO register access macros. These should not change the data endian-ness
+ * as they are written to memory to be dealt with by the upper layers */
+#define mci_fifo_readw(__reg)  __raw_readw(__reg)
+#define mci_fifo_readl(__reg)  __raw_readl(__reg)
+#define mci_fifo_readq(__reg)  __raw_readq(__reg)
+
+#define mci_fifo_writew(__value, __reg)        __raw_writew(__reg, __value)
+#define mci_fifo_writel(__value, __reg)        __raw_writel(__reg, __value)
+#define mci_fifo_writeq(__value, __reg)        __raw_writeq(__reg, __value)
+
 /* Register access macros */
 #define mci_readl(dev, reg)                    \
-       __raw_readl((dev)->regs + SDMMC_##reg)
+       readl_relaxed((dev)->regs + SDMMC_##reg)
 #define mci_writel(dev, reg, value)                    \
-       __raw_writel((value), (dev)->regs + SDMMC_##reg)
+       writel_relaxed((value), (dev)->regs + SDMMC_##reg)
 
 /* 16-bit FIFO access macros */
 #define mci_readw(dev, reg)                    \
-       __raw_readw((dev)->regs + SDMMC_##reg)
+       readw_relaxed((dev)->regs + SDMMC_##reg)
 #define mci_writew(dev, reg, value)                    \
-       __raw_writew((value), (dev)->regs + SDMMC_##reg)
+       writew_relaxed((value), (dev)->regs + SDMMC_##reg)
 
 /* 64-bit FIFO access macros */
 #ifdef readq
 #define mci_readq(dev, reg)                    \
-       __raw_readq((dev)->regs + SDMMC_##reg)
+       readq_relaxed((dev)->regs + SDMMC_##reg)
 #define mci_writeq(dev, reg, value)                    \
-       __raw_writeq((value), (dev)->regs + SDMMC_##reg)
+       writeq_relaxed((value), (dev)->regs + SDMMC_##reg)
 #else
 /*
  * Dummy readq implementation for architectures that don't define it.
        (*(volatile u64 __force *)((dev)->regs + SDMMC_##reg))
 #define mci_writeq(dev, reg, value)                    \
        (*(volatile u64 __force *)((dev)->regs + SDMMC_##reg) = (value))
+
+#define __raw_writeq(__value, __reg) \
+       (*(volatile u64 __force *)(__reg) = (__value))
+#define __raw_readq(__reg) (*(volatile u64 __force *)(__reg))
 #endif
 
 extern int dw_mci_probe(struct dw_mci *host);
@@ -271,5 +285,7 @@ struct dw_mci_drv_data {
        void            (*set_ios)(struct dw_mci *host, struct mmc_ios *ios);
        int             (*parse_dt)(struct dw_mci *host);
        int             (*execute_tuning)(struct dw_mci_slot *slot);
+       int             (*prepare_hs400_tuning)(struct dw_mci *host,
+                                               struct mmc_ios *ios);
 };
 #endif /* _DW_MMC_H_ */
index e4a07546f8b631d4c905dbe483449bf381960279..ae19d83bb9de0d966bb095f73c2f8b900578659a 100644 (file)
@@ -1507,7 +1507,7 @@ static int mmc_spi_remove(struct spi_device *spi)
        return 0;
 }
 
-static struct of_device_id mmc_spi_of_match_table[] = {
+static const struct of_device_id mmc_spi_of_match_table[] = {
        { .compatible = "mmc-spi-slot", },
        {},
 };
index 7fe16194ebc802c65c65a1741f746ff4af42fb2c..fb266745f8240d603956b8b791370ac4764f5a23 100644 (file)
@@ -1613,7 +1613,10 @@ static int mmci_probe(struct amba_device *dev,
        dev_dbg(mmc_dev(mmc), "clocking block at %u Hz\n", mmc->f_max);
 
        /* Get regulators and the supported OCR mask */
-       mmc_regulator_get_supply(mmc);
+       ret = mmc_regulator_get_supply(mmc);
+       if (ret == -EPROBE_DEFER)
+               goto clk_disable;
+
        if (!mmc->ocr_avail)
                mmc->ocr_avail = plat->ocr_mask;
        else if (plat->ocr_mask)
index f84cfb01716d777d159a9bbabf14b4176098b45b..9df2b6801f767c9c0da6904b689299c93d031417 100644 (file)
@@ -222,10 +222,6 @@ struct omap_hsmmc_host {
        struct omap_hsmmc_next  next_data;
        struct  omap_hsmmc_platform_data        *pdata;
 
-       /* To handle board related suspend/resume functionality for MMC */
-       int (*suspend)(struct device *dev);
-       int (*resume)(struct device *dev);
-
        /* return MMC cover switch state, can be NULL if not supported.
         *
         * possible return values:
@@ -234,12 +230,7 @@ struct omap_hsmmc_host {
         */
        int (*get_cover_state)(struct device *dev);
 
-       /* Card detection IRQs */
-       int card_detect_irq;
-
        int (*card_detect)(struct device *dev);
-       int (*get_ro)(struct device *dev);
-
 };
 
 struct omap_mmc_of_data {
@@ -256,13 +247,6 @@ static int omap_hsmmc_card_detect(struct device *dev)
        return mmc_gpio_get_cd(host->mmc);
 }
 
-static int omap_hsmmc_get_wp(struct device *dev)
-{
-       struct omap_hsmmc_host *host = dev_get_drvdata(dev);
-
-       return mmc_gpio_get_ro(host->mmc);
-}
-
 static int omap_hsmmc_get_cover_state(struct device *dev)
 {
        struct omap_hsmmc_host *host = dev_get_drvdata(dev);
@@ -434,7 +418,7 @@ static inline int omap_hsmmc_have_reg(void)
 
 #endif
 
-static irqreturn_t omap_hsmmc_detect(int irq, void *dev_id);
+static irqreturn_t omap_hsmmc_cover_irq(int irq, void *dev_id);
 
 static int omap_hsmmc_gpio_init(struct mmc_host *mmc,
                                struct omap_hsmmc_host *host,
@@ -442,29 +426,25 @@ static int omap_hsmmc_gpio_init(struct mmc_host *mmc,
 {
        int ret;
 
-       if (gpio_is_valid(pdata->switch_pin)) {
-               if (pdata->cover)
-                       host->get_cover_state =
-                               omap_hsmmc_get_cover_state;
-               else
-                       host->card_detect = omap_hsmmc_card_detect;
-               host->card_detect_irq =
-                               gpio_to_irq(pdata->switch_pin);
-               mmc_gpio_set_cd_isr(mmc, omap_hsmmc_detect);
-               ret = mmc_gpio_request_cd(mmc, pdata->switch_pin, 0);
+       if (gpio_is_valid(pdata->gpio_cod)) {
+               ret = mmc_gpio_request_cd(mmc, pdata->gpio_cod, 0);
                if (ret)
                        return ret;
-       } else {
-               pdata->switch_pin = -EINVAL;
+
+               host->get_cover_state = omap_hsmmc_get_cover_state;
+               mmc_gpio_set_cd_isr(mmc, omap_hsmmc_cover_irq);
+       } else if (gpio_is_valid(pdata->gpio_cd)) {
+               ret = mmc_gpio_request_cd(mmc, pdata->gpio_cd, 0);
+               if (ret)
+                       return ret;
+
+               host->card_detect = omap_hsmmc_card_detect;
        }
 
        if (gpio_is_valid(pdata->gpio_wp)) {
-               host->get_ro = omap_hsmmc_get_wp;
                ret = mmc_gpio_request_ro(mmc, pdata->gpio_wp);
                if (ret)
                        return ret;
-       } else {
-               pdata->gpio_wp = -EINVAL;
        }
 
        return 0;
@@ -882,6 +862,8 @@ static void omap_hsmmc_request_done(struct omap_hsmmc_host *host, struct mmc_req
                return;
        host->mrq = NULL;
        mmc_request_done(host->mmc, mrq);
+       pm_runtime_mark_last_busy(host->dev);
+       pm_runtime_put_autosuspend(host->dev);
 }
 
 /*
@@ -1252,26 +1234,16 @@ static void omap_hsmmc_protect_card(struct omap_hsmmc_host *host)
 }
 
 /*
- * irq handler to notify the core about card insertion/removal
+ * irq handler when (cell-phone) cover is mounted/removed
  */
-static irqreturn_t omap_hsmmc_detect(int irq, void *dev_id)
+static irqreturn_t omap_hsmmc_cover_irq(int irq, void *dev_id)
 {
        struct omap_hsmmc_host *host = dev_id;
-       int carddetect;
 
        sysfs_notify(&host->mmc->class_dev.kobj, NULL, "cover_switch");
 
-       if (host->card_detect)
-               carddetect = host->card_detect(host->dev);
-       else {
-               omap_hsmmc_protect_card(host);
-               carddetect = -ENOSYS;
-       }
-
-       if (carddetect)
-               mmc_detect_change(host->mmc, (HZ * 200) / 1000);
-       else
-               mmc_detect_change(host->mmc, (HZ * 50) / 1000);
+       omap_hsmmc_protect_card(host);
+       mmc_detect_change(host->mmc, (HZ * 200) / 1000);
        return IRQ_HANDLED;
 }
 
@@ -1305,6 +1277,8 @@ static void omap_hsmmc_dma_callback(void *param)
 
                host->mrq = NULL;
                mmc_request_done(host->mmc, mrq);
+               pm_runtime_mark_last_busy(host->dev);
+               pm_runtime_put_autosuspend(host->dev);
        }
 }
 
@@ -1537,6 +1511,7 @@ static void omap_hsmmc_request(struct mmc_host *mmc, struct mmc_request *req)
 
        BUG_ON(host->req_in_progress);
        BUG_ON(host->dma_ch != -1);
+       pm_runtime_get_sync(host->dev);
        if (host->protect_card) {
                if (host->reqs_blocked < 3) {
                        /*
@@ -1553,6 +1528,8 @@ static void omap_hsmmc_request(struct mmc_host *mmc, struct mmc_request *req)
                        req->data->error = -EBADF;
                req->cmd->retries = 0;
                mmc_request_done(mmc, req);
+               pm_runtime_mark_last_busy(host->dev);
+               pm_runtime_put_autosuspend(host->dev);
                return;
        } else if (host->reqs_blocked)
                host->reqs_blocked = 0;
@@ -1566,6 +1543,8 @@ static void omap_hsmmc_request(struct mmc_host *mmc, struct mmc_request *req)
                        req->data->error = err;
                host->mrq = NULL;
                mmc_request_done(mmc, req);
+               pm_runtime_mark_last_busy(host->dev);
+               pm_runtime_put_autosuspend(host->dev);
                return;
        }
        if (req->sbc && !(host->flags & AUTO_CMD23)) {
@@ -1641,15 +1620,6 @@ static int omap_hsmmc_get_cd(struct mmc_host *mmc)
        return host->card_detect(host->dev);
 }
 
-static int omap_hsmmc_get_ro(struct mmc_host *mmc)
-{
-       struct omap_hsmmc_host *host = mmc_priv(mmc);
-
-       if (!host->get_ro)
-               return -ENOSYS;
-       return host->get_ro(host->dev);
-}
-
 static void omap_hsmmc_init_card(struct mmc_host *mmc, struct mmc_card *card)
 {
        struct omap_hsmmc_host *host = mmc_priv(mmc);
@@ -1778,25 +1748,6 @@ static void omap_hsmmc_conf_bus_power(struct omap_hsmmc_host *host)
        set_sd_bus_power(host);
 }
 
-static int omap_hsmmc_enable_fclk(struct mmc_host *mmc)
-{
-       struct omap_hsmmc_host *host = mmc_priv(mmc);
-
-       pm_runtime_get_sync(host->dev);
-
-       return 0;
-}
-
-static int omap_hsmmc_disable_fclk(struct mmc_host *mmc)
-{
-       struct omap_hsmmc_host *host = mmc_priv(mmc);
-
-       pm_runtime_mark_last_busy(host->dev);
-       pm_runtime_put_autosuspend(host->dev);
-
-       return 0;
-}
-
 static int omap_hsmmc_multi_io_quirk(struct mmc_card *card,
                                     unsigned int direction, int blk_size)
 {
@@ -1808,14 +1759,12 @@ static int omap_hsmmc_multi_io_quirk(struct mmc_card *card,
 }
 
 static struct mmc_host_ops omap_hsmmc_ops = {
-       .enable = omap_hsmmc_enable_fclk,
-       .disable = omap_hsmmc_disable_fclk,
        .post_req = omap_hsmmc_post_req,
        .pre_req = omap_hsmmc_pre_req,
        .request = omap_hsmmc_request,
        .set_ios = omap_hsmmc_set_ios,
        .get_cd = omap_hsmmc_get_cd,
-       .get_ro = omap_hsmmc_get_ro,
+       .get_ro = mmc_gpio_get_ro,
        .init_card = omap_hsmmc_init_card,
        .enable_sdio_irq = omap_hsmmc_enable_sdio_irq,
 };
@@ -1937,7 +1886,8 @@ static struct omap_hsmmc_platform_data *of_get_hsmmc_pdata(struct device *dev)
        if (of_find_property(np, "ti,dual-volt", NULL))
                pdata->controller_flags |= OMAP_HSMMC_SUPPORTS_DUAL_VOLT;
 
-       pdata->switch_pin = -EINVAL;
+       pdata->gpio_cd = -EINVAL;
+       pdata->gpio_cod = -EINVAL;
        pdata->gpio_wp = -EINVAL;
 
        if (of_find_property(np, "ti,non-removable", NULL)) {
@@ -2179,9 +2129,9 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
                if (ret < 0)
                        goto err_slot_name;
        }
-       if (host->card_detect_irq && host->get_cover_state) {
+       if (host->get_cover_state) {
                ret = device_create_file(&mmc->class_dev,
-                                       &dev_attr_cover_switch);
+                                        &dev_attr_cover_switch);
                if (ret < 0)
                        goto err_slot_name;
        }
@@ -2236,7 +2186,7 @@ static int omap_hsmmc_remove(struct platform_device *pdev)
        return 0;
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int omap_hsmmc_suspend(struct device *dev)
 {
        struct omap_hsmmc_host *host = dev_get_drvdata(dev);
@@ -2292,10 +2242,6 @@ static int omap_hsmmc_resume(struct device *dev)
        pm_runtime_put_autosuspend(host->dev);
        return 0;
 }
-
-#else
-#define omap_hsmmc_suspend     NULL
-#define omap_hsmmc_resume      NULL
 #endif
 
 static int omap_hsmmc_runtime_suspend(struct device *dev)
@@ -2376,8 +2322,7 @@ static int omap_hsmmc_runtime_resume(struct device *dev)
 }
 
 static struct dev_pm_ops omap_hsmmc_dev_pm_ops = {
-       .suspend        = omap_hsmmc_suspend,
-       .resume         = omap_hsmmc_resume,
+       SET_SYSTEM_SLEEP_PM_OPS(omap_hsmmc_suspend, omap_hsmmc_resume)
        .runtime_suspend = omap_hsmmc_runtime_suspend,
        .runtime_resume = omap_hsmmc_runtime_resume,
 };
index a45ed39d062c1d7a73d557cd3c62e326fbf61652..22d929fa3371adbf87c2efa744b0272701d72741 100644 (file)
@@ -40,7 +40,6 @@
 #include <linux/mmc/host.h>
 #include <linux/mmc/pm.h>
 #include <linux/mmc/slot-gpio.h>
-#include <linux/mmc/sdhci.h>
 
 #include "sdhci.h"
 
index 34bb8f92586e25980ea5467c366e1700884328a5..2bd90fb35c75e5b5c03f243aabd1edc99dce04d5 100644 (file)
@@ -54,7 +54,6 @@
 
 struct sdhci_bcm_kona_dev {
        struct mutex    write_lock; /* protect back to back writes */
-       struct clk      *external_clk;
 };
 
 
@@ -175,24 +174,6 @@ static void sdhci_bcm_kona_card_event(struct sdhci_host *host)
        }
 }
 
-/*
- * Get the base clock. Use central clock source for now. Not sure if different
- * clock speed to each dev is allowed
- */
-static unsigned int sdhci_bcm_kona_get_max_clk(struct sdhci_host *host)
-{
-       struct sdhci_bcm_kona_dev *kona_dev;
-       struct sdhci_pltfm_host *pltfm_priv = sdhci_priv(host);
-       kona_dev = sdhci_pltfm_priv(pltfm_priv);
-
-       return host->mmc->f_max;
-}
-
-static unsigned int sdhci_bcm_kona_get_timeout_clock(struct sdhci_host *host)
-{
-       return sdhci_bcm_kona_get_max_clk(host);
-}
-
 static void sdhci_bcm_kona_init_74_clocks(struct sdhci_host *host,
                                u8 power_mode)
 {
@@ -207,8 +188,8 @@ static void sdhci_bcm_kona_init_74_clocks(struct sdhci_host *host,
 
 static struct sdhci_ops sdhci_bcm_kona_ops = {
        .set_clock = sdhci_set_clock,
-       .get_max_clock = sdhci_bcm_kona_get_max_clk,
-       .get_timeout_clock = sdhci_bcm_kona_get_timeout_clock,
+       .get_max_clock = sdhci_pltfm_clk_get_max_clock,
+       .get_timeout_clock = sdhci_pltfm_clk_get_max_clock,
        .platform_send_init_74_clocks = sdhci_bcm_kona_init_74_clocks,
        .set_bus_width = sdhci_set_bus_width,
        .reset = sdhci_reset,
@@ -264,21 +245,21 @@ static int sdhci_bcm_kona_probe(struct platform_device *pdev)
                goto err_pltfm_free;
        }
 
-       /* Get and enable the external clock */
-       kona_dev->external_clk = devm_clk_get(dev, NULL);
-       if (IS_ERR(kona_dev->external_clk)) {
-               dev_err(dev, "Failed to get external clock\n");
-               ret = PTR_ERR(kona_dev->external_clk);
+       /* Get and enable the core clock */
+       pltfm_priv->clk = devm_clk_get(dev, NULL);
+       if (IS_ERR(pltfm_priv->clk)) {
+               dev_err(dev, "Failed to get core clock\n");
+               ret = PTR_ERR(pltfm_priv->clk);
                goto err_pltfm_free;
        }
 
-       if (clk_set_rate(kona_dev->external_clk, host->mmc->f_max) != 0) {
-               dev_err(dev, "Failed to set rate external clock\n");
+       if (clk_set_rate(pltfm_priv->clk, host->mmc->f_max) != 0) {
+               dev_err(dev, "Failed to set rate core clock\n");
                goto err_pltfm_free;
        }
 
-       if (clk_prepare_enable(kona_dev->external_clk) != 0) {
-               dev_err(dev, "Failed to enable external clock\n");
+       if (clk_prepare_enable(pltfm_priv->clk) != 0) {
+               dev_err(dev, "Failed to enable core clock\n");
                goto err_pltfm_free;
        }
 
@@ -333,7 +314,7 @@ err_reset:
        sdhci_bcm_kona_sd_reset(host);
 
 err_clk_disable:
-       clk_disable_unprepare(kona_dev->external_clk);
+       clk_disable_unprepare(pltfm_priv->clk);
 
 err_pltfm_free:
        sdhci_pltfm_free(pdev);
@@ -342,22 +323,6 @@ err_pltfm_free:
        return ret;
 }
 
-static int sdhci_bcm_kona_remove(struct platform_device *pdev)
-{
-       struct sdhci_host *host = platform_get_drvdata(pdev);
-       struct sdhci_pltfm_host *pltfm_priv = sdhci_priv(host);
-       struct sdhci_bcm_kona_dev *kona_dev = sdhci_pltfm_priv(pltfm_priv);
-       int dead = (readl(host->ioaddr + SDHCI_INT_STATUS) == 0xffffffff);
-
-       sdhci_remove_host(host, dead);
-
-       clk_disable_unprepare(kona_dev->external_clk);
-
-       sdhci_pltfm_free(pdev);
-
-       return 0;
-}
-
 static struct platform_driver sdhci_bcm_kona_driver = {
        .driver         = {
                .name   = "sdhci-kona",
@@ -365,7 +330,7 @@ static struct platform_driver sdhci_bcm_kona_driver = {
                .of_match_table = sdhci_bcm_kona_of_match,
        },
        .probe          = sdhci_bcm_kona_probe,
-       .remove         = sdhci_bcm_kona_remove,
+       .remove         = sdhci_pltfm_unregister,
 };
 module_platform_driver(sdhci_bcm_kona_driver);
 
index 439d259fdf1d571336fe86a3b3de483143430517..0ef0343c603ad492937ef338af8b058ecc9f51e9 100644 (file)
@@ -180,11 +180,6 @@ err:
        return ret;
 }
 
-static int bcm2835_sdhci_remove(struct platform_device *pdev)
-{
-       return sdhci_pltfm_unregister(pdev);
-}
-
 static const struct of_device_id bcm2835_sdhci_of_match[] = {
        { .compatible = "brcm,bcm2835-sdhci" },
        { }
@@ -198,7 +193,7 @@ static struct platform_driver bcm2835_sdhci_driver = {
                .pm = SDHCI_PLTFM_PMOPS,
        },
        .probe = bcm2835_sdhci_probe,
-       .remove = bcm2835_sdhci_remove,
+       .remove = sdhci_pltfm_unregister,
 };
 module_platform_driver(bcm2835_sdhci_driver);
 
index a7935a8d0922218f0874deea4e7f303a35dbb685..59f2923f80547d37f2dd5d8f43d3aeacb49854ce 100644 (file)
@@ -98,18 +98,13 @@ static int sdhci_cns3xxx_probe(struct platform_device *pdev)
        return sdhci_pltfm_register(pdev, &sdhci_cns3xxx_pdata, 0);
 }
 
-static int sdhci_cns3xxx_remove(struct platform_device *pdev)
-{
-       return sdhci_pltfm_unregister(pdev);
-}
-
 static struct platform_driver sdhci_cns3xxx_driver = {
        .driver         = {
                .name   = "sdhci-cns3xxx",
                .pm     = SDHCI_PLTFM_PMOPS,
        },
        .probe          = sdhci_cns3xxx_probe,
-       .remove         = sdhci_cns3xxx_remove,
+       .remove         = sdhci_pltfm_unregister,
 };
 
 module_platform_driver(sdhci_cns3xxx_driver);
index ca969d271a270bfb0bcb6e56fc3626f7398218a5..407c21f152b2dacad43706db77b8a2c5a97c08e8 100644 (file)
 
 #include "sdhci-pltfm.h"
 
-struct sdhci_dove_priv {
-       struct clk *clk;
-};
-
 static u16 sdhci_dove_readw(struct sdhci_host *host, int reg)
 {
        u16 ret;
@@ -84,27 +80,17 @@ static int sdhci_dove_probe(struct platform_device *pdev)
 {
        struct sdhci_host *host;
        struct sdhci_pltfm_host *pltfm_host;
-       struct sdhci_dove_priv *priv;
        int ret;
 
-       priv = devm_kzalloc(&pdev->dev, sizeof(struct sdhci_dove_priv),
-                           GFP_KERNEL);
-       if (!priv) {
-               dev_err(&pdev->dev, "unable to allocate private data");
-               return -ENOMEM;
-       }
-
-       priv->clk = devm_clk_get(&pdev->dev, NULL);
-
        host = sdhci_pltfm_init(pdev, &sdhci_dove_pdata, 0);
        if (IS_ERR(host))
                return PTR_ERR(host);
 
        pltfm_host = sdhci_priv(host);
-       pltfm_host->priv = priv;
+       pltfm_host->clk = devm_clk_get(&pdev->dev, NULL);
 
-       if (!IS_ERR(priv->clk))
-               clk_prepare_enable(priv->clk);
+       if (!IS_ERR(pltfm_host->clk))
+               clk_prepare_enable(pltfm_host->clk);
 
        ret = mmc_of_parse(host->mmc);
        if (ret)
@@ -117,26 +103,11 @@ static int sdhci_dove_probe(struct platform_device *pdev)
        return 0;
 
 err_sdhci_add:
-       if (!IS_ERR(priv->clk))
-               clk_disable_unprepare(priv->clk);
+       clk_disable_unprepare(pltfm_host->clk);
        sdhci_pltfm_free(pdev);
        return ret;
 }
 
-static int sdhci_dove_remove(struct platform_device *pdev)
-{
-       struct sdhci_host *host = platform_get_drvdata(pdev);
-       struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-       struct sdhci_dove_priv *priv = pltfm_host->priv;
-
-       sdhci_pltfm_unregister(pdev);
-
-       if (!IS_ERR(priv->clk))
-               clk_disable_unprepare(priv->clk);
-
-       return 0;
-}
-
 static const struct of_device_id sdhci_dove_of_match_table[] = {
        { .compatible = "marvell,dove-sdhci", },
        {}
@@ -150,7 +121,7 @@ static struct platform_driver sdhci_dove_driver = {
                .of_match_table = sdhci_dove_of_match_table,
        },
        .probe          = sdhci_dove_probe,
-       .remove         = sdhci_dove_remove,
+       .remove         = sdhci_pltfm_unregister,
 };
 
 module_platform_driver(sdhci_dove_driver);
index 10ef8244a239632819fd2f399bf1ee48b2bb8496..82f512d87cb8916e76314f465339c820ef92b1f1 100644 (file)
@@ -416,7 +416,7 @@ static void esdhc_writew_le(struct sdhci_host *host, u16 val, int reg)
                        new_val |= ESDHC_VENDOR_SPEC_FRC_SDCLK_ON;
                else
                        new_val &= ~ESDHC_VENDOR_SPEC_FRC_SDCLK_ON;
-                       writel(new_val, host->ioaddr + ESDHC_VENDOR_SPEC);
+               writel(new_val, host->ioaddr + ESDHC_VENDOR_SPEC);
                return;
        case SDHCI_HOST_CONTROL2:
                new_val = readl(host->ioaddr + ESDHC_VENDOR_SPEC);
@@ -864,6 +864,7 @@ static const struct sdhci_pltfm_data sdhci_esdhc_imx_pdata = {
 #ifdef CONFIG_OF
 static int
 sdhci_esdhc_imx_probe_dt(struct platform_device *pdev,
+                        struct sdhci_host *host,
                         struct esdhc_platform_data *boarddata)
 {
        struct device_node *np = pdev->dev.of_node;
@@ -900,11 +901,14 @@ sdhci_esdhc_imx_probe_dt(struct platform_device *pdev,
        if (of_property_read_u32(np, "fsl,delay-line", &boarddata->delay_line))
                boarddata->delay_line = 0;
 
+       mmc_of_parse_voltage(np, &host->ocr_mask);
+
        return 0;
 }
 #else
 static inline int
 sdhci_esdhc_imx_probe_dt(struct platform_device *pdev,
+                        struct sdhci_host *host,
                         struct esdhc_platform_data *boarddata)
 {
        return -ENODEV;
@@ -999,7 +1003,7 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev)
                        host->ioaddr + ESDHC_TUNING_CTRL);
 
        boarddata = &imx_data->boarddata;
-       if (sdhci_esdhc_imx_probe_dt(pdev, boarddata) < 0) {
+       if (sdhci_esdhc_imx_probe_dt(pdev, host, boarddata) < 0) {
                if (!host->mmc->parent->platform_data) {
                        dev_err(mmc_dev(host->mmc), "no board data!\n");
                        err = -EINVAL;
@@ -1009,40 +1013,9 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev)
                                        host->mmc->parent->platform_data);
        }
 
-       /* write_protect */
-       if (boarddata->wp_type == ESDHC_WP_GPIO) {
-               err = mmc_gpio_request_ro(host->mmc, boarddata->wp_gpio);
-               if (err) {
-                       dev_err(mmc_dev(host->mmc),
-                               "failed to request write-protect gpio!\n");
-                       goto disable_clk;
-               }
-               host->mmc->caps2 |= MMC_CAP2_RO_ACTIVE_HIGH;
-       }
-
        /* card_detect */
-       switch (boarddata->cd_type) {
-       case ESDHC_CD_GPIO:
-               err = mmc_gpio_request_cd(host->mmc, boarddata->cd_gpio, 0);
-               if (err) {
-                       dev_err(mmc_dev(host->mmc),
-                               "failed to request card-detect gpio!\n");
-                       goto disable_clk;
-               }
-               /* fall through */
-
-       case ESDHC_CD_CONTROLLER:
-               /* we have a working card_detect back */
+       if (boarddata->cd_type == ESDHC_CD_CONTROLLER)
                host->quirks &= ~SDHCI_QUIRK_BROKEN_CARD_DETECTION;
-               break;
-
-       case ESDHC_CD_PERMANENT:
-               host->mmc->caps |= MMC_CAP_NONREMOVABLE;
-               break;
-
-       case ESDHC_CD_NONE:
-               break;
-       }
 
        switch (boarddata->max_bus_width) {
        case 8:
@@ -1075,6 +1048,11 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev)
                host->quirks2 |= SDHCI_QUIRK2_NO_1_8_V;
        }
 
+       /* call to generic mmc_of_parse to support additional capabilities */
+       err = mmc_of_parse(host->mmc);
+       if (err)
+               goto disable_clk;
+
        err = sdhci_add_host(host);
        if (err)
                goto disable_clk;
diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c
new file mode 100644 (file)
index 0000000..3b423b0
--- /dev/null
@@ -0,0 +1,241 @@
+/*
+ * Copyright (C) 2014 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * iProc SDHCI platform driver
+ */
+
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/mmc/host.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include "sdhci-pltfm.h"
+
+struct sdhci_iproc_data {
+       const struct sdhci_pltfm_data *pdata;
+       u32 caps;
+       u32 caps1;
+};
+
+struct sdhci_iproc_host {
+       const struct sdhci_iproc_data *data;
+       u32 shadow_cmd;
+       u32 shadow_blk;
+};
+
+#define REG_OFFSET_IN_BITS(reg) ((reg) << 3 & 0x18)
+
+static inline u32 sdhci_iproc_readl(struct sdhci_host *host, int reg)
+{
+       u32 val = readl(host->ioaddr + reg);
+
+       pr_debug("%s: readl [0x%02x] 0x%08x\n",
+                mmc_hostname(host->mmc), reg, val);
+       return val;
+}
+
+static u16 sdhci_iproc_readw(struct sdhci_host *host, int reg)
+{
+       u32 val = sdhci_iproc_readl(host, (reg & ~3));
+       u16 word = val >> REG_OFFSET_IN_BITS(reg) & 0xffff;
+       return word;
+}
+
+static u8 sdhci_iproc_readb(struct sdhci_host *host, int reg)
+{
+       u32 val = sdhci_iproc_readl(host, (reg & ~3));
+       u8 byte = val >> REG_OFFSET_IN_BITS(reg) & 0xff;
+       return byte;
+}
+
+static inline void sdhci_iproc_writel(struct sdhci_host *host, u32 val, int reg)
+{
+       pr_debug("%s: writel [0x%02x] 0x%08x\n",
+                mmc_hostname(host->mmc), reg, val);
+
+       writel(val, host->ioaddr + reg);
+
+       if (host->clock <= 400000) {
+               /* Round up to micro-second four SD clock delay */
+               if (host->clock)
+                       udelay((4 * 1000000 + host->clock - 1) / host->clock);
+               else
+                       udelay(10);
+       }
+}
+
+/*
+ * The Arasan has a bugette whereby it may lose the content of successive
+ * writes to the same register that are within two SD-card clock cycles of
+ * each other (a clock domain crossing problem). The data
+ * register does not have this problem, which is just as well - otherwise we'd
+ * have to nobble the DMA engine too.
+ *
+ * This wouldn't be a problem with the code except that we can only write the
+ * controller with 32-bit writes.  So two different 16-bit registers are
+ * written back to back creates the problem.
+ *
+ * In reality, this only happens when SDHCI_BLOCK_SIZE and SDHCI_BLOCK_COUNT
+ * are written followed by SDHCI_TRANSFER_MODE and SDHCI_COMMAND.
+ * The BLOCK_SIZE and BLOCK_COUNT are meaningless until a command issued so
+ * the work around can be further optimized. We can keep shadow values of
+ * BLOCK_SIZE, BLOCK_COUNT, and TRANSFER_MODE until a COMMAND is issued.
+ * Then, write the BLOCK_SIZE+BLOCK_COUNT in a single 32-bit write followed
+ * by the TRANSFER+COMMAND in another 32-bit write.
+ */
+static void sdhci_iproc_writew(struct sdhci_host *host, u16 val, int reg)
+{
+       struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+       struct sdhci_iproc_host *iproc_host = sdhci_pltfm_priv(pltfm_host);
+       u32 word_shift = REG_OFFSET_IN_BITS(reg);
+       u32 mask = 0xffff << word_shift;
+       u32 oldval, newval;
+
+       if (reg == SDHCI_COMMAND) {
+               /* Write the block now as we are issuing a command */
+               if (iproc_host->shadow_blk != 0) {
+                       sdhci_iproc_writel(host, iproc_host->shadow_blk,
+                               SDHCI_BLOCK_SIZE);
+                       iproc_host->shadow_blk = 0;
+               }
+               oldval = iproc_host->shadow_cmd;
+       } else if (reg == SDHCI_BLOCK_SIZE || reg == SDHCI_BLOCK_COUNT) {
+               /* Block size and count are stored in shadow reg */
+               oldval = iproc_host->shadow_blk;
+       } else {
+               /* Read reg, all other registers are not shadowed */
+               oldval = sdhci_iproc_readl(host, (reg & ~3));
+       }
+       newval = (oldval & ~mask) | (val << word_shift);
+
+       if (reg == SDHCI_TRANSFER_MODE) {
+               /* Save the transfer mode until the command is issued */
+               iproc_host->shadow_cmd = newval;
+       } else if (reg == SDHCI_BLOCK_SIZE || reg == SDHCI_BLOCK_COUNT) {
+               /* Save the block info until the command is issued */
+               iproc_host->shadow_blk = newval;
+       } else {
+               /* Command or other regular 32-bit write */
+               sdhci_iproc_writel(host, newval, reg & ~3);
+       }
+}
+
+static void sdhci_iproc_writeb(struct sdhci_host *host, u8 val, int reg)
+{
+       u32 oldval = sdhci_iproc_readl(host, (reg & ~3));
+       u32 byte_shift = REG_OFFSET_IN_BITS(reg);
+       u32 mask = 0xff << byte_shift;
+       u32 newval = (oldval & ~mask) | (val << byte_shift);
+
+       sdhci_iproc_writel(host, newval, reg & ~3);
+}
+
+static const struct sdhci_ops sdhci_iproc_ops = {
+       .read_l = sdhci_iproc_readl,
+       .read_w = sdhci_iproc_readw,
+       .read_b = sdhci_iproc_readb,
+       .write_l = sdhci_iproc_writel,
+       .write_w = sdhci_iproc_writew,
+       .write_b = sdhci_iproc_writeb,
+       .set_clock = sdhci_set_clock,
+       .get_max_clock = sdhci_pltfm_clk_get_max_clock,
+       .set_bus_width = sdhci_set_bus_width,
+       .reset = sdhci_reset,
+       .set_uhs_signaling = sdhci_set_uhs_signaling,
+};
+
+static const struct sdhci_pltfm_data sdhci_iproc_pltfm_data = {
+       .quirks = SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK,
+       .quirks2 = SDHCI_QUIRK2_ACMD23_BROKEN,
+       .ops = &sdhci_iproc_ops,
+};
+
+static const struct sdhci_iproc_data iproc_data = {
+       .pdata = &sdhci_iproc_pltfm_data,
+       .caps = 0x05E90000,
+       .caps1 = 0x00000064,
+};
+
+static const struct of_device_id sdhci_iproc_of_match[] = {
+       { .compatible = "brcm,sdhci-iproc-cygnus", .data = &iproc_data },
+       { }
+};
+MODULE_DEVICE_TABLE(of, sdhci_iproc_of_match);
+
+static int sdhci_iproc_probe(struct platform_device *pdev)
+{
+       const struct of_device_id *match;
+       const struct sdhci_iproc_data *iproc_data;
+       struct sdhci_host *host;
+       struct sdhci_iproc_host *iproc_host;
+       struct sdhci_pltfm_host *pltfm_host;
+       int ret;
+
+       match = of_match_device(sdhci_iproc_of_match, &pdev->dev);
+       if (!match)
+               return -EINVAL;
+       iproc_data = match->data;
+
+       host = sdhci_pltfm_init(pdev, iproc_data->pdata, sizeof(*iproc_host));
+       if (IS_ERR(host))
+               return PTR_ERR(host);
+
+       pltfm_host = sdhci_priv(host);
+       iproc_host = sdhci_pltfm_priv(pltfm_host);
+
+       iproc_host->data = iproc_data;
+
+       mmc_of_parse(host->mmc);
+       sdhci_get_of_property(pdev);
+
+       /* Enable EMMC 1/8V DDR capable */
+       host->mmc->caps |= MMC_CAP_1_8V_DDR;
+
+       pltfm_host->clk = devm_clk_get(&pdev->dev, NULL);
+       if (IS_ERR(pltfm_host->clk)) {
+               ret = PTR_ERR(pltfm_host->clk);
+               goto err;
+       }
+
+       if (iproc_host->data->pdata->quirks & SDHCI_QUIRK_MISSING_CAPS) {
+               host->caps = iproc_host->data->caps;
+               host->caps1 = iproc_host->data->caps1;
+       }
+
+       return sdhci_add_host(host);
+
+err:
+       sdhci_pltfm_free(pdev);
+       return ret;
+}
+
+static int sdhci_iproc_remove(struct platform_device *pdev)
+{
+       return sdhci_pltfm_unregister(pdev);
+}
+
+static struct platform_driver sdhci_iproc_driver = {
+       .driver = {
+               .name = "sdhci-iproc",
+               .of_match_table = sdhci_iproc_of_match,
+               .pm = SDHCI_PLTFM_PMOPS,
+       },
+       .probe = sdhci_iproc_probe,
+       .remove = sdhci_iproc_remove,
+};
+module_platform_driver(sdhci_iproc_driver);
+
+MODULE_AUTHOR("Broadcom");
+MODULE_DESCRIPTION("IPROC SDHCI driver");
+MODULE_LICENSE("GPL v2");
index 3d32ce896b091f51cdfd74b383ade7ad6e8477a7..4a09f7608c66affcede7565ea7c75c41bb10f6ba 100644 (file)
 
 #include "sdhci-pltfm.h"
 
+#define CORE_MCI_VERSION               0x50
+#define CORE_VERSION_MAJOR_SHIFT       28
+#define CORE_VERSION_MAJOR_MASK                (0xf << CORE_VERSION_MAJOR_SHIFT)
+#define CORE_VERSION_MINOR_MASK                0xff
+
 #define CORE_HC_MODE           0x78
 #define HC_MODE_EN             0x1
 #define CORE_POWER             0x0
@@ -41,6 +46,8 @@
 #define CORE_VENDOR_SPEC       0x10c
 #define CORE_CLK_PWRSAVE       BIT(1)
 
+#define CORE_VENDOR_SPEC_CAPABILITIES0 0x11c
+
 #define CDR_SELEXT_SHIFT       20
 #define CDR_SELEXT_MASK                (0xf << CDR_SELEXT_SHIFT)
 #define CMUX_SHIFT_PHASE_SHIFT 24
@@ -426,7 +433,9 @@ static int sdhci_msm_probe(struct platform_device *pdev)
        struct sdhci_msm_host *msm_host;
        struct resource *core_memres;
        int ret;
-       u16 host_version;
+       u16 host_version, core_minor;
+       u32 core_version, caps;
+       u8 core_major;
 
        msm_host = devm_kzalloc(&pdev->dev, sizeof(*msm_host), GFP_KERNEL);
        if (!msm_host)
@@ -516,6 +525,24 @@ static int sdhci_msm_probe(struct platform_device *pdev)
                host_version, ((host_version & SDHCI_VENDOR_VER_MASK) >>
                               SDHCI_VENDOR_VER_SHIFT));
 
+       core_version = readl_relaxed(msm_host->core_mem + CORE_MCI_VERSION);
+       core_major = (core_version & CORE_VERSION_MAJOR_MASK) >>
+                     CORE_VERSION_MAJOR_SHIFT;
+       core_minor = core_version & CORE_VERSION_MINOR_MASK;
+       dev_dbg(&pdev->dev, "MCI Version: 0x%08x, major: 0x%04x, minor: 0x%02x\n",
+               core_version, core_major, core_minor);
+
+       /*
+        * Support for some capabilities is not advertised by newer
+        * controller versions and must be explicitly enabled.
+        */
+       if (core_major >= 1 && core_minor != 0x11 && core_minor != 0x12) {
+               caps = readl_relaxed(host->ioaddr + SDHCI_CAPABILITIES);
+               caps |= SDHCI_CAN_VDD_300 | SDHCI_CAN_DO_8BIT;
+               writel_relaxed(caps, host->ioaddr +
+                              CORE_VENDOR_SPEC_CAPABILITIES0);
+       }
+
        ret = sdhci_add_host(host);
        if (ret)
                goto clk_disable;
index bcb51e9dfdcd0789866ca7739a60444106f88770..6287d426c96bf933237aa948881b5bce53215fcb 100644 (file)
@@ -173,6 +173,12 @@ static int sdhci_arasan_probe(struct platform_device *pdev)
        pltfm_host->priv = sdhci_arasan;
        pltfm_host->clk = clk_xin;
 
+       ret = mmc_of_parse(host->mmc);
+       if (ret) {
+               dev_err(&pdev->dev, "parsing dt failed (%u)\n", ret);
+               goto clk_disable_all;
+       }
+
        ret = sdhci_add_host(host);
        if (ret)
                goto err_pltfm_free;
@@ -195,7 +201,6 @@ static int sdhci_arasan_remove(struct platform_device *pdev)
        struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
        struct sdhci_arasan_data *sdhci_arasan = pltfm_host->priv;
 
-       clk_disable_unprepare(pltfm_host->clk);
        clk_disable_unprepare(sdhci_arasan->clk_ahb);
 
        return sdhci_pltfm_unregister(pdev);
index 17fe02ed667263a710f1aff755a8f94049c641a2..22e9111b11ffcbfcfcfeba35ba9ab260256a9de7 100644 (file)
@@ -386,11 +386,6 @@ static int sdhci_esdhc_probe(struct platform_device *pdev)
        return ret;
 }
 
-static int sdhci_esdhc_remove(struct platform_device *pdev)
-{
-       return sdhci_pltfm_unregister(pdev);
-}
-
 static const struct of_device_id sdhci_esdhc_of_match[] = {
        { .compatible = "fsl,mpc8379-esdhc" },
        { .compatible = "fsl,mpc8536-esdhc" },
@@ -406,7 +401,7 @@ static struct platform_driver sdhci_esdhc_driver = {
                .pm = ESDHC_PMOPS,
        },
        .probe = sdhci_esdhc_probe,
-       .remove = sdhci_esdhc_remove,
+       .remove = sdhci_pltfm_unregister,
 };
 
 module_platform_driver(sdhci_esdhc_driver);
index be479279a1d55479bad67654e855fa71781678ea..4079a96ad37e46e23522e11682f232191c50613d 100644 (file)
@@ -75,11 +75,6 @@ static int sdhci_hlwd_probe(struct platform_device *pdev)
        return sdhci_pltfm_register(pdev, &sdhci_hlwd_pdata, 0);
 }
 
-static int sdhci_hlwd_remove(struct platform_device *pdev)
-{
-       return sdhci_pltfm_unregister(pdev);
-}
-
 static const struct of_device_id sdhci_hlwd_of_match[] = {
        { .compatible = "nintendo,hollywood-sdhci" },
        { }
@@ -93,7 +88,7 @@ static struct platform_driver sdhci_hlwd_driver = {
                .pm = SDHCI_PLTFM_PMOPS,
        },
        .probe = sdhci_hlwd_probe,
-       .remove = sdhci_hlwd_remove,
+       .remove = sdhci_pltfm_unregister,
 };
 
 module_platform_driver(sdhci_hlwd_driver);
index 29eaff78238e9ee3dd50b9e4054522ffdf035a5f..7a3fc16d0a6c601fdb65cac29451232f5f6d30aa 100644 (file)
@@ -650,6 +650,7 @@ static int rtsx_probe_slot(struct sdhci_pci_slot *slot)
 
 static const struct sdhci_pci_fixes sdhci_rtsx = {
        .quirks2        = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
+                       SDHCI_QUIRK2_BROKEN_64_BIT_DMA |
                        SDHCI_QUIRK2_BROKEN_DDR50,
        .probe_slot     = rtsx_probe_slot,
 };
index c5b01d6bb85d41fdf236473b4f2b25cae9ee68f5..a207f5aaf62f53d7aa4db3699b4d695f9ce5d625 100644 (file)
@@ -75,43 +75,41 @@ void sdhci_get_of_property(struct platform_device *pdev)
        u32 bus_width;
        int size;
 
-       if (of_device_is_available(np)) {
-               if (of_get_property(np, "sdhci,auto-cmd12", NULL))
-                       host->quirks |= SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12;
+       if (of_get_property(np, "sdhci,auto-cmd12", NULL))
+               host->quirks |= SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12;
 
-               if (of_get_property(np, "sdhci,1-bit-only", NULL) ||
-                   (of_property_read_u32(np, "bus-width", &bus_width) == 0 &&
-                   bus_width == 1))
-                       host->quirks |= SDHCI_QUIRK_FORCE_1_BIT_DATA;
+       if (of_get_property(np, "sdhci,1-bit-only", NULL) ||
+           (of_property_read_u32(np, "bus-width", &bus_width) == 0 &&
+           bus_width == 1))
+               host->quirks |= SDHCI_QUIRK_FORCE_1_BIT_DATA;
 
-               if (sdhci_of_wp_inverted(np))
-                       host->quirks |= SDHCI_QUIRK_INVERTED_WRITE_PROTECT;
+       if (sdhci_of_wp_inverted(np))
+               host->quirks |= SDHCI_QUIRK_INVERTED_WRITE_PROTECT;
 
-               if (of_get_property(np, "broken-cd", NULL))
-                       host->quirks |= SDHCI_QUIRK_BROKEN_CARD_DETECTION;
+       if (of_get_property(np, "broken-cd", NULL))
+               host->quirks |= SDHCI_QUIRK_BROKEN_CARD_DETECTION;
 
-               if (of_get_property(np, "no-1-8-v", NULL))
-                       host->quirks2 |= SDHCI_QUIRK2_NO_1_8_V;
+       if (of_get_property(np, "no-1-8-v", NULL))
+               host->quirks2 |= SDHCI_QUIRK2_NO_1_8_V;
 
-               if (of_device_is_compatible(np, "fsl,p2020-rev1-esdhc"))
-                       host->quirks |= SDHCI_QUIRK_BROKEN_DMA;
+       if (of_device_is_compatible(np, "fsl,p2020-rev1-esdhc"))
+               host->quirks |= SDHCI_QUIRK_BROKEN_DMA;
 
-               if (of_device_is_compatible(np, "fsl,p2020-esdhc") ||
-                   of_device_is_compatible(np, "fsl,p1010-esdhc") ||
-                   of_device_is_compatible(np, "fsl,t4240-esdhc") ||
-                   of_device_is_compatible(np, "fsl,mpc8536-esdhc"))
-                       host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL;
+       if (of_device_is_compatible(np, "fsl,p2020-esdhc") ||
+           of_device_is_compatible(np, "fsl,p1010-esdhc") ||
+           of_device_is_compatible(np, "fsl,t4240-esdhc") ||
+           of_device_is_compatible(np, "fsl,mpc8536-esdhc"))
+               host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL;
 
-               clk = of_get_property(np, "clock-frequency", &size);
-               if (clk && size == sizeof(*clk) && *clk)
-                       pltfm_host->clock = be32_to_cpup(clk);
+       clk = of_get_property(np, "clock-frequency", &size);
+       if (clk && size == sizeof(*clk) && *clk)
+               pltfm_host->clock = be32_to_cpup(clk);
 
-               if (of_find_property(np, "keep-power-in-suspend", NULL))
-                       host->mmc->pm_caps |= MMC_PM_KEEP_POWER;
+       if (of_find_property(np, "keep-power-in-suspend", NULL))
+               host->mmc->pm_caps |= MMC_PM_KEEP_POWER;
 
-               if (of_find_property(np, "enable-sdio-wakeup", NULL))
-                       host->mmc->pm_caps |= MMC_PM_WAKE_SDIO_IRQ;
-       }
+       if (of_find_property(np, "enable-sdio-wakeup", NULL))
+               host->mmc->pm_caps |= MMC_PM_WAKE_SDIO_IRQ;
 }
 #else
 void sdhci_get_of_property(struct platform_device *pdev) {}
@@ -225,9 +223,11 @@ EXPORT_SYMBOL_GPL(sdhci_pltfm_register);
 int sdhci_pltfm_unregister(struct platform_device *pdev)
 {
        struct sdhci_host *host = platform_get_drvdata(pdev);
+       struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
        int dead = (readl(host->ioaddr + SDHCI_INT_STATUS) == 0xffffffff);
 
        sdhci_remove_host(host, dead);
+       clk_disable_unprepare(pltfm_host->clk);
        sdhci_pltfm_free(pdev);
 
        return 0;
index f6f82ec3618d66bb6bf8a54fa7e118c71d7acb10..32848eb7ad807d70acee4a8125e21f509f24f65e 100644 (file)
 #define SIRF_TUNING_COUNT 128
 
 struct sdhci_sirf_priv {
-       struct clk *clk;
        int gpio_cd;
 };
 
-static unsigned int sdhci_sirf_get_max_clk(struct sdhci_host *host)
-{
-       struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-       struct sdhci_sirf_priv *priv = sdhci_pltfm_priv(pltfm_host);
-       return clk_get_rate(priv->clk);
-}
-
 static void sdhci_sirf_set_bus_width(struct sdhci_host *host, int width)
 {
        u8 ctrl;
@@ -56,7 +48,7 @@ static int sdhci_sirf_execute_tuning(struct sdhci_host *host, u32 opcode)
        int tuning_seq_cnt = 3;
        u8 phase, tuned_phases[SIRF_TUNING_COUNT];
        u8 tuned_phase_cnt = 0;
-       int rc, longest_range = 0;
+       int rc = 0, longest_range = 0;
        int start = -1, end = 0, tuning_value = -1, range = 0;
        u16 clock_setting;
        struct mmc_host *mmc = host->mmc;
@@ -68,7 +60,7 @@ retry:
        phase = 0;
        do {
                sdhci_writel(host,
-                       clock_setting | phase | (phase << 7) | (phase << 16),
+                       clock_setting | phase,
                        SDHCI_CLK_DELAY_SETTING);
 
                if (!mmc_send_tuning(mmc)) {
@@ -102,7 +94,7 @@ retry:
                 */
                phase = tuning_value;
                sdhci_writel(host,
-                       clock_setting | phase | (phase << 7) | (phase << 16),
+                       clock_setting | phase,
                        SDHCI_CLK_DELAY_SETTING);
 
                dev_dbg(mmc_dev(mmc), "%s: Setting the tuning phase to %d\n",
@@ -122,7 +114,7 @@ retry:
 static struct sdhci_ops sdhci_sirf_ops = {
        .platform_execute_tuning = sdhci_sirf_execute_tuning,
        .set_clock = sdhci_set_clock,
-       .get_max_clock  = sdhci_sirf_get_max_clk,
+       .get_max_clock  = sdhci_pltfm_clk_get_max_clock,
        .set_bus_width = sdhci_sirf_set_bus_width,
        .reset = sdhci_reset,
        .set_uhs_signaling = sdhci_set_uhs_signaling,
@@ -162,13 +154,13 @@ static int sdhci_sirf_probe(struct platform_device *pdev)
                return PTR_ERR(host);
 
        pltfm_host = sdhci_priv(host);
+       pltfm_host->clk = clk;
        priv = sdhci_pltfm_priv(pltfm_host);
-       priv->clk = clk;
        priv->gpio_cd = gpio_cd;
 
        sdhci_get_of_property(pdev);
 
-       ret = clk_prepare_enable(priv->clk);
+       ret = clk_prepare_enable(pltfm_host->clk);
        if (ret)
                goto err_clk_prepare;
 
@@ -195,37 +187,24 @@ static int sdhci_sirf_probe(struct platform_device *pdev)
 err_request_cd:
        sdhci_remove_host(host, 0);
 err_sdhci_add:
-       clk_disable_unprepare(priv->clk);
+       clk_disable_unprepare(pltfm_host->clk);
 err_clk_prepare:
        sdhci_pltfm_free(pdev);
        return ret;
 }
 
-static int sdhci_sirf_remove(struct platform_device *pdev)
-{
-       struct sdhci_host *host = platform_get_drvdata(pdev);
-       struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-       struct sdhci_sirf_priv *priv = sdhci_pltfm_priv(pltfm_host);
-
-       sdhci_pltfm_unregister(pdev);
-
-       clk_disable_unprepare(priv->clk);
-       return 0;
-}
-
 #ifdef CONFIG_PM_SLEEP
 static int sdhci_sirf_suspend(struct device *dev)
 {
        struct sdhci_host *host = dev_get_drvdata(dev);
        struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-       struct sdhci_sirf_priv *priv = sdhci_pltfm_priv(pltfm_host);
        int ret;
 
        ret = sdhci_suspend_host(host);
        if (ret)
                return ret;
 
-       clk_disable(priv->clk);
+       clk_disable(pltfm_host->clk);
 
        return 0;
 }
@@ -234,10 +213,9 @@ static int sdhci_sirf_resume(struct device *dev)
 {
        struct sdhci_host *host = dev_get_drvdata(dev);
        struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-       struct sdhci_sirf_priv *priv = sdhci_pltfm_priv(pltfm_host);
        int ret;
 
-       ret = clk_enable(priv->clk);
+       ret = clk_enable(pltfm_host->clk);
        if (ret) {
                dev_dbg(dev, "Resume: Error enabling clock\n");
                return ret;
@@ -264,7 +242,7 @@ static struct platform_driver sdhci_sirf_driver = {
 #endif
        },
        .probe          = sdhci_sirf_probe,
-       .remove         = sdhci_sirf_remove,
+       .remove         = sdhci_pltfm_unregister,
 };
 
 module_platform_driver(sdhci_sirf_driver);
index 22e58268545f0b028ee3cd82d4fa7e50a49a4558..df088343d60f32413a3694813ad97f9abb57b7ae 100644 (file)
 #include <linux/pm.h>
 #include <linux/slab.h>
 #include <linux/mmc/host.h>
-#include <linux/mmc/sdhci-spear.h>
 #include <linux/mmc/slot-gpio.h>
 #include <linux/io.h>
 #include "sdhci.h"
 
 struct spear_sdhci {
        struct clk *clk;
-       struct sdhci_plat_data *data;
+       int card_int_gpio;
 };
 
 /* sdhci ops */
@@ -44,38 +43,20 @@ static const struct sdhci_ops sdhci_pltfm_ops = {
        .set_uhs_signaling = sdhci_set_uhs_signaling,
 };
 
-#ifdef CONFIG_OF
-static struct sdhci_plat_data *sdhci_probe_config_dt(struct platform_device *pdev)
+static void sdhci_probe_config_dt(struct device_node *np,
+                               struct spear_sdhci *host)
 {
-       struct device_node *np = pdev->dev.of_node;
-       struct sdhci_plat_data *pdata = NULL;
        int cd_gpio;
 
        cd_gpio = of_get_named_gpio(np, "cd-gpios", 0);
        if (!gpio_is_valid(cd_gpio))
                cd_gpio = -1;
 
-       /* If pdata is required */
-       if (cd_gpio != -1) {
-               pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
-               if (!pdata)
-                       dev_err(&pdev->dev, "DT: kzalloc failed\n");
-               else
-                       pdata->card_int_gpio = cd_gpio;
-       }
-
-       return pdata;
-}
-#else
-static struct sdhci_plat_data *sdhci_probe_config_dt(struct platform_device *pdev)
-{
-       return ERR_PTR(-ENOSYS);
+       host->card_int_gpio = cd_gpio;
 }
-#endif
 
 static int sdhci_probe(struct platform_device *pdev)
 {
-       struct device_node *np = pdev->dev.of_node;
        struct sdhci_host *host;
        struct resource *iomem;
        struct spear_sdhci *sdhci;
@@ -124,28 +105,18 @@ static int sdhci_probe(struct platform_device *pdev)
                dev_dbg(&pdev->dev, "Error setting desired clk, clk=%lu\n",
                                clk_get_rate(sdhci->clk));
 
-       if (np) {
-               sdhci->data = sdhci_probe_config_dt(pdev);
-               if (IS_ERR(sdhci->data)) {
-                       dev_err(&pdev->dev, "DT: Failed to get pdata\n");
-                       goto disable_clk;
-               }
-       } else {
-               sdhci->data = dev_get_platdata(&pdev->dev);
-       }
-
+       sdhci_probe_config_dt(pdev->dev.of_node, sdhci);
        /*
         * It is optional to use GPIOs for sdhci card detection. If
-        * sdhci->data is NULL, then use original sdhci lines otherwise
+        * sdhci->card_int_gpio < 0, then use original sdhci lines otherwise
         * GPIO lines. We use the built-in GPIO support for this.
         */
-       if (sdhci->data && sdhci->data->card_int_gpio >= 0) {
-               ret = mmc_gpio_request_cd(host->mmc,
-                                         sdhci->data->card_int_gpio, 0);
+       if (sdhci->card_int_gpio >= 0) {
+               ret = mmc_gpio_request_cd(host->mmc, sdhci->card_int_gpio, 0);
                if (ret < 0) {
                        dev_dbg(&pdev->dev,
                                "failed to request card-detect gpio%d\n",
-                               sdhci->data->card_int_gpio);
+                               sdhci->card_int_gpio);
                        goto disable_clk;
                }
        }
index 882b07e9667e20d3675620152cf108fc55f2a3ae..682f2bb0f4bf3dc200ccb3bf0451a8759f1cd363 100644 (file)
 #include <linux/module.h>
 #include <linux/err.h>
 #include <linux/mmc/host.h>
-
+#include <linux/reset.h>
 #include "sdhci-pltfm.h"
 
+struct st_mmc_platform_data {
+       struct  reset_control *rstc;
+       void __iomem *top_ioaddr;
+};
+
+/* MMCSS glue logic to setup the HC on some ST SoCs (e.g. STiH407 family) */
+
+#define ST_MMC_CCONFIG_REG_1           0x400
+#define ST_MMC_CCONFIG_TIMEOUT_CLK_UNIT        BIT(24)
+#define ST_MMC_CCONFIG_TIMEOUT_CLK_FREQ        BIT(12)
+#define ST_MMC_CCONFIG_TUNING_COUNT_DEFAULT    BIT(8)
+#define ST_MMC_CCONFIG_ASYNC_WAKEUP    BIT(0)
+#define ST_MMC_CCONFIG_1_DEFAULT       \
+                               ((ST_MMC_CCONFIG_TIMEOUT_CLK_UNIT) | \
+                                (ST_MMC_CCONFIG_TIMEOUT_CLK_FREQ) | \
+                                (ST_MMC_CCONFIG_TUNING_COUNT_DEFAULT))
+
+#define ST_MMC_CCONFIG_REG_2           0x404
+#define ST_MMC_CCONFIG_HIGH_SPEED      BIT(28)
+#define ST_MMC_CCONFIG_ADMA2           BIT(24)
+#define ST_MMC_CCONFIG_8BIT            BIT(20)
+#define ST_MMC_CCONFIG_MAX_BLK_LEN     16
+#define  MAX_BLK_LEN_1024              1
+#define  MAX_BLK_LEN_2048              2
+#define BASE_CLK_FREQ_200              0xc8
+#define BASE_CLK_FREQ_100              0x64
+#define BASE_CLK_FREQ_50               0x32
+#define ST_MMC_CCONFIG_2_DEFAULT \
+       (ST_MMC_CCONFIG_HIGH_SPEED | ST_MMC_CCONFIG_ADMA2 | \
+        ST_MMC_CCONFIG_8BIT | \
+        (MAX_BLK_LEN_1024 << ST_MMC_CCONFIG_MAX_BLK_LEN))
+
+#define ST_MMC_CCONFIG_REG_3                   0x408
+#define ST_MMC_CCONFIG_EMMC_SLOT_TYPE          BIT(28)
+#define ST_MMC_CCONFIG_64BIT                   BIT(24)
+#define ST_MMC_CCONFIG_ASYNCH_INTR_SUPPORT     BIT(20)
+#define ST_MMC_CCONFIG_1P8_VOLT                        BIT(16)
+#define ST_MMC_CCONFIG_3P0_VOLT                        BIT(12)
+#define ST_MMC_CCONFIG_3P3_VOLT                        BIT(8)
+#define ST_MMC_CCONFIG_SUSP_RES_SUPPORT                BIT(4)
+#define ST_MMC_CCONFIG_SDMA                    BIT(0)
+#define ST_MMC_CCONFIG_3_DEFAULT       \
+                        (ST_MMC_CCONFIG_ASYNCH_INTR_SUPPORT    | \
+                         ST_MMC_CCONFIG_3P3_VOLT               | \
+                         ST_MMC_CCONFIG_SUSP_RES_SUPPORT       | \
+                         ST_MMC_CCONFIG_SDMA)
+
+#define ST_MMC_CCONFIG_REG_4   0x40c
+#define ST_MMC_CCONFIG_D_DRIVER        BIT(20)
+#define ST_MMC_CCONFIG_C_DRIVER        BIT(16)
+#define ST_MMC_CCONFIG_A_DRIVER        BIT(12)
+#define ST_MMC_CCONFIG_DDR50   BIT(8)
+#define ST_MMC_CCONFIG_SDR104  BIT(4)
+#define ST_MMC_CCONFIG_SDR50   BIT(0)
+#define ST_MMC_CCONFIG_4_DEFAULT       0
+
+#define ST_MMC_CCONFIG_REG_5           0x410
+#define ST_MMC_CCONFIG_TUNING_FOR_SDR50        BIT(8)
+#define RETUNING_TIMER_CNT_MAX         0xf
+#define ST_MMC_CCONFIG_5_DEFAULT       0
+
+/* I/O configuration for Arasan IP */
+#define ST_MMC_GP_OUTPUT       0x450
+#define ST_MMC_GP_OUTPUT_CD    BIT(12)
+
+#define ST_MMC_STATUS_R                0x460
+
+#define ST_TOP_MMC_DLY_FIX_OFF(x)      (x - 0x8)
+
+/* TOP config registers to manage static and dynamic delay */
+#define ST_TOP_MMC_TX_CLK_DLY                  ST_TOP_MMC_DLY_FIX_OFF(0x8)
+#define ST_TOP_MMC_RX_CLK_DLY                  ST_TOP_MMC_DLY_FIX_OFF(0xc)
+/* MMC delay control register */
+#define ST_TOP_MMC_DLY_CTRL                    ST_TOP_MMC_DLY_FIX_OFF(0x18)
+#define ST_TOP_MMC_DLY_CTRL_DLL_BYPASS_CMD     BIT(0)
+#define ST_TOP_MMC_DLY_CTRL_DLL_BYPASS_PH_SEL  BIT(1)
+#define ST_TOP_MMC_DLY_CTRL_TX_DLL_ENABLE      BIT(8)
+#define ST_TOP_MMC_DLY_CTRL_RX_DLL_ENABLE      BIT(9)
+#define ST_TOP_MMC_DLY_CTRL_ATUNE_NOT_CFG_DLY  BIT(10)
+#define ST_TOP_MMC_START_DLL_LOCK              BIT(11)
+
+/* register to provide the phase-shift value for DLL */
+#define ST_TOP_MMC_TX_DLL_STEP_DLY             ST_TOP_MMC_DLY_FIX_OFF(0x1c)
+#define ST_TOP_MMC_RX_DLL_STEP_DLY             ST_TOP_MMC_DLY_FIX_OFF(0x20)
+#define ST_TOP_MMC_RX_CMD_STEP_DLY             ST_TOP_MMC_DLY_FIX_OFF(0x24)
+
+/* phase shift delay on the tx clk 2.188ns */
+#define ST_TOP_MMC_TX_DLL_STEP_DLY_VALID       0x6
+
+#define ST_TOP_MMC_DLY_MAX                     0xf
+
+#define ST_TOP_MMC_DYN_DLY_CONF        \
+               (ST_TOP_MMC_DLY_CTRL_TX_DLL_ENABLE | \
+                ST_TOP_MMC_DLY_CTRL_ATUNE_NOT_CFG_DLY | \
+                ST_TOP_MMC_START_DLL_LOCK)
+
+/*
+ * For clock speeds greater than 90MHz, we need to check that the
+ * DLL procedure has finished before switching to ultra-speed modes.
+ */
+#define        CLK_TO_CHECK_DLL_LOCK   90000000
+
+static inline void st_mmcss_set_static_delay(void __iomem *ioaddr)
+{
+       if (!ioaddr)
+               return;
+
+       writel_relaxed(0x0, ioaddr + ST_TOP_MMC_DLY_CTRL);
+       writel_relaxed(ST_TOP_MMC_DLY_MAX,
+                       ioaddr + ST_TOP_MMC_TX_CLK_DLY);
+}
+
+/**
+ * st_mmcss_cconfig: configure the Arasan HC inside the flashSS.
+ * @np: dt device node.
+ * @host: sdhci host
+ * Description: this function is to configure the Arasan host controller.
+ * On some ST SoCs, i.e. STiH407 family, the MMC devices inside a dedicated
+ * flashSS sub-system which needs to be configured to be compliant to eMMC 4.5
+ * or eMMC4.3.  This has to be done before registering the sdhci host.
+ */
+static void st_mmcss_cconfig(struct device_node *np, struct sdhci_host *host)
+{
+       struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+       struct mmc_host *mhost = host->mmc;
+       u32 cconf2, cconf3, cconf4, cconf5;
+
+       if (!of_device_is_compatible(np, "st,sdhci-stih407"))
+               return;
+
+       cconf2 = ST_MMC_CCONFIG_2_DEFAULT;
+       cconf3 = ST_MMC_CCONFIG_3_DEFAULT;
+       cconf4 = ST_MMC_CCONFIG_4_DEFAULT;
+       cconf5 = ST_MMC_CCONFIG_5_DEFAULT;
+
+       writel_relaxed(ST_MMC_CCONFIG_1_DEFAULT,
+                       host->ioaddr + ST_MMC_CCONFIG_REG_1);
+
+       /* Set clock frequency, default to 50MHz if max-frequency is not
+        * provided */
+
+       switch (mhost->f_max) {
+       case 200000000:
+               clk_set_rate(pltfm_host->clk, mhost->f_max);
+               cconf2 |= BASE_CLK_FREQ_200;
+               break;
+       case 100000000:
+               clk_set_rate(pltfm_host->clk, mhost->f_max);
+               cconf2 |= BASE_CLK_FREQ_100;
+               break;
+       default:
+               clk_set_rate(pltfm_host->clk, 50000000);
+               cconf2 |= BASE_CLK_FREQ_50;
+       }
+
+       writel_relaxed(cconf2, host->ioaddr + ST_MMC_CCONFIG_REG_2);
+
+       if (mhost->caps & MMC_CAP_NONREMOVABLE)
+               cconf3 |= ST_MMC_CCONFIG_EMMC_SLOT_TYPE;
+       else
+               /* CARD _D ET_CTRL */
+               writel_relaxed(ST_MMC_GP_OUTPUT_CD,
+                               host->ioaddr + ST_MMC_GP_OUTPUT);
+
+       if (mhost->caps & MMC_CAP_UHS_SDR50) {
+               /* use 1.8V */
+               cconf3 |= ST_MMC_CCONFIG_1P8_VOLT;
+               cconf4 |= ST_MMC_CCONFIG_SDR50;
+               /* Use tuning */
+               cconf5 |= ST_MMC_CCONFIG_TUNING_FOR_SDR50;
+               /* Max timeout for retuning */
+               cconf5 |= RETUNING_TIMER_CNT_MAX;
+       }
+
+       if (mhost->caps & MMC_CAP_UHS_SDR104) {
+               /*
+                * SDR104 implies the HC can support HS200 mode, so
+                * it's mandatory to use 1.8V
+                */
+               cconf3 |= ST_MMC_CCONFIG_1P8_VOLT;
+               cconf4 |= ST_MMC_CCONFIG_SDR104;
+               /* Max timeout for retuning */
+               cconf5 |= RETUNING_TIMER_CNT_MAX;
+       }
+
+       if (mhost->caps & MMC_CAP_UHS_DDR50)
+               cconf4 |= ST_MMC_CCONFIG_DDR50;
+
+       writel_relaxed(cconf3, host->ioaddr + ST_MMC_CCONFIG_REG_3);
+       writel_relaxed(cconf4, host->ioaddr + ST_MMC_CCONFIG_REG_4);
+       writel_relaxed(cconf5, host->ioaddr + ST_MMC_CCONFIG_REG_5);
+}
+
+static inline void st_mmcss_set_dll(void __iomem *ioaddr)
+{
+       if (!ioaddr)
+               return;
+
+       writel_relaxed(ST_TOP_MMC_DYN_DLY_CONF, ioaddr + ST_TOP_MMC_DLY_CTRL);
+       writel_relaxed(ST_TOP_MMC_TX_DLL_STEP_DLY_VALID,
+                       ioaddr + ST_TOP_MMC_TX_DLL_STEP_DLY);
+}
+
+static int st_mmcss_lock_dll(void __iomem *ioaddr)
+{
+       unsigned long curr, value;
+       unsigned long finish = jiffies + HZ;
+
+       /* Checks if the DLL procedure is finished */
+       do {
+               curr = jiffies;
+               value = readl(ioaddr + ST_MMC_STATUS_R);
+               if (value & 0x1)
+                       return 0;
+
+               cpu_relax();
+       } while (!time_after_eq(curr, finish));
+
+       return -EBUSY;
+}
+
+static int sdhci_st_set_dll_for_clock(struct sdhci_host *host)
+{
+       int ret = 0;
+       struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+       struct st_mmc_platform_data *pdata = pltfm_host->priv;
+
+       if (host->clock > CLK_TO_CHECK_DLL_LOCK) {
+               st_mmcss_set_dll(pdata->top_ioaddr);
+               ret = st_mmcss_lock_dll(host->ioaddr);
+       }
+
+       return ret;
+}
+
+static void sdhci_st_set_uhs_signaling(struct sdhci_host *host,
+                                       unsigned int uhs)
+{
+       struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+       struct st_mmc_platform_data *pdata = pltfm_host->priv;
+       u16 ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+       int ret = 0;
+
+       /* Select Bus Speed Mode for host */
+       ctrl_2 &= ~SDHCI_CTRL_UHS_MASK;
+       switch (uhs) {
+       /*
+        * Set V18_EN -- UHS modes do not work without this.
+        * does not change signaling voltage
+        */
+
+       case MMC_TIMING_UHS_SDR12:
+               st_mmcss_set_static_delay(pdata->top_ioaddr);
+               ctrl_2 |= SDHCI_CTRL_UHS_SDR12 | SDHCI_CTRL_VDD_180;
+               break;
+       case MMC_TIMING_UHS_SDR25:
+               st_mmcss_set_static_delay(pdata->top_ioaddr);
+               ctrl_2 |= SDHCI_CTRL_UHS_SDR25 | SDHCI_CTRL_VDD_180;
+               break;
+       case MMC_TIMING_UHS_SDR50:
+               st_mmcss_set_static_delay(pdata->top_ioaddr);
+               ctrl_2 |= SDHCI_CTRL_UHS_SDR50 | SDHCI_CTRL_VDD_180;
+               ret = sdhci_st_set_dll_for_clock(host);
+               break;
+       case MMC_TIMING_UHS_SDR104:
+       case MMC_TIMING_MMC_HS200:
+               st_mmcss_set_static_delay(pdata->top_ioaddr);
+               ctrl_2 |= SDHCI_CTRL_UHS_SDR104 | SDHCI_CTRL_VDD_180;
+               ret =  sdhci_st_set_dll_for_clock(host);
+               break;
+       case MMC_TIMING_UHS_DDR50:
+       case MMC_TIMING_MMC_DDR52:
+               st_mmcss_set_static_delay(pdata->top_ioaddr);
+               ctrl_2 |= SDHCI_CTRL_UHS_DDR50 | SDHCI_CTRL_VDD_180;
+               break;
+       }
+
+       if (ret)
+               dev_warn(mmc_dev(host->mmc), "Error setting dll for clock "
+                                               "(uhs %d)\n", uhs);
+
+       dev_dbg(mmc_dev(host->mmc), "uhs %d, ctrl_2 %04X\n", uhs, ctrl_2);
+
+       sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2);
+}
+
 static u32 sdhci_st_readl(struct sdhci_host *host, int reg)
 {
        u32 ret;
@@ -48,22 +334,33 @@ static const struct sdhci_ops sdhci_st_ops = {
        .set_bus_width = sdhci_set_bus_width,
        .read_l = sdhci_st_readl,
        .reset = sdhci_reset,
+       .set_uhs_signaling = sdhci_st_set_uhs_signaling,
 };
 
 static const struct sdhci_pltfm_data sdhci_st_pdata = {
        .ops = &sdhci_st_ops,
        .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC |
-           SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
+               SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN |
+               SDHCI_QUIRK_NO_HISPD_BIT,
+       .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
+               SDHCI_QUIRK2_STOP_WITH_TC,
 };
 
 
 static int sdhci_st_probe(struct platform_device *pdev)
 {
+       struct device_node *np = pdev->dev.of_node;
        struct sdhci_host *host;
+       struct st_mmc_platform_data *pdata;
        struct sdhci_pltfm_host *pltfm_host;
        struct clk *clk;
        int ret = 0;
        u16 host_version;
+       struct resource *res;
+
+       pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+       if (!pdata)
+               return -ENOMEM;
 
        clk =  devm_clk_get(&pdev->dev, "mmc");
        if (IS_ERR(clk)) {
@@ -71,10 +368,17 @@ static int sdhci_st_probe(struct platform_device *pdev)
                return PTR_ERR(clk);
        }
 
+       pdata->rstc = devm_reset_control_get(&pdev->dev, NULL);
+       if (IS_ERR(pdata->rstc))
+               pdata->rstc = NULL;
+       else
+               reset_control_deassert(pdata->rstc);
+
        host = sdhci_pltfm_init(pdev, &sdhci_st_pdata, 0);
        if (IS_ERR(host)) {
                dev_err(&pdev->dev, "Failed sdhci_pltfm_init\n");
-               return PTR_ERR(host);
+               ret = PTR_ERR(host);
+               goto err_pltfm_init;
        }
 
        ret = mmc_of_parse(host->mmc);
@@ -85,9 +389,22 @@ static int sdhci_st_probe(struct platform_device *pdev)
 
        clk_prepare_enable(clk);
 
+       /* Configure the FlashSS Top registers for setting eMMC TX/RX delay */
+       res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+                                          "top-mmc-delay");
+       pdata->top_ioaddr = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(pdata->top_ioaddr)) {
+               dev_warn(&pdev->dev, "FlashSS Top Dly registers not available");
+               pdata->top_ioaddr = NULL;
+       }
+
        pltfm_host = sdhci_priv(host);
+       pltfm_host->priv = pdata;
        pltfm_host->clk = clk;
 
+       /* Configure the Arasan HC inside the flashSS */
+       st_mmcss_cconfig(np, host);
+
        ret = sdhci_add_host(host);
        if (ret) {
                dev_err(&pdev->dev, "Failed sdhci_add_host\n");
@@ -109,6 +426,9 @@ err_out:
        clk_disable_unprepare(clk);
 err_of:
        sdhci_pltfm_free(pdev);
+err_pltfm_init:
+       if (pdata->rstc)
+               reset_control_assert(pdata->rstc);
 
        return ret;
 }
@@ -117,10 +437,15 @@ static int sdhci_st_remove(struct platform_device *pdev)
 {
        struct sdhci_host *host = platform_get_drvdata(pdev);
        struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+       struct st_mmc_platform_data *pdata = pltfm_host->priv;
+       int ret;
 
-       clk_disable_unprepare(pltfm_host->clk);
+       ret = sdhci_pltfm_unregister(pdev);
+
+       if (pdata->rstc)
+               reset_control_assert(pdata->rstc);
 
-       return sdhci_pltfm_unregister(pdev);
+       return ret;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -128,11 +453,15 @@ static int sdhci_st_suspend(struct device *dev)
 {
        struct sdhci_host *host = dev_get_drvdata(dev);
        struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+       struct st_mmc_platform_data *pdata = pltfm_host->priv;
        int ret = sdhci_suspend_host(host);
 
        if (ret)
                goto out;
 
+       if (pdata->rstc)
+               reset_control_assert(pdata->rstc);
+
        clk_disable_unprepare(pltfm_host->clk);
 out:
        return ret;
@@ -142,9 +471,16 @@ static int sdhci_st_resume(struct device *dev)
 {
        struct sdhci_host *host = dev_get_drvdata(dev);
        struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+       struct st_mmc_platform_data *pdata = pltfm_host->priv;
+       struct device_node *np = dev->of_node;
 
        clk_prepare_enable(pltfm_host->clk);
 
+       if (pdata->rstc)
+               reset_control_deassert(pdata->rstc);
+
+       st_mmcss_cconfig(np, host);
+
        return sdhci_resume_host(host);
 }
 #endif
index f3778d58d1cd42031663b92144deb3fd42723755..ad28b49f0203f5d734ef7325aba081c4c4b5ce4e 100644 (file)
 #include <linux/io.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
-#include <linux/of_gpio.h>
-#include <linux/gpio.h>
 #include <linux/mmc/card.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/slot-gpio.h>
+#include <linux/gpio/consumer.h>
 
 #include "sdhci-pltfm.h"
 
@@ -41,7 +40,6 @@
 #define NVQUIRK_DISABLE_SDR50          BIT(3)
 #define NVQUIRK_DISABLE_SDR104         BIT(4)
 #define NVQUIRK_DISABLE_DDR50          BIT(5)
-#define NVQUIRK_SHADOW_XFER_MODE_REG   BIT(6)
 
 struct sdhci_tegra_soc_data {
        const struct sdhci_pltfm_data *pdata;
@@ -50,7 +48,7 @@ struct sdhci_tegra_soc_data {
 
 struct sdhci_tegra {
        const struct sdhci_tegra_soc_data *soc_data;
-       int power_gpio;
+       struct gpio_desc *power_gpio;
 };
 
 static u16 tegra_sdhci_readw(struct sdhci_host *host, int reg)
@@ -71,23 +69,19 @@ static u16 tegra_sdhci_readw(struct sdhci_host *host, int reg)
 static void tegra_sdhci_writew(struct sdhci_host *host, u16 val, int reg)
 {
        struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-       struct sdhci_tegra *tegra_host = pltfm_host->priv;
-       const struct sdhci_tegra_soc_data *soc_data = tegra_host->soc_data;
 
-       if (soc_data->nvquirks & NVQUIRK_SHADOW_XFER_MODE_REG) {
-               switch (reg) {
-               case SDHCI_TRANSFER_MODE:
-                       /*
-                        * Postpone this write, we must do it together with a
-                        * command write that is down below.
-                        */
-                       pltfm_host->xfer_mode_shadow = val;
-                       return;
-               case SDHCI_COMMAND:
-                       writel((val << 16) | pltfm_host->xfer_mode_shadow,
-                               host->ioaddr + SDHCI_TRANSFER_MODE);
-                       return;
-               }
+       switch (reg) {
+       case SDHCI_TRANSFER_MODE:
+               /*
+                * Postpone this write, we must do it together with a
+                * command write that is down below.
+                */
+               pltfm_host->xfer_mode_shadow = val;
+               return;
+       case SDHCI_COMMAND:
+               writel((val << 16) | pltfm_host->xfer_mode_shadow,
+                       host->ioaddr + SDHCI_TRANSFER_MODE);
+               return;
        }
 
        writew(val, host->ioaddr + reg);
@@ -173,7 +167,6 @@ static void tegra_sdhci_set_bus_width(struct sdhci_host *host, int bus_width)
 static const struct sdhci_ops tegra_sdhci_ops = {
        .get_ro     = tegra_sdhci_get_ro,
        .read_w     = tegra_sdhci_readw,
-       .write_w    = tegra_sdhci_writew,
        .write_l    = tegra_sdhci_writel,
        .set_clock  = sdhci_set_clock,
        .set_bus_width = tegra_sdhci_set_bus_width,
@@ -214,6 +207,18 @@ static struct sdhci_tegra_soc_data soc_data_tegra30 = {
                    NVQUIRK_DISABLE_SDR104,
 };
 
+static const struct sdhci_ops tegra114_sdhci_ops = {
+       .get_ro     = tegra_sdhci_get_ro,
+       .read_w     = tegra_sdhci_readw,
+       .write_w    = tegra_sdhci_writew,
+       .write_l    = tegra_sdhci_writel,
+       .set_clock  = sdhci_set_clock,
+       .set_bus_width = tegra_sdhci_set_bus_width,
+       .reset      = tegra_sdhci_reset,
+       .set_uhs_signaling = sdhci_set_uhs_signaling,
+       .get_max_clock = sdhci_pltfm_clk_get_max_clock,
+};
+
 static const struct sdhci_pltfm_data sdhci_tegra114_pdata = {
        .quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
                  SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
@@ -221,15 +226,14 @@ static const struct sdhci_pltfm_data sdhci_tegra114_pdata = {
                  SDHCI_QUIRK_NO_HISPD_BIT |
                  SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC |
                  SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
-       .ops  = &tegra_sdhci_ops,
+       .ops  = &tegra114_sdhci_ops,
 };
 
 static struct sdhci_tegra_soc_data soc_data_tegra114 = {
        .pdata = &sdhci_tegra114_pdata,
        .nvquirks = NVQUIRK_DISABLE_SDR50 |
                    NVQUIRK_DISABLE_DDR50 |
-                   NVQUIRK_DISABLE_SDR104 |
-                   NVQUIRK_SHADOW_XFER_MODE_REG,
+                   NVQUIRK_DISABLE_SDR104,
 };
 
 static const struct of_device_id sdhci_tegra_dt_match[] = {
@@ -241,17 +245,6 @@ static const struct of_device_id sdhci_tegra_dt_match[] = {
 };
 MODULE_DEVICE_TABLE(of, sdhci_tegra_dt_match);
 
-static int sdhci_tegra_parse_dt(struct device *dev)
-{
-       struct device_node *np = dev->of_node;
-       struct sdhci_host *host = dev_get_drvdata(dev);
-       struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-       struct sdhci_tegra *tegra_host = pltfm_host->priv;
-
-       tegra_host->power_gpio = of_get_named_gpio(np, "power-gpios", 0);
-       return mmc_of_parse(host->mmc);
-}
-
 static int sdhci_tegra_probe(struct platform_device *pdev)
 {
        const struct of_device_id *match;
@@ -281,21 +274,18 @@ static int sdhci_tegra_probe(struct platform_device *pdev)
        tegra_host->soc_data = soc_data;
        pltfm_host->priv = tegra_host;
 
-       rc = sdhci_tegra_parse_dt(&pdev->dev);
+       rc = mmc_of_parse(host->mmc);
        if (rc)
                goto err_parse_dt;
 
-       if (gpio_is_valid(tegra_host->power_gpio)) {
-               rc = gpio_request(tegra_host->power_gpio, "sdhci_power");
-               if (rc) {
-                       dev_err(mmc_dev(host->mmc),
-                               "failed to allocate power gpio\n");
-                       goto err_power_req;
-               }
-               gpio_direction_output(tegra_host->power_gpio, 1);
+       tegra_host->power_gpio = devm_gpiod_get_optional(&pdev->dev, "power",
+                                                        GPIOD_OUT_HIGH);
+       if (IS_ERR(tegra_host->power_gpio)) {
+               rc = PTR_ERR(tegra_host->power_gpio);
+               goto err_power_req;
        }
 
-       clk = clk_get(mmc_dev(host->mmc), NULL);
+       clk = devm_clk_get(mmc_dev(host->mmc), NULL);
        if (IS_ERR(clk)) {
                dev_err(mmc_dev(host->mmc), "clk err\n");
                rc = PTR_ERR(clk);
@@ -312,10 +302,7 @@ static int sdhci_tegra_probe(struct platform_device *pdev)
 
 err_add_host:
        clk_disable_unprepare(pltfm_host->clk);
-       clk_put(pltfm_host->clk);
 err_clk_get:
-       if (gpio_is_valid(tegra_host->power_gpio))
-               gpio_free(tegra_host->power_gpio);
 err_power_req:
 err_parse_dt:
 err_alloc_tegra_host:
@@ -323,26 +310,6 @@ err_alloc_tegra_host:
        return rc;
 }
 
-static int sdhci_tegra_remove(struct platform_device *pdev)
-{
-       struct sdhci_host *host = platform_get_drvdata(pdev);
-       struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-       struct sdhci_tegra *tegra_host = pltfm_host->priv;
-       int dead = (readl(host->ioaddr + SDHCI_INT_STATUS) == 0xffffffff);
-
-       sdhci_remove_host(host, dead);
-
-       if (gpio_is_valid(tegra_host->power_gpio))
-               gpio_free(tegra_host->power_gpio);
-
-       clk_disable_unprepare(pltfm_host->clk);
-       clk_put(pltfm_host->clk);
-
-       sdhci_pltfm_free(pdev);
-
-       return 0;
-}
-
 static struct platform_driver sdhci_tegra_driver = {
        .driver         = {
                .name   = "sdhci-tegra",
@@ -350,7 +317,7 @@ static struct platform_driver sdhci_tegra_driver = {
                .pm     = SDHCI_PLTFM_PMOPS,
        },
        .probe          = sdhci_tegra_probe,
-       .remove         = sdhci_tegra_remove,
+       .remove         = sdhci_pltfm_unregister,
 };
 
 module_platform_driver(sdhci_tegra_driver);
index 0ad412a4876fae403a1a2cdd1a75e7761c809fac..c80287a027356e079e366401c2d5f64d9967461b 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/mmc/mmc.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/card.h>
+#include <linux/mmc/sdio.h>
 #include <linux/mmc/slot-gpio.h>
 
 #include "sdhci.h"
@@ -56,6 +57,7 @@ static void sdhci_enable_preset_value(struct sdhci_host *host, bool enable);
 static int sdhci_pre_dma_transfer(struct sdhci_host *host,
                                        struct mmc_data *data,
                                        struct sdhci_host_next *next);
+static int sdhci_do_get_cd(struct sdhci_host *host);
 
 #ifdef CONFIG_PM
 static int sdhci_runtime_pm_get(struct sdhci_host *host);
@@ -931,7 +933,8 @@ static void sdhci_set_transfer_mode(struct sdhci_host *host,
                 * If we are sending CMD23, CMD12 never gets sent
                 * on successful completion (so no Auto-CMD12).
                 */
-               if (!host->mrq->sbc && (host->flags & SDHCI_AUTO_CMD12))
+               if (!host->mrq->sbc && (host->flags & SDHCI_AUTO_CMD12) &&
+                   (cmd->opcode != SD_IO_RW_EXTENDED))
                        mode |= SDHCI_TRNS_AUTO_CMD12;
                else if (host->mrq->sbc && (host->flags & SDHCI_AUTO_CMD23)) {
                        mode |= SDHCI_TRNS_AUTO_CMD23;
@@ -1356,7 +1359,8 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq)
 
        sdhci_runtime_pm_get(host);
 
-       present = mmc_gpio_get_cd(host->mmc);
+       /* Firstly check card presence */
+       present = sdhci_do_get_cd(host);
 
        spin_lock_irqsave(&host->lock, flags);
 
@@ -1379,22 +1383,6 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq)
 
        host->mrq = mrq;
 
-       /*
-        * Firstly check card presence from cd-gpio.  The return could
-        * be one of the following possibilities:
-        *     negative: cd-gpio is not available
-        *     zero: cd-gpio is used, and card is removed
-        *     one: cd-gpio is used, and card is present
-        */
-       if (present < 0) {
-               /* If polling, assume that the card is always present. */
-               if (host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION)
-                       present = 1;
-               else
-                       present = sdhci_readl(host, SDHCI_PRESENT_STATE) &
-                                       SDHCI_CARD_PRESENT;
-       }
-
        if (!present || host->flags & SDHCI_DEVICE_DEAD) {
                host->mrq->cmd->error = -ENOMEDIUM;
                tasklet_schedule(&host->finish_tasklet);
@@ -3164,7 +3152,8 @@ int sdhci_add_host(struct sdhci_host *host)
        /* Auto-CMD23 stuff only works in ADMA or PIO. */
        if ((host->version >= SDHCI_SPEC_300) &&
            ((host->flags & SDHCI_USE_ADMA) ||
-            !(host->flags & SDHCI_USE_SDMA))) {
+            !(host->flags & SDHCI_USE_SDMA)) &&
+            !(host->quirks2 & SDHCI_QUIRK2_ACMD23_BROKEN)) {
                host->flags |= SDHCI_AUTO_CMD23;
                DBG("%s: Auto-CMD23 available\n", mmc_hostname(mmc));
        } else {
index 0315e1844330c4d0a70616c7525899eca57204a4..e639b7f435e564f6f8a56dc22a6747d23939c8f2 100644 (file)
@@ -18,7 +18,7 @@
 #include <linux/types.h>
 #include <linux/io.h>
 
-#include <linux/mmc/sdhci.h>
+#include <linux/mmc/host.h>
 
 /*
  * Controller registers
@@ -309,6 +309,207 @@ struct sdhci_adma2_64_desc {
  */
 #define SDHCI_MAX_SEGS         128
 
+struct sdhci_host_next {
+       unsigned int    sg_count;
+       s32             cookie;
+};
+
+struct sdhci_host {
+       /* Data set by hardware interface driver */
+       const char *hw_name;    /* Hardware bus name */
+
+       unsigned int quirks;    /* Deviations from spec. */
+
+/* Controller doesn't honor resets unless we touch the clock register */
+#define SDHCI_QUIRK_CLOCK_BEFORE_RESET                 (1<<0)
+/* Controller has bad caps bits, but really supports DMA */
+#define SDHCI_QUIRK_FORCE_DMA                          (1<<1)
+/* Controller doesn't like to be reset when there is no card inserted. */
+#define SDHCI_QUIRK_NO_CARD_NO_RESET                   (1<<2)
+/* Controller doesn't like clearing the power reg before a change */
+#define SDHCI_QUIRK_SINGLE_POWER_WRITE                 (1<<3)
+/* Controller has flaky internal state so reset it on each ios change */
+#define SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS              (1<<4)
+/* Controller has an unusable DMA engine */
+#define SDHCI_QUIRK_BROKEN_DMA                         (1<<5)
+/* Controller has an unusable ADMA engine */
+#define SDHCI_QUIRK_BROKEN_ADMA                                (1<<6)
+/* Controller can only DMA from 32-bit aligned addresses */
+#define SDHCI_QUIRK_32BIT_DMA_ADDR                     (1<<7)
+/* Controller can only DMA chunk sizes that are a multiple of 32 bits */
+#define SDHCI_QUIRK_32BIT_DMA_SIZE                     (1<<8)
+/* Controller can only ADMA chunks that are a multiple of 32 bits */
+#define SDHCI_QUIRK_32BIT_ADMA_SIZE                    (1<<9)
+/* Controller needs to be reset after each request to stay stable */
+#define SDHCI_QUIRK_RESET_AFTER_REQUEST                        (1<<10)
+/* Controller needs voltage and power writes to happen separately */
+#define SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER            (1<<11)
+/* Controller provides an incorrect timeout value for transfers */
+#define SDHCI_QUIRK_BROKEN_TIMEOUT_VAL                 (1<<12)
+/* Controller has an issue with buffer bits for small transfers */
+#define SDHCI_QUIRK_BROKEN_SMALL_PIO                   (1<<13)
+/* Controller does not provide transfer-complete interrupt when not busy */
+#define SDHCI_QUIRK_NO_BUSY_IRQ                                (1<<14)
+/* Controller has unreliable card detection */
+#define SDHCI_QUIRK_BROKEN_CARD_DETECTION              (1<<15)
+/* Controller reports inverted write-protect state */
+#define SDHCI_QUIRK_INVERTED_WRITE_PROTECT             (1<<16)
+/* Controller does not like fast PIO transfers */
+#define SDHCI_QUIRK_PIO_NEEDS_DELAY                    (1<<18)
+/* Controller has to be forced to use block size of 2048 bytes */
+#define SDHCI_QUIRK_FORCE_BLK_SZ_2048                  (1<<20)
+/* Controller cannot do multi-block transfers */
+#define SDHCI_QUIRK_NO_MULTIBLOCK                      (1<<21)
+/* Controller can only handle 1-bit data transfers */
+#define SDHCI_QUIRK_FORCE_1_BIT_DATA                   (1<<22)
+/* Controller needs 10ms delay between applying power and clock */
+#define SDHCI_QUIRK_DELAY_AFTER_POWER                  (1<<23)
+/* Controller uses SDCLK instead of TMCLK for data timeouts */
+#define SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK            (1<<24)
+/* Controller reports wrong base clock capability */
+#define SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN              (1<<25)
+/* Controller cannot support End Attribute in NOP ADMA descriptor */
+#define SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC              (1<<26)
+/* Controller is missing device caps. Use caps provided by host */
+#define SDHCI_QUIRK_MISSING_CAPS                       (1<<27)
+/* Controller uses Auto CMD12 command to stop the transfer */
+#define SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12             (1<<28)
+/* Controller doesn't have HISPD bit field in HI-SPEED SD card */
+#define SDHCI_QUIRK_NO_HISPD_BIT                       (1<<29)
+/* Controller treats ADMA descriptors with length 0000h incorrectly */
+#define SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC           (1<<30)
+/* The read-only detection via SDHCI_PRESENT_STATE register is unstable */
+#define SDHCI_QUIRK_UNSTABLE_RO_DETECT                 (1<<31)
+
+       unsigned int quirks2;   /* More deviations from spec. */
+
+#define SDHCI_QUIRK2_HOST_OFF_CARD_ON                  (1<<0)
+#define SDHCI_QUIRK2_HOST_NO_CMD23                     (1<<1)
+/* The system physically doesn't support 1.8v, even if the host does */
+#define SDHCI_QUIRK2_NO_1_8_V                          (1<<2)
+#define SDHCI_QUIRK2_PRESET_VALUE_BROKEN               (1<<3)
+#define SDHCI_QUIRK2_CARD_ON_NEEDS_BUS_ON              (1<<4)
+/* Controller has a non-standard host control register */
+#define SDHCI_QUIRK2_BROKEN_HOST_CONTROL               (1<<5)
+/* Controller does not support HS200 */
+#define SDHCI_QUIRK2_BROKEN_HS200                      (1<<6)
+/* Controller does not support DDR50 */
+#define SDHCI_QUIRK2_BROKEN_DDR50                      (1<<7)
+/* Stop command (CMD12) can set Transfer Complete when not using MMC_RSP_BUSY */
+#define SDHCI_QUIRK2_STOP_WITH_TC                      (1<<8)
+/* Controller does not support 64-bit DMA */
+#define SDHCI_QUIRK2_BROKEN_64_BIT_DMA                 (1<<9)
+/* need clear transfer mode register before send cmd */
+#define SDHCI_QUIRK2_CLEAR_TRANSFERMODE_REG_BEFORE_CMD (1<<10)
+/* Capability register bit-63 indicates HS400 support */
+#define SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400              (1<<11)
+/* forced tuned clock */
+#define SDHCI_QUIRK2_TUNING_WORK_AROUND                        (1<<12)
+/* disable the block count for single block transactions */
+#define SDHCI_QUIRK2_SUPPORT_SINGLE                    (1<<13)
+/* Controller broken with using ACMD23 */
+#define SDHCI_QUIRK2_ACMD23_BROKEN                     (1<<14)
+
+       int irq;                /* Device IRQ */
+       void __iomem *ioaddr;   /* Mapped address */
+
+       const struct sdhci_ops *ops;    /* Low level hw interface */
+
+       /* Internal data */
+       struct mmc_host *mmc;   /* MMC structure */
+       u64 dma_mask;           /* custom DMA mask */
+
+#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
+       struct led_classdev led;        /* LED control */
+       char led_name[32];
+#endif
+
+       spinlock_t lock;        /* Mutex */
+
+       int flags;              /* Host attributes */
+#define SDHCI_USE_SDMA         (1<<0)  /* Host is SDMA capable */
+#define SDHCI_USE_ADMA         (1<<1)  /* Host is ADMA capable */
+#define SDHCI_REQ_USE_DMA      (1<<2)  /* Use DMA for this req. */
+#define SDHCI_DEVICE_DEAD      (1<<3)  /* Device unresponsive */
+#define SDHCI_SDR50_NEEDS_TUNING (1<<4)        /* SDR50 needs tuning */
+#define SDHCI_NEEDS_RETUNING   (1<<5)  /* Host needs retuning */
+#define SDHCI_AUTO_CMD12       (1<<6)  /* Auto CMD12 support */
+#define SDHCI_AUTO_CMD23       (1<<7)  /* Auto CMD23 support */
+#define SDHCI_PV_ENABLED       (1<<8)  /* Preset value enabled */
+#define SDHCI_SDIO_IRQ_ENABLED (1<<9)  /* SDIO irq enabled */
+#define SDHCI_SDR104_NEEDS_TUNING (1<<10)      /* SDR104/HS200 needs tuning */
+#define SDHCI_USING_RETUNING_TIMER (1<<11)     /* Host is using a retuning timer for the card */
+#define SDHCI_USE_64_BIT_DMA   (1<<12) /* Use 64-bit DMA */
+#define SDHCI_HS400_TUNING     (1<<13) /* Tuning for HS400 */
+
+       unsigned int version;   /* SDHCI spec. version */
+
+       unsigned int max_clk;   /* Max possible freq (MHz) */
+       unsigned int timeout_clk;       /* Timeout freq (KHz) */
+       unsigned int clk_mul;   /* Clock Muliplier value */
+
+       unsigned int clock;     /* Current clock (MHz) */
+       u8 pwr;                 /* Current voltage */
+
+       bool runtime_suspended; /* Host is runtime suspended */
+       bool bus_on;            /* Bus power prevents runtime suspend */
+       bool preset_enabled;    /* Preset is enabled */
+
+       struct mmc_request *mrq;        /* Current request */
+       struct mmc_command *cmd;        /* Current command */
+       struct mmc_data *data;  /* Current data request */
+       unsigned int data_early:1;      /* Data finished before cmd */
+       unsigned int busy_handle:1;     /* Handling the order of Busy-end */
+
+       struct sg_mapping_iter sg_miter;        /* SG state for PIO */
+       unsigned int blocks;    /* remaining PIO blocks */
+
+       int sg_count;           /* Mapped sg entries */
+
+       void *adma_table;       /* ADMA descriptor table */
+       void *align_buffer;     /* Bounce buffer */
+
+       size_t adma_table_sz;   /* ADMA descriptor table size */
+       size_t align_buffer_sz; /* Bounce buffer size */
+
+       dma_addr_t adma_addr;   /* Mapped ADMA descr. table */
+       dma_addr_t align_addr;  /* Mapped bounce buffer */
+
+       unsigned int desc_sz;   /* ADMA descriptor size */
+       unsigned int align_sz;  /* ADMA alignment */
+       unsigned int align_mask;        /* ADMA alignment mask */
+
+       struct tasklet_struct finish_tasklet;   /* Tasklet structures */
+
+       struct timer_list timer;        /* Timer for timeouts */
+
+       u32 caps;               /* Alternative CAPABILITY_0 */
+       u32 caps1;              /* Alternative CAPABILITY_1 */
+
+       unsigned int            ocr_avail_sdio; /* OCR bit masks */
+       unsigned int            ocr_avail_sd;
+       unsigned int            ocr_avail_mmc;
+       u32 ocr_mask;           /* available voltages */
+
+       unsigned                timing;         /* Current timing */
+
+       u32                     thread_isr;
+
+       /* cached registers */
+       u32                     ier;
+
+       wait_queue_head_t       buf_ready_int;  /* Waitqueue for Buffer Read Ready interrupt */
+       unsigned int            tuning_done;    /* Condition flag set when CMD19 succeeds */
+
+       unsigned int            tuning_count;   /* Timer count for re-tuning */
+       unsigned int            tuning_mode;    /* Re-tuning mode supported by host */
+#define SDHCI_TUNING_MODE_1    0
+       struct timer_list       tuning_timer;   /* Timer for tuning */
+
+       struct sdhci_host_next  next_data;
+       unsigned long private[0] ____cacheline_aligned;
+};
+
 struct sdhci_ops {
 #ifdef CONFIG_MMC_SDHCI_IO_ACCESSORS
        u32             (*read_l)(struct sdhci_host *host, int reg);
index 7d9d6a3215210f537b1b6e46070b610320f80ff5..072f67066df38e8d60d4eb0d1f5bc656947fa1fa 100644 (file)
@@ -875,6 +875,7 @@ static void sh_mmcif_start_cmd(struct sh_mmcif_host *host,
        struct mmc_command *cmd = mrq->cmd;
        u32 opc = cmd->opcode;
        u32 mask;
+       unsigned long flags;
 
        switch (opc) {
        /* response busy check */
@@ -909,10 +910,12 @@ static void sh_mmcif_start_cmd(struct sh_mmcif_host *host,
        /* set arg */
        sh_mmcif_writel(host->addr, MMCIF_CE_ARG, cmd->arg);
        /* set cmd */
+       spin_lock_irqsave(&host->lock, flags);
        sh_mmcif_writel(host->addr, MMCIF_CE_CMD_SET, opc);
 
        host->wait_for = MMCIF_WAIT_FOR_CMD;
        schedule_delayed_work(&host->timeout_work, host->timeout);
+       spin_unlock_irqrestore(&host->lock, flags);
 }
 
 static void sh_mmcif_stop_cmd(struct sh_mmcif_host *host,
@@ -1171,6 +1174,12 @@ static irqreturn_t sh_mmcif_irqt(int irq, void *dev_id)
        struct sh_mmcif_host *host = dev_id;
        struct mmc_request *mrq;
        bool wait = false;
+       unsigned long flags;
+       int wait_work;
+
+       spin_lock_irqsave(&host->lock, flags);
+       wait_work = host->wait_for;
+       spin_unlock_irqrestore(&host->lock, flags);
 
        cancel_delayed_work_sync(&host->timeout_work);
 
@@ -1188,7 +1197,7 @@ static irqreturn_t sh_mmcif_irqt(int irq, void *dev_id)
         * All handlers return true, if processing continues, and false, if the
         * request has to be completed - successfully or not
         */
-       switch (host->wait_for) {
+       switch (wait_work) {
        case MMCIF_WAIT_FOR_REQUEST:
                /* We're too late, the timeout has already kicked in */
                mutex_unlock(&host->thread_lock);
@@ -1312,15 +1321,15 @@ static void mmcif_timeout_work(struct work_struct *work)
                /* Don't run after mmc_remove_host() */
                return;
 
-       dev_err(&host->pd->dev, "Timeout waiting for %u on CMD%u\n",
-               host->wait_for, mrq->cmd->opcode);
-
        spin_lock_irqsave(&host->lock, flags);
        if (host->state == STATE_IDLE) {
                spin_unlock_irqrestore(&host->lock, flags);
                return;
        }
 
+       dev_err(&host->pd->dev, "Timeout waiting for %u on CMD%u\n",
+               host->wait_for, mrq->cmd->opcode);
+
        host->state = STATE_TIMEOUT;
        spin_unlock_irqrestore(&host->lock, flags);
 
index e8a4218b57267f508eb871f216de112e93707d31..4d3e1ffe5508273fc1f9f91b7b5c3218bed1ea07 100644 (file)
@@ -293,7 +293,7 @@ static void sunxi_mmc_init_idma_des(struct sunxi_mmc_host *host,
                                    struct mmc_data *data)
 {
        struct sunxi_idma_des *pdes = (struct sunxi_idma_des *)host->sg_cpu;
-       struct sunxi_idma_des *pdes_pa = (struct sunxi_idma_des *)host->sg_dma;
+       dma_addr_t next_desc = host->sg_dma;
        int i, max_len = (1 << host->idma_des_size_bits);
 
        for (i = 0; i < data->sg_len; i++) {
@@ -305,8 +305,9 @@ static void sunxi_mmc_init_idma_des(struct sunxi_mmc_host *host,
                else
                        pdes[i].buf_size = data->sg[i].length;
 
+               next_desc += sizeof(struct sunxi_idma_des);
                pdes[i].buf_addr_ptr1 = sg_dma_address(&data->sg[i]);
-               pdes[i].buf_addr_ptr2 = (u32)&pdes_pa[i + 1];
+               pdes[i].buf_addr_ptr2 = (u32)next_desc;
        }
 
        pdes[0].config |= SDXC_IDMAC_DES0_FD;
@@ -930,7 +931,9 @@ static int sunxi_mmc_resource_request(struct sunxi_mmc_host *host,
                return PTR_ERR(host->clk_sample);
        }
 
-       host->reset = devm_reset_control_get(&pdev->dev, "ahb");
+       host->reset = devm_reset_control_get_optional(&pdev->dev, "ahb");
+       if (PTR_ERR(host->reset) == -EPROBE_DEFER)
+               return PTR_ERR(host->reset);
 
        ret = clk_prepare_enable(host->clk_ahb);
        if (ret) {
@@ -1028,7 +1031,7 @@ static int sunxi_mmc_probe(struct platform_device *pdev)
        mmc->f_min              =   400000;
        mmc->f_max              = 50000000;
        mmc->caps              |= MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED |
-                                 MMC_CAP_ERASE;
+                                 MMC_CAP_ERASE | MMC_CAP_SDIO_IRQ;
 
        ret = mmc_of_parse(mmc);
        if (ret)
index a31c3573d386f8c6f0a9f4e2143a623081d1d636..dba7e1c19dd758e784f31c89afe7dafc66334a06 100644 (file)
@@ -1073,8 +1073,6 @@ EXPORT_SYMBOL(tmio_mmc_host_alloc);
 void tmio_mmc_host_free(struct tmio_mmc_host *host)
 {
        mmc_free_host(host->mmc);
-
-       host->mmc = NULL;
 }
 EXPORT_SYMBOL(tmio_mmc_host_free);
 
index dd2e1aa95ba3fa4c999bfb25dc9fd3a7d3f08982..5af00559e9d6f7f933bccd41db4e3dc334a41673 100644 (file)
@@ -744,7 +744,7 @@ static struct wmt_mci_caps wm8505_caps = {
        .max_blk_size = 2048,
 };
 
-static struct of_device_id wmt_mci_dt_ids[] = {
+static const struct of_device_id wmt_mci_dt_ids[] = {
        { .compatible = "wm,wm8505-sdhc", .data = &wm8505_caps },
        { /* Sentinel */ },
 };
index 1f4ea6f2d91094b1af4dee898d7fd0d63765f032..2e9f84fdd9ceb3d39611c617573f4895ab36423e 100644 (file)
@@ -342,7 +342,7 @@ static const struct irq_domain_ops msi_domain_ops = {
        .map = dw_pcie_msi_map,
 };
 
-int __init dw_pcie_host_init(struct pcie_port *pp)
+int dw_pcie_host_init(struct pcie_port *pp)
 {
        struct device_node *np = pp->dev->of_node;
        struct platform_device *pdev = to_platform_device(pp->dev);
index 866465fd3dbf7e617b3085e9a175bceea82d1ad1..020d788907191fd73ac1b7058b66b9a76f6b7351 100644 (file)
@@ -269,7 +269,7 @@ static struct pcie_host_ops spear13xx_pcie_host_ops = {
        .host_init = spear13xx_pcie_host_init,
 };
 
-static int __init spear13xx_add_pcie_port(struct pcie_port *pp,
+static int spear13xx_add_pcie_port(struct pcie_port *pp,
                                         struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
@@ -299,7 +299,7 @@ static int __init spear13xx_add_pcie_port(struct pcie_port *pp,
        return 0;
 }
 
-static int __init spear13xx_pcie_probe(struct platform_device *pdev)
+static int spear13xx_pcie_probe(struct platform_device *pdev)
 {
        struct spear13xx_pcie *spear13xx_pcie;
        struct pcie_port *pp;
@@ -370,7 +370,7 @@ static const struct of_device_id spear13xx_pcie_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, spear13xx_pcie_of_match);
 
-static struct platform_driver spear13xx_pcie_driver __initdata = {
+static struct platform_driver spear13xx_pcie_driver = {
        .probe          = spear13xx_pcie_probe,
        .driver = {
                .name   = "spear-pcie",
index 7d48ecae6695581e7ded9caa59adf782d5bf0d3f..788db48dbbad9ddf6ff697cd88c4fd832de29cde 100644 (file)
@@ -286,11 +286,12 @@ int cpci_configure_slot(struct slot *slot)
        }
        parent = slot->dev->bus;
 
-       list_for_each_entry(dev, &parent->devices, bus_list)
+       list_for_each_entry(dev, &parent->devices, bus_list) {
                if (PCI_SLOT(dev->devfn) != PCI_SLOT(slot->devfn))
                        continue;
                if (pci_is_bridge(dev))
                        pci_hp_add_bridge(dev);
+       }
 
 
        pci_assign_unassigned_bridge_resources(parent->self);
index 4890639873256812b721d83f304dff634ef58da1..c93fbe76d281c4b4822ceddae79feab984cfb576 100644 (file)
@@ -248,6 +248,9 @@ int pci_get_hp_params(struct pci_dev *dev, struct hotplug_params *hpp)
        acpi_handle handle, phandle;
        struct pci_bus *pbus;
 
+       if (acpi_pci_disabled)
+               return -ENODEV;
+
        handle = NULL;
        for (pbus = dev->bus; pbus; pbus = pbus->parent) {
                handle = acpi_pci_get_bridge_handle(pbus);
index c6849d9e86ce6c33bc11ec8ea621e3e042878817..167fe411ce2e30460ba6d0c8a04383976532b4e9 100644 (file)
@@ -132,16 +132,8 @@ static const char *aer_agent_string[] = {
 static void __print_tlp_header(struct pci_dev *dev,
                               struct aer_header_log_regs *t)
 {
-       unsigned char *tlp = (unsigned char *)&t;
-
-       dev_err(&dev->dev, "  TLP Header:"
-               " %02x%02x%02x%02x %02x%02x%02x%02x"
-               " %02x%02x%02x%02x %02x%02x%02x%02x\n",
-               *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
-               *(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4),
-               *(tlp + 11), *(tlp + 10), *(tlp + 9),
-               *(tlp + 8), *(tlp + 15), *(tlp + 14),
-               *(tlp + 13), *(tlp + 12));
+       dev_err(&dev->dev, "  TLP Header: %08x %08x %08x %08x\n",
+               t->dw0, t->dw1, t->dw2, t->dw3);
 }
 
 static void __aer_print_error(struct pci_dev *dev,
index 472a5adc4642790d2ff1df9eab73aaaf2c4b064d..c29ba7e1430482c1ffe6c6ec92e1bd5cc7b06743 100644 (file)
@@ -55,7 +55,7 @@ static int rtc_suspend(struct device *dev)
        struct timespec64       delta, delta_delta;
        int err;
 
-       if (has_persistent_clock())
+       if (timekeeping_rtc_skipsuspend())
                return 0;
 
        if (strcmp(dev_name(&rtc->dev), CONFIG_RTC_HCTOSYS_DEVICE) != 0)
@@ -102,7 +102,7 @@ static int rtc_resume(struct device *dev)
        struct timespec64       sleep_time;
        int err;
 
-       if (has_persistent_clock())
+       if (timekeeping_rtc_skipresume())
                return 0;
 
        rtc_hctosys_ret = -ENODEV;
@@ -117,10 +117,6 @@ static int rtc_resume(struct device *dev)
                return 0;
        }
 
-       if (rtc_valid_tm(&tm) != 0) {
-               pr_debug("%s:  bogus resume time\n", dev_name(&rtc->dev));
-               return 0;
-       }
        new_rtc.tv_sec = rtc_tm_to_time64(&tm);
        new_rtc.tv_nsec = 0;
 
index 37215cf983e92926653d1f3206aa8c4c8842a55a..d43ee409a5f29c8ba344232577f56eab6121a6b2 100644 (file)
@@ -72,7 +72,11 @@ int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm)
                err = -ENODEV;
        else if (rtc->ops->set_time)
                err = rtc->ops->set_time(rtc->dev.parent, tm);
-       else if (rtc->ops->set_mmss) {
+       else if (rtc->ops->set_mmss64) {
+               time64_t secs64 = rtc_tm_to_time64(tm);
+
+               err = rtc->ops->set_mmss64(rtc->dev.parent, secs64);
+       } else if (rtc->ops->set_mmss) {
                time64_t secs64 = rtc_tm_to_time64(tm);
                err = rtc->ops->set_mmss(rtc->dev.parent, secs64);
        } else
@@ -96,6 +100,8 @@ int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs)
 
        if (!rtc->ops)
                err = -ENODEV;
+       else if (rtc->ops->set_mmss64)
+               err = rtc->ops->set_mmss64(rtc->dev.parent, secs);
        else if (rtc->ops->set_mmss)
                err = rtc->ops->set_mmss(rtc->dev.parent, secs);
        else if (rtc->ops->read_time && rtc->ops->set_time) {
index 1d0340fdb82021b5326182b4a517c4763b0818e8..9b725c55305859b48f5e6f2ffe45088d4b6f0659 100644 (file)
 /*
  * RTC clock functions and device struct declaration
  */
-static int ab3100_rtc_set_mmss(struct device *dev, unsigned long secs)
+static int ab3100_rtc_set_mmss(struct device *dev, time64_t secs)
 {
        u8 regs[] = {AB3100_TI0, AB3100_TI1, AB3100_TI2,
                     AB3100_TI3, AB3100_TI4, AB3100_TI5};
        unsigned char buf[6];
-       u64 fat_time = (u64) secs * AB3100_RTC_CLOCK_RATE * 2;
+       u64 hw_counter = secs * AB3100_RTC_CLOCK_RATE * 2;
        int err = 0;
        int i;
 
-       buf[0] = (fat_time) & 0xFF;
-       buf[1] = (fat_time >> 8) & 0xFF;
-       buf[2] = (fat_time >> 16) & 0xFF;
-       buf[3] = (fat_time >> 24) & 0xFF;
-       buf[4] = (fat_time >> 32) & 0xFF;
-       buf[5] = (fat_time >> 40) & 0xFF;
+       buf[0] = (hw_counter) & 0xFF;
+       buf[1] = (hw_counter >> 8) & 0xFF;
+       buf[2] = (hw_counter >> 16) & 0xFF;
+       buf[3] = (hw_counter >> 24) & 0xFF;
+       buf[4] = (hw_counter >> 32) & 0xFF;
+       buf[5] = (hw_counter >> 40) & 0xFF;
 
        for (i = 0; i < 6; i++) {
                err = abx500_set_register_interruptible(dev, 0,
@@ -75,7 +75,7 @@ static int ab3100_rtc_set_mmss(struct device *dev, unsigned long secs)
 
 static int ab3100_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
-       unsigned long time;
+       time64_t time;
        u8 rtcval;
        int err;
 
@@ -88,7 +88,7 @@ static int ab3100_rtc_read_time(struct device *dev, struct rtc_time *tm)
                dev_info(dev, "clock not set (lost power)");
                return -EINVAL;
        } else {
-               u64 fat_time;
+               u64 hw_counter;
                u8 buf[6];
 
                /* Read out time registers */
@@ -98,22 +98,21 @@ static int ab3100_rtc_read_time(struct device *dev, struct rtc_time *tm)
                if (err != 0)
                        return err;
 
-               fat_time = ((u64) buf[5] << 40) | ((u64) buf[4] << 32) |
+               hw_counter = ((u64) buf[5] << 40) | ((u64) buf[4] << 32) |
                        ((u64) buf[3] << 24) | ((u64) buf[2] << 16) |
                        ((u64) buf[1] << 8) | (u64) buf[0];
-               time = (unsigned long) (fat_time /
-                                       (u64) (AB3100_RTC_CLOCK_RATE * 2));
+               time = hw_counter / (u64) (AB3100_RTC_CLOCK_RATE * 2);
        }
 
-       rtc_time_to_tm(time, tm);
+       rtc_time64_to_tm(time, tm);
 
        return rtc_valid_tm(tm);
 }
 
 static int ab3100_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm)
 {
-       unsigned long time;
-       u64 fat_time;
+       time64_t time;
+       u64 hw_counter;
        u8 buf[6];
        u8 rtcval;
        int err;
@@ -134,11 +133,11 @@ static int ab3100_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm)
                                                     AB3100_AL0, buf, 4);
        if (err)
                return err;
-       fat_time = ((u64) buf[3] << 40) | ((u64) buf[2] << 32) |
+       hw_counter = ((u64) buf[3] << 40) | ((u64) buf[2] << 32) |
                ((u64) buf[1] << 24) | ((u64) buf[0] << 16);
-       time = (unsigned long) (fat_time / (u64) (AB3100_RTC_CLOCK_RATE * 2));
+       time = hw_counter / (u64) (AB3100_RTC_CLOCK_RATE * 2);
 
-       rtc_time_to_tm(time, &alarm->time);
+       rtc_time64_to_tm(time, &alarm->time);
 
        return rtc_valid_tm(&alarm->time);
 }
@@ -147,17 +146,17 @@ static int ab3100_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
 {
        u8 regs[] = {AB3100_AL0, AB3100_AL1, AB3100_AL2, AB3100_AL3};
        unsigned char buf[4];
-       unsigned long secs;
-       u64 fat_time;
+       time64_t secs;
+       u64 hw_counter;
        int err;
        int i;
 
-       rtc_tm_to_time(&alarm->time, &secs);
-       fat_time = (u64) secs * AB3100_RTC_CLOCK_RATE * 2;
-       buf[0] = (fat_time >> 16) & 0xFF;
-       buf[1] = (fat_time >> 24) & 0xFF;
-       buf[2] = (fat_time >> 32) & 0xFF;
-       buf[3] = (fat_time >> 40) & 0xFF;
+       secs = rtc_tm_to_time64(&alarm->time);
+       hw_counter = secs * AB3100_RTC_CLOCK_RATE * 2;
+       buf[0] = (hw_counter >> 16) & 0xFF;
+       buf[1] = (hw_counter >> 24) & 0xFF;
+       buf[2] = (hw_counter >> 32) & 0xFF;
+       buf[3] = (hw_counter >> 40) & 0xFF;
 
        /* Set the alarm */
        for (i = 0; i < 4; i++) {
@@ -193,7 +192,7 @@ static int ab3100_rtc_irq_enable(struct device *dev, unsigned int enabled)
 
 static const struct rtc_class_ops ab3100_rtc_ops = {
        .read_time      = ab3100_rtc_read_time,
-       .set_mmss       = ab3100_rtc_set_mmss,
+       .set_mmss64     = ab3100_rtc_set_mmss,
        .read_alarm     = ab3100_rtc_read_alarm,
        .set_alarm      = ab3100_rtc_set_alarm,
        .alarm_irq_enable = ab3100_rtc_irq_enable,
index 5bce904b7ee617cb2e3991cd4893ad493a75cbbd..32df1d812367c58acf3ebcd8445597fc8bdac014 100644 (file)
@@ -83,20 +83,19 @@ static int mc13xxx_rtc_read_time(struct device *dev, struct rtc_time *tm)
                        return ret;
        } while (days1 != days2);
 
-       rtc_time_to_tm(days1 * SEC_PER_DAY + seconds, tm);
+       rtc_time64_to_tm((time64_t)days1 * SEC_PER_DAY + seconds, tm);
 
        return rtc_valid_tm(tm);
 }
 
-static int mc13xxx_rtc_set_mmss(struct device *dev, unsigned long secs)
+static int mc13xxx_rtc_set_mmss(struct device *dev, time64_t secs)
 {
        struct mc13xxx_rtc *priv = dev_get_drvdata(dev);
        unsigned int seconds, days;
        unsigned int alarmseconds;
        int ret;
 
-       seconds = secs % SEC_PER_DAY;
-       days = secs / SEC_PER_DAY;
+       days = div_s64_rem(secs, SEC_PER_DAY, &seconds);
 
        mc13xxx_lock(priv->mc13xxx);
 
@@ -159,7 +158,7 @@ static int mc13xxx_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm)
 {
        struct mc13xxx_rtc *priv = dev_get_drvdata(dev);
        unsigned seconds, days;
-       unsigned long s1970;
+       time64_t s1970;
        int enabled, pending;
        int ret;
 
@@ -189,10 +188,10 @@ out:
        alarm->enabled = enabled;
        alarm->pending = pending;
 
-       s1970 = days * SEC_PER_DAY + seconds;
+       s1970 = (time64_t)days * SEC_PER_DAY + seconds;
 
-       rtc_time_to_tm(s1970, &alarm->time);
-       dev_dbg(dev, "%s: %lu\n", __func__, s1970);
+       rtc_time64_to_tm(s1970, &alarm->time);
+       dev_dbg(dev, "%s: %lld\n", __func__, (long long)s1970);
 
        return 0;
 }
@@ -200,8 +199,8 @@ out:
 static int mc13xxx_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
 {
        struct mc13xxx_rtc *priv = dev_get_drvdata(dev);
-       unsigned long s1970;
-       unsigned seconds, days;
+       time64_t s1970;
+       u32 seconds, days;
        int ret;
 
        mc13xxx_lock(priv->mc13xxx);
@@ -215,20 +214,17 @@ static int mc13xxx_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
        if (unlikely(ret))
                goto out;
 
-       ret = rtc_tm_to_time(&alarm->time, &s1970);
-       if (unlikely(ret))
-               goto out;
+       s1970 = rtc_tm_to_time64(&alarm->time);
 
-       dev_dbg(dev, "%s: o%2.s %lu\n", __func__, alarm->enabled ? "n" : "ff",
-                       s1970);
+       dev_dbg(dev, "%s: o%2.s %lld\n", __func__, alarm->enabled ? "n" : "ff",
+                       (long long)s1970);
 
        ret = mc13xxx_rtc_irq_enable_unlocked(dev, alarm->enabled,
                        MC13XXX_IRQ_TODA);
        if (unlikely(ret))
                goto out;
 
-       seconds = s1970 % SEC_PER_DAY;
-       days = s1970 / SEC_PER_DAY;
+       days = div_s64_rem(s1970, SEC_PER_DAY, &seconds);
 
        ret = mc13xxx_reg_write(priv->mc13xxx, MC13XXX_RTCDAYA, days);
        if (unlikely(ret))
@@ -268,7 +264,7 @@ static irqreturn_t mc13xxx_rtc_update_handler(int irq, void *dev)
 
 static const struct rtc_class_ops mc13xxx_rtc_ops = {
        .read_time = mc13xxx_rtc_read_time,
-       .set_mmss = mc13xxx_rtc_set_mmss,
+       .set_mmss64 = mc13xxx_rtc_set_mmss,
        .read_alarm = mc13xxx_rtc_read_alarm,
        .set_alarm = mc13xxx_rtc_set_alarm,
        .alarm_irq_enable = mc13xxx_rtc_alarm_irq_enable,
index 3c3f8d10ab439c83fe93ec2041e8a3e29d9e0fc4..09d422b9f7f737d6ba4a0c990212e7f33f1d4038 100644 (file)
@@ -106,7 +106,7 @@ static inline int is_imx1_rtc(struct rtc_plat_data *data)
  * This function is used to obtain the RTC time or the alarm value in
  * second.
  */
-static u32 get_alarm_or_time(struct device *dev, int time_alarm)
+static time64_t get_alarm_or_time(struct device *dev, int time_alarm)
 {
        struct platform_device *pdev = to_platform_device(dev);
        struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
@@ -129,29 +129,28 @@ static u32 get_alarm_or_time(struct device *dev, int time_alarm)
        hr = hr_min >> 8;
        min = hr_min & 0xff;
 
-       return (((day * 24 + hr) * 60) + min) * 60 + sec;
+       return ((((time64_t)day * 24 + hr) * 60) + min) * 60 + sec;
 }
 
 /*
  * This function sets the RTC alarm value or the time value.
  */
-static void set_alarm_or_time(struct device *dev, int time_alarm, u32 time)
+static void set_alarm_or_time(struct device *dev, int time_alarm, time64_t time)
 {
-       u32 day, hr, min, sec, temp;
+       u32 tod, day, hr, min, sec, temp;
        struct platform_device *pdev = to_platform_device(dev);
        struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
        void __iomem *ioaddr = pdata->ioaddr;
 
-       day = time / 86400;
-       time -= day * 86400;
+       day = div_s64_rem(time, 86400, &tod);
 
        /* time is within a day now */
-       hr = time / 3600;
-       time -= hr * 3600;
+       hr = tod / 3600;
+       tod -= hr * 3600;
 
        /* time is within an hour now */
-       min = time / 60;
-       sec = time - min * 60;
+       min = tod / 60;
+       sec = tod - min * 60;
 
        temp = (hr << 8) + min;
 
@@ -173,29 +172,18 @@ static void set_alarm_or_time(struct device *dev, int time_alarm, u32 time)
  * This function updates the RTC alarm registers and then clears all the
  * interrupt status bits.
  */
-static int rtc_update_alarm(struct device *dev, struct rtc_time *alrm)
+static void rtc_update_alarm(struct device *dev, struct rtc_time *alrm)
 {
-       struct rtc_time alarm_tm, now_tm;
-       unsigned long now, time;
+       time64_t time;
        struct platform_device *pdev = to_platform_device(dev);
        struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
        void __iomem *ioaddr = pdata->ioaddr;
 
-       now = get_alarm_or_time(dev, MXC_RTC_TIME);
-       rtc_time_to_tm(now, &now_tm);
-       alarm_tm.tm_year = now_tm.tm_year;
-       alarm_tm.tm_mon = now_tm.tm_mon;
-       alarm_tm.tm_mday = now_tm.tm_mday;
-       alarm_tm.tm_hour = alrm->tm_hour;
-       alarm_tm.tm_min = alrm->tm_min;
-       alarm_tm.tm_sec = alrm->tm_sec;
-       rtc_tm_to_time(&alarm_tm, &time);
+       time = rtc_tm_to_time64(alrm);
 
        /* clear all the interrupt status bits */
        writew(readw(ioaddr + RTC_RTCISR), ioaddr + RTC_RTCISR);
        set_alarm_or_time(dev, MXC_RTC_ALARM, time);
-
-       return 0;
 }
 
 static void mxc_rtc_irq_enable(struct device *dev, unsigned int bit,
@@ -283,14 +271,14 @@ static int mxc_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
  */
 static int mxc_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
-       u32 val;
+       time64_t val;
 
        /* Avoid roll-over from reading the different registers */
        do {
                val = get_alarm_or_time(dev, MXC_RTC_TIME);
        } while (val != get_alarm_or_time(dev, MXC_RTC_TIME));
 
-       rtc_time_to_tm(val, tm);
+       rtc_time64_to_tm(val, tm);
 
        return 0;
 }
@@ -298,7 +286,7 @@ static int mxc_rtc_read_time(struct device *dev, struct rtc_time *tm)
 /*
  * This function sets the internal RTC time based on tm in Gregorian date.
  */
-static int mxc_rtc_set_mmss(struct device *dev, unsigned long time)
+static int mxc_rtc_set_mmss(struct device *dev, time64_t time)
 {
        struct platform_device *pdev = to_platform_device(dev);
        struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
@@ -309,9 +297,9 @@ static int mxc_rtc_set_mmss(struct device *dev, unsigned long time)
        if (is_imx1_rtc(pdata)) {
                struct rtc_time tm;
 
-               rtc_time_to_tm(time, &tm);
+               rtc_time64_to_tm(time, &tm);
                tm.tm_year = 70;
-               rtc_tm_to_time(&tm, &time);
+               time = rtc_tm_to_time64(&tm);
        }
 
        /* Avoid roll-over from reading the different registers */
@@ -333,7 +321,7 @@ static int mxc_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
        struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
        void __iomem *ioaddr = pdata->ioaddr;
 
-       rtc_time_to_tm(get_alarm_or_time(dev, MXC_RTC_ALARM), &alrm->time);
+       rtc_time64_to_tm(get_alarm_or_time(dev, MXC_RTC_ALARM), &alrm->time);
        alrm->pending = ((readw(ioaddr + RTC_RTCISR) & RTC_ALM_BIT)) ? 1 : 0;
 
        return 0;
@@ -346,11 +334,8 @@ static int mxc_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 {
        struct platform_device *pdev = to_platform_device(dev);
        struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
-       int ret;
 
-       ret = rtc_update_alarm(dev, &alrm->time);
-       if (ret)
-               return ret;
+       rtc_update_alarm(dev, &alrm->time);
 
        memcpy(&pdata->g_rtc_alarm, &alrm->time, sizeof(struct rtc_time));
        mxc_rtc_irq_enable(dev, RTC_ALM_BIT, alrm->enabled);
@@ -362,7 +347,7 @@ static int mxc_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 static struct rtc_class_ops mxc_rtc_ops = {
        .release                = mxc_rtc_release,
        .read_time              = mxc_rtc_read_time,
-       .set_mmss               = mxc_rtc_set_mmss,
+       .set_mmss64             = mxc_rtc_set_mmss,
        .read_alarm             = mxc_rtc_read_alarm,
        .set_alarm              = mxc_rtc_set_alarm,
        .alarm_irq_enable       = mxc_rtc_alarm_irq_enable,
index 8f86fa91de1a80e4e7d1a60a58c3acd3a27bd1ce..3a2da4c892d65f25c597f1872932d639108bd57a 100644 (file)
 #include <linux/rtc.h>
 #include <linux/platform_device.h>
 
+static int test_mmss64;
+module_param(test_mmss64, int, 0644);
+MODULE_PARM_DESC(test_mmss64, "Test struct rtc_class_ops.set_mmss64().");
+
 static struct platform_device *test0 = NULL, *test1 = NULL;
 
 static int test_rtc_read_alarm(struct device *dev,
@@ -30,7 +34,13 @@ static int test_rtc_set_alarm(struct device *dev,
 static int test_rtc_read_time(struct device *dev,
        struct rtc_time *tm)
 {
-       rtc_time_to_tm(get_seconds(), tm);
+       rtc_time64_to_tm(ktime_get_real_seconds(), tm);
+       return 0;
+}
+
+static int test_rtc_set_mmss64(struct device *dev, time64_t secs)
+{
+       dev_info(dev, "%s, secs = %lld\n", __func__, (long long)secs);
        return 0;
 }
 
@@ -55,7 +65,7 @@ static int test_rtc_alarm_irq_enable(struct device *dev, unsigned int enable)
        return 0;
 }
 
-static const struct rtc_class_ops test_rtc_ops = {
+static struct rtc_class_ops test_rtc_ops = {
        .proc = test_rtc_proc,
        .read_time = test_rtc_read_time,
        .read_alarm = test_rtc_read_alarm,
@@ -101,6 +111,11 @@ static int test_probe(struct platform_device *plat_dev)
        int err;
        struct rtc_device *rtc;
 
+       if (test_mmss64) {
+               test_rtc_ops.set_mmss64 = test_rtc_set_mmss64;
+               test_rtc_ops.set_mmss = NULL;
+       }
+
        rtc = devm_rtc_device_register(&plat_dev->dev, "test",
                                &test_rtc_ops, THIS_MODULE);
        if (IS_ERR(rtc)) {
index eb71872d0361c0dbedd8f994aae72ce6bd0d2c7e..7728d5e32bf4921d7bfd02cabe4b5c1ea124a6bd 100644 (file)
@@ -11,7 +11,7 @@
  * rtc_set_ntp_time - Save NTP synchronized time to the RTC
  * @now: Current time of day
  *
- * Replacement for the NTP platform function update_persistent_clock
+ * Replacement for the NTP platform function update_persistent_clock64
  * that stores time for later retrieval by rtc_hctosys.
  *
  * Returns 0 on successful RTC update, -ENODEV if a RTC update is not
@@ -35,7 +35,10 @@ int rtc_set_ntp_time(struct timespec64 now)
        if (rtc) {
                /* rtc_hctosys exclusively uses UTC, so we call set_time here,
                 * not set_mmss. */
-               if (rtc->ops && (rtc->ops->set_time || rtc->ops->set_mmss))
+               if (rtc->ops &&
+                   (rtc->ops->set_time ||
+                    rtc->ops->set_mmss64 ||
+                    rtc->ops->set_mmss))
                        err = rtc_set_time(rtc, &tm);
                rtc_class_close(rtc);
        }
index a7cc618378187fb7d38e504d51befb93e78b223c..923a2b5a24395547212207312588b125f19de3a2 100644 (file)
@@ -5734,9 +5734,9 @@ free_port:
 hba_free:
        if (phba->msix_enabled)
                pci_disable_msix(phba->pcidev);
-       iscsi_host_remove(phba->shost);
        pci_dev_put(phba->pcidev);
        iscsi_host_free(phba->shost);
+       pci_set_drvdata(pcidev, NULL);
 disable_pci:
        pci_disable_device(pcidev);
        return ret;
index 54d7a6cbb98a48d06b932cce40c488488760da08..b1a263137a23391a1e19c2589f35fdaf2c4f514f 100644 (file)
@@ -1311,9 +1311,11 @@ scsi_prep_state_check(struct scsi_device *sdev, struct request *req)
                                    "rejecting I/O to dead device\n");
                        ret = BLKPREP_KILL;
                        break;
-               case SDEV_QUIESCE:
                case SDEV_BLOCK:
                case SDEV_CREATED_BLOCK:
+                       ret = BLKPREP_DEFER;
+                       break;
+               case SDEV_QUIESCE:
                        /*
                         * If the devices is blocked we defer normal commands.
                         */
index ab8dfbef6f1bb681a9ff14ca1e7ca09749b6a199..198f96b7fb45dab78845ba1fcde123ea680e2082 100644 (file)
@@ -159,10 +159,9 @@ config SPI_BUTTERFLY
 
 config SPI_CADENCE
        tristate "Cadence SPI controller"
-       depends on ARM
        help
          This selects the Cadence SPI controller master driver
-         used by Xilinx Zynq.
+         used by Xilinx Zynq and ZynqMP.
 
 config SPI_CLPS711X
        tristate "CLPS711X host SPI controller"
@@ -632,7 +631,7 @@ config SPI_DW_PCI
 
 config SPI_DW_MID_DMA
        bool "DMA support for DW SPI controller on Intel MID platform"
-       depends on SPI_DW_PCI && INTEL_MID_DMAC
+       depends on SPI_DW_PCI && DW_DMAC_PCI
 
 config SPI_DW_MMIO
        tristate "Memory-mapped io interface driver for DW SPI core"
index 06de34001c6695a5a741925e862d7baa05853c98..a2f40b1b222500eb53d14c681ab58f3d3c445af8 100644 (file)
          | SPI_BF(name, value))
 
 /* Register access macros */
+#ifdef CONFIG_AVR32
 #define spi_readl(port, reg) \
        __raw_readl((port)->regs + SPI_##reg)
 #define spi_writel(port, reg, value) \
        __raw_writel((value), (port)->regs + SPI_##reg)
-
+#else
+#define spi_readl(port, reg) \
+       readl_relaxed((port)->regs + SPI_##reg)
+#define spi_writel(port, reg, value) \
+       writel_relaxed((value), (port)->regs + SPI_##reg)
+#endif
 /* use PIO for small transfers, avoiding DMA setup/teardown overhead and
  * cache operations; better heuristics consider wordsize and bitrate.
  */
index 419a782ab6d50541809f0c8b10b3d8eaeeb1ae89..f63864a893c520c40d9c79f1c8ca838b15dedc8c 100644 (file)
@@ -3,6 +3,7 @@
  *
  * Copyright (C) 2012 Chris Boot
  * Copyright (C) 2013 Stephen Warren
+ * Copyright (C) 2015 Martin Sperl
  *
  * This driver is inspired by:
  * spi-ath79.c, Copyright (C) 2009-2011 Gabor Juhos <juhosg@openwrt.org>
@@ -29,6 +30,7 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
+#include <linux/of_gpio.h>
 #include <linux/of_device.h>
 #include <linux/spi/spi.h>
 
 #define BCM2835_SPI_CS_CS_10           0x00000002
 #define BCM2835_SPI_CS_CS_01           0x00000001
 
-#define BCM2835_SPI_TIMEOUT_MS 30000
-#define BCM2835_SPI_MODE_BITS  (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_NO_CS)
+#define BCM2835_SPI_POLLING_LIMIT_US   30
+#define BCM2835_SPI_TIMEOUT_MS         30000
+#define BCM2835_SPI_MODE_BITS  (SPI_CPOL | SPI_CPHA | SPI_CS_HIGH \
+                               | SPI_NO_CS | SPI_3WIRE)
 
 #define DRV_NAME       "spi-bcm2835"
 
@@ -75,10 +79,10 @@ struct bcm2835_spi {
        void __iomem *regs;
        struct clk *clk;
        int irq;
-       struct completion done;
        const u8 *tx_buf;
        u8 *rx_buf;
-       int len;
+       int tx_len;
+       int rx_len;
 };
 
 static inline u32 bcm2835_rd(struct bcm2835_spi *bs, unsigned reg)
@@ -91,205 +95,315 @@ static inline void bcm2835_wr(struct bcm2835_spi *bs, unsigned reg, u32 val)
        writel(val, bs->regs + reg);
 }
 
-static inline void bcm2835_rd_fifo(struct bcm2835_spi *bs, int len)
+static inline void bcm2835_rd_fifo(struct bcm2835_spi *bs)
 {
        u8 byte;
 
-       while (len--) {
+       while ((bs->rx_len) &&
+              (bcm2835_rd(bs, BCM2835_SPI_CS) & BCM2835_SPI_CS_RXD)) {
                byte = bcm2835_rd(bs, BCM2835_SPI_FIFO);
                if (bs->rx_buf)
                        *bs->rx_buf++ = byte;
+               bs->rx_len--;
        }
 }
 
-static inline void bcm2835_wr_fifo(struct bcm2835_spi *bs, int len)
+static inline void bcm2835_wr_fifo(struct bcm2835_spi *bs)
 {
        u8 byte;
 
-       if (len > bs->len)
-               len = bs->len;
-
-       while (len--) {
+       while ((bs->tx_len) &&
+              (bcm2835_rd(bs, BCM2835_SPI_CS) & BCM2835_SPI_CS_TXD)) {
                byte = bs->tx_buf ? *bs->tx_buf++ : 0;
                bcm2835_wr(bs, BCM2835_SPI_FIFO, byte);
-               bs->len--;
+               bs->tx_len--;
        }
 }
 
+static void bcm2835_spi_reset_hw(struct spi_master *master)
+{
+       struct bcm2835_spi *bs = spi_master_get_devdata(master);
+       u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS);
+
+       /* Disable SPI interrupts and transfer */
+       cs &= ~(BCM2835_SPI_CS_INTR |
+               BCM2835_SPI_CS_INTD |
+               BCM2835_SPI_CS_TA);
+       /* and reset RX/TX FIFOS */
+       cs |= BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX;
+
+       /* and reset the SPI_HW */
+       bcm2835_wr(bs, BCM2835_SPI_CS, cs);
+}
+
 static irqreturn_t bcm2835_spi_interrupt(int irq, void *dev_id)
 {
        struct spi_master *master = dev_id;
        struct bcm2835_spi *bs = spi_master_get_devdata(master);
-       u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS);
 
-       /*
-        * RXR - RX needs Reading. This means 12 (or more) bytes have been
-        * transmitted and hence 12 (or more) bytes have been received.
-        *
-        * The FIFO is 16-bytes deep. We check for this interrupt to keep the
-        * FIFO full; we have a 4-byte-time buffer for IRQ latency. We check
-        * this before DONE (TX empty) just in case we delayed processing this
-        * interrupt for some reason.
-        *
-        * We only check for this case if we have more bytes to TX; at the end
-        * of the transfer, we ignore this pipelining optimization, and let
-        * bcm2835_spi_finish_transfer() drain the RX FIFO.
+       /* Read as many bytes as possible from FIFO */
+       bcm2835_rd_fifo(bs);
+       /* Write as many bytes as possible to FIFO */
+       bcm2835_wr_fifo(bs);
+
+       /* based on flags decide if we can finish the transfer */
+       if (bcm2835_rd(bs, BCM2835_SPI_CS) & BCM2835_SPI_CS_DONE) {
+               /* Transfer complete - reset SPI HW */
+               bcm2835_spi_reset_hw(master);
+               /* wake up the framework */
+               complete(&master->xfer_completion);
+       }
+
+       return IRQ_HANDLED;
+}
+
+static int bcm2835_spi_transfer_one_poll(struct spi_master *master,
+                                        struct spi_device *spi,
+                                        struct spi_transfer *tfr,
+                                        u32 cs,
+                                        unsigned long xfer_time_us)
+{
+       struct bcm2835_spi *bs = spi_master_get_devdata(master);
+       unsigned long timeout = jiffies +
+               max(4 * xfer_time_us * HZ / 1000000, 2uL);
+
+       /* enable HW block without interrupts */
+       bcm2835_wr(bs, BCM2835_SPI_CS, cs | BCM2835_SPI_CS_TA);
+
+       /* set timeout to 4x the expected time, or 2 jiffies */
+       /* loop until finished the transfer */
+       while (bs->rx_len) {
+               /* read from fifo as much as possible */
+               bcm2835_rd_fifo(bs);
+               /* fill in tx fifo as much as possible */
+               bcm2835_wr_fifo(bs);
+               /* if we still expect some data after the read,
+                * check for a possible timeout
+                */
+               if (bs->rx_len && time_after(jiffies, timeout)) {
+                       /* Transfer complete - reset SPI HW */
+                       bcm2835_spi_reset_hw(master);
+                       /* and return timeout */
+                       return -ETIMEDOUT;
+               }
+       }
+
+       /* Transfer complete - reset SPI HW */
+       bcm2835_spi_reset_hw(master);
+       /* and return without waiting for completion */
+       return 0;
+}
+
+static int bcm2835_spi_transfer_one_irq(struct spi_master *master,
+                                       struct spi_device *spi,
+                                       struct spi_transfer *tfr,
+                                       u32 cs)
+{
+       struct bcm2835_spi *bs = spi_master_get_devdata(master);
+
+       /* fill in fifo if we have gpio-cs
+        * note that there have been rare events where the native-CS
+        * flapped for <1us which may change the behaviour
+        * with gpio-cs this does not happen, so it is implemented
+        * only for this case
         */
-       if (bs->len && (cs & BCM2835_SPI_CS_RXR)) {
-               /* Read 12 bytes of data */
-               bcm2835_rd_fifo(bs, 12);
-
-               /* Write up to 12 bytes */
-               bcm2835_wr_fifo(bs, 12);
-
-               /*
-                * We must have written something to the TX FIFO due to the
-                * bs->len check above, so cannot be DONE. Hence, return
-                * early. Note that DONE could also be set if we serviced an
-                * RXR interrupt really late.
+       if (gpio_is_valid(spi->cs_gpio)) {
+               /* enable HW block, but without interrupts enabled
+                * this would triggern an immediate interrupt
                 */
-               return IRQ_HANDLED;
+               bcm2835_wr(bs, BCM2835_SPI_CS,
+                          cs | BCM2835_SPI_CS_TA);
+               /* fill in tx fifo as much as possible */
+               bcm2835_wr_fifo(bs);
        }
 
        /*
-        * DONE - TX empty. This occurs when we first enable the transfer
-        * since we do not pre-fill the TX FIFO. At any other time, given that
-        * we refill the TX FIFO above based on RXR, and hence ignore DONE if
-        * RXR is set, DONE really does mean end-of-transfer.
+        * Enable the HW block. This will immediately trigger a DONE (TX
+        * empty) interrupt, upon which we will fill the TX FIFO with the
+        * first TX bytes. Pre-filling the TX FIFO here to avoid the
+        * interrupt doesn't work:-(
         */
-       if (cs & BCM2835_SPI_CS_DONE) {
-               if (bs->len) { /* First interrupt in a transfer */
-                       bcm2835_wr_fifo(bs, 16);
-               } else { /* Transfer complete */
-                       /* Disable SPI interrupts */
-                       cs &= ~(BCM2835_SPI_CS_INTR | BCM2835_SPI_CS_INTD);
-                       bcm2835_wr(bs, BCM2835_SPI_CS, cs);
-
-                       /*
-                        * Wake up bcm2835_spi_transfer_one(), which will call
-                        * bcm2835_spi_finish_transfer(), to drain the RX FIFO.
-                        */
-                       complete(&bs->done);
-               }
-
-               return IRQ_HANDLED;
-       }
+       cs |= BCM2835_SPI_CS_INTR | BCM2835_SPI_CS_INTD | BCM2835_SPI_CS_TA;
+       bcm2835_wr(bs, BCM2835_SPI_CS, cs);
 
-       return IRQ_NONE;
+       /* signal that we need to wait for completion */
+       return 1;
 }
 
-static int bcm2835_spi_start_transfer(struct spi_device *spi,
-               struct spi_transfer *tfr)
+static int bcm2835_spi_transfer_one(struct spi_master *master,
+                                   struct spi_device *spi,
+                                   struct spi_transfer *tfr)
 {
-       struct bcm2835_spi *bs = spi_master_get_devdata(spi->master);
+       struct bcm2835_spi *bs = spi_master_get_devdata(master);
        unsigned long spi_hz, clk_hz, cdiv;
-       u32 cs = BCM2835_SPI_CS_INTR | BCM2835_SPI_CS_INTD | BCM2835_SPI_CS_TA;
+       unsigned long spi_used_hz, xfer_time_us;
+       u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS);
 
+       /* set clock */
        spi_hz = tfr->speed_hz;
        clk_hz = clk_get_rate(bs->clk);
 
        if (spi_hz >= clk_hz / 2) {
                cdiv = 2; /* clk_hz/2 is the fastest we can go */
        } else if (spi_hz) {
-               /* CDIV must be a power of two */
-               cdiv = roundup_pow_of_two(DIV_ROUND_UP(clk_hz, spi_hz));
+               /* CDIV must be a multiple of two */
+               cdiv = DIV_ROUND_UP(clk_hz, spi_hz);
+               cdiv += (cdiv % 2);
 
                if (cdiv >= 65536)
                        cdiv = 0; /* 0 is the slowest we can go */
-       } else
+       } else {
                cdiv = 0; /* 0 is the slowest we can go */
+       }
+       spi_used_hz = cdiv ? (clk_hz / cdiv) : (clk_hz / 65536);
+       bcm2835_wr(bs, BCM2835_SPI_CLK, cdiv);
 
+       /* handle all the modes */
+       if ((spi->mode & SPI_3WIRE) && (tfr->rx_buf))
+               cs |= BCM2835_SPI_CS_REN;
        if (spi->mode & SPI_CPOL)
                cs |= BCM2835_SPI_CS_CPOL;
        if (spi->mode & SPI_CPHA)
                cs |= BCM2835_SPI_CS_CPHA;
 
-       if (!(spi->mode & SPI_NO_CS)) {
-               if (spi->mode & SPI_CS_HIGH) {
-                       cs |= BCM2835_SPI_CS_CSPOL;
-                       cs |= BCM2835_SPI_CS_CSPOL0 << spi->chip_select;
-               }
-
-               cs |= spi->chip_select;
-       }
+       /* for gpio_cs set dummy CS so that no HW-CS get changed
+        * we can not run this in bcm2835_spi_set_cs, as it does
+        * not get called for cs_gpio cases, so we need to do it here
+        */
+       if (gpio_is_valid(spi->cs_gpio) || (spi->mode & SPI_NO_CS))
+               cs |= BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01;
 
-       reinit_completion(&bs->done);
+       /* set transmit buffers and length */
        bs->tx_buf = tfr->tx_buf;
        bs->rx_buf = tfr->rx_buf;
-       bs->len = tfr->len;
+       bs->tx_len = tfr->len;
+       bs->rx_len = tfr->len;
 
-       bcm2835_wr(bs, BCM2835_SPI_CLK, cdiv);
-       /*
-        * Enable the HW block. This will immediately trigger a DONE (TX
-        * empty) interrupt, upon which we will fill the TX FIFO with the
-        * first TX bytes. Pre-filling the TX FIFO here to avoid the
-        * interrupt doesn't work:-(
-        */
-       bcm2835_wr(bs, BCM2835_SPI_CS, cs);
+       /* calculate the estimated time in us the transfer runs */
+       xfer_time_us = tfr->len
+               * 9 /* clocks/byte - SPI-HW waits 1 clock after each byte */
+               * 1000000 / spi_used_hz;
 
-       return 0;
+       /* for short requests run polling*/
+       if (xfer_time_us <= BCM2835_SPI_POLLING_LIMIT_US)
+               return bcm2835_spi_transfer_one_poll(master, spi, tfr,
+                                                    cs, xfer_time_us);
+
+       return bcm2835_spi_transfer_one_irq(master, spi, tfr, cs);
 }
 
-static int bcm2835_spi_finish_transfer(struct spi_device *spi,
-               struct spi_transfer *tfr, bool cs_change)
+static void bcm2835_spi_handle_err(struct spi_master *master,
+                                  struct spi_message *msg)
 {
-       struct bcm2835_spi *bs = spi_master_get_devdata(spi->master);
-       u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS);
+       bcm2835_spi_reset_hw(master);
+}
+
+static void bcm2835_spi_set_cs(struct spi_device *spi, bool gpio_level)
+{
+       /*
+        * we can assume that we are "native" as per spi_set_cs
+        *   calling us ONLY when cs_gpio is not set
+        * we can also assume that we are CS < 3 as per bcm2835_spi_setup
+        *   we would not get called because of error handling there.
+        * the level passed is the electrical level not enabled/disabled
+        *   so it has to get translated back to enable/disable
+        *   see spi_set_cs in spi.c for the implementation
+        */
 
-       /* Drain RX FIFO */
-       while (cs & BCM2835_SPI_CS_RXD) {
-               bcm2835_rd_fifo(bs, 1);
-               cs = bcm2835_rd(bs, BCM2835_SPI_CS);
+       struct spi_master *master = spi->master;
+       struct bcm2835_spi *bs = spi_master_get_devdata(master);
+       u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS);
+       bool enable;
+
+       /* calculate the enable flag from the passed gpio_level */
+       enable = (spi->mode & SPI_CS_HIGH) ? gpio_level : !gpio_level;
+
+       /* set flags for "reverse" polarity in the registers */
+       if (spi->mode & SPI_CS_HIGH) {
+               /* set the correct CS-bits */
+               cs |= BCM2835_SPI_CS_CSPOL;
+               cs |= BCM2835_SPI_CS_CSPOL0 << spi->chip_select;
+       } else {
+               /* clean the CS-bits */
+               cs &= ~BCM2835_SPI_CS_CSPOL;
+               cs &= ~(BCM2835_SPI_CS_CSPOL0 << spi->chip_select);
        }
 
-       if (tfr->delay_usecs)
-               udelay(tfr->delay_usecs);
+       /* select the correct chip_select depending on disabled/enabled */
+       if (enable) {
+               /* set cs correctly */
+               if (spi->mode & SPI_NO_CS) {
+                       /* use the "undefined" chip-select */
+                       cs |= BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01;
+               } else {
+                       /* set the chip select */
+                       cs &= ~(BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01);
+                       cs |= spi->chip_select;
+               }
+       } else {
+               /* disable CSPOL which puts HW-CS into deselected state */
+               cs &= ~BCM2835_SPI_CS_CSPOL;
+               /* use the "undefined" chip-select as precaution */
+               cs |= BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01;
+       }
 
-       if (cs_change)
-               /* Clear TA flag */
-               bcm2835_wr(bs, BCM2835_SPI_CS, cs & ~BCM2835_SPI_CS_TA);
+       /* finally set the calculated flags in SPI_CS */
+       bcm2835_wr(bs, BCM2835_SPI_CS, cs);
+}
 
-       return 0;
+static int chip_match_name(struct gpio_chip *chip, void *data)
+{
+       return !strcmp(chip->label, data);
 }
 
-static int bcm2835_spi_transfer_one(struct spi_master *master,
-               struct spi_message *mesg)
+static int bcm2835_spi_setup(struct spi_device *spi)
 {
-       struct bcm2835_spi *bs = spi_master_get_devdata(master);
-       struct spi_transfer *tfr;
-       struct spi_device *spi = mesg->spi;
-       int err = 0;
-       unsigned int timeout;
-       bool cs_change;
-
-       list_for_each_entry(tfr, &mesg->transfers, transfer_list) {
-               err = bcm2835_spi_start_transfer(spi, tfr);
-               if (err)
-                       goto out;
-
-               timeout = wait_for_completion_timeout(&bs->done,
-                               msecs_to_jiffies(BCM2835_SPI_TIMEOUT_MS));
-               if (!timeout) {
-                       err = -ETIMEDOUT;
-                       goto out;
-               }
+       int err;
+       struct gpio_chip *chip;
+       /*
+        * sanity checking the native-chipselects
+        */
+       if (spi->mode & SPI_NO_CS)
+               return 0;
+       if (gpio_is_valid(spi->cs_gpio))
+               return 0;
+       if (spi->chip_select > 1) {
+               /* error in the case of native CS requested with CS > 1
+                * officially there is a CS2, but it is not documented
+                * which GPIO is connected with that...
+                */
+               dev_err(&spi->dev,
+                       "setup: only two native chip-selects are supported\n");
+               return -EINVAL;
+       }
+       /* now translate native cs to GPIO */
 
-               cs_change = tfr->cs_change ||
-                       list_is_last(&tfr->transfer_list, &mesg->transfers);
+       /* get the gpio chip for the base */
+       chip = gpiochip_find("pinctrl-bcm2835", chip_match_name);
+       if (!chip)
+               return 0;
 
-               err = bcm2835_spi_finish_transfer(spi, tfr, cs_change);
-               if (err)
-                       goto out;
+       /* and calculate the real CS */
+       spi->cs_gpio = chip->base + 8 - spi->chip_select;
 
-               mesg->actual_length += (tfr->len - bs->len);
-       }
+       /* and set up the "mode" and level */
+       dev_info(&spi->dev, "setting up native-CS%i as GPIO %i\n",
+                spi->chip_select, spi->cs_gpio);
 
-out:
-       /* Clear FIFOs, and disable the HW block */
-       bcm2835_wr(bs, BCM2835_SPI_CS,
-                  BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX);
-       mesg->status = err;
-       spi_finalize_current_message(master);
+       /* set up GPIO as output and pull to the correct level */
+       err = gpio_direction_output(spi->cs_gpio,
+                                   (spi->mode & SPI_CS_HIGH) ? 0 : 1);
+       if (err) {
+               dev_err(&spi->dev,
+                       "could not set CS%i gpio %i as output: %i",
+                       spi->chip_select, spi->cs_gpio, err);
+               return err;
+       }
+       /* the implementation of pinctrl-bcm2835 currently does not
+        * set the GPIO value when using gpio_direction_output
+        * so we are setting it here explicitly
+        */
+       gpio_set_value(spi->cs_gpio, (spi->mode & SPI_CS_HIGH) ? 0 : 1);
 
        return 0;
 }
@@ -312,13 +426,14 @@ static int bcm2835_spi_probe(struct platform_device *pdev)
        master->mode_bits = BCM2835_SPI_MODE_BITS;
        master->bits_per_word_mask = SPI_BPW_MASK(8);
        master->num_chipselect = 3;
-       master->transfer_one_message = bcm2835_spi_transfer_one;
+       master->setup = bcm2835_spi_setup;
+       master->set_cs = bcm2835_spi_set_cs;
+       master->transfer_one = bcm2835_spi_transfer_one;
+       master->handle_err = bcm2835_spi_handle_err;
        master->dev.of_node = pdev->dev.of_node;
 
        bs = spi_master_get_devdata(master);
 
-       init_completion(&bs->done);
-
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        bs->regs = devm_ioremap_resource(&pdev->dev, res);
        if (IS_ERR(bs->regs)) {
@@ -343,13 +458,13 @@ static int bcm2835_spi_probe(struct platform_device *pdev)
        clk_prepare_enable(bs->clk);
 
        err = devm_request_irq(&pdev->dev, bs->irq, bcm2835_spi_interrupt, 0,
-                               dev_name(&pdev->dev), master);
+                              dev_name(&pdev->dev), master);
        if (err) {
                dev_err(&pdev->dev, "could not request IRQ: %d\n", err);
                goto out_clk_disable;
        }
 
-       /* initialise the hardware */
+       /* initialise the hardware with the default polarities */
        bcm2835_wr(bs, BCM2835_SPI_CS,
                   BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX);
 
index 3fb91c81015a39a670afc818d90a9f3c8ae54521..1520554978a31373126829025a45f37b75a73702 100644 (file)
@@ -44,7 +44,7 @@ static int bcm53xxspi_wait(struct bcm53xxspi *b53spi, unsigned int timeout_ms)
        u32 tmp;
 
        /* SPE bit has to be 0 before we read MSPI STATUS */
-       deadline = jiffies + BCM53XXSPI_SPE_TIMEOUT_MS * HZ / 1000;
+       deadline = jiffies + msecs_to_jiffies(BCM53XXSPI_SPE_TIMEOUT_MS);
        do {
                tmp = bcm53xxspi_read(b53spi, B53SPI_MSPI_SPCR2);
                if (!(tmp & B53SPI_MSPI_SPCR2_SPE))
@@ -56,7 +56,7 @@ static int bcm53xxspi_wait(struct bcm53xxspi *b53spi, unsigned int timeout_ms)
                goto spi_timeout;
 
        /* Check status */
-       deadline = jiffies + timeout_ms * HZ / 1000;
+       deadline = jiffies + msecs_to_jiffies(timeout_ms);
        do {
                tmp = bcm53xxspi_read(b53spi, B53SPI_MSPI_MSPI_STATUS);
                if (tmp & B53SPI_MSPI_MSPI_STATUS_SPIF) {
index 37079937d2f77a5b33e20086b6d2a1c6d4fe5331..a3d65b4f49445d144fcba077ce3413630300e8c8 100644 (file)
@@ -559,7 +559,7 @@ static void bfin_spi_pump_transfers(unsigned long data)
        struct spi_transfer *previous = NULL;
        struct bfin_spi_slave_data *chip = NULL;
        unsigned int bits_per_word;
-       u16 cr, cr_width, dma_width, dma_config;
+       u16 cr, cr_width = 0, dma_width, dma_config;
        u32 tranf_success = 1;
        u8 full_duplex = 0;
 
@@ -648,7 +648,6 @@ static void bfin_spi_pump_transfers(unsigned long data)
        } else if (bits_per_word == 8) {
                drv_data->n_bytes = bits_per_word/8;
                drv_data->len = transfer->len;
-               cr_width = 0;
                drv_data->ops = &bfin_bfin_spi_transfer_ops_u8;
        }
        cr = bfin_read(&drv_data->regs->ctl) & ~(BIT_CTL_TIMOD | BIT_CTL_WORDSIZE);
index c616e41521be18589ec89ab68dcfeec9cd477da7..06b34e5bcfa37eaf9eba3e9ff8d1d017454dc81d 100644 (file)
@@ -49,12 +49,17 @@ bitbang_txrx_be_cpha0(struct spi_device *spi,
 {
        /* if (cpol == 0) this is SPI_MODE_0; else this is SPI_MODE_2 */
 
+       bool oldbit = !(word & 1);
        /* clock starts at inactive polarity */
        for (word <<= (32 - bits); likely(bits); bits--) {
 
                /* setup MSB (to slave) on trailing edge */
-               if ((flags & SPI_MASTER_NO_TX) == 0)
-                       setmosi(spi, word & (1 << 31));
+               if ((flags & SPI_MASTER_NO_TX) == 0) {
+                       if ((word & (1 << 31)) != oldbit) {
+                               setmosi(spi, word & (1 << 31));
+                               oldbit = word & (1 << 31);
+                       }
+               }
                spidelay(nsecs);        /* T(setup) */
 
                setsck(spi, !cpol);
@@ -76,13 +81,18 @@ bitbang_txrx_be_cpha1(struct spi_device *spi,
 {
        /* if (cpol == 0) this is SPI_MODE_1; else this is SPI_MODE_3 */
 
+       bool oldbit = !(word & (1 << 31));
        /* clock starts at inactive polarity */
        for (word <<= (32 - bits); likely(bits); bits--) {
 
                /* setup MSB (to slave) on leading edge */
                setsck(spi, !cpol);
-               if ((flags & SPI_MASTER_NO_TX) == 0)
-                       setmosi(spi, word & (1 << 31));
+               if ((flags & SPI_MASTER_NO_TX) == 0) {
+                       if ((word & (1 << 31)) != oldbit) {
+                               setmosi(spi, word & (1 << 31));
+                               oldbit = word & (1 << 31);
+                       }
+               }
                spidelay(nsecs); /* T(setup) */
 
                setsck(spi, cpol);
index 4f8c798e0633a81483c3b05af46805b1848ba386..bb1052e748f28269b3cdde6390c6998b43b98d37 100644 (file)
 #include "spi-dw.h"
 
 #ifdef CONFIG_SPI_DW_MID_DMA
-#include <linux/intel_mid_dma.h>
 #include <linux/pci.h>
+#include <linux/platform_data/dma-dw.h>
 
 #define RX_BUSY                0
 #define TX_BUSY                1
 
-struct mid_dma {
-       struct intel_mid_dma_slave      dmas_tx;
-       struct intel_mid_dma_slave      dmas_rx;
-};
+static struct dw_dma_slave mid_dma_tx = { .dst_id = 1 };
+static struct dw_dma_slave mid_dma_rx = { .src_id = 0 };
 
 static bool mid_spi_dma_chan_filter(struct dma_chan *chan, void *param)
 {
-       struct dw_spi *dws = param;
+       struct dw_dma_slave *s = param;
+
+       if (s->dma_dev != chan->device->dev)
+               return false;
 
-       return dws->dma_dev == chan->device->dev;
+       chan->private = s;
+       return true;
 }
 
 static int mid_spi_dma_init(struct dw_spi *dws)
 {
-       struct mid_dma *dw_dma = dws->dma_priv;
        struct pci_dev *dma_dev;
-       struct intel_mid_dma_slave *rxs, *txs;
+       struct dw_dma_slave *tx = dws->dma_tx;
+       struct dw_dma_slave *rx = dws->dma_rx;
        dma_cap_mask_t mask;
 
        /*
@@ -56,28 +58,22 @@ static int mid_spi_dma_init(struct dw_spi *dws)
        if (!dma_dev)
                return -ENODEV;
 
-       dws->dma_dev = &dma_dev->dev;
-
        dma_cap_zero(mask);
        dma_cap_set(DMA_SLAVE, mask);
 
        /* 1. Init rx channel */
-       dws->rxchan = dma_request_channel(mask, mid_spi_dma_chan_filter, dws);
+       rx->dma_dev = &dma_dev->dev;
+       dws->rxchan = dma_request_channel(mask, mid_spi_dma_chan_filter, rx);
        if (!dws->rxchan)
                goto err_exit;
-       rxs = &dw_dma->dmas_rx;
-       rxs->hs_mode = LNW_DMA_HW_HS;
-       rxs->cfg_mode = LNW_DMA_PER_TO_MEM;
-       dws->rxchan->private = rxs;
+       dws->master->dma_rx = dws->rxchan;
 
        /* 2. Init tx channel */
-       dws->txchan = dma_request_channel(mask, mid_spi_dma_chan_filter, dws);
+       tx->dma_dev = &dma_dev->dev;
+       dws->txchan = dma_request_channel(mask, mid_spi_dma_chan_filter, tx);
        if (!dws->txchan)
                goto free_rxchan;
-       txs = &dw_dma->dmas_tx;
-       txs->hs_mode = LNW_DMA_HW_HS;
-       txs->cfg_mode = LNW_DMA_MEM_TO_PER;
-       dws->txchan->private = txs;
+       dws->master->dma_tx = dws->txchan;
 
        dws->dma_inited = 1;
        return 0;
@@ -100,6 +96,42 @@ static void mid_spi_dma_exit(struct dw_spi *dws)
        dma_release_channel(dws->rxchan);
 }
 
+static irqreturn_t dma_transfer(struct dw_spi *dws)
+{
+       u16 irq_status = dw_readl(dws, DW_SPI_ISR);
+
+       if (!irq_status)
+               return IRQ_NONE;
+
+       dw_readl(dws, DW_SPI_ICR);
+       spi_reset_chip(dws);
+
+       dev_err(&dws->master->dev, "%s: FIFO overrun/underrun\n", __func__);
+       dws->master->cur_msg->status = -EIO;
+       spi_finalize_current_transfer(dws->master);
+       return IRQ_HANDLED;
+}
+
+static bool mid_spi_can_dma(struct spi_master *master, struct spi_device *spi,
+               struct spi_transfer *xfer)
+{
+       struct dw_spi *dws = spi_master_get_devdata(master);
+
+       if (!dws->dma_inited)
+               return false;
+
+       return xfer->len > dws->fifo_len;
+}
+
+static enum dma_slave_buswidth convert_dma_width(u32 dma_width) {
+       if (dma_width == 1)
+               return DMA_SLAVE_BUSWIDTH_1_BYTE;
+       else if (dma_width == 2)
+               return DMA_SLAVE_BUSWIDTH_2_BYTES;
+
+       return DMA_SLAVE_BUSWIDTH_UNDEFINED;
+}
+
 /*
  * dws->dma_chan_busy is set before the dma transfer starts, callback for tx
  * channel will clear a corresponding bit.
@@ -111,33 +143,30 @@ static void dw_spi_dma_tx_done(void *arg)
        clear_bit(TX_BUSY, &dws->dma_chan_busy);
        if (test_bit(RX_BUSY, &dws->dma_chan_busy))
                return;
-       dw_spi_xfer_done(dws);
+       spi_finalize_current_transfer(dws->master);
 }
 
-static struct dma_async_tx_descriptor *dw_spi_dma_prepare_tx(struct dw_spi *dws)
+static struct dma_async_tx_descriptor *dw_spi_dma_prepare_tx(struct dw_spi *dws,
+               struct spi_transfer *xfer)
 {
        struct dma_slave_config txconf;
        struct dma_async_tx_descriptor *txdesc;
 
-       if (!dws->tx_dma)
+       if (!xfer->tx_buf)
                return NULL;
 
        txconf.direction = DMA_MEM_TO_DEV;
        txconf.dst_addr = dws->dma_addr;
-       txconf.dst_maxburst = LNW_DMA_MSIZE_16;
+       txconf.dst_maxburst = 16;
        txconf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-       txconf.dst_addr_width = dws->dma_width;
+       txconf.dst_addr_width = convert_dma_width(dws->dma_width);
        txconf.device_fc = false;
 
        dmaengine_slave_config(dws->txchan, &txconf);
 
-       memset(&dws->tx_sgl, 0, sizeof(dws->tx_sgl));
-       dws->tx_sgl.dma_address = dws->tx_dma;
-       dws->tx_sgl.length = dws->len;
-
        txdesc = dmaengine_prep_slave_sg(dws->txchan,
-                               &dws->tx_sgl,
-                               1,
+                               xfer->tx_sg.sgl,
+                               xfer->tx_sg.nents,
                                DMA_MEM_TO_DEV,
                                DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
        if (!txdesc)
@@ -160,33 +189,30 @@ static void dw_spi_dma_rx_done(void *arg)
        clear_bit(RX_BUSY, &dws->dma_chan_busy);
        if (test_bit(TX_BUSY, &dws->dma_chan_busy))
                return;
-       dw_spi_xfer_done(dws);
+       spi_finalize_current_transfer(dws->master);
 }
 
-static struct dma_async_tx_descriptor *dw_spi_dma_prepare_rx(struct dw_spi *dws)
+static struct dma_async_tx_descriptor *dw_spi_dma_prepare_rx(struct dw_spi *dws,
+               struct spi_transfer *xfer)
 {
        struct dma_slave_config rxconf;
        struct dma_async_tx_descriptor *rxdesc;
 
-       if (!dws->rx_dma)
+       if (!xfer->rx_buf)
                return NULL;
 
        rxconf.direction = DMA_DEV_TO_MEM;
        rxconf.src_addr = dws->dma_addr;
-       rxconf.src_maxburst = LNW_DMA_MSIZE_16;
+       rxconf.src_maxburst = 16;
        rxconf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-       rxconf.src_addr_width = dws->dma_width;
+       rxconf.src_addr_width = convert_dma_width(dws->dma_width);
        rxconf.device_fc = false;
 
        dmaengine_slave_config(dws->rxchan, &rxconf);
 
-       memset(&dws->rx_sgl, 0, sizeof(dws->rx_sgl));
-       dws->rx_sgl.dma_address = dws->rx_dma;
-       dws->rx_sgl.length = dws->len;
-
        rxdesc = dmaengine_prep_slave_sg(dws->rxchan,
-                               &dws->rx_sgl,
-                               1,
+                               xfer->rx_sg.sgl,
+                               xfer->rx_sg.nents,
                                DMA_DEV_TO_MEM,
                                DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
        if (!rxdesc)
@@ -198,37 +224,36 @@ static struct dma_async_tx_descriptor *dw_spi_dma_prepare_rx(struct dw_spi *dws)
        return rxdesc;
 }
 
-static void dw_spi_dma_setup(struct dw_spi *dws)
+static int mid_spi_dma_setup(struct dw_spi *dws, struct spi_transfer *xfer)
 {
        u16 dma_ctrl = 0;
 
-       spi_enable_chip(dws, 0);
+       dw_writel(dws, DW_SPI_DMARDLR, 0xf);
+       dw_writel(dws, DW_SPI_DMATDLR, 0x10);
 
-       dw_writew(dws, DW_SPI_DMARDLR, 0xf);
-       dw_writew(dws, DW_SPI_DMATDLR, 0x10);
-
-       if (dws->tx_dma)
+       if (xfer->tx_buf)
                dma_ctrl |= SPI_DMA_TDMAE;
-       if (dws->rx_dma)
+       if (xfer->rx_buf)
                dma_ctrl |= SPI_DMA_RDMAE;
-       dw_writew(dws, DW_SPI_DMACR, dma_ctrl);
+       dw_writel(dws, DW_SPI_DMACR, dma_ctrl);
+
+       /* Set the interrupt mask */
+       spi_umask_intr(dws, SPI_INT_TXOI | SPI_INT_RXUI | SPI_INT_RXOI);
 
-       spi_enable_chip(dws, 1);
+       dws->transfer_handler = dma_transfer;
+
+       return 0;
 }
 
-static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change)
+static int mid_spi_dma_transfer(struct dw_spi *dws, struct spi_transfer *xfer)
 {
        struct dma_async_tx_descriptor *txdesc, *rxdesc;
 
-       /* 1. setup DMA related registers */
-       if (cs_change)
-               dw_spi_dma_setup(dws);
+       /* Prepare the TX dma transfer */
+       txdesc = dw_spi_dma_prepare_tx(dws, xfer);
 
-       /* 2. Prepare the TX dma transfer */
-       txdesc = dw_spi_dma_prepare_tx(dws);
-
-       /* 3. Prepare the RX dma transfer */
-       rxdesc = dw_spi_dma_prepare_rx(dws);
+       /* Prepare the RX dma transfer */
+       rxdesc = dw_spi_dma_prepare_rx(dws, xfer);
 
        /* rx must be started before tx due to spi instinct */
        if (rxdesc) {
@@ -246,10 +271,25 @@ static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change)
        return 0;
 }
 
+static void mid_spi_dma_stop(struct dw_spi *dws)
+{
+       if (test_bit(TX_BUSY, &dws->dma_chan_busy)) {
+               dmaengine_terminate_all(dws->txchan);
+               clear_bit(TX_BUSY, &dws->dma_chan_busy);
+       }
+       if (test_bit(RX_BUSY, &dws->dma_chan_busy)) {
+               dmaengine_terminate_all(dws->rxchan);
+               clear_bit(RX_BUSY, &dws->dma_chan_busy);
+       }
+}
+
 static struct dw_spi_dma_ops mid_dma_ops = {
        .dma_init       = mid_spi_dma_init,
        .dma_exit       = mid_spi_dma_exit,
+       .dma_setup      = mid_spi_dma_setup,
+       .can_dma        = mid_spi_can_dma,
        .dma_transfer   = mid_spi_dma_transfer,
+       .dma_stop       = mid_spi_dma_stop,
 };
 #endif
 
@@ -282,9 +322,8 @@ int dw_spi_mid_init(struct dw_spi *dws)
        iounmap(clk_reg);
 
 #ifdef CONFIG_SPI_DW_MID_DMA
-       dws->dma_priv = kzalloc(sizeof(struct mid_dma), GFP_KERNEL);
-       if (!dws->dma_priv)
-               return -ENOMEM;
+       dws->dma_tx = &mid_dma_tx;
+       dws->dma_rx = &mid_dma_rx;
        dws->dma_ops = &mid_dma_ops;
 #endif
        return 0;
index 4847afba89f4e933e5a5d778c5f94e1a14ffe156..8d67d03c71ebcdec375023f8db910d3d0d8469ec 100644 (file)
 #include <linux/debugfs.h>
 #endif
 
-#define START_STATE    ((void *)0)
-#define RUNNING_STATE  ((void *)1)
-#define DONE_STATE     ((void *)2)
-#define ERROR_STATE    ((void *)-1)
-
 /* Slave spi_dev related */
 struct chip_data {
        u16 cr0;
@@ -143,13 +138,26 @@ static inline void dw_spi_debugfs_remove(struct dw_spi *dws)
 }
 #endif /* CONFIG_DEBUG_FS */
 
+static void dw_spi_set_cs(struct spi_device *spi, bool enable)
+{
+       struct dw_spi *dws = spi_master_get_devdata(spi->master);
+       struct chip_data *chip = spi_get_ctldata(spi);
+
+       /* Chip select logic is inverted from spi_set_cs() */
+       if (chip && chip->cs_control)
+               chip->cs_control(!enable);
+
+       if (!enable)
+               dw_writel(dws, DW_SPI_SER, BIT(spi->chip_select));
+}
+
 /* Return the max entries we can fill into tx fifo */
 static inline u32 tx_max(struct dw_spi *dws)
 {
        u32 tx_left, tx_room, rxtx_gap;
 
        tx_left = (dws->tx_end - dws->tx) / dws->n_bytes;
-       tx_room = dws->fifo_len - dw_readw(dws, DW_SPI_TXFLR);
+       tx_room = dws->fifo_len - dw_readl(dws, DW_SPI_TXFLR);
 
        /*
         * Another concern is about the tx/rx mismatch, we
@@ -170,7 +178,7 @@ static inline u32 rx_max(struct dw_spi *dws)
 {
        u32 rx_left = (dws->rx_end - dws->rx) / dws->n_bytes;
 
-       return min_t(u32, rx_left, dw_readw(dws, DW_SPI_RXFLR));
+       return min_t(u32, rx_left, dw_readl(dws, DW_SPI_RXFLR));
 }
 
 static void dw_writer(struct dw_spi *dws)
@@ -186,7 +194,7 @@ static void dw_writer(struct dw_spi *dws)
                        else
                                txw = *(u16 *)(dws->tx);
                }
-               dw_writew(dws, DW_SPI_DR, txw);
+               dw_writel(dws, DW_SPI_DR, txw);
                dws->tx += dws->n_bytes;
        }
 }
@@ -197,7 +205,7 @@ static void dw_reader(struct dw_spi *dws)
        u16 rxw;
 
        while (max--) {
-               rxw = dw_readw(dws, DW_SPI_DR);
+               rxw = dw_readl(dws, DW_SPI_DR);
                /* Care rx only if the transfer's original "rx" is not null */
                if (dws->rx_end - dws->len) {
                        if (dws->n_bytes == 1)
@@ -209,103 +217,22 @@ static void dw_reader(struct dw_spi *dws)
        }
 }
 
-static void *next_transfer(struct dw_spi *dws)
-{
-       struct spi_message *msg = dws->cur_msg;
-       struct spi_transfer *trans = dws->cur_transfer;
-
-       /* Move to next transfer */
-       if (trans->transfer_list.next != &msg->transfers) {
-               dws->cur_transfer =
-                       list_entry(trans->transfer_list.next,
-                                       struct spi_transfer,
-                                       transfer_list);
-               return RUNNING_STATE;
-       }
-
-       return DONE_STATE;
-}
-
-/*
- * Note: first step is the protocol driver prepares
- * a dma-capable memory, and this func just need translate
- * the virt addr to physical
- */
-static int map_dma_buffers(struct dw_spi *dws)
-{
-       if (!dws->cur_msg->is_dma_mapped
-               || !dws->dma_inited
-               || !dws->cur_chip->enable_dma
-               || !dws->dma_ops)
-               return 0;
-
-       if (dws->cur_transfer->tx_dma)
-               dws->tx_dma = dws->cur_transfer->tx_dma;
-
-       if (dws->cur_transfer->rx_dma)
-               dws->rx_dma = dws->cur_transfer->rx_dma;
-
-       return 1;
-}
-
-/* Caller already set message->status; dma and pio irqs are blocked */
-static void giveback(struct dw_spi *dws)
-{
-       struct spi_transfer *last_transfer;
-       struct spi_message *msg;
-
-       msg = dws->cur_msg;
-       dws->cur_msg = NULL;
-       dws->cur_transfer = NULL;
-       dws->prev_chip = dws->cur_chip;
-       dws->cur_chip = NULL;
-       dws->dma_mapped = 0;
-
-       last_transfer = list_last_entry(&msg->transfers, struct spi_transfer,
-                                       transfer_list);
-
-       if (!last_transfer->cs_change)
-               spi_chip_sel(dws, msg->spi, 0);
-
-       spi_finalize_current_message(dws->master);
-}
-
 static void int_error_stop(struct dw_spi *dws, const char *msg)
 {
-       /* Stop the hw */
-       spi_enable_chip(dws, 0);
+       spi_reset_chip(dws);
 
        dev_err(&dws->master->dev, "%s\n", msg);
-       dws->cur_msg->state = ERROR_STATE;
-       tasklet_schedule(&dws->pump_transfers);
+       dws->master->cur_msg->status = -EIO;
+       spi_finalize_current_transfer(dws->master);
 }
 
-void dw_spi_xfer_done(struct dw_spi *dws)
-{
-       /* Update total byte transferred return count actual bytes read */
-       dws->cur_msg->actual_length += dws->len;
-
-       /* Move to next transfer */
-       dws->cur_msg->state = next_transfer(dws);
-
-       /* Handle end of message */
-       if (dws->cur_msg->state == DONE_STATE) {
-               dws->cur_msg->status = 0;
-               giveback(dws);
-       } else
-               tasklet_schedule(&dws->pump_transfers);
-}
-EXPORT_SYMBOL_GPL(dw_spi_xfer_done);
-
 static irqreturn_t interrupt_transfer(struct dw_spi *dws)
 {
-       u16 irq_status = dw_readw(dws, DW_SPI_ISR);
+       u16 irq_status = dw_readl(dws, DW_SPI_ISR);
 
        /* Error handling */
        if (irq_status & (SPI_INT_TXOI | SPI_INT_RXOI | SPI_INT_RXUI)) {
-               dw_readw(dws, DW_SPI_TXOICR);
-               dw_readw(dws, DW_SPI_RXOICR);
-               dw_readw(dws, DW_SPI_RXUICR);
+               dw_readl(dws, DW_SPI_ICR);
                int_error_stop(dws, "interrupt_transfer: fifo overrun/underrun");
                return IRQ_HANDLED;
        }
@@ -313,7 +240,7 @@ static irqreturn_t interrupt_transfer(struct dw_spi *dws)
        dw_reader(dws);
        if (dws->rx_end == dws->rx) {
                spi_mask_intr(dws, SPI_INT_TXEI);
-               dw_spi_xfer_done(dws);
+               spi_finalize_current_transfer(dws->master);
                return IRQ_HANDLED;
        }
        if (irq_status & SPI_INT_TXEI) {
@@ -328,13 +255,14 @@ static irqreturn_t interrupt_transfer(struct dw_spi *dws)
 
 static irqreturn_t dw_spi_irq(int irq, void *dev_id)
 {
-       struct dw_spi *dws = dev_id;
-       u16 irq_status = dw_readw(dws, DW_SPI_ISR) & 0x3f;
+       struct spi_master *master = dev_id;
+       struct dw_spi *dws = spi_master_get_devdata(master);
+       u16 irq_status = dw_readl(dws, DW_SPI_ISR) & 0x3f;
 
        if (!irq_status)
                return IRQ_NONE;
 
-       if (!dws->cur_msg) {
+       if (!master->cur_msg) {
                spi_mask_intr(dws, SPI_INT_TXEI);
                return IRQ_HANDLED;
        }
@@ -343,7 +271,7 @@ static irqreturn_t dw_spi_irq(int irq, void *dev_id)
 }
 
 /* Must be called inside pump_transfers() */
-static void poll_transfer(struct dw_spi *dws)
+static int poll_transfer(struct dw_spi *dws)
 {
        do {
                dw_writer(dws);
@@ -351,64 +279,32 @@ static void poll_transfer(struct dw_spi *dws)
                cpu_relax();
        } while (dws->rx_end > dws->rx);
 
-       dw_spi_xfer_done(dws);
+       return 0;
 }
 
-static void pump_transfers(unsigned long data)
+static int dw_spi_transfer_one(struct spi_master *master,
+               struct spi_device *spi, struct spi_transfer *transfer)
 {
-       struct dw_spi *dws = (struct dw_spi *)data;
-       struct spi_message *message = NULL;
-       struct spi_transfer *transfer = NULL;
-       struct spi_transfer *previous = NULL;
-       struct spi_device *spi = NULL;
-       struct chip_data *chip = NULL;
-       u8 bits = 0;
+       struct dw_spi *dws = spi_master_get_devdata(master);
+       struct chip_data *chip = spi_get_ctldata(spi);
        u8 imask = 0;
-       u8 cs_change = 0;
-       u16 txint_level = 0;
+       u16 txlevel = 0;
        u16 clk_div = 0;
        u32 speed = 0;
        u32 cr0 = 0;
+       int ret;
 
-       /* Get current state information */
-       message = dws->cur_msg;
-       transfer = dws->cur_transfer;
-       chip = dws->cur_chip;
-       spi = message->spi;
-
-       if (message->state == ERROR_STATE) {
-               message->status = -EIO;
-               goto early_exit;
-       }
-
-       /* Handle end of message */
-       if (message->state == DONE_STATE) {
-               message->status = 0;
-               goto early_exit;
-       }
-
-       /* Delay if requested at end of transfer */
-       if (message->state == RUNNING_STATE) {
-               previous = list_entry(transfer->transfer_list.prev,
-                                       struct spi_transfer,
-                                       transfer_list);
-               if (previous->delay_usecs)
-                       udelay(previous->delay_usecs);
-       }
-
+       dws->dma_mapped = 0;
        dws->n_bytes = chip->n_bytes;
        dws->dma_width = chip->dma_width;
-       dws->cs_control = chip->cs_control;
 
-       dws->rx_dma = transfer->rx_dma;
-       dws->tx_dma = transfer->tx_dma;
        dws->tx = (void *)transfer->tx_buf;
        dws->tx_end = dws->tx + transfer->len;
        dws->rx = transfer->rx_buf;
        dws->rx_end = dws->rx + transfer->len;
-       dws->len = dws->cur_transfer->len;
-       if (chip != dws->prev_chip)
-               cs_change = 1;
+       dws->len = transfer->len;
+
+       spi_enable_chip(dws, 0);
 
        cr0 = chip->cr0;
 
@@ -416,32 +312,37 @@ static void pump_transfers(unsigned long data)
        if (transfer->speed_hz) {
                speed = chip->speed_hz;
 
-               if ((transfer->speed_hz != speed) || (!chip->clk_div)) {
+               if ((transfer->speed_hz != speed) || !chip->clk_div) {
                        speed = transfer->speed_hz;
 
                        /* clk_div doesn't support odd number */
-                       clk_div = dws->max_freq / speed;
-                       clk_div = (clk_div + 1) & 0xfffe;
+                       clk_div = (dws->max_freq / speed + 1) & 0xfffe;
 
                        chip->speed_hz = speed;
                        chip->clk_div = clk_div;
+
+                       spi_set_clk(dws, chip->clk_div);
                }
        }
        if (transfer->bits_per_word) {
-               bits = transfer->bits_per_word;
-               dws->n_bytes = dws->dma_width = bits >> 3;
-               cr0 = (bits - 1)
+               if (transfer->bits_per_word == 8) {
+                       dws->n_bytes = 1;
+                       dws->dma_width = 1;
+               } else if (transfer->bits_per_word == 16) {
+                       dws->n_bytes = 2;
+                       dws->dma_width = 2;
+               }
+               cr0 = (transfer->bits_per_word - 1)
                        | (chip->type << SPI_FRF_OFFSET)
                        | (spi->mode << SPI_MODE_OFFSET)
                        | (chip->tmode << SPI_TMOD_OFFSET);
        }
-       message->state = RUNNING_STATE;
 
        /*
         * Adjust transfer mode if necessary. Requires platform dependent
         * chipselect mechanism.
         */
-       if (dws->cs_control) {
+       if (chip->cs_control) {
                if (dws->rx && dws->tx)
                        chip->tmode = SPI_TMOD_TR;
                else if (dws->rx)
@@ -453,80 +354,60 @@ static void pump_transfers(unsigned long data)
                cr0 |= (chip->tmode << SPI_TMOD_OFFSET);
        }
 
+       dw_writel(dws, DW_SPI_CTRL0, cr0);
+
        /* Check if current transfer is a DMA transaction */
-       dws->dma_mapped = map_dma_buffers(dws);
+       if (master->can_dma && master->can_dma(master, spi, transfer))
+               dws->dma_mapped = master->cur_msg_mapped;
+
+       /* For poll mode just disable all interrupts */
+       spi_mask_intr(dws, 0xff);
 
        /*
         * Interrupt mode
         * we only need set the TXEI IRQ, as TX/RX always happen syncronizely
         */
-       if (!dws->dma_mapped && !chip->poll_mode) {
-               int templen = dws->len / dws->n_bytes;
-
-               txint_level = dws->fifo_len / 2;
-               txint_level = (templen > txint_level) ? txint_level : templen;
+       if (dws->dma_mapped) {
+               ret = dws->dma_ops->dma_setup(dws, transfer);
+               if (ret < 0) {
+                       spi_enable_chip(dws, 1);
+                       return ret;
+               }
+       } else if (!chip->poll_mode) {
+               txlevel = min_t(u16, dws->fifo_len / 2, dws->len / dws->n_bytes);
+               dw_writel(dws, DW_SPI_TXFLTR, txlevel);
 
+               /* Set the interrupt mask */
                imask |= SPI_INT_TXEI | SPI_INT_TXOI |
                         SPI_INT_RXUI | SPI_INT_RXOI;
+               spi_umask_intr(dws, imask);
+
                dws->transfer_handler = interrupt_transfer;
        }
 
-       /*
-        * Reprogram registers only if
-        *      1. chip select changes
-        *      2. clk_div is changed
-        *      3. control value changes
-        */
-       if (dw_readw(dws, DW_SPI_CTRL0) != cr0 || cs_change || clk_div || imask) {
-               spi_enable_chip(dws, 0);
-
-               if (dw_readw(dws, DW_SPI_CTRL0) != cr0)
-                       dw_writew(dws, DW_SPI_CTRL0, cr0);
-
-               spi_set_clk(dws, clk_div ? clk_div : chip->clk_div);
-               spi_chip_sel(dws, spi, 1);
-
-               /* Set the interrupt mask, for poll mode just disable all int */
-               spi_mask_intr(dws, 0xff);
-               if (imask)
-                       spi_umask_intr(dws, imask);
-               if (txint_level)
-                       dw_writew(dws, DW_SPI_TXFLTR, txint_level);
+       spi_enable_chip(dws, 1);
 
-               spi_enable_chip(dws, 1);
-               if (cs_change)
-                       dws->prev_chip = chip;
+       if (dws->dma_mapped) {
+               ret = dws->dma_ops->dma_transfer(dws, transfer);
+               if (ret < 0)
+                       return ret;
        }
 
-       if (dws->dma_mapped)
-               dws->dma_ops->dma_transfer(dws, cs_change);
-
        if (chip->poll_mode)
-               poll_transfer(dws);
-
-       return;
+               return poll_transfer(dws);
 
-early_exit:
-       giveback(dws);
+       return 1;
 }
 
-static int dw_spi_transfer_one_message(struct spi_master *master,
+static void dw_spi_handle_err(struct spi_master *master,
                struct spi_message *msg)
 {
        struct dw_spi *dws = spi_master_get_devdata(master);
 
-       dws->cur_msg = msg;
-       /* Initial message state */
-       dws->cur_msg->state = START_STATE;
-       dws->cur_transfer = list_entry(dws->cur_msg->transfers.next,
-                                               struct spi_transfer,
-                                               transfer_list);
-       dws->cur_chip = spi_get_ctldata(dws->cur_msg->spi);
-
-       /* Launch transfers */
-       tasklet_schedule(&dws->pump_transfers);
+       if (dws->dma_mapped)
+               dws->dma_ops->dma_stop(dws);
 
-       return 0;
+       spi_reset_chip(dws);
 }
 
 /* This may be called twice for each spi dev */
@@ -561,8 +442,6 @@ static int dw_spi_setup(struct spi_device *spi)
 
                chip->rx_threshold = 0;
                chip->tx_threshold = 0;
-
-               chip->enable_dma = chip_info->enable_dma;
        }
 
        if (spi->bits_per_word == 8) {
@@ -610,9 +489,7 @@ static void dw_spi_cleanup(struct spi_device *spi)
 /* Restart the controller, disable all interrupts, clean rx fifo */
 static void spi_hw_init(struct device *dev, struct dw_spi *dws)
 {
-       spi_enable_chip(dws, 0);
-       spi_mask_intr(dws, 0xff);
-       spi_enable_chip(dws, 1);
+       spi_reset_chip(dws);
 
        /*
         * Try to detect the FIFO depth if not set by interface driver,
@@ -622,11 +499,11 @@ static void spi_hw_init(struct device *dev, struct dw_spi *dws)
                u32 fifo;
 
                for (fifo = 1; fifo < 256; fifo++) {
-                       dw_writew(dws, DW_SPI_TXFLTR, fifo);
-                       if (fifo != dw_readw(dws, DW_SPI_TXFLTR))
+                       dw_writel(dws, DW_SPI_TXFLTR, fifo);
+                       if (fifo != dw_readl(dws, DW_SPI_TXFLTR))
                                break;
                }
-               dw_writew(dws, DW_SPI_TXFLTR, 0);
+               dw_writel(dws, DW_SPI_TXFLTR, 0);
 
                dws->fifo_len = (fifo == 1) ? 0 : fifo;
                dev_dbg(dev, "Detected FIFO size: %u bytes\n", dws->fifo_len);
@@ -646,13 +523,12 @@ int dw_spi_add_host(struct device *dev, struct dw_spi *dws)
 
        dws->master = master;
        dws->type = SSI_MOTO_SPI;
-       dws->prev_chip = NULL;
        dws->dma_inited = 0;
        dws->dma_addr = (dma_addr_t)(dws->paddr + 0x60);
        snprintf(dws->name, sizeof(dws->name), "dw_spi%d", dws->bus_num);
 
        ret = devm_request_irq(dev, dws->irq, dw_spi_irq, IRQF_SHARED,
-                       dws->name, dws);
+                       dws->name, master);
        if (ret < 0) {
                dev_err(&master->dev, "can not get IRQ\n");
                goto err_free_master;
@@ -664,7 +540,9 @@ int dw_spi_add_host(struct device *dev, struct dw_spi *dws)
        master->num_chipselect = dws->num_cs;
        master->setup = dw_spi_setup;
        master->cleanup = dw_spi_cleanup;
-       master->transfer_one_message = dw_spi_transfer_one_message;
+       master->set_cs = dw_spi_set_cs;
+       master->transfer_one = dw_spi_transfer_one;
+       master->handle_err = dw_spi_handle_err;
        master->max_speed_hz = dws->max_freq;
        master->dev.of_node = dev->of_node;
 
@@ -676,11 +554,11 @@ int dw_spi_add_host(struct device *dev, struct dw_spi *dws)
                if (ret) {
                        dev_warn(dev, "DMA init failed\n");
                        dws->dma_inited = 0;
+               } else {
+                       master->can_dma = dws->dma_ops->can_dma;
                }
        }
 
-       tasklet_init(&dws->pump_transfers, pump_transfers, (unsigned long)dws);
-
        spi_master_set_devdata(master, dws);
        ret = devm_spi_register_master(dev, master);
        if (ret) {
index 3d32be68c14210c93e0ae3a6c87e65c7e5c38759..6c91391c1a4f8f07b40cb546de3eb169c44670f3 100644 (file)
@@ -91,12 +91,15 @@ struct dw_spi;
 struct dw_spi_dma_ops {
        int (*dma_init)(struct dw_spi *dws);
        void (*dma_exit)(struct dw_spi *dws);
-       int (*dma_transfer)(struct dw_spi *dws, int cs_change);
+       int (*dma_setup)(struct dw_spi *dws, struct spi_transfer *xfer);
+       bool (*can_dma)(struct spi_master *master, struct spi_device *spi,
+                       struct spi_transfer *xfer);
+       int (*dma_transfer)(struct dw_spi *dws, struct spi_transfer *xfer);
+       void (*dma_stop)(struct dw_spi *dws);
 };
 
 struct dw_spi {
        struct spi_master       *master;
-       struct spi_device       *cur_dev;
        enum dw_ssi_type        type;
        char                    name[16];
 
@@ -109,41 +112,26 @@ struct dw_spi {
        u16                     bus_num;
        u16                     num_cs;         /* supported slave numbers */
 
-       /* Message Transfer pump */
-       struct tasklet_struct   pump_transfers;
-
        /* Current message transfer state info */
-       struct spi_message      *cur_msg;
-       struct spi_transfer     *cur_transfer;
-       struct chip_data        *cur_chip;
-       struct chip_data        *prev_chip;
        size_t                  len;
        void                    *tx;
        void                    *tx_end;
        void                    *rx;
        void                    *rx_end;
        int                     dma_mapped;
-       dma_addr_t              rx_dma;
-       dma_addr_t              tx_dma;
-       size_t                  rx_map_len;
-       size_t                  tx_map_len;
        u8                      n_bytes;        /* current is a 1/2 bytes op */
-       u8                      max_bits_per_word;      /* maxim is 16b */
        u32                     dma_width;
        irqreturn_t             (*transfer_handler)(struct dw_spi *dws);
-       void                    (*cs_control)(u32 command);
 
-       /* Dma info */
+       /* DMA info */
        int                     dma_inited;
        struct dma_chan         *txchan;
-       struct scatterlist      tx_sgl;
        struct dma_chan         *rxchan;
-       struct scatterlist      rx_sgl;
        unsigned long           dma_chan_busy;
-       struct device           *dma_dev;
        dma_addr_t              dma_addr; /* phy address of the Data register */
        struct dw_spi_dma_ops   *dma_ops;
-       void                    *dma_priv; /* platform relate info */
+       void                    *dma_tx;
+       void                    *dma_rx;
 
        /* Bus interface info */
        void                    *priv;
@@ -162,16 +150,6 @@ static inline void dw_writel(struct dw_spi *dws, u32 offset, u32 val)
        __raw_writel(val, dws->regs + offset);
 }
 
-static inline u16 dw_readw(struct dw_spi *dws, u32 offset)
-{
-       return __raw_readw(dws->regs + offset);
-}
-
-static inline void dw_writew(struct dw_spi *dws, u32 offset, u16 val)
-{
-       __raw_writew(val, dws->regs + offset);
-}
-
 static inline void spi_enable_chip(struct dw_spi *dws, int enable)
 {
        dw_writel(dws, DW_SPI_SSIENR, (enable ? 1 : 0));
@@ -182,22 +160,6 @@ static inline void spi_set_clk(struct dw_spi *dws, u16 div)
        dw_writel(dws, DW_SPI_BAUDR, div);
 }
 
-static inline void spi_chip_sel(struct dw_spi *dws, struct spi_device *spi,
-               int active)
-{
-       u16 cs = spi->chip_select;
-       int gpio_val = active ? (spi->mode & SPI_CS_HIGH) :
-               !(spi->mode & SPI_CS_HIGH);
-
-       if (dws->cs_control)
-               dws->cs_control(active);
-       if (gpio_is_valid(spi->cs_gpio))
-               gpio_set_value(spi->cs_gpio, gpio_val);
-
-       if (active)
-               dw_writel(dws, DW_SPI_SER, 1 << cs);
-}
-
 /* Disable IRQ bits */
 static inline void spi_mask_intr(struct dw_spi *dws, u32 mask)
 {
@@ -216,16 +178,27 @@ static inline void spi_umask_intr(struct dw_spi *dws, u32 mask)
        dw_writel(dws, DW_SPI_IMR, new_mask);
 }
 
+/*
+ * This does disable the SPI controller, interrupts, and re-enable the
+ * controller back. Transmit and receive FIFO buffers are cleared when the
+ * device is disabled.
+ */
+static inline void spi_reset_chip(struct dw_spi *dws)
+{
+       spi_enable_chip(dws, 0);
+       spi_mask_intr(dws, 0xff);
+       spi_enable_chip(dws, 1);
+}
+
 /*
  * Each SPI slave device to work with dw_api controller should
- * has such a structure claiming its working mode (PIO/DMA etc),
+ * has such a structure claiming its working mode (poll or PIO/DMA),
  * which can be save in the "controller_data" member of the
  * struct spi_device.
  */
 struct dw_spi_chip {
        u8 poll_mode;   /* 1 for controller polling mode */
        u8 type;        /* SPI/SSP/MicroWire */
-       u8 enable_dma;
        void (*cs_control)(u32 command);
 };
 
@@ -233,7 +206,6 @@ extern int dw_spi_add_host(struct device *dev, struct dw_spi *dws);
 extern void dw_spi_remove_host(struct dw_spi *dws);
 extern int dw_spi_suspend_host(struct dw_spi *dws);
 extern int dw_spi_resume_host(struct dw_spi *dws);
-extern void dw_spi_xfer_done(struct dw_spi *dws);
 
 /* platform related setup */
 extern int dw_spi_mid_init(struct dw_spi *dws); /* Intel MID platforms */
index d1a39249704a7e3a16bcd861bc7f16f5a9ed02dd..5fe54cda309f5e7523c39f44245e6374c134fb08 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
+#include <linux/math64.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
@@ -29,6 +30,7 @@
 #include <linux/sched.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/spi_bitbang.h>
+#include <linux/time.h>
 
 #define DRIVER_NAME "fsl-dspi"
 
@@ -51,7 +53,7 @@
 #define SPI_CTAR_CPOL(x)       ((x) << 26)
 #define SPI_CTAR_CPHA(x)       ((x) << 25)
 #define SPI_CTAR_LSBFE(x)      ((x) << 24)
-#define SPI_CTAR_PCSSCR(x)     (((x) & 0x00000003) << 22)
+#define SPI_CTAR_PCSSCK(x)     (((x) & 0x00000003) << 22)
 #define SPI_CTAR_PASC(x)       (((x) & 0x00000003) << 20)
 #define SPI_CTAR_PDT(x)        (((x) & 0x00000003) << 18)
 #define SPI_CTAR_PBR(x)        (((x) & 0x00000003) << 16)
@@ -59,6 +61,7 @@
 #define SPI_CTAR_ASC(x)        (((x) & 0x0000000f) << 8)
 #define SPI_CTAR_DT(x)         (((x) & 0x0000000f) << 4)
 #define SPI_CTAR_BR(x)         ((x) & 0x0000000f)
+#define SPI_CTAR_SCALE_BITS    0xf
 
 #define SPI_CTAR0_SLAVE        0x0c
 
@@ -148,23 +151,66 @@ static void hz_to_spi_baud(char *pbr, char *br, int speed_hz,
                16,     32,     64,     128,
                256,    512,    1024,   2048,
                4096,   8192,   16384,  32768 };
-       int temp, i = 0, j = 0;
+       int scale_needed, scale, minscale = INT_MAX;
+       int i, j;
+
+       scale_needed = clkrate / speed_hz;
+       if (clkrate % speed_hz)
+               scale_needed++;
+
+       for (i = 0; i < ARRAY_SIZE(brs); i++)
+               for (j = 0; j < ARRAY_SIZE(pbr_tbl); j++) {
+                       scale = brs[i] * pbr_tbl[j];
+                       if (scale >= scale_needed) {
+                               if (scale < minscale) {
+                                       minscale = scale;
+                                       *br = i;
+                                       *pbr = j;
+                               }
+                               break;
+                       }
+               }
 
-       temp = clkrate / 2 / speed_hz;
+       if (minscale == INT_MAX) {
+               pr_warn("Can not find valid baud rate,speed_hz is %d,clkrate is %ld, we use the max prescaler value.\n",
+                       speed_hz, clkrate);
+               *pbr = ARRAY_SIZE(pbr_tbl) - 1;
+               *br =  ARRAY_SIZE(brs) - 1;
+       }
+}
 
-       for (i = 0; i < ARRAY_SIZE(pbr_tbl); i++)
-               for (j = 0; j < ARRAY_SIZE(brs); j++) {
-                       if (pbr_tbl[i] * brs[j] >= temp) {
-                               *pbr = i;
-                               *br = j;
-                               return;
+static void ns_delay_scale(char *psc, char *sc, int delay_ns,
+               unsigned long clkrate)
+{
+       int pscale_tbl[4] = {1, 3, 5, 7};
+       int scale_needed, scale, minscale = INT_MAX;
+       int i, j;
+       u32 remainder;
+
+       scale_needed = div_u64_rem((u64)delay_ns * clkrate, NSEC_PER_SEC,
+                       &remainder);
+       if (remainder)
+               scale_needed++;
+
+       for (i = 0; i < ARRAY_SIZE(pscale_tbl); i++)
+               for (j = 0; j <= SPI_CTAR_SCALE_BITS; j++) {
+                       scale = pscale_tbl[i] * (2 << j);
+                       if (scale >= scale_needed) {
+                               if (scale < minscale) {
+                                       minscale = scale;
+                                       *psc = i;
+                                       *sc = j;
+                               }
+                               break;
                        }
                }
 
-       pr_warn("Can not find valid baud rate,speed_hz is %d,clkrate is %ld\
-               ,we use the max prescaler value.\n", speed_hz, clkrate);
-       *pbr = ARRAY_SIZE(pbr_tbl) - 1;
-       *br =  ARRAY_SIZE(brs) - 1;
+       if (minscale == INT_MAX) {
+               pr_warn("Cannot find correct scale values for %dns delay at clkrate %ld, using max prescaler value",
+                       delay_ns, clkrate);
+               *psc = ARRAY_SIZE(pscale_tbl) - 1;
+               *sc = SPI_CTAR_SCALE_BITS;
+       }
 }
 
 static int dspi_transfer_write(struct fsl_dspi *dspi)
@@ -345,7 +391,10 @@ static int dspi_setup(struct spi_device *spi)
 {
        struct chip_data *chip;
        struct fsl_dspi *dspi = spi_master_get_devdata(spi->master);
-       unsigned char br = 0, pbr = 0, fmsz = 0;
+       u32 cs_sck_delay = 0, sck_cs_delay = 0;
+       unsigned char br = 0, pbr = 0, pcssck = 0, cssck = 0;
+       unsigned char pasc = 0, asc = 0, fmsz = 0;
+       unsigned long clkrate;
 
        if ((spi->bits_per_word >= 4) && (spi->bits_per_word <= 16)) {
                fmsz = spi->bits_per_word - 1;
@@ -362,18 +411,34 @@ static int dspi_setup(struct spi_device *spi)
                        return -ENOMEM;
        }
 
+       of_property_read_u32(spi->dev.of_node, "fsl,spi-cs-sck-delay",
+                       &cs_sck_delay);
+
+       of_property_read_u32(spi->dev.of_node, "fsl,spi-sck-cs-delay",
+                       &sck_cs_delay);
+
        chip->mcr_val = SPI_MCR_MASTER | SPI_MCR_PCSIS |
                SPI_MCR_CLR_TXF | SPI_MCR_CLR_RXF;
 
        chip->void_write_data = 0;
 
-       hz_to_spi_baud(&pbr, &br,
-                       spi->max_speed_hz, clk_get_rate(dspi->clk));
+       clkrate = clk_get_rate(dspi->clk);
+       hz_to_spi_baud(&pbr, &br, spi->max_speed_hz, clkrate);
+
+       /* Set PCS to SCK delay scale values */
+       ns_delay_scale(&pcssck, &cssck, cs_sck_delay, clkrate);
+
+       /* Set After SCK delay scale values */
+       ns_delay_scale(&pasc, &asc, sck_cs_delay, clkrate);
 
        chip->ctar_val =  SPI_CTAR_FMSZ(fmsz)
                | SPI_CTAR_CPOL(spi->mode & SPI_CPOL ? 1 : 0)
                | SPI_CTAR_CPHA(spi->mode & SPI_CPHA ? 1 : 0)
                | SPI_CTAR_LSBFE(spi->mode & SPI_LSB_FIRST ? 1 : 0)
+               | SPI_CTAR_PCSSCK(pcssck)
+               | SPI_CTAR_CSSCK(cssck)
+               | SPI_CTAR_PASC(pasc)
+               | SPI_CTAR_ASC(asc)
                | SPI_CTAR_PBR(pbr)
                | SPI_CTAR_BR(br);
 
index e649bc7d4c086bb789d2dc1803c621bebdc29a19..788e2b176a4f7707051bcc325538e7d2a6d599f4 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/dmaengine.h>
+#include <linux/gpio.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/irq.h>
@@ -122,36 +123,31 @@ static inline void spfi_start(struct img_spfi *spfi)
        spfi_writel(spfi, val, SPFI_CONTROL);
 }
 
-static inline void spfi_stop(struct img_spfi *spfi)
-{
-       u32 val;
-
-       val = spfi_readl(spfi, SPFI_CONTROL);
-       val &= ~SPFI_CONTROL_SPFI_EN;
-       spfi_writel(spfi, val, SPFI_CONTROL);
-}
-
 static inline void spfi_reset(struct img_spfi *spfi)
 {
        spfi_writel(spfi, SPFI_CONTROL_SOFT_RESET, SPFI_CONTROL);
-       udelay(1);
        spfi_writel(spfi, 0, SPFI_CONTROL);
 }
 
-static void spfi_flush_tx_fifo(struct img_spfi *spfi)
+static int spfi_wait_all_done(struct img_spfi *spfi)
 {
-       unsigned long timeout = jiffies + msecs_to_jiffies(10);
+       unsigned long timeout = jiffies + msecs_to_jiffies(50);
 
-       spfi_writel(spfi, SPFI_INTERRUPT_SDE, SPFI_INTERRUPT_CLEAR);
        while (time_before(jiffies, timeout)) {
-               if (spfi_readl(spfi, SPFI_INTERRUPT_STATUS) &
-                   SPFI_INTERRUPT_SDE)
-                       return;
+               u32 status = spfi_readl(spfi, SPFI_INTERRUPT_STATUS);
+
+               if (status & SPFI_INTERRUPT_ALLDONETRIG) {
+                       spfi_writel(spfi, SPFI_INTERRUPT_ALLDONETRIG,
+                                   SPFI_INTERRUPT_CLEAR);
+                       return 0;
+               }
                cpu_relax();
        }
 
-       dev_err(spfi->dev, "Timed out waiting for FIFO to drain\n");
+       dev_err(spfi->dev, "Timed out waiting for transaction to complete\n");
        spfi_reset(spfi);
+
+       return -ETIMEDOUT;
 }
 
 static unsigned int spfi_pio_write32(struct img_spfi *spfi, const u32 *buf,
@@ -237,6 +233,7 @@ static int img_spfi_start_pio(struct spi_master *master,
        const void *tx_buf = xfer->tx_buf;
        void *rx_buf = xfer->rx_buf;
        unsigned long timeout;
+       int ret;
 
        if (tx_buf)
                tx_bytes = xfer->len;
@@ -269,16 +266,15 @@ static int img_spfi_start_pio(struct spi_master *master,
                cpu_relax();
        }
 
+       ret = spfi_wait_all_done(spfi);
+       if (ret < 0)
+               return ret;
+
        if (rx_bytes > 0 || tx_bytes > 0) {
                dev_err(spfi->dev, "PIO transfer timed out\n");
-               spfi_reset(spfi);
                return -ETIMEDOUT;
        }
 
-       if (tx_buf)
-               spfi_flush_tx_fifo(spfi);
-       spfi_stop(spfi);
-
        return 0;
 }
 
@@ -287,14 +283,12 @@ static void img_spfi_dma_rx_cb(void *data)
        struct img_spfi *spfi = data;
        unsigned long flags;
 
-       spin_lock_irqsave(&spfi->lock, flags);
+       spfi_wait_all_done(spfi);
 
+       spin_lock_irqsave(&spfi->lock, flags);
        spfi->rx_dma_busy = false;
-       if (!spfi->tx_dma_busy) {
-               spfi_stop(spfi);
+       if (!spfi->tx_dma_busy)
                spi_finalize_current_transfer(spfi->master);
-       }
-
        spin_unlock_irqrestore(&spfi->lock, flags);
 }
 
@@ -303,16 +297,12 @@ static void img_spfi_dma_tx_cb(void *data)
        struct img_spfi *spfi = data;
        unsigned long flags;
 
-       spfi_flush_tx_fifo(spfi);
+       spfi_wait_all_done(spfi);
 
        spin_lock_irqsave(&spfi->lock, flags);
-
        spfi->tx_dma_busy = false;
-       if (!spfi->rx_dma_busy) {
-               spfi_stop(spfi);
+       if (!spfi->rx_dma_busy)
                spi_finalize_current_transfer(spfi->master);
-       }
-
        spin_unlock_irqrestore(&spfi->lock, flags);
 }
 
@@ -397,6 +387,75 @@ stop_dma:
        return -EIO;
 }
 
+static void img_spfi_handle_err(struct spi_master *master,
+                               struct spi_message *msg)
+{
+       struct img_spfi *spfi = spi_master_get_devdata(master);
+       unsigned long flags;
+
+       /*
+        * Stop all DMA and reset the controller if the previous transaction
+        * timed-out and never completed it's DMA.
+        */
+       spin_lock_irqsave(&spfi->lock, flags);
+       if (spfi->tx_dma_busy || spfi->rx_dma_busy) {
+               spfi->tx_dma_busy = false;
+               spfi->rx_dma_busy = false;
+
+               dmaengine_terminate_all(spfi->tx_ch);
+               dmaengine_terminate_all(spfi->rx_ch);
+       }
+       spin_unlock_irqrestore(&spfi->lock, flags);
+}
+
+static int img_spfi_prepare(struct spi_master *master, struct spi_message *msg)
+{
+       struct img_spfi *spfi = spi_master_get_devdata(master);
+       u32 val;
+
+       val = spfi_readl(spfi, SPFI_PORT_STATE);
+       if (msg->spi->mode & SPI_CPHA)
+               val |= SPFI_PORT_STATE_CK_PHASE(msg->spi->chip_select);
+       else
+               val &= ~SPFI_PORT_STATE_CK_PHASE(msg->spi->chip_select);
+       if (msg->spi->mode & SPI_CPOL)
+               val |= SPFI_PORT_STATE_CK_POL(msg->spi->chip_select);
+       else
+               val &= ~SPFI_PORT_STATE_CK_POL(msg->spi->chip_select);
+       spfi_writel(spfi, val, SPFI_PORT_STATE);
+
+       return 0;
+}
+
+static int img_spfi_unprepare(struct spi_master *master,
+                             struct spi_message *msg)
+{
+       struct img_spfi *spfi = spi_master_get_devdata(master);
+
+       spfi_reset(spfi);
+
+       return 0;
+}
+
+static int img_spfi_setup(struct spi_device *spi)
+{
+       int ret;
+
+       ret = gpio_request_one(spi->cs_gpio, (spi->mode & SPI_CS_HIGH) ?
+                              GPIOF_OUT_INIT_LOW : GPIOF_OUT_INIT_HIGH,
+                              dev_name(&spi->dev));
+       if (ret)
+               dev_err(&spi->dev, "can't request chipselect gpio %d\n",
+                               spi->cs_gpio);
+
+       return ret;
+}
+
+static void img_spfi_cleanup(struct spi_device *spi)
+{
+       gpio_free(spi->cs_gpio);
+}
+
 static void img_spfi_config(struct spi_master *master, struct spi_device *spi,
                            struct spi_transfer *xfer)
 {
@@ -405,10 +464,10 @@ static void img_spfi_config(struct spi_master *master, struct spi_device *spi,
 
        /*
         * output = spfi_clk * (BITCLK / 512), where BITCLK must be a
-        * power of 2 up to 256 (where 255 == 256 since BITCLK is 8 bits)
+        * power of 2 up to 128
         */
-       div = DIV_ROUND_UP(master->max_speed_hz, xfer->speed_hz);
-       div = clamp(512 / (1 << get_count_order(div)), 1, 255);
+       div = DIV_ROUND_UP(clk_get_rate(spfi->spfi_clk), xfer->speed_hz);
+       div = clamp(512 / (1 << get_count_order(div)), 1, 128);
 
        val = spfi_readl(spfi, SPFI_DEVICE_PARAMETER(spi->chip_select));
        val &= ~(SPFI_DEVICE_PARAMETER_BITCLK_MASK <<
@@ -416,6 +475,9 @@ static void img_spfi_config(struct spi_master *master, struct spi_device *spi,
        val |= div << SPFI_DEVICE_PARAMETER_BITCLK_SHIFT;
        spfi_writel(spfi, val, SPFI_DEVICE_PARAMETER(spi->chip_select));
 
+       spfi_writel(spfi, xfer->len << SPFI_TRANSACTION_TSIZE_SHIFT,
+                   SPFI_TRANSACTION);
+
        val = spfi_readl(spfi, SPFI_CONTROL);
        val &= ~(SPFI_CONTROL_SEND_DMA | SPFI_CONTROL_GET_DMA);
        if (xfer->tx_buf)
@@ -429,25 +491,7 @@ static void img_spfi_config(struct spi_master *master, struct spi_device *spi,
        else if (xfer->tx_nbits == SPI_NBITS_QUAD &&
                 xfer->rx_nbits == SPI_NBITS_QUAD)
                val |= SPFI_CONTROL_TMODE_QUAD << SPFI_CONTROL_TMODE_SHIFT;
-       val &= ~SPFI_CONTROL_CONTINUE;
-       if (!xfer->cs_change && !list_is_last(&xfer->transfer_list,
-                                             &master->cur_msg->transfers))
-               val |= SPFI_CONTROL_CONTINUE;
        spfi_writel(spfi, val, SPFI_CONTROL);
-
-       val = spfi_readl(spfi, SPFI_PORT_STATE);
-       if (spi->mode & SPI_CPHA)
-               val |= SPFI_PORT_STATE_CK_PHASE(spi->chip_select);
-       else
-               val &= ~SPFI_PORT_STATE_CK_PHASE(spi->chip_select);
-       if (spi->mode & SPI_CPOL)
-               val |= SPFI_PORT_STATE_CK_POL(spi->chip_select);
-       else
-               val &= ~SPFI_PORT_STATE_CK_POL(spi->chip_select);
-       spfi_writel(spfi, val, SPFI_PORT_STATE);
-
-       spfi_writel(spfi, xfer->len << SPFI_TRANSACTION_TSIZE_SHIFT,
-                   SPFI_TRANSACTION);
 }
 
 static int img_spfi_transfer_one(struct spi_master *master,
@@ -455,8 +499,6 @@ static int img_spfi_transfer_one(struct spi_master *master,
                                 struct spi_transfer *xfer)
 {
        struct img_spfi *spfi = spi_master_get_devdata(spi->master);
-       bool dma_reset = false;
-       unsigned long flags;
        int ret;
 
        if (xfer->len > SPFI_TRANSACTION_TSIZE_MASK) {
@@ -466,23 +508,6 @@ static int img_spfi_transfer_one(struct spi_master *master,
                return -EINVAL;
        }
 
-       /*
-        * Stop all DMA and reset the controller if the previous transaction
-        * timed-out and never completed it's DMA.
-        */
-       spin_lock_irqsave(&spfi->lock, flags);
-       if (spfi->tx_dma_busy || spfi->rx_dma_busy) {
-               dev_err(spfi->dev, "SPI DMA still busy\n");
-               dma_reset = true;
-       }
-       spin_unlock_irqrestore(&spfi->lock, flags);
-
-       if (dma_reset) {
-               dmaengine_terminate_all(spfi->tx_ch);
-               dmaengine_terminate_all(spfi->rx_ch);
-               spfi_reset(spfi);
-       }
-
        img_spfi_config(master, spi, xfer);
        if (master->can_dma && master->can_dma(master, spi, xfer))
                ret = img_spfi_start_dma(master, spi, xfer);
@@ -492,17 +517,6 @@ static int img_spfi_transfer_one(struct spi_master *master,
        return ret;
 }
 
-static void img_spfi_set_cs(struct spi_device *spi, bool enable)
-{
-       struct img_spfi *spfi = spi_master_get_devdata(spi->master);
-       u32 val;
-
-       val = spfi_readl(spfi, SPFI_PORT_STATE);
-       val &= ~(SPFI_PORT_STATE_DEV_SEL_MASK << SPFI_PORT_STATE_DEV_SEL_SHIFT);
-       val |= spi->chip_select << SPFI_PORT_STATE_DEV_SEL_SHIFT;
-       spfi_writel(spfi, val, SPFI_PORT_STATE);
-}
-
 static bool img_spfi_can_dma(struct spi_master *master, struct spi_device *spi,
                             struct spi_transfer *xfer)
 {
@@ -591,14 +605,17 @@ static int img_spfi_probe(struct platform_device *pdev)
        master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_TX_DUAL | SPI_RX_DUAL;
        if (of_property_read_bool(spfi->dev->of_node, "img,supports-quad-mode"))
                master->mode_bits |= SPI_TX_QUAD | SPI_RX_QUAD;
-       master->num_chipselect = 5;
        master->dev.of_node = pdev->dev.of_node;
        master->bits_per_word_mask = SPI_BPW_MASK(32) | SPI_BPW_MASK(8);
-       master->max_speed_hz = clk_get_rate(spfi->spfi_clk);
-       master->min_speed_hz = master->max_speed_hz / 512;
+       master->max_speed_hz = clk_get_rate(spfi->spfi_clk) / 4;
+       master->min_speed_hz = clk_get_rate(spfi->spfi_clk) / 512;
 
-       master->set_cs = img_spfi_set_cs;
+       master->setup = img_spfi_setup;
+       master->cleanup = img_spfi_cleanup;
        master->transfer_one = img_spfi_transfer_one;
+       master->prepare_message = img_spfi_prepare;
+       master->unprepare_message = img_spfi_unprepare;
+       master->handle_err = img_spfi_handle_err;
 
        spfi->tx_ch = dma_request_slave_channel(spfi->dev, "tx");
        spfi->rx_ch = dma_request_slave_channel(spfi->dev, "rx");
index 6fea4af51c413f27640c626ad61a2bcdca0b6bac..f08e812b29847bd3ba5f44ab1c5b02aae8437258 100644 (file)
@@ -370,8 +370,6 @@ static int __maybe_unused mx51_ecspi_config(struct spi_imx_data *spi_imx,
        if (spi_imx->dma_is_inited) {
                dma = readl(spi_imx->base + MX51_ECSPI_DMA);
 
-               spi_imx->tx_wml = spi_imx_get_fifosize(spi_imx) / 2;
-               spi_imx->rx_wml = spi_imx_get_fifosize(spi_imx) / 2;
                spi_imx->rxt_wml = spi_imx_get_fifosize(spi_imx) / 2;
                rx_wml_cfg = spi_imx->rx_wml << MX51_ECSPI_DMA_RX_WML_OFFSET;
                tx_wml_cfg = spi_imx->tx_wml << MX51_ECSPI_DMA_TX_WML_OFFSET;
@@ -868,6 +866,8 @@ static int spi_imx_sdma_init(struct device *dev, struct spi_imx_data *spi_imx,
        master->max_dma_len = MAX_SDMA_BD_BYTES;
        spi_imx->bitbang.master->flags = SPI_MASTER_MUST_RX |
                                         SPI_MASTER_MUST_TX;
+       spi_imx->tx_wml = spi_imx_get_fifosize(spi_imx) / 2;
+       spi_imx->rx_wml = spi_imx_get_fifosize(spi_imx) / 2;
        spi_imx->dma_is_inited = 1;
 
        return 0;
@@ -903,7 +903,7 @@ static int spi_imx_dma_transfer(struct spi_imx_data *spi_imx,
 
        if (tx) {
                desc_tx = dmaengine_prep_slave_sg(master->dma_tx,
-                                       tx->sgl, tx->nents, DMA_TO_DEVICE,
+                                       tx->sgl, tx->nents, DMA_MEM_TO_DEV,
                                        DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
                if (!desc_tx)
                        goto no_dma;
@@ -915,7 +915,7 @@ static int spi_imx_dma_transfer(struct spi_imx_data *spi_imx,
 
        if (rx) {
                desc_rx = dmaengine_prep_slave_sg(master->dma_rx,
-                                       rx->sgl, rx->nents, DMA_FROM_DEVICE,
+                                       rx->sgl, rx->nents, DMA_DEV_TO_MEM,
                                        DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
                if (!desc_rx)
                        goto no_dma;
index ecae0d4e29459048679aa30aa3e30c4a1eac55d8..965d2bdcfdcc710e4c8f0374fffe4e9570b117b7 100644 (file)
@@ -588,7 +588,7 @@ static int mpc512x_psc_spi_of_remove(struct platform_device *op)
        return mpc512x_psc_spi_do_remove(&op->dev);
 }
 
-static struct of_device_id mpc512x_psc_spi_of_match[] = {
+static const struct of_device_id mpc512x_psc_spi_of_match[] = {
        { .compatible = "fsl,mpc5121-psc-spi", },
        {},
 };
index b283d537d16aaacc2b57e402b0dd6e3cd078a7e9..e99d6a93d3943c8b578fbd4a00ecba152d0881c5 100644 (file)
@@ -238,7 +238,7 @@ static int octeon_spi_remove(struct platform_device *pdev)
        return 0;
 }
 
-static struct of_device_id octeon_spi_match[] = {
+static const struct of_device_id octeon_spi_match[] = {
        { .compatible = "cavium,octeon-3010-spi", },
        {},
 };
index d890d309dff9b553364654ae343f322c65d1c51a..35b332dacb13a1e7d91c868dab37ac8a5abc4dac 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/device.h>
 #include <linux/delay.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/io.h>
@@ -294,16 +295,6 @@ static int omap1_spi100k_setup(struct spi_device *spi)
        return ret;
 }
 
-static int omap1_spi100k_prepare_hardware(struct spi_master *master)
-{
-       struct omap1_spi100k *spi100k = spi_master_get_devdata(master);
-
-       clk_prepare_enable(spi100k->ick);
-       clk_prepare_enable(spi100k->fck);
-
-       return 0;
-}
-
 static int omap1_spi100k_transfer_one_message(struct spi_master *master,
                                              struct spi_message *m)
 {
@@ -372,16 +363,6 @@ static int omap1_spi100k_transfer_one_message(struct spi_master *master,
        return status;
 }
 
-static int omap1_spi100k_unprepare_hardware(struct spi_master *master)
-{
-       struct omap1_spi100k *spi100k = spi_master_get_devdata(master);
-
-       clk_disable_unprepare(spi100k->ick);
-       clk_disable_unprepare(spi100k->fck);
-
-       return 0;
-}
-
 static int omap1_spi100k_probe(struct platform_device *pdev)
 {
        struct spi_master       *master;
@@ -402,14 +383,12 @@ static int omap1_spi100k_probe(struct platform_device *pdev)
 
        master->setup = omap1_spi100k_setup;
        master->transfer_one_message = omap1_spi100k_transfer_one_message;
-       master->prepare_transfer_hardware = omap1_spi100k_prepare_hardware;
-       master->unprepare_transfer_hardware = omap1_spi100k_unprepare_hardware;
-       master->cleanup = NULL;
        master->num_chipselect = 2;
        master->mode_bits = MODEBITS;
        master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32);
        master->min_speed_hz = OMAP1_SPI100K_MAX_FREQ/(1<<16);
        master->max_speed_hz = OMAP1_SPI100K_MAX_FREQ;
+       master->auto_runtime_pm = true;
 
        spi100k = spi_master_get_devdata(master);
 
@@ -434,22 +413,96 @@ static int omap1_spi100k_probe(struct platform_device *pdev)
                goto err;
        }
 
+       status = clk_prepare_enable(spi100k->ick);
+       if (status != 0) {
+               dev_err(&pdev->dev, "failed to enable ick: %d\n", status);
+               goto err;
+       }
+
+       status = clk_prepare_enable(spi100k->fck);
+       if (status != 0) {
+               dev_err(&pdev->dev, "failed to enable fck: %d\n", status);
+               goto err_ick;
+       }
+
+       pm_runtime_enable(&pdev->dev);
+       pm_runtime_set_active(&pdev->dev);
+
        status = devm_spi_register_master(&pdev->dev, master);
        if (status < 0)
-               goto err;
+               goto err_fck;
 
        return status;
 
+err_fck:
+       clk_disable_unprepare(spi100k->fck);
+err_ick:
+       clk_disable_unprepare(spi100k->ick);
 err:
        spi_master_put(master);
        return status;
 }
 
+static int omap1_spi100k_remove(struct platform_device *pdev)
+{
+       struct spi_master *master = spi_master_get(platform_get_drvdata(pdev));
+       struct omap1_spi100k *spi100k = spi_master_get_devdata(master);
+
+       pm_runtime_disable(&pdev->dev);
+
+       clk_disable_unprepare(spi100k->fck);
+       clk_disable_unprepare(spi100k->ick);
+
+       return 0;
+}
+
+#ifdef CONFIG_PM
+static int omap1_spi100k_runtime_suspend(struct device *dev)
+{
+       struct spi_master *master = spi_master_get(dev_get_drvdata(dev));
+       struct omap1_spi100k *spi100k = spi_master_get_devdata(master);
+
+       clk_disable_unprepare(spi100k->ick);
+       clk_disable_unprepare(spi100k->fck);
+
+       return 0;
+}
+
+static int omap1_spi100k_runtime_resume(struct device *dev)
+{
+       struct spi_master *master = spi_master_get(dev_get_drvdata(dev));
+       struct omap1_spi100k *spi100k = spi_master_get_devdata(master);
+       int ret;
+
+       ret = clk_prepare_enable(spi100k->ick);
+       if (ret != 0) {
+               dev_err(dev, "Failed to enable ick: %d\n", ret);
+               return ret;
+       }
+
+       ret = clk_prepare_enable(spi100k->fck);
+       if (ret != 0) {
+               dev_err(dev, "Failed to enable fck: %d\n", ret);
+               clk_disable_unprepare(spi100k->ick);
+               return ret;
+       }
+
+       return 0;
+}
+#endif
+
+static const struct dev_pm_ops omap1_spi100k_pm = {
+       SET_RUNTIME_PM_OPS(omap1_spi100k_runtime_suspend,
+                          omap1_spi100k_runtime_resume, NULL)
+};
+
 static struct platform_driver omap1_spi100k_driver = {
        .driver = {
                .name           = "omap1_spi100k",
+               .pm             = &omap1_spi100k_pm,
        },
        .probe          = omap1_spi100k_probe,
+       .remove         = omap1_spi100k_remove,
 };
 
 module_platform_driver(omap1_spi100k_driver);
index 3c0844457c075d0c5f3ed98fce96dd7951888f05..55576db315497379d73d6f2513b2bbfadb32d2d0 100644 (file)
@@ -44,7 +44,6 @@
 #include <linux/module.h>
 #include <linux/io.h>
 
-#include <asm/irq.h>
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
 
index ee513a85296b19a3c57aad4daef4ec42ee3d90b1..94af80676684e4708d373e1551b8a6c46ea118fa 100644 (file)
  */
 #define DEFAULT_SSP_REG_IMSC  0x0UL
 #define DISABLE_ALL_INTERRUPTS DEFAULT_SSP_REG_IMSC
-#define ENABLE_ALL_INTERRUPTS (~DEFAULT_SSP_REG_IMSC)
+#define ENABLE_ALL_INTERRUPTS ( \
+       SSP_IMSC_MASK_RORIM | \
+       SSP_IMSC_MASK_RTIM | \
+       SSP_IMSC_MASK_RXIM | \
+       SSP_IMSC_MASK_TXIM \
+)
 
 #define CLEAR_ALL_INTERRUPTS  0x3
 
@@ -1251,7 +1256,6 @@ static irqreturn_t pl022_interrupt_handler(int irq, void *dev_id)
        struct pl022 *pl022 = dev_id;
        struct spi_message *msg = pl022->cur_msg;
        u16 irq_status = 0;
-       u16 flag = 0;
 
        if (unlikely(!msg)) {
                dev_err(&pl022->adev->dev,
@@ -1280,9 +1284,6 @@ static irqreturn_t pl022_interrupt_handler(int irq, void *dev_id)
                if (readw(SSP_SR(pl022->virtbase)) & SSP_SR_MASK_RFF)
                        dev_err(&pl022->adev->dev,
                                "RXFIFO is full\n");
-               if (readw(SSP_SR(pl022->virtbase)) & SSP_SR_MASK_TNF)
-                       dev_err(&pl022->adev->dev,
-                               "TXFIFO is full\n");
 
                /*
                 * Disable and clear interrupts, disable SSP,
@@ -1303,8 +1304,7 @@ static irqreturn_t pl022_interrupt_handler(int irq, void *dev_id)
 
        readwriter(pl022);
 
-       if ((pl022->tx == pl022->tx_end) && (flag == 0)) {
-               flag = 1;
+       if (pl022->tx == pl022->tx_end) {
                /* Disable Transmit interrupt, enable receive interrupt */
                writew((readw(SSP_IMSC(pl022->virtbase)) &
                       ~SSP_IMSC_MASK_TXIM) | SSP_IMSC_MASK_RXIM,
index 6f72ad01e0410257a42bc8739f8962abfbaf3b5e..e3223ac75a7c57d55e1c56e90dfac2aea877f8d5 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
+#include <linux/kernel.h>
 #include <linux/platform_device.h>
 #include <linux/spi/pxa2xx_spi.h>
 #include <linux/spi/spi.h>
 #include <linux/pm_runtime.h>
 #include <linux/acpi.h>
 
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/delay.h>
-
 #include "spi-pxa2xx.h"
 
 MODULE_AUTHOR("Stephen Street");
@@ -67,54 +64,6 @@ MODULE_ALIAS("platform:pxa2xx-spi");
 #define LPSS_TX_LOTHRESH_DFLT  160
 #define LPSS_TX_HITHRESH_DFLT  224
 
-struct quark_spi_rate {
-       u32 bitrate;
-       u32 dds_clk_rate;
-       u32 clk_div;
-};
-
-/*
- * 'rate', 'dds', 'clk_div' lookup table, which is defined in
- * the Quark SPI datasheet.
- */
-static const struct quark_spi_rate quark_spi_rate_table[] = {
-/*     bitrate,        dds_clk_rate,   clk_div */
-       {50000000,      0x800000,       0},
-       {40000000,      0x666666,       0},
-       {25000000,      0x400000,       0},
-       {20000000,      0x666666,       1},
-       {16667000,      0x800000,       2},
-       {13333000,      0x666666,       2},
-       {12500000,      0x200000,       0},
-       {10000000,      0x800000,       4},
-       {8000000,       0x666666,       4},
-       {6250000,       0x400000,       3},
-       {5000000,       0x400000,       4},
-       {4000000,       0x666666,       9},
-       {3125000,       0x80000,        0},
-       {2500000,       0x400000,       9},
-       {2000000,       0x666666,       19},
-       {1563000,       0x40000,        0},
-       {1250000,       0x200000,       9},
-       {1000000,       0x400000,       24},
-       {800000,        0x666666,       49},
-       {781250,        0x20000,        0},
-       {625000,        0x200000,       19},
-       {500000,        0x400000,       49},
-       {400000,        0x666666,       99},
-       {390625,        0x10000,        0},
-       {250000,        0x400000,       99},
-       {200000,        0x666666,       199},
-       {195313,        0x8000,         0},
-       {125000,        0x100000,       49},
-       {100000,        0x200000,       124},
-       {50000,         0x100000,       124},
-       {25000,         0x80000,        124},
-       {10016,         0x20000,        77},
-       {5040,          0x20000,        154},
-       {1002,          0x8000,         194},
-};
-
 /* Offset from drv_data->lpss_base */
 #define GENERAL_REG            0x08
 #define GENERAL_REG_RXTO_HOLDOFF_DISABLE BIT(24)
@@ -701,25 +650,124 @@ static irqreturn_t ssp_int(int irq, void *dev_id)
 }
 
 /*
- * The Quark SPI data sheet gives a table, and for the given 'rate',
- * the 'dds' and 'clk_div' can be found in the table.
+ * The Quark SPI has an additional 24 bit register (DDS_CLK_RATE) to multiply
+ * input frequency by fractions of 2^24. It also has a divider by 5.
+ *
+ * There are formulas to get baud rate value for given input frequency and
+ * divider parameters, such as DDS_CLK_RATE and SCR:
+ *
+ * Fsys = 200MHz
+ *
+ * Fssp = Fsys * DDS_CLK_RATE / 2^24                   (1)
+ * Baud rate = Fsclk = Fssp / (2 * (SCR + 1))          (2)
+ *
+ * DDS_CLK_RATE either 2^n or 2^n / 5.
+ * SCR is in range 0 .. 255
+ *
+ * Divisor = 5^i * 2^j * 2 * k
+ *       i = [0, 1]      i = 1 iff j = 0 or j > 3
+ *       j = [0, 23]     j = 0 iff i = 1
+ *       k = [1, 256]
+ * Special case: j = 0, i = 1: Divisor = 2 / 5
+ *
+ * Accordingly to the specification the recommended values for DDS_CLK_RATE
+ * are:
+ *     Case 1:         2^n, n = [0, 23]
+ *     Case 2:         2^24 * 2 / 5 (0x666666)
+ *     Case 3:         less than or equal to 2^24 / 5 / 16 (0x33333)
+ *
+ * In all cases the lowest possible value is better.
+ *
+ * The function calculates parameters for all cases and chooses the one closest
+ * to the asked baud rate.
  */
-static u32 quark_x1000_set_clk_regvals(u32 rate, u32 *dds, u32 *clk_div)
+static unsigned int quark_x1000_get_clk_div(int rate, u32 *dds)
 {
-       unsigned int i;
-
-       for (i = 0; i < ARRAY_SIZE(quark_spi_rate_table); i++) {
-               if (rate >= quark_spi_rate_table[i].bitrate) {
-                       *dds = quark_spi_rate_table[i].dds_clk_rate;
-                       *clk_div = quark_spi_rate_table[i].clk_div;
-                       return quark_spi_rate_table[i].bitrate;
+       unsigned long xtal = 200000000;
+       unsigned long fref = xtal / 2;          /* mandatory division by 2,
+                                                  see (2) */
+                                               /* case 3 */
+       unsigned long fref1 = fref / 2;         /* case 1 */
+       unsigned long fref2 = fref * 2 / 5;     /* case 2 */
+       unsigned long scale;
+       unsigned long q, q1, q2;
+       long r, r1, r2;
+       u32 mul;
+
+       /* Case 1 */
+
+       /* Set initial value for DDS_CLK_RATE */
+       mul = (1 << 24) >> 1;
+
+       /* Calculate initial quot */
+       q1 = DIV_ROUND_CLOSEST(fref1, rate);
+
+       /* Scale q1 if it's too big */
+       if (q1 > 256) {
+               /* Scale q1 to range [1, 512] */
+               scale = fls_long(q1 - 1);
+               if (scale > 9) {
+                       q1 >>= scale - 9;
+                       mul >>= scale - 9;
                }
+
+               /* Round the result if we have a remainder */
+               q1 += q1 & 1;
        }
 
-       *dds = quark_spi_rate_table[i-1].dds_clk_rate;
-       *clk_div = quark_spi_rate_table[i-1].clk_div;
+       /* Decrease DDS_CLK_RATE as much as we can without loss in precision */
+       scale = __ffs(q1);
+       q1 >>= scale;
+       mul >>= scale;
+
+       /* Get the remainder */
+       r1 = abs(fref1 / (1 << (24 - fls_long(mul))) / q1 - rate);
+
+       /* Case 2 */
+
+       q2 = DIV_ROUND_CLOSEST(fref2, rate);
+       r2 = abs(fref2 / q2 - rate);
 
-       return quark_spi_rate_table[i-1].bitrate;
+       /*
+        * Choose the best between two: less remainder we have the better. We
+        * can't go case 2 if q2 is greater than 256 since SCR register can
+        * hold only values 0 .. 255.
+        */
+       if (r2 >= r1 || q2 > 256) {
+               /* case 1 is better */
+               r = r1;
+               q = q1;
+       } else {
+               /* case 2 is better */
+               r = r2;
+               q = q2;
+               mul = (1 << 24) * 2 / 5;
+       }
+
+       /* Check case 3 only If the divisor is big enough */
+       if (fref / rate >= 80) {
+               u64 fssp;
+               u32 m;
+
+               /* Calculate initial quot */
+               q1 = DIV_ROUND_CLOSEST(fref, rate);
+               m = (1 << 24) / q1;
+
+               /* Get the remainder */
+               fssp = (u64)fref * m;
+               do_div(fssp, 1 << 24);
+               r1 = abs(fssp - rate);
+
+               /* Choose this one if it suits better */
+               if (r1 < r) {
+                       /* case 3 is better */
+                       q = 1;
+                       mul = m;
+               }
+       }
+
+       *dds = mul;
+       return q - 1;
 }
 
 static unsigned int ssp_get_clk_div(struct driver_data *drv_data, int rate)
@@ -730,23 +778,25 @@ static unsigned int ssp_get_clk_div(struct driver_data *drv_data, int rate)
        rate = min_t(int, ssp_clk, rate);
 
        if (ssp->type == PXA25x_SSP || ssp->type == CE4100_SSP)
-               return ((ssp_clk / (2 * rate) - 1) & 0xff) << 8;
+               return (ssp_clk / (2 * rate) - 1) & 0xff;
        else
-               return ((ssp_clk / rate - 1) & 0xfff) << 8;
+               return (ssp_clk / rate - 1) & 0xfff;
 }
 
 static unsigned int pxa2xx_ssp_get_clk_div(struct driver_data *drv_data,
                                           struct chip_data *chip, int rate)
 {
-       u32 clk_div;
+       unsigned int clk_div;
 
        switch (drv_data->ssp_type) {
        case QUARK_X1000_SSP:
-               quark_x1000_set_clk_regvals(rate, &chip->dds_rate, &clk_div);
-               return clk_div << 8;
+               clk_div = quark_x1000_get_clk_div(rate, &chip->dds_rate);
+               break;
        default:
-               return ssp_get_clk_div(drv_data, rate);
+               clk_div = ssp_get_clk_div(drv_data, rate);
+               break;
        }
+       return clk_div << 8;
 }
 
 static void pump_transfers(unsigned long data)
index 2b2c359f5a501da32a38af38cfe0c0956b0e23c9..810a7fae347988a7d9dc1101586234b8ef8efc77 100644 (file)
@@ -22,6 +22,8 @@
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/spi/spi.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
 
 #define QUP_CONFIG                     0x0000
 #define QUP_STATE                      0x0004
 
 #define SPI_NUM_CHIPSELECTS            4
 
+#define SPI_MAX_DMA_XFER               (SZ_64K - 64)
+
 /* high speed mode is when bus rate is greater then 26MHz */
 #define SPI_HS_MIN_RATE                        26000000
 #define SPI_MAX_RATE                   50000000
@@ -140,9 +144,14 @@ struct spi_qup {
        struct completion       done;
        int                     error;
        int                     w_size; /* bytes per SPI word */
+       int                     n_words;
        int                     tx_bytes;
        int                     rx_bytes;
        int                     qup_v1;
+
+       int                     use_dma;
+       struct dma_slave_config rx_conf;
+       struct dma_slave_config tx_conf;
 };
 
 
@@ -198,7 +207,6 @@ static int spi_qup_set_state(struct spi_qup *controller, u32 state)
        return 0;
 }
 
-
 static void spi_qup_fifo_read(struct spi_qup *controller,
                            struct spi_transfer *xfer)
 {
@@ -266,6 +274,107 @@ static void spi_qup_fifo_write(struct spi_qup *controller,
        }
 }
 
+static void spi_qup_dma_done(void *data)
+{
+       struct spi_qup *qup = data;
+
+       complete(&qup->done);
+}
+
+static int spi_qup_prep_sg(struct spi_master *master, struct spi_transfer *xfer,
+                          enum dma_transfer_direction dir,
+                          dma_async_tx_callback callback)
+{
+       struct spi_qup *qup = spi_master_get_devdata(master);
+       unsigned long flags = DMA_PREP_INTERRUPT | DMA_PREP_FENCE;
+       struct dma_async_tx_descriptor *desc;
+       struct scatterlist *sgl;
+       struct dma_chan *chan;
+       dma_cookie_t cookie;
+       unsigned int nents;
+
+       if (dir == DMA_MEM_TO_DEV) {
+               chan = master->dma_tx;
+               nents = xfer->tx_sg.nents;
+               sgl = xfer->tx_sg.sgl;
+       } else {
+               chan = master->dma_rx;
+               nents = xfer->rx_sg.nents;
+               sgl = xfer->rx_sg.sgl;
+       }
+
+       desc = dmaengine_prep_slave_sg(chan, sgl, nents, dir, flags);
+       if (!desc)
+               return -EINVAL;
+
+       desc->callback = callback;
+       desc->callback_param = qup;
+
+       cookie = dmaengine_submit(desc);
+
+       return dma_submit_error(cookie);
+}
+
+static void spi_qup_dma_terminate(struct spi_master *master,
+                                 struct spi_transfer *xfer)
+{
+       if (xfer->tx_buf)
+               dmaengine_terminate_all(master->dma_tx);
+       if (xfer->rx_buf)
+               dmaengine_terminate_all(master->dma_rx);
+}
+
+static int spi_qup_do_dma(struct spi_master *master, struct spi_transfer *xfer)
+{
+       dma_async_tx_callback rx_done = NULL, tx_done = NULL;
+       int ret;
+
+       if (xfer->rx_buf)
+               rx_done = spi_qup_dma_done;
+       else if (xfer->tx_buf)
+               tx_done = spi_qup_dma_done;
+
+       if (xfer->rx_buf) {
+               ret = spi_qup_prep_sg(master, xfer, DMA_DEV_TO_MEM, rx_done);
+               if (ret)
+                       return ret;
+
+               dma_async_issue_pending(master->dma_rx);
+       }
+
+       if (xfer->tx_buf) {
+               ret = spi_qup_prep_sg(master, xfer, DMA_MEM_TO_DEV, tx_done);
+               if (ret)
+                       return ret;
+
+               dma_async_issue_pending(master->dma_tx);
+       }
+
+       return 0;
+}
+
+static int spi_qup_do_pio(struct spi_master *master, struct spi_transfer *xfer)
+{
+       struct spi_qup *qup = spi_master_get_devdata(master);
+       int ret;
+
+       ret = spi_qup_set_state(qup, QUP_STATE_RUN);
+       if (ret) {
+               dev_warn(qup->dev, "cannot set RUN state\n");
+               return ret;
+       }
+
+       ret = spi_qup_set_state(qup, QUP_STATE_PAUSE);
+       if (ret) {
+               dev_warn(qup->dev, "cannot set PAUSE state\n");
+               return ret;
+       }
+
+       spi_qup_fifo_write(qup, xfer);
+
+       return 0;
+}
+
 static irqreturn_t spi_qup_qup_irq(int irq, void *dev_id)
 {
        struct spi_qup *controller = dev_id;
@@ -315,11 +424,13 @@ static irqreturn_t spi_qup_qup_irq(int irq, void *dev_id)
                error = -EIO;
        }
 
-       if (opflags & QUP_OP_IN_SERVICE_FLAG)
-               spi_qup_fifo_read(controller, xfer);
+       if (!controller->use_dma) {
+               if (opflags & QUP_OP_IN_SERVICE_FLAG)
+                       spi_qup_fifo_read(controller, xfer);
 
-       if (opflags & QUP_OP_OUT_SERVICE_FLAG)
-               spi_qup_fifo_write(controller, xfer);
+               if (opflags & QUP_OP_OUT_SERVICE_FLAG)
+                       spi_qup_fifo_write(controller, xfer);
+       }
 
        spin_lock_irqsave(&controller->lock, flags);
        controller->error = error;
@@ -332,13 +443,35 @@ static irqreturn_t spi_qup_qup_irq(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
+static u32
+spi_qup_get_mode(struct spi_master *master, struct spi_transfer *xfer)
+{
+       struct spi_qup *qup = spi_master_get_devdata(master);
+       u32 mode;
+
+       qup->w_size = 4;
+
+       if (xfer->bits_per_word <= 8)
+               qup->w_size = 1;
+       else if (xfer->bits_per_word <= 16)
+               qup->w_size = 2;
+
+       qup->n_words = xfer->len / qup->w_size;
+
+       if (qup->n_words <= (qup->in_fifo_sz / sizeof(u32)))
+               mode = QUP_IO_M_MODE_FIFO;
+       else
+               mode = QUP_IO_M_MODE_BLOCK;
+
+       return mode;
+}
 
 /* set clock freq ... bits per word */
 static int spi_qup_io_config(struct spi_device *spi, struct spi_transfer *xfer)
 {
        struct spi_qup *controller = spi_master_get_devdata(spi->master);
        u32 config, iomode, mode, control;
-       int ret, n_words, w_size;
+       int ret, n_words;
 
        if (spi->mode & SPI_LOOP && xfer->len > controller->in_fifo_sz) {
                dev_err(controller->dev, "too big size for loopback %d > %d\n",
@@ -358,35 +491,54 @@ static int spi_qup_io_config(struct spi_device *spi, struct spi_transfer *xfer)
                return -EIO;
        }
 
-       w_size = 4;
-       if (xfer->bits_per_word <= 8)
-               w_size = 1;
-       else if (xfer->bits_per_word <= 16)
-               w_size = 2;
-
-       n_words = xfer->len / w_size;
-       controller->w_size = w_size;
+       mode = spi_qup_get_mode(spi->master, xfer);
+       n_words = controller->n_words;
 
-       if (n_words <= (controller->in_fifo_sz / sizeof(u32))) {
-               mode = QUP_IO_M_MODE_FIFO;
+       if (mode == QUP_IO_M_MODE_FIFO) {
                writel_relaxed(n_words, controller->base + QUP_MX_READ_CNT);
                writel_relaxed(n_words, controller->base + QUP_MX_WRITE_CNT);
                /* must be zero for FIFO */
                writel_relaxed(0, controller->base + QUP_MX_INPUT_CNT);
                writel_relaxed(0, controller->base + QUP_MX_OUTPUT_CNT);
-       } else {
-               mode = QUP_IO_M_MODE_BLOCK;
+       } else if (!controller->use_dma) {
                writel_relaxed(n_words, controller->base + QUP_MX_INPUT_CNT);
                writel_relaxed(n_words, controller->base + QUP_MX_OUTPUT_CNT);
                /* must be zero for BLOCK and BAM */
                writel_relaxed(0, controller->base + QUP_MX_READ_CNT);
                writel_relaxed(0, controller->base + QUP_MX_WRITE_CNT);
+       } else {
+               mode = QUP_IO_M_MODE_BAM;
+               writel_relaxed(0, controller->base + QUP_MX_READ_CNT);
+               writel_relaxed(0, controller->base + QUP_MX_WRITE_CNT);
+
+               if (!controller->qup_v1) {
+                       void __iomem *input_cnt;
+
+                       input_cnt = controller->base + QUP_MX_INPUT_CNT;
+                       /*
+                        * for DMA transfers, both QUP_MX_INPUT_CNT and
+                        * QUP_MX_OUTPUT_CNT must be zero to all cases but one.
+                        * That case is a non-balanced transfer when there is
+                        * only a rx_buf.
+                        */
+                       if (xfer->tx_buf)
+                               writel_relaxed(0, input_cnt);
+                       else
+                               writel_relaxed(n_words, input_cnt);
+
+                       writel_relaxed(0, controller->base + QUP_MX_OUTPUT_CNT);
+               }
        }
 
        iomode = readl_relaxed(controller->base + QUP_IO_M_MODES);
        /* Set input and output transfer mode */
        iomode &= ~(QUP_IO_M_INPUT_MODE_MASK | QUP_IO_M_OUTPUT_MODE_MASK);
-       iomode &= ~(QUP_IO_M_PACK_EN | QUP_IO_M_UNPACK_EN);
+
+       if (!controller->use_dma)
+               iomode &= ~(QUP_IO_M_PACK_EN | QUP_IO_M_UNPACK_EN);
+       else
+               iomode |= QUP_IO_M_PACK_EN | QUP_IO_M_UNPACK_EN;
+
        iomode |= (mode << QUP_IO_M_OUTPUT_MODE_MASK_SHIFT);
        iomode |= (mode << QUP_IO_M_INPUT_MODE_MASK_SHIFT);
 
@@ -428,11 +580,31 @@ static int spi_qup_io_config(struct spi_device *spi, struct spi_transfer *xfer)
        config &= ~(QUP_CONFIG_NO_INPUT | QUP_CONFIG_NO_OUTPUT | QUP_CONFIG_N);
        config |= xfer->bits_per_word - 1;
        config |= QUP_CONFIG_SPI_MODE;
+
+       if (controller->use_dma) {
+               if (!xfer->tx_buf)
+                       config |= QUP_CONFIG_NO_OUTPUT;
+               if (!xfer->rx_buf)
+                       config |= QUP_CONFIG_NO_INPUT;
+       }
+
        writel_relaxed(config, controller->base + QUP_CONFIG);
 
        /* only write to OPERATIONAL_MASK when register is present */
-       if (!controller->qup_v1)
-               writel_relaxed(0, controller->base + QUP_OPERATIONAL_MASK);
+       if (!controller->qup_v1) {
+               u32 mask = 0;
+
+               /*
+                * mask INPUT and OUTPUT service flags to prevent IRQs on FIFO
+                * status change in BAM mode
+                */
+
+               if (mode == QUP_IO_M_MODE_BAM)
+                       mask = QUP_OP_IN_SERVICE_FLAG | QUP_OP_OUT_SERVICE_FLAG;
+
+               writel_relaxed(mask, controller->base + QUP_OPERATIONAL_MASK);
+       }
+
        return 0;
 }
 
@@ -461,17 +633,13 @@ static int spi_qup_transfer_one(struct spi_master *master,
        controller->tx_bytes = 0;
        spin_unlock_irqrestore(&controller->lock, flags);
 
-       if (spi_qup_set_state(controller, QUP_STATE_RUN)) {
-               dev_warn(controller->dev, "cannot set RUN state\n");
-               goto exit;
-       }
+       if (controller->use_dma)
+               ret = spi_qup_do_dma(master, xfer);
+       else
+               ret = spi_qup_do_pio(master, xfer);
 
-       if (spi_qup_set_state(controller, QUP_STATE_PAUSE)) {
-               dev_warn(controller->dev, "cannot set PAUSE state\n");
+       if (ret)
                goto exit;
-       }
-
-       spi_qup_fifo_write(controller, xfer);
 
        if (spi_qup_set_state(controller, QUP_STATE_RUN)) {
                dev_warn(controller->dev, "cannot set EXECUTE state\n");
@@ -480,6 +648,7 @@ static int spi_qup_transfer_one(struct spi_master *master,
 
        if (!wait_for_completion_timeout(&controller->done, timeout))
                ret = -ETIMEDOUT;
+
 exit:
        spi_qup_set_state(controller, QUP_STATE_RESET);
        spin_lock_irqsave(&controller->lock, flags);
@@ -487,6 +656,97 @@ exit:
        if (!ret)
                ret = controller->error;
        spin_unlock_irqrestore(&controller->lock, flags);
+
+       if (ret && controller->use_dma)
+               spi_qup_dma_terminate(master, xfer);
+
+       return ret;
+}
+
+static bool spi_qup_can_dma(struct spi_master *master, struct spi_device *spi,
+                           struct spi_transfer *xfer)
+{
+       struct spi_qup *qup = spi_master_get_devdata(master);
+       size_t dma_align = dma_get_cache_alignment();
+       u32 mode;
+
+       qup->use_dma = 0;
+
+       if (xfer->rx_buf && (xfer->len % qup->in_blk_sz ||
+           IS_ERR_OR_NULL(master->dma_rx) ||
+           !IS_ALIGNED((size_t)xfer->rx_buf, dma_align)))
+               return false;
+
+       if (xfer->tx_buf && (xfer->len % qup->out_blk_sz ||
+           IS_ERR_OR_NULL(master->dma_tx) ||
+           !IS_ALIGNED((size_t)xfer->tx_buf, dma_align)))
+               return false;
+
+       mode = spi_qup_get_mode(master, xfer);
+       if (mode == QUP_IO_M_MODE_FIFO)
+               return false;
+
+       qup->use_dma = 1;
+
+       return true;
+}
+
+static void spi_qup_release_dma(struct spi_master *master)
+{
+       if (!IS_ERR_OR_NULL(master->dma_rx))
+               dma_release_channel(master->dma_rx);
+       if (!IS_ERR_OR_NULL(master->dma_tx))
+               dma_release_channel(master->dma_tx);
+}
+
+static int spi_qup_init_dma(struct spi_master *master, resource_size_t base)
+{
+       struct spi_qup *spi = spi_master_get_devdata(master);
+       struct dma_slave_config *rx_conf = &spi->rx_conf,
+                               *tx_conf = &spi->tx_conf;
+       struct device *dev = spi->dev;
+       int ret;
+
+       /* allocate dma resources, if available */
+       master->dma_rx = dma_request_slave_channel_reason(dev, "rx");
+       if (IS_ERR(master->dma_rx))
+               return PTR_ERR(master->dma_rx);
+
+       master->dma_tx = dma_request_slave_channel_reason(dev, "tx");
+       if (IS_ERR(master->dma_tx)) {
+               ret = PTR_ERR(master->dma_tx);
+               goto err_tx;
+       }
+
+       /* set DMA parameters */
+       rx_conf->direction = DMA_DEV_TO_MEM;
+       rx_conf->device_fc = 1;
+       rx_conf->src_addr = base + QUP_INPUT_FIFO;
+       rx_conf->src_maxburst = spi->in_blk_sz;
+
+       tx_conf->direction = DMA_MEM_TO_DEV;
+       tx_conf->device_fc = 1;
+       tx_conf->dst_addr = base + QUP_OUTPUT_FIFO;
+       tx_conf->dst_maxburst = spi->out_blk_sz;
+
+       ret = dmaengine_slave_config(master->dma_rx, rx_conf);
+       if (ret) {
+               dev_err(dev, "failed to configure RX channel\n");
+               goto err;
+       }
+
+       ret = dmaengine_slave_config(master->dma_tx, tx_conf);
+       if (ret) {
+               dev_err(dev, "failed to configure TX channel\n");
+               goto err;
+       }
+
+       return 0;
+
+err:
+       dma_release_channel(master->dma_tx);
+err_tx:
+       dma_release_channel(master->dma_rx);
        return ret;
 }
 
@@ -563,6 +823,8 @@ static int spi_qup_probe(struct platform_device *pdev)
        master->transfer_one = spi_qup_transfer_one;
        master->dev.of_node = pdev->dev.of_node;
        master->auto_runtime_pm = true;
+       master->dma_alignment = dma_get_cache_alignment();
+       master->max_dma_len = SPI_MAX_DMA_XFER;
 
        platform_set_drvdata(pdev, master);
 
@@ -574,6 +836,12 @@ static int spi_qup_probe(struct platform_device *pdev)
        controller->cclk = cclk;
        controller->irq = irq;
 
+       ret = spi_qup_init_dma(master, res->start);
+       if (ret == -EPROBE_DEFER)
+               goto error;
+       else if (!ret)
+               master->can_dma = spi_qup_can_dma;
+
        /* set v1 flag if device is version 1 */
        if (of_device_is_compatible(dev->of_node, "qcom,spi-qup-v1.1.1"))
                controller->qup_v1 = 1;
@@ -610,7 +878,7 @@ static int spi_qup_probe(struct platform_device *pdev)
        ret = spi_qup_set_state(controller, QUP_STATE_RESET);
        if (ret) {
                dev_err(dev, "cannot set RESET state\n");
-               goto error;
+               goto error_dma;
        }
 
        writel_relaxed(0, base + QUP_OPERATIONAL);
@@ -634,7 +902,7 @@ static int spi_qup_probe(struct platform_device *pdev)
        ret = devm_request_irq(dev, irq, spi_qup_qup_irq,
                               IRQF_TRIGGER_HIGH, pdev->name, controller);
        if (ret)
-               goto error;
+               goto error_dma;
 
        pm_runtime_set_autosuspend_delay(dev, MSEC_PER_SEC);
        pm_runtime_use_autosuspend(dev);
@@ -649,6 +917,8 @@ static int spi_qup_probe(struct platform_device *pdev)
 
 disable_pm:
        pm_runtime_disable(&pdev->dev);
+error_dma:
+       spi_qup_release_dma(master);
 error:
        clk_disable_unprepare(cclk);
        clk_disable_unprepare(iclk);
@@ -740,6 +1010,8 @@ static int spi_qup_remove(struct platform_device *pdev)
        if (ret)
                return ret;
 
+       spi_qup_release_dma(master);
+
        clk_disable_unprepare(controller->cclk);
        clk_disable_unprepare(controller->iclk);
 
index 1a777dc261d6f5bfa2e56dc437fb7d957d2b0891..68e7efeb9a27a6751d49ad3a6583b2fe5d97a811 100644 (file)
@@ -179,6 +179,7 @@ struct rockchip_spi {
        u8 tmode;
        u8 bpw;
        u8 n_bytes;
+       u8 rsd_nsecs;
        unsigned len;
        u32 speed;
 
@@ -302,8 +303,8 @@ static int rockchip_spi_prepare_message(struct spi_master *master,
        return 0;
 }
 
-static int rockchip_spi_unprepare_message(struct spi_master *master,
-                                         struct spi_message *msg)
+static void rockchip_spi_handle_err(struct spi_master *master,
+                                   struct spi_message *msg)
 {
        unsigned long flags;
        struct rockchip_spi *rs = spi_master_get_devdata(master);
@@ -313,8 +314,8 @@ static int rockchip_spi_unprepare_message(struct spi_master *master,
        /*
         * For DMA mode, we need terminate DMA channel and flush
         * fifo for the next transfer if DMA thansfer timeout.
-        * unprepare_message() was called by core if transfer complete
-        * or timeout. Maybe it is reasonable for error handling here.
+        * handle_err() was called by core if transfer failed.
+        * Maybe it is reasonable for error handling here.
         */
        if (rs->use_dma) {
                if (rs->state & RXBUSY) {
@@ -327,6 +328,12 @@ static int rockchip_spi_unprepare_message(struct spi_master *master,
        }
 
        spin_unlock_irqrestore(&rs->lock, flags);
+}
+
+static int rockchip_spi_unprepare_message(struct spi_master *master,
+                                         struct spi_message *msg)
+{
+       struct rockchip_spi *rs = spi_master_get_devdata(master);
 
        spi_enable_chip(rs, 0);
 
@@ -493,6 +500,7 @@ static void rockchip_spi_config(struct rockchip_spi *rs)
 {
        u32 div = 0;
        u32 dmacr = 0;
+       int rsd = 0;
 
        u32 cr0 = (CR0_BHT_8BIT << CR0_BHT_OFFSET)
                | (CR0_SSD_ONE << CR0_SSD_OFFSET);
@@ -519,9 +527,23 @@ static void rockchip_spi_config(struct rockchip_spi *rs)
        }
 
        /* div doesn't support odd number */
-       div = max_t(u32, rs->max_freq / rs->speed, 1);
+       div = DIV_ROUND_UP(rs->max_freq, rs->speed);
        div = (div + 1) & 0xfffe;
 
+       /* Rx sample delay is expressed in parent clock cycles (max 3) */
+       rsd = DIV_ROUND_CLOSEST(rs->rsd_nsecs * (rs->max_freq >> 8),
+                               1000000000 >> 8);
+       if (!rsd && rs->rsd_nsecs) {
+               pr_warn_once("rockchip-spi: %u Hz are too slow to express %u ns delay\n",
+                            rs->max_freq, rs->rsd_nsecs);
+       } else if (rsd > 3) {
+               rsd = 3;
+               pr_warn_once("rockchip-spi: %u Hz are too fast to express %u ns delay, clamping at %u ns\n",
+                            rs->max_freq, rs->rsd_nsecs,
+                            rsd * 1000000000U / rs->max_freq);
+       }
+       cr0 |= rsd << CR0_RSD_OFFSET;
+
        writel_relaxed(cr0, rs->regs + ROCKCHIP_SPI_CTRLR0);
 
        writel_relaxed(rs->len - 1, rs->regs + ROCKCHIP_SPI_CTRLR1);
@@ -614,6 +636,7 @@ static int rockchip_spi_probe(struct platform_device *pdev)
        struct rockchip_spi *rs;
        struct spi_master *master;
        struct resource *mem;
+       u32 rsd_nsecs;
 
        master = spi_alloc_master(&pdev->dev, sizeof(struct rockchip_spi));
        if (!master)
@@ -665,6 +688,10 @@ static int rockchip_spi_probe(struct platform_device *pdev)
        rs->dev = &pdev->dev;
        rs->max_freq = clk_get_rate(rs->spiclk);
 
+       if (!of_property_read_u32(pdev->dev.of_node, "rx-sample-delay-ns",
+                                 &rsd_nsecs))
+               rs->rsd_nsecs = rsd_nsecs;
+
        rs->fifo_len = get_fifo_len(rs);
        if (!rs->fifo_len) {
                dev_err(&pdev->dev, "Failed to get fifo length\n");
@@ -688,6 +715,7 @@ static int rockchip_spi_probe(struct platform_device *pdev)
        master->prepare_message = rockchip_spi_prepare_message;
        master->unprepare_message = rockchip_spi_unprepare_message;
        master->transfer_one = rockchip_spi_transfer_one;
+       master->handle_err = rockchip_spi_handle_err;
 
        rs->dma_tx.ch = dma_request_slave_channel(rs->dev, "tx");
        if (!rs->dma_tx.ch)
index 46ce47076e63d143f10b298f386877bb960b8f56..186924aa4740e758af2075f417745487825b6163 100644 (file)
 #define SPBFCR_RXRST           0x40    /* Receive Buffer Data Reset */
 #define SPBFCR_TXTRG_MASK      0x30    /* Transmit Buffer Data Triggering Number */
 #define SPBFCR_RXTRG_MASK      0x07    /* Receive Buffer Data Triggering Number */
+/* QSPI on R-Car Gen2 */
+#define SPBFCR_TXTRG_1B                0x00    /* 31 bytes (1 byte available) */
+#define SPBFCR_TXTRG_32B       0x30    /* 0 byte (32 bytes available) */
+#define SPBFCR_RXTRG_1B                0x00    /* 1 byte (31 bytes available) */
+#define SPBFCR_RXTRG_32B       0x07    /* 32 bytes (0 byte available) */
+
+#define QSPI_BUFFER_SIZE        32u
 
 struct rspi_data {
        void __iomem *addr;
@@ -366,6 +373,52 @@ static int qspi_set_config_register(struct rspi_data *rspi, int access_size)
        return 0;
 }
 
+static void qspi_update(const struct rspi_data *rspi, u8 mask, u8 val, u8 reg)
+{
+       u8 data;
+
+       data = rspi_read8(rspi, reg);
+       data &= ~mask;
+       data |= (val & mask);
+       rspi_write8(rspi, data, reg);
+}
+
+static int qspi_set_send_trigger(struct rspi_data *rspi, unsigned int len)
+{
+       unsigned int n;
+
+       n = min(len, QSPI_BUFFER_SIZE);
+
+       if (len >= QSPI_BUFFER_SIZE) {
+               /* sets triggering number to 32 bytes */
+               qspi_update(rspi, SPBFCR_TXTRG_MASK,
+                            SPBFCR_TXTRG_32B, QSPI_SPBFCR);
+       } else {
+               /* sets triggering number to 1 byte */
+               qspi_update(rspi, SPBFCR_TXTRG_MASK,
+                            SPBFCR_TXTRG_1B, QSPI_SPBFCR);
+       }
+
+       return n;
+}
+
+static void qspi_set_receive_trigger(struct rspi_data *rspi, unsigned int len)
+{
+       unsigned int n;
+
+       n = min(len, QSPI_BUFFER_SIZE);
+
+       if (len >= QSPI_BUFFER_SIZE) {
+               /* sets triggering number to 32 bytes */
+               qspi_update(rspi, SPBFCR_RXTRG_MASK,
+                            SPBFCR_RXTRG_32B, QSPI_SPBFCR);
+       } else {
+               /* sets triggering number to 1 byte */
+               qspi_update(rspi, SPBFCR_RXTRG_MASK,
+                            SPBFCR_RXTRG_1B, QSPI_SPBFCR);
+       }
+}
+
 #define set_config_register(spi, n) spi->ops->set_config_register(spi, n)
 
 static void rspi_enable_irq(const struct rspi_data *rspi, u8 enable)
@@ -609,19 +662,29 @@ static bool rspi_can_dma(struct spi_master *master, struct spi_device *spi,
        return __rspi_can_dma(rspi, xfer);
 }
 
-static int rspi_common_transfer(struct rspi_data *rspi,
-                               struct spi_transfer *xfer)
+static int rspi_dma_check_then_transfer(struct rspi_data *rspi,
+                                        struct spi_transfer *xfer)
 {
-       int ret;
-
        if (rspi->master->can_dma && __rspi_can_dma(rspi, xfer)) {
                /* rx_buf can be NULL on RSPI on SH in TX-only Mode */
-               ret = rspi_dma_transfer(rspi, &xfer->tx_sg,
+               int ret = rspi_dma_transfer(rspi, &xfer->tx_sg,
                                        xfer->rx_buf ? &xfer->rx_sg : NULL);
                if (ret != -EAGAIN)
-                       return ret;
+                       return 0;
        }
 
+       return -EAGAIN;
+}
+
+static int rspi_common_transfer(struct rspi_data *rspi,
+                               struct spi_transfer *xfer)
+{
+       int ret;
+
+       ret = rspi_dma_check_then_transfer(rspi, xfer);
+       if (ret != -EAGAIN)
+               return ret;
+
        ret = rspi_pio_transfer(rspi, xfer->tx_buf, xfer->rx_buf, xfer->len);
        if (ret < 0)
                return ret;
@@ -661,12 +724,59 @@ static int rspi_rz_transfer_one(struct spi_master *master,
        return rspi_common_transfer(rspi, xfer);
 }
 
+static int qspi_trigger_transfer_out_int(struct rspi_data *rspi, const u8 *tx,
+                                       u8 *rx, unsigned int len)
+{
+       int i, n, ret;
+       int error;
+
+       while (len > 0) {
+               n = qspi_set_send_trigger(rspi, len);
+               qspi_set_receive_trigger(rspi, len);
+               if (n == QSPI_BUFFER_SIZE) {
+                       error = rspi_wait_for_tx_empty(rspi);
+                       if (error < 0) {
+                               dev_err(&rspi->master->dev, "transmit timeout\n");
+                               return error;
+                       }
+                       for (i = 0; i < n; i++)
+                               rspi_write_data(rspi, *tx++);
+
+                       error = rspi_wait_for_rx_full(rspi);
+                       if (error < 0) {
+                               dev_err(&rspi->master->dev, "receive timeout\n");
+                               return error;
+                       }
+                       for (i = 0; i < n; i++)
+                               *rx++ = rspi_read_data(rspi);
+               } else {
+                       ret = rspi_pio_transfer(rspi, tx, rx, n);
+                       if (ret < 0)
+                               return ret;
+               }
+               len -= n;
+       }
+
+       return 0;
+}
+
 static int qspi_transfer_out_in(struct rspi_data *rspi,
                                struct spi_transfer *xfer)
 {
+       int ret;
+
        qspi_receive_init(rspi);
 
-       return rspi_common_transfer(rspi, xfer);
+       ret = rspi_dma_check_then_transfer(rspi, xfer);
+       if (ret != -EAGAIN)
+               return ret;
+
+       ret = qspi_trigger_transfer_out_int(rspi, xfer->tx_buf,
+                                           xfer->rx_buf, xfer->len);
+       if (ret < 0)
+               return ret;
+
+       return 0;
 }
 
 static int qspi_transfer_out(struct rspi_data *rspi, struct spi_transfer *xfer)
index 9231c34b5a5c73bc9c32175d346cdb4e9c232cd6..b1c6731fbf2755199669fe204747330b985b4733 100644 (file)
@@ -324,7 +324,7 @@ static int s3c64xx_spi_prepare_transfer(struct spi_master *spi)
 
                /* Acquire DMA channels */
                sdd->rx_dma.ch = dma_request_slave_channel_compat(mask, filter,
-                                  (void *)sdd->rx_dma.dmach, dev, "rx");
+                                  (void *)(long)sdd->rx_dma.dmach, dev, "rx");
                if (!sdd->rx_dma.ch) {
                        dev_err(dev, "Failed to get RX DMA channel\n");
                        ret = -EBUSY;
@@ -333,7 +333,7 @@ static int s3c64xx_spi_prepare_transfer(struct spi_master *spi)
                spi->dma_rx = sdd->rx_dma.ch;
 
                sdd->tx_dma.ch = dma_request_slave_channel_compat(mask, filter,
-                                  (void *)sdd->tx_dma.dmach, dev, "tx");
+                                  (void *)(long)sdd->tx_dma.dmach, dev, "tx");
                if (!sdd->tx_dma.ch) {
                        dev_err(dev, "Failed to get TX DMA channel\n");
                        ret = -EBUSY;
index 5a56acf8a43e697f6e569e1ee768bc3a9cf6120a..36af4d48a700520bc3bdea7aaf987b1cf9dff057 100644 (file)
@@ -286,7 +286,7 @@ static int sc18is602_probe(struct i2c_client *client,
                        hw->freq = SC18IS602_CLOCK;
                break;
        }
-       master->bus_num = client->adapter->nr;
+       master->bus_num = np ? -1 : client->adapter->nr;
        master->mode_bits = SPI_CPHA | SPI_CPOL | SPI_LSB_FIRST;
        master->bits_per_word_mask = SPI_BPW_MASK(8);
        master->setup = sc18is602_setup;
index 2faeaa7b57a8568c4a34003a7e7f65e18e42bf57..f17c0abe299f418697774fa9351e724600442d51 100644 (file)
@@ -482,7 +482,7 @@ static const struct dev_pm_ops spi_st_pm = {
        SET_RUNTIME_PM_OPS(spi_st_runtime_suspend, spi_st_runtime_resume, NULL)
 };
 
-static struct of_device_id stm_spi_match[] = {
+static const struct of_device_id stm_spi_match[] = {
        { .compatible = "st,comms-ssc4-spi", },
        {},
 };
index 57a195041dc72e019a02b3c7c4b6f4c2a6fac59b..d5d7d2235163f9ef8c6430d99544ec7f2c791c6b 100644 (file)
@@ -16,7 +16,6 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/kmod.h>
 #include <linux/device.h>
 #include <linux/init.h>
 #include <linux/cache.h>
@@ -129,125 +128,11 @@ static int spi_uevent(struct device *dev, struct kobj_uevent_env *env)
        return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int spi_legacy_suspend(struct device *dev, pm_message_t message)
-{
-       int                     value = 0;
-       struct spi_driver       *drv = to_spi_driver(dev->driver);
-
-       /* suspend will stop irqs and dma; no more i/o */
-       if (drv) {
-               if (drv->suspend)
-                       value = drv->suspend(to_spi_device(dev), message);
-               else
-                       dev_dbg(dev, "... can't suspend\n");
-       }
-       return value;
-}
-
-static int spi_legacy_resume(struct device *dev)
-{
-       int                     value = 0;
-       struct spi_driver       *drv = to_spi_driver(dev->driver);
-
-       /* resume may restart the i/o queue */
-       if (drv) {
-               if (drv->resume)
-                       value = drv->resume(to_spi_device(dev));
-               else
-                       dev_dbg(dev, "... can't resume\n");
-       }
-       return value;
-}
-
-static int spi_pm_suspend(struct device *dev)
-{
-       const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-       if (pm)
-               return pm_generic_suspend(dev);
-       else
-               return spi_legacy_suspend(dev, PMSG_SUSPEND);
-}
-
-static int spi_pm_resume(struct device *dev)
-{
-       const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-       if (pm)
-               return pm_generic_resume(dev);
-       else
-               return spi_legacy_resume(dev);
-}
-
-static int spi_pm_freeze(struct device *dev)
-{
-       const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-       if (pm)
-               return pm_generic_freeze(dev);
-       else
-               return spi_legacy_suspend(dev, PMSG_FREEZE);
-}
-
-static int spi_pm_thaw(struct device *dev)
-{
-       const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-       if (pm)
-               return pm_generic_thaw(dev);
-       else
-               return spi_legacy_resume(dev);
-}
-
-static int spi_pm_poweroff(struct device *dev)
-{
-       const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-       if (pm)
-               return pm_generic_poweroff(dev);
-       else
-               return spi_legacy_suspend(dev, PMSG_HIBERNATE);
-}
-
-static int spi_pm_restore(struct device *dev)
-{
-       const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-       if (pm)
-               return pm_generic_restore(dev);
-       else
-               return spi_legacy_resume(dev);
-}
-#else
-#define spi_pm_suspend NULL
-#define spi_pm_resume  NULL
-#define spi_pm_freeze  NULL
-#define spi_pm_thaw    NULL
-#define spi_pm_poweroff        NULL
-#define spi_pm_restore NULL
-#endif
-
-static const struct dev_pm_ops spi_pm = {
-       .suspend = spi_pm_suspend,
-       .resume = spi_pm_resume,
-       .freeze = spi_pm_freeze,
-       .thaw = spi_pm_thaw,
-       .poweroff = spi_pm_poweroff,
-       .restore = spi_pm_restore,
-       SET_RUNTIME_PM_OPS(
-               pm_generic_runtime_suspend,
-               pm_generic_runtime_resume,
-               NULL
-       )
-};
-
 struct bus_type spi_bus_type = {
        .name           = "spi",
        .dev_groups     = spi_dev_groups,
        .match          = spi_match_device,
        .uevent         = spi_uevent,
-       .pm             = &spi_pm,
 };
 EXPORT_SYMBOL_GPL(spi_bus_type);
 
@@ -851,6 +736,9 @@ out:
        if (msg->status == -EINPROGRESS)
                msg->status = ret;
 
+       if (msg->status && master->handle_err)
+               master->handle_err(master, msg);
+
        spi_finalize_current_message(master);
 
        return ret;
@@ -1360,7 +1248,6 @@ of_register_spi_device(struct spi_master *master, struct device_node *nc)
        spi->dev.of_node = nc;
 
        /* Register the new device */
-       request_module("%s%s", SPI_MODULE_PREFIX, spi->modalias);
        rc = spi_add_device(spi);
        if (rc) {
                dev_err(&master->dev, "spi_device register error %s\n",
@@ -1894,6 +1781,8 @@ int spi_setup(struct spi_device *spi)
        if (!spi->max_speed_hz)
                spi->max_speed_hz = spi->master->max_speed_hz;
 
+       spi_set_cs(spi, false);
+
        if (spi->master->setup)
                status = spi->master->setup(spi);
 
index 4eb7a980e67075a018a9be2dd4146927a3f38a6c..92c909eed6b504b01086e4775143128069f33261 100644 (file)
@@ -223,7 +223,7 @@ static int spidev_message(struct spidev_data *spidev,
        struct spi_transfer     *k_xfers;
        struct spi_transfer     *k_tmp;
        struct spi_ioc_transfer *u_tmp;
-       unsigned                n, total;
+       unsigned                n, total, tx_total, rx_total;
        u8                      *tx_buf, *rx_buf;
        int                     status = -EFAULT;
 
@@ -239,33 +239,52 @@ static int spidev_message(struct spidev_data *spidev,
        tx_buf = spidev->tx_buffer;
        rx_buf = spidev->rx_buffer;
        total = 0;
+       tx_total = 0;
+       rx_total = 0;
        for (n = n_xfers, k_tmp = k_xfers, u_tmp = u_xfers;
                        n;
                        n--, k_tmp++, u_tmp++) {
                k_tmp->len = u_tmp->len;
 
                total += k_tmp->len;
-               if (total > bufsiz) {
+               /* Since the function returns the total length of transfers
+                * on success, restrict the total to positive int values to
+                * avoid the return value looking like an error.  Also check
+                * each transfer length to avoid arithmetic overflow.
+                */
+               if (total > INT_MAX || k_tmp->len > INT_MAX) {
                        status = -EMSGSIZE;
                        goto done;
                }
 
                if (u_tmp->rx_buf) {
+                       /* this transfer needs space in RX bounce buffer */
+                       rx_total += k_tmp->len;
+                       if (rx_total > bufsiz) {
+                               status = -EMSGSIZE;
+                               goto done;
+                       }
                        k_tmp->rx_buf = rx_buf;
                        if (!access_ok(VERIFY_WRITE, (u8 __user *)
                                                (uintptr_t) u_tmp->rx_buf,
                                                u_tmp->len))
                                goto done;
+                       rx_buf += k_tmp->len;
                }
                if (u_tmp->tx_buf) {
+                       /* this transfer needs space in TX bounce buffer */
+                       tx_total += k_tmp->len;
+                       if (tx_total > bufsiz) {
+                               status = -EMSGSIZE;
+                               goto done;
+                       }
                        k_tmp->tx_buf = tx_buf;
                        if (copy_from_user(tx_buf, (const u8 __user *)
                                                (uintptr_t) u_tmp->tx_buf,
                                        u_tmp->len))
                                goto done;
+                       tx_buf += k_tmp->len;
                }
-               tx_buf += k_tmp->len;
-               rx_buf += k_tmp->len;
 
                k_tmp->cs_change = !!u_tmp->cs_change;
                k_tmp->tx_nbits = u_tmp->tx_nbits;
@@ -303,8 +322,8 @@ static int spidev_message(struct spidev_data *spidev,
                                status = -EFAULT;
                                goto done;
                        }
+                       rx_buf += u_tmp->len;
                }
-               rx_buf += u_tmp->len;
        }
        status = total;
 
@@ -684,6 +703,14 @@ static const struct file_operations spidev_fops = {
 
 static struct class *spidev_class;
 
+#ifdef CONFIG_OF
+static const struct of_device_id spidev_dt_ids[] = {
+       { .compatible = "rohm,dh2228fv" },
+       {},
+};
+MODULE_DEVICE_TABLE(of, spidev_dt_ids);
+#endif
+
 /*-------------------------------------------------------------------------*/
 
 static int spidev_probe(struct spi_device *spi)
@@ -692,6 +719,17 @@ static int spidev_probe(struct spi_device *spi)
        int                     status;
        unsigned long           minor;
 
+       /*
+        * spidev should never be referenced in DT without a specific
+        * compatbile string, it is a Linux implementation thing
+        * rather than a description of the hardware.
+        */
+       if (spi->dev.of_node && !of_match_device(spidev_dt_ids, &spi->dev)) {
+               dev_err(&spi->dev, "buggy DT: spidev listed directly in DT\n");
+               WARN_ON(spi->dev.of_node &&
+                       !of_match_device(spidev_dt_ids, &spi->dev));
+       }
+
        /* Allocate driver data */
        spidev = kzalloc(sizeof(*spidev), GFP_KERNEL);
        if (!spidev)
@@ -758,13 +796,6 @@ static int spidev_remove(struct spi_device *spi)
        return 0;
 }
 
-static const struct of_device_id spidev_dt_ids[] = {
-       { .compatible = "rohm,dh2228fv" },
-       {},
-};
-
-MODULE_DEVICE_TABLE(of, spidev_dt_ids);
-
 static struct spi_driver spidev_spi_driver = {
        .driver = {
                .name =         "spidev",
index 2accb6e47beb35c1eff10256f7a85d749fe6d590..77d64251af40451f8e897eac1813599e3ba499d0 100644 (file)
@@ -1181,7 +1181,7 @@ iscsit_handle_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
         * traditional iSCSI block I/O.
         */
        if (iscsit_allocate_iovecs(cmd) < 0) {
-               return iscsit_add_reject_cmd(cmd,
+               return iscsit_reject_cmd(cmd,
                                ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf);
        }
        immed_data = cmd->immediate_data;
@@ -3468,6 +3468,7 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd,
                                                tpg_np_list) {
                                struct iscsi_np *np = tpg_np->tpg_np;
                                bool inaddr_any = iscsit_check_inaddr_any(np);
+                               char *fmt_str;
 
                                if (np->np_network_transport != network_transport)
                                        continue;
@@ -3495,8 +3496,12 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd,
                                        }
                                }
 
-                               len = sprintf(buf, "TargetAddress="
-                                       "%s:%hu,%hu",
+                               if (np->np_sockaddr.ss_family == AF_INET6)
+                                       fmt_str = "TargetAddress=[%s]:%hu,%hu";
+                               else
+                                       fmt_str = "TargetAddress=%s:%hu,%hu";
+
+                               len = sprintf(buf, fmt_str,
                                        inaddr_any ? conn->local_ip : np->np_ip,
                                        np->np_port,
                                        tpg->tpgt);
index 79b4ec3ca2db12416a692fba785a09b462e3169f..7faa6aef9a4d5429cbf1d3810ebb181f7a911beb 100644 (file)
@@ -781,8 +781,8 @@ int se_dev_set_emulate_fua_write(struct se_device *dev, int flag)
        }
        if (flag &&
            dev->transport->get_write_cache) {
-               pr_err("emulate_fua_write not supported for this device\n");
-               return -EINVAL;
+               pr_warn("emulate_fua_write not supported for this device, ignoring\n");
+               return 0;
        }
        if (dev->export_count) {
                pr_err("emulate_fua_write cannot be changed with active"
index d1ec5804c0bb94cebeb22d5038b4861393047ba0..76c515dd802b489116dd73f342520dff8326a67e 100644 (file)
@@ -25,7 +25,7 @@
  * Function to allocate regfields which are common
  * between syscfg and memory mapped based sensors
  */
-int st_thermal_alloc_regfields(struct st_thermal_sensor *sensor)
+static int st_thermal_alloc_regfields(struct st_thermal_sensor *sensor)
 {
        struct device *dev = sensor->dev;
        struct regmap *regmap = sensor->regmap;
index 067bfcdb91d678aefb4fb5ca461024dd0792b699..fc0c9e198710327bbd10983b895eac4d740bb258 100644 (file)
@@ -157,7 +157,7 @@ static const struct st_thermal_sensor_ops st_mmap_sensor_ops = {
 };
 
 /* Compatible device data stih416 mpe thermal sensor */
-const struct st_thermal_compat_data st_416mpe_cdata = {
+static const struct st_thermal_compat_data st_416mpe_cdata = {
        .reg_fields             = st_mmap_thermal_regfields,
        .ops                    = &st_mmap_sensor_ops,
        .calibration_val        = 14,
@@ -166,7 +166,7 @@ const struct st_thermal_compat_data st_416mpe_cdata = {
 };
 
 /* Compatible device data stih407 thermal sensor */
-const struct st_thermal_compat_data st_407_cdata = {
+static const struct st_thermal_compat_data st_407_cdata = {
        .reg_fields             = st_mmap_thermal_regfields,
        .ops                    = &st_mmap_sensor_ops,
        .calibration_val        = 16,
@@ -174,19 +174,19 @@ const struct st_thermal_compat_data st_407_cdata = {
        .crit_temp              = 120,
 };
 
-static struct of_device_id st_mmap_thermal_of_match[] = {
+static const struct of_device_id st_mmap_thermal_of_match[] = {
        { .compatible = "st,stih416-mpe-thermal", .data = &st_416mpe_cdata },
        { .compatible = "st,stih407-thermal",     .data = &st_407_cdata },
        { /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, st_mmap_thermal_of_match);
 
-int st_mmap_probe(struct platform_device *pdev)
+static int st_mmap_probe(struct platform_device *pdev)
 {
        return st_thermal_register(pdev,  st_mmap_thermal_of_match);
 }
 
-int st_mmap_remove(struct platform_device *pdev)
+static int st_mmap_remove(struct platform_device *pdev)
 {
        return st_thermal_unregister(pdev);
 }
index 26d36a242bb89d63dec3b110e1eaa96b4c4fad65..3df5b789070325db13361d14b33eb4817bfc0d8d 100644 (file)
@@ -104,7 +104,7 @@ static const struct st_thermal_sensor_ops st_syscfg_sensor_ops = {
 };
 
 /* Compatible device data for stih415 sas thermal sensor */
-const struct st_thermal_compat_data st_415sas_cdata = {
+static const struct st_thermal_compat_data st_415sas_cdata = {
        .sys_compat             = "st,stih415-front-syscfg",
        .reg_fields             = st_415sas_regfields,
        .ops                    = &st_syscfg_sensor_ops,
@@ -114,7 +114,7 @@ const struct st_thermal_compat_data st_415sas_cdata = {
 };
 
 /* Compatible device data for stih415 mpe thermal sensor */
-const struct st_thermal_compat_data st_415mpe_cdata = {
+static const struct st_thermal_compat_data st_415mpe_cdata = {
        .sys_compat             = "st,stih415-system-syscfg",
        .reg_fields             = st_415mpe_regfields,
        .ops                    = &st_syscfg_sensor_ops,
@@ -124,7 +124,7 @@ const struct st_thermal_compat_data st_415mpe_cdata = {
 };
 
 /* Compatible device data for stih416 sas thermal sensor */
-const struct st_thermal_compat_data st_416sas_cdata = {
+static const struct st_thermal_compat_data st_416sas_cdata = {
        .sys_compat             = "st,stih416-front-syscfg",
        .reg_fields             = st_416sas_regfields,
        .ops                    = &st_syscfg_sensor_ops,
@@ -134,7 +134,7 @@ const struct st_thermal_compat_data st_416sas_cdata = {
 };
 
 /* Compatible device data for stid127 thermal sensor */
-const struct st_thermal_compat_data st_127_cdata = {
+static const struct st_thermal_compat_data st_127_cdata = {
        .sys_compat             = "st,stid127-cpu-syscfg",
        .reg_fields             = st_127_regfields,
        .ops                    = &st_syscfg_sensor_ops,
@@ -143,7 +143,7 @@ const struct st_thermal_compat_data st_127_cdata = {
        .crit_temp              = 120,
 };
 
-static struct of_device_id st_syscfg_thermal_of_match[] = {
+static const struct of_device_id st_syscfg_thermal_of_match[] = {
        { .compatible = "st,stih415-sas-thermal", .data = &st_415sas_cdata },
        { .compatible = "st,stih415-mpe-thermal", .data = &st_415mpe_cdata },
        { .compatible = "st,stih416-sas-thermal", .data = &st_416sas_cdata },
@@ -152,12 +152,12 @@ static struct of_device_id st_syscfg_thermal_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, st_syscfg_thermal_of_match);
 
-int st_syscfg_probe(struct platform_device *pdev)
+static int st_syscfg_probe(struct platform_device *pdev)
 {
        return st_thermal_register(pdev, st_syscfg_thermal_of_match);
 }
 
-int st_syscfg_remove(struct platform_device *pdev)
+static int st_syscfg_remove(struct platform_device *pdev)
 {
        return st_thermal_unregister(pdev);
 }
index 174d3bcf8bd7a16a161cdf94c53e8d14efaa55a6..4108db7e10c1094622d4abca08017db3125cf80e 100644 (file)
@@ -458,8 +458,10 @@ static void update_temperature(struct thermal_zone_device *tz)
 
        ret = thermal_zone_get_temp(tz, &temp);
        if (ret) {
-               dev_warn(&tz->device, "failed to read out thermal zone %d\n",
-                        tz->id);
+               if (ret != -EAGAIN)
+                       dev_warn(&tz->device,
+                                "failed to read out thermal zone (%d)\n",
+                                ret);
                return;
        }
 
index f8e52a1854c1ab383e32383ac65a0f167e385793..a793f7023755dc15cb2b8bebe5206bc610bb428c 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -278,11 +278,11 @@ static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
        return 0;
 }
 
-static void aio_ring_remap(struct file *file, struct vm_area_struct *vma)
+static int aio_ring_remap(struct file *file, struct vm_area_struct *vma)
 {
        struct mm_struct *mm = vma->vm_mm;
        struct kioctx_table *table;
-       int i;
+       int i, res = -EINVAL;
 
        spin_lock(&mm->ioctx_lock);
        rcu_read_lock();
@@ -292,13 +292,17 @@ static void aio_ring_remap(struct file *file, struct vm_area_struct *vma)
 
                ctx = table->table[i];
                if (ctx && ctx->aio_ring_file == file) {
-                       ctx->user_id = ctx->mmap_base = vma->vm_start;
+                       if (!atomic_read(&ctx->dead)) {
+                               ctx->user_id = ctx->mmap_base = vma->vm_start;
+                               res = 0;
+                       }
                        break;
                }
        }
 
        rcu_read_unlock();
        spin_unlock(&mm->ioctx_lock);
+       return res;
 }
 
 static const struct file_operations aio_ring_fops = {
@@ -727,6 +731,9 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 err_cleanup:
        aio_nr_sub(ctx->max_reqs);
 err_ctx:
+       atomic_set(&ctx->dead, 1);
+       if (ctx->mmap_size)
+               vm_munmap(ctx->mmap_base, ctx->mmap_size);
        aio_free_ring(ctx);
 err:
        mutex_unlock(&ctx->ring_lock);
@@ -748,11 +755,12 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
 {
        struct kioctx_table *table;
 
-       if (atomic_xchg(&ctx->dead, 1))
+       spin_lock(&mm->ioctx_lock);
+       if (atomic_xchg(&ctx->dead, 1)) {
+               spin_unlock(&mm->ioctx_lock);
                return -EINVAL;
+       }
 
-
-       spin_lock(&mm->ioctx_lock);
        table = rcu_dereference_raw(mm->ioctx_table);
        WARN_ON(ctx != table->table[ctx->id]);
        table->table[ctx->id] = NULL;
index 46e0d4e857c7f493f512196603d3725ca8d3dfaa..ba1790e52ff2364bd027454650ceef6a9ba227b9 100644 (file)
@@ -2394,7 +2394,6 @@ relock:
                /*
                 * for completing the rest of the request.
                 */
-               *ppos += written;
                count -= written;
                written_buffered = generic_perform_write(file, from, *ppos);
                /*
@@ -2409,7 +2408,6 @@ relock:
                        goto out_dio;
                }
 
-               iocb->ki_pos = *ppos + written_buffered;
                /* We need to ensure that the page cache pages are written to
                 * disk and invalidated to preserve the expected O_DIRECT
                 * semantics.
@@ -2418,6 +2416,7 @@ relock:
                ret = filemap_write_and_wait_range(file->f_mapping, *ppos,
                                endbyte);
                if (ret == 0) {
+                       iocb->ki_pos = *ppos + written_buffered;
                        written += written_buffered;
                        invalidate_mapping_pages(mapping,
                                        *ppos >> PAGE_CACHE_SHIFT,
@@ -2440,10 +2439,14 @@ out_dio:
        /* buffered aio wouldn't have proper lock coverage today */
        BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
 
+       if (unlikely(written <= 0))
+               goto no_sync;
+
        if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
            ((file->f_flags & O_DIRECT) && !direct_io)) {
-               ret = filemap_fdatawrite_range(file->f_mapping, *ppos,
-                                              *ppos + count - 1);
+               ret = filemap_fdatawrite_range(file->f_mapping,
+                                              iocb->ki_pos - written,
+                                              iocb->ki_pos - 1);
                if (ret < 0)
                        written = ret;
 
@@ -2454,10 +2457,12 @@ out_dio:
                }
 
                if (!ret)
-                       ret = filemap_fdatawait_range(file->f_mapping, *ppos,
-                                                     *ppos + count - 1);
+                       ret = filemap_fdatawait_range(file->f_mapping,
+                                                     iocb->ki_pos - written,
+                                                     iocb->ki_pos - 1);
        }
 
+no_sync:
        /*
         * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
         * function pointer which is called when o_direct io completes so that
index b3f45a578344a90eee8d1c76f724636de24447b1..e5966758c093483cd6027b4e14ea678904be5978 100644 (file)
 #include <linux/workqueue.h>
 
 struct arch_timer_kvm {
-#ifdef CONFIG_KVM_ARM_TIMER
        /* Is the timer enabled */
        bool                    enabled;
 
        /* Virtual offset */
        cycle_t                 cntvoff;
-#endif
 };
 
 struct arch_timer_cpu {
-#ifdef CONFIG_KVM_ARM_TIMER
        /* Registers: control register, timer value */
        u32                             cntv_ctl;       /* Saved/restored */
        cycle_t                         cntv_cval;      /* Saved/restored */
@@ -55,10 +52,8 @@ struct arch_timer_cpu {
 
        /* Timer IRQ */
        const struct kvm_irq_level      *irq;
-#endif
 };
 
-#ifdef CONFIG_KVM_ARM_TIMER
 int kvm_timer_hyp_init(void);
 void kvm_timer_enable(struct kvm *kvm);
 void kvm_timer_init(struct kvm *kvm);
@@ -72,30 +67,6 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu);
 u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
 int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
 
-#else
-static inline int kvm_timer_hyp_init(void)
-{
-       return 0;
-};
-
-static inline void kvm_timer_enable(struct kvm *kvm) {}
-static inline void kvm_timer_init(struct kvm *kvm) {}
-static inline void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
-                                       const struct kvm_irq_level *irq) {}
-static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {}
-static inline void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) {}
-static inline void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) {}
-static inline void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) {}
-
-static inline int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
-{
-       return 0;
-}
-
-static inline u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
-{
-       return 0;
-}
-#endif
+bool kvm_timer_should_fire(struct kvm_vcpu *vcpu);
 
 #endif
index 66203b268984ebedd72d5bd1b2f54440e56011bc..133ea00aa83bc8926137ca8867f28a4ab9469d27 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/irqreturn.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
+#include <kvm/iodev.h>
 
 #define VGIC_NR_IRQS_LEGACY    256
 #define VGIC_NR_SGIS           16
@@ -140,16 +141,21 @@ struct vgic_params {
 };
 
 struct vgic_vm_ops {
-       bool    (*handle_mmio)(struct kvm_vcpu *, struct kvm_run *,
-                              struct kvm_exit_mmio *);
        bool    (*queue_sgi)(struct kvm_vcpu *, int irq);
        void    (*add_sgi_source)(struct kvm_vcpu *, int irq, int source);
        int     (*init_model)(struct kvm *);
        int     (*map_resources)(struct kvm *, const struct vgic_params *);
 };
 
+struct vgic_io_device {
+       gpa_t addr;
+       int len;
+       const struct vgic_io_range *reg_ranges;
+       struct kvm_vcpu *redist_vcpu;
+       struct kvm_io_device dev;
+};
+
 struct vgic_dist {
-#ifdef CONFIG_KVM_ARM_VGIC
        spinlock_t              lock;
        bool                    in_kernel;
        bool                    ready;
@@ -197,6 +203,9 @@ struct vgic_dist {
        /* Level-triggered interrupt queued on VCPU interface */
        struct vgic_bitmap      irq_queued;
 
+       /* Interrupt was active when unqueue from VCPU interface */
+       struct vgic_bitmap      irq_active;
+
        /* Interrupt priority. Not used yet. */
        struct vgic_bytemap     irq_priority;
 
@@ -237,8 +246,12 @@ struct vgic_dist {
        /* Bitmap indicating which CPU has something pending */
        unsigned long           *irq_pending_on_cpu;
 
+       /* Bitmap indicating which CPU has active IRQs */
+       unsigned long           *irq_active_on_cpu;
+
        struct vgic_vm_ops      vm_ops;
-#endif
+       struct vgic_io_device   dist_iodev;
+       struct vgic_io_device   *redist_iodevs;
 };
 
 struct vgic_v2_cpu_if {
@@ -266,13 +279,18 @@ struct vgic_v3_cpu_if {
 };
 
 struct vgic_cpu {
-#ifdef CONFIG_KVM_ARM_VGIC
        /* per IRQ to LR mapping */
        u8              *vgic_irq_lr_map;
 
-       /* Pending interrupts on this VCPU */
+       /* Pending/active/both interrupts on this VCPU */
        DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS);
+       DECLARE_BITMAP( active_percpu, VGIC_NR_PRIVATE_IRQS);
+       DECLARE_BITMAP( pend_act_percpu, VGIC_NR_PRIVATE_IRQS);
+
+       /* Pending/active/both shared interrupts, dynamically sized */
        unsigned long   *pending_shared;
+       unsigned long   *active_shared;
+       unsigned long   *pend_act_shared;
 
        /* Bitmap of used/free list registers */
        DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS);
@@ -285,7 +303,6 @@ struct vgic_cpu {
                struct vgic_v2_cpu_if   vgic_v2;
                struct vgic_v3_cpu_if   vgic_v3;
        };
-#endif
 };
 
 #define LR_EMPTY       0xff
@@ -295,10 +312,7 @@ struct vgic_cpu {
 
 struct kvm;
 struct kvm_vcpu;
-struct kvm_run;
-struct kvm_exit_mmio;
 
-#ifdef CONFIG_KVM_ARM_VGIC
 int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
 int kvm_vgic_hyp_init(void);
 int kvm_vgic_map_resources(struct kvm *kvm);
@@ -312,8 +326,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
                        bool level);
 void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
-bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
-                     struct kvm_exit_mmio *mmio);
+int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
 
 #define irqchip_in_kernel(k)   (!!((k)->arch.vgic.in_kernel))
 #define vgic_initialized(k)    (!!((k)->arch.vgic.nr_cpus))
@@ -335,84 +348,4 @@ static inline int vgic_v3_probe(struct device_node *vgic_node,
 }
 #endif
 
-#else
-static inline int kvm_vgic_hyp_init(void)
-{
-       return 0;
-}
-
-static inline int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr)
-{
-       return 0;
-}
-
-static inline int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
-{
-       return -ENXIO;
-}
-
-static inline int kvm_vgic_map_resources(struct kvm *kvm)
-{
-       return 0;
-}
-
-static inline int kvm_vgic_create(struct kvm *kvm, u32 type)
-{
-       return 0;
-}
-
-static inline void kvm_vgic_destroy(struct kvm *kvm)
-{
-}
-
-static inline void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
-{
-}
-
-static inline int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
-{
-       return 0;
-}
-
-static inline void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) {}
-static inline void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) {}
-
-static inline int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid,
-                                     unsigned int irq_num, bool level)
-{
-       return 0;
-}
-
-static inline int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
-{
-       return 0;
-}
-
-static inline bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
-                                   struct kvm_exit_mmio *mmio)
-{
-       return false;
-}
-
-static inline int irqchip_in_kernel(struct kvm *kvm)
-{
-       return 0;
-}
-
-static inline bool vgic_initialized(struct kvm *kvm)
-{
-       return true;
-}
-
-static inline bool vgic_ready(struct kvm *kvm)
-{
-       return true;
-}
-
-static inline int kvm_vgic_get_max_vcpus(void)
-{
-       return KVM_MAX_VCPUS;
-}
-#endif
-
 #endif
diff --git a/include/kvm/iodev.h b/include/kvm/iodev.h
new file mode 100644 (file)
index 0000000..a6d208b
--- /dev/null
@@ -0,0 +1,76 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __KVM_IODEV_H__
+#define __KVM_IODEV_H__
+
+#include <linux/kvm_types.h>
+#include <linux/errno.h>
+
+struct kvm_io_device;
+struct kvm_vcpu;
+
+/**
+ * kvm_io_device_ops are called under kvm slots_lock.
+ * read and write handlers return 0 if the transaction has been handled,
+ * or non-zero to have it passed to the next device.
+ **/
+struct kvm_io_device_ops {
+       int (*read)(struct kvm_vcpu *vcpu,
+                   struct kvm_io_device *this,
+                   gpa_t addr,
+                   int len,
+                   void *val);
+       int (*write)(struct kvm_vcpu *vcpu,
+                    struct kvm_io_device *this,
+                    gpa_t addr,
+                    int len,
+                    const void *val);
+       void (*destructor)(struct kvm_io_device *this);
+};
+
+
+struct kvm_io_device {
+       const struct kvm_io_device_ops *ops;
+};
+
+static inline void kvm_iodevice_init(struct kvm_io_device *dev,
+                                    const struct kvm_io_device_ops *ops)
+{
+       dev->ops = ops;
+}
+
+static inline int kvm_iodevice_read(struct kvm_vcpu *vcpu,
+                                   struct kvm_io_device *dev, gpa_t addr,
+                                   int l, void *v)
+{
+       return dev->ops->read ? dev->ops->read(vcpu, dev, addr, l, v)
+                               : -EOPNOTSUPP;
+}
+
+static inline int kvm_iodevice_write(struct kvm_vcpu *vcpu,
+                                    struct kvm_io_device *dev, gpa_t addr,
+                                    int l, const void *v)
+{
+       return dev->ops->write ? dev->ops->write(vcpu, dev, addr, l, v)
+                                : -EOPNOTSUPP;
+}
+
+static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
+{
+       if (dev->ops->destructor)
+               dev->ops->destructor(dev);
+}
+
+#endif /* __KVM_IODEV_H__ */
index c294e3e25e37a50a953a4a0bb3cb1aa66ba904d9..a1b25e35ea5f9fc2978b7f62917c6b4e39c3dc75 100644 (file)
@@ -181,7 +181,9 @@ enum rq_flag_bits {
        __REQ_ELVPRIV,          /* elevator private data attached */
        __REQ_FAILED,           /* set if the request failed */
        __REQ_QUIET,            /* don't worry about errors */
-       __REQ_PREEMPT,          /* set for "ide_preempt" requests */
+       __REQ_PREEMPT,          /* set for "ide_preempt" requests and also
+                                  for requests for which the SCSI "quiesce"
+                                  state must be ignored. */
        __REQ_ALLOCED,          /* request came from our alloc pool */
        __REQ_COPY_USER,        /* contains copies of user pages */
        __REQ_FLUSH_SEQ,        /* request for flush sequence */
index 2e4cb67f6e560094aa719fe75f595dfbb562cf8e..96c280b2c263476c053bdd0c514aa16df3d04212 100644 (file)
@@ -8,64 +8,69 @@
 #ifndef _LINUX_CLOCKCHIPS_H
 #define _LINUX_CLOCKCHIPS_H
 
-/* Clock event notification values */
-enum clock_event_nofitiers {
-       CLOCK_EVT_NOTIFY_ADD,
-       CLOCK_EVT_NOTIFY_BROADCAST_ON,
-       CLOCK_EVT_NOTIFY_BROADCAST_OFF,
-       CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
-       CLOCK_EVT_NOTIFY_BROADCAST_ENTER,
-       CLOCK_EVT_NOTIFY_BROADCAST_EXIT,
-       CLOCK_EVT_NOTIFY_SUSPEND,
-       CLOCK_EVT_NOTIFY_RESUME,
-       CLOCK_EVT_NOTIFY_CPU_DYING,
-       CLOCK_EVT_NOTIFY_CPU_DEAD,
-};
-
-#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
 
-#include <linux/clocksource.h>
-#include <linux/cpumask.h>
-#include <linux/ktime.h>
-#include <linux/notifier.h>
+# include <linux/clocksource.h>
+# include <linux/cpumask.h>
+# include <linux/ktime.h>
+# include <linux/notifier.h>
 
 struct clock_event_device;
 struct module;
 
-/* Clock event mode commands */
+/* Clock event mode commands for legacy ->set_mode(): OBSOLETE */
 enum clock_event_mode {
-       CLOCK_EVT_MODE_UNUSED = 0,
+       CLOCK_EVT_MODE_UNUSED,
        CLOCK_EVT_MODE_SHUTDOWN,
        CLOCK_EVT_MODE_PERIODIC,
        CLOCK_EVT_MODE_ONESHOT,
        CLOCK_EVT_MODE_RESUME,
 };
 
+/*
+ * Possible states of a clock event device.
+ *
+ * DETACHED:   Device is not used by clockevents core. Initial state or can be
+ *             reached from SHUTDOWN.
+ * SHUTDOWN:   Device is powered-off. Can be reached from PERIODIC or ONESHOT.
+ * PERIODIC:   Device is programmed to generate events periodically. Can be
+ *             reached from DETACHED or SHUTDOWN.
+ * ONESHOT:    Device is programmed to generate event only once. Can be reached
+ *             from DETACHED or SHUTDOWN.
+ */
+enum clock_event_state {
+       CLOCK_EVT_STATE_DETACHED,
+       CLOCK_EVT_STATE_SHUTDOWN,
+       CLOCK_EVT_STATE_PERIODIC,
+       CLOCK_EVT_STATE_ONESHOT,
+};
+
 /*
  * Clock event features
  */
-#define CLOCK_EVT_FEAT_PERIODIC                0x000001
-#define CLOCK_EVT_FEAT_ONESHOT         0x000002
-#define CLOCK_EVT_FEAT_KTIME           0x000004
+# define CLOCK_EVT_FEAT_PERIODIC       0x000001
+# define CLOCK_EVT_FEAT_ONESHOT                0x000002
+# define CLOCK_EVT_FEAT_KTIME          0x000004
+
 /*
- * x86(64) specific misfeatures:
+ * x86(64) specific (mis)features:
  *
  * - Clockevent source stops in C3 State and needs broadcast support.
  * - Local APIC timer is used as a dummy device.
  */
-#define CLOCK_EVT_FEAT_C3STOP          0x000008
-#define CLOCK_EVT_FEAT_DUMMY           0x000010
+# define CLOCK_EVT_FEAT_C3STOP         0x000008
+# define CLOCK_EVT_FEAT_DUMMY          0x000010
 
 /*
  * Core shall set the interrupt affinity dynamically in broadcast mode
  */
-#define CLOCK_EVT_FEAT_DYNIRQ          0x000020
-#define CLOCK_EVT_FEAT_PERCPU          0x000040
+# define CLOCK_EVT_FEAT_DYNIRQ         0x000020
+# define CLOCK_EVT_FEAT_PERCPU         0x000040
 
 /*
  * Clockevent device is based on a hrtimer for broadcast
  */
-#define CLOCK_EVT_FEAT_HRTIMER         0x000080
+# define CLOCK_EVT_FEAT_HRTIMER                0x000080
 
 /**
  * struct clock_event_device - clock event device descriptor
@@ -78,10 +83,15 @@ enum clock_event_mode {
  * @min_delta_ns:      minimum delta value in ns
  * @mult:              nanosecond to cycles multiplier
  * @shift:             nanoseconds to cycles divisor (power of two)
- * @mode:              operating mode assigned by the management code
+ * @mode:              operating mode, relevant only to ->set_mode(), OBSOLETE
+ * @state:             current state of the device, assigned by the core code
  * @features:          features
  * @retries:           number of forced programming retries
- * @set_mode:          set mode function
+ * @set_mode:          legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME.
+ * @set_state_periodic:        switch state to periodic, if !set_mode
+ * @set_state_oneshot: switch state to oneshot, if !set_mode
+ * @set_state_shutdown:        switch state to shutdown, if !set_mode
+ * @tick_resume:       resume clkevt device, if !set_mode
  * @broadcast:         function to broadcast events
  * @min_delta_ticks:   minimum delta value in ticks stored for reconfiguration
  * @max_delta_ticks:   maximum delta value in ticks stored for reconfiguration
@@ -95,22 +105,31 @@ enum clock_event_mode {
  */
 struct clock_event_device {
        void                    (*event_handler)(struct clock_event_device *);
-       int                     (*set_next_event)(unsigned long evt,
-                                                 struct clock_event_device *);
-       int                     (*set_next_ktime)(ktime_t expires,
-                                                 struct clock_event_device *);
+       int                     (*set_next_event)(unsigned long evt, struct clock_event_device *);
+       int                     (*set_next_ktime)(ktime_t expires, struct clock_event_device *);
        ktime_t                 next_event;
        u64                     max_delta_ns;
        u64                     min_delta_ns;
        u32                     mult;
        u32                     shift;
        enum clock_event_mode   mode;
+       enum clock_event_state  state;
        unsigned int            features;
        unsigned long           retries;
 
+       /*
+        * State transition callback(s): Only one of the two groups should be
+        * defined:
+        * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME.
+        * - set_state_{shutdown|periodic|oneshot}(), tick_resume().
+        */
+       void                    (*set_mode)(enum clock_event_mode mode, struct clock_event_device *);
+       int                     (*set_state_periodic)(struct clock_event_device *);
+       int                     (*set_state_oneshot)(struct clock_event_device *);
+       int                     (*set_state_shutdown)(struct clock_event_device *);
+       int                     (*tick_resume)(struct clock_event_device *);
+
        void                    (*broadcast)(const struct cpumask *mask);
-       void                    (*set_mode)(enum clock_event_mode mode,
-                                           struct clock_event_device *);
        void                    (*suspend)(struct clock_event_device *);
        void                    (*resume)(struct clock_event_device *);
        unsigned long           min_delta_ticks;
@@ -136,18 +155,18 @@ struct clock_event_device {
  *
  * factor = (clock_ticks << shift) / nanoseconds
  */
-static inline unsigned long div_sc(unsigned long ticks, unsigned long nsec,
-                                  int shift)
+static inline unsigned long
+div_sc(unsigned long ticks, unsigned long nsec, int shift)
 {
-       uint64_t tmp = ((uint64_t)ticks) << shift;
+       u64 tmp = ((u64)ticks) << shift;
 
        do_div(tmp, nsec);
+
        return (unsigned long) tmp;
 }
 
 /* Clock event layer functions */
-extern u64 clockevent_delta2ns(unsigned long latch,
-                              struct clock_event_device *evt);
+extern u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt);
 extern void clockevents_register_device(struct clock_event_device *dev);
 extern int clockevents_unbind_device(struct clock_event_device *ced, int cpu);
 
@@ -158,57 +177,42 @@ extern void clockevents_config_and_register(struct clock_event_device *dev,
 
 extern int clockevents_update_freq(struct clock_event_device *ce, u32 freq);
 
-extern void clockevents_exchange_device(struct clock_event_device *old,
-                                       struct clock_event_device *new);
-extern void clockevents_set_mode(struct clock_event_device *dev,
-                                enum clock_event_mode mode);
-extern int clockevents_program_event(struct clock_event_device *dev,
-                                    ktime_t expires, bool force);
-
-extern void clockevents_handle_noop(struct clock_event_device *dev);
-
 static inline void
 clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 minsec)
 {
-       return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC,
-                                     freq, minsec);
+       return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC, freq, minsec);
 }
 
 extern void clockevents_suspend(void);
 extern void clockevents_resume(void);
 
-#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
-#ifdef CONFIG_ARCH_HAS_TICK_BROADCAST
+# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+#  ifdef CONFIG_ARCH_HAS_TICK_BROADCAST
 extern void tick_broadcast(const struct cpumask *mask);
-#else
-#define tick_broadcast NULL
-#endif
+#  else
+#   define tick_broadcast      NULL
+#  endif
 extern int tick_receive_broadcast(void);
-#endif
+# endif
 
-#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
+# if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
 extern void tick_setup_hrtimer_broadcast(void);
 extern int tick_check_broadcast_expired(void);
-#else
+# else
 static inline int tick_check_broadcast_expired(void) { return 0; }
-static inline void tick_setup_hrtimer_broadcast(void) {};
-#endif
+static inline void tick_setup_hrtimer_broadcast(void) { }
+# endif
 
-#ifdef CONFIG_GENERIC_CLOCKEVENTS
 extern int clockevents_notify(unsigned long reason, void *arg);
-#else
-static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; }
-#endif
-
-#else /* CONFIG_GENERIC_CLOCKEVENTS_BUILD */
 
-static inline void clockevents_suspend(void) {}
-static inline void clockevents_resume(void) {}
+#else /* !CONFIG_GENERIC_CLOCKEVENTS: */
 
+static inline void clockevents_suspend(void) { }
+static inline void clockevents_resume(void) { }
 static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; }
 static inline int tick_check_broadcast_expired(void) { return 0; }
-static inline void tick_setup_hrtimer_broadcast(void) {};
+static inline void tick_setup_hrtimer_broadcast(void) { }
 
-#endif
+#endif /* !CONFIG_GENERIC_CLOCKEVENTS */
 
-#endif
+#endif /* _LINUX_CLOCKCHIPS_H */
index 9c78d15d33e4de979b3d70d6a569064aa5529048..135509821c3994f4083bd2f0eb8f7c05409c026c 100644 (file)
@@ -56,6 +56,7 @@ struct module;
  * @shift:             cycle to nanosecond divisor (power of two)
  * @max_idle_ns:       max idle time permitted by the clocksource (nsecs)
  * @maxadj:            maximum adjustment value to mult (~11%)
+ * @max_cycles:                maximum safe cycle value which won't overflow on multiplication
  * @flags:             flags describing special properties
  * @archdata:          arch-specific data
  * @suspend:           suspend function for the clocksource, if necessary
@@ -76,7 +77,7 @@ struct clocksource {
 #ifdef CONFIG_ARCH_CLOCKSOURCE_DATA
        struct arch_clocksource_data archdata;
 #endif
-
+       u64 max_cycles;
        const char *name;
        struct list_head list;
        int rating;
@@ -178,7 +179,6 @@ static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift)
 }
 
 
-extern int clocksource_register(struct clocksource*);
 extern int clocksource_unregister(struct clocksource*);
 extern void clocksource_touch_watchdog(void);
 extern struct clocksource* clocksource_get_next(void);
@@ -189,7 +189,7 @@ extern struct clocksource * __init clocksource_default_clock(void);
 extern void clocksource_mark_unstable(struct clocksource *cs);
 
 extern u64
-clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask);
+clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cycles);
 extern void
 clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);
 
@@ -200,7 +200,16 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);
 extern int
 __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq);
 extern void
-__clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq);
+__clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq);
+
+/*
+ * Don't call this unless you are a default clocksource
+ * (AKA: jiffies) and absolutely have to.
+ */
+static inline int __clocksource_register(struct clocksource *cs)
+{
+       return __clocksource_register_scale(cs, 1, 0);
+}
 
 static inline int clocksource_register_hz(struct clocksource *cs, u32 hz)
 {
@@ -212,14 +221,14 @@ static inline int clocksource_register_khz(struct clocksource *cs, u32 khz)
        return __clocksource_register_scale(cs, 1000, khz);
 }
 
-static inline void __clocksource_updatefreq_hz(struct clocksource *cs, u32 hz)
+static inline void __clocksource_update_freq_hz(struct clocksource *cs, u32 hz)
 {
-       __clocksource_updatefreq_scale(cs, 1, hz);
+       __clocksource_update_freq_scale(cs, 1, hz);
 }
 
-static inline void __clocksource_updatefreq_khz(struct clocksource *cs, u32 khz)
+static inline void __clocksource_update_freq_khz(struct clocksource *cs, u32 khz)
 {
-       __clocksource_updatefreq_scale(cs, 1000, khz);
+       __clocksource_update_freq_scale(cs, 1000, khz);
 }
 
 
index 1b45e4a0519b2c34033db91e37fd1f1f0b367f8c..0e41ca0e59275deb33b7c7220dffd1ff39cbb2f7 100644 (file)
@@ -192,29 +192,16 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 
 #include <uapi/linux/types.h>
 
-static __always_inline void data_access_exceeds_word_size(void)
-#ifdef __compiletime_warning
-__compiletime_warning("data access exceeds word size and won't be atomic")
-#endif
-;
-
-static __always_inline void data_access_exceeds_word_size(void)
-{
-}
-
 static __always_inline void __read_once_size(const volatile void *p, void *res, int size)
 {
        switch (size) {
        case 1: *(__u8 *)res = *(volatile __u8 *)p; break;
        case 2: *(__u16 *)res = *(volatile __u16 *)p; break;
        case 4: *(__u32 *)res = *(volatile __u32 *)p; break;
-#ifdef CONFIG_64BIT
        case 8: *(__u64 *)res = *(volatile __u64 *)p; break;
-#endif
        default:
                barrier();
                __builtin_memcpy((void *)res, (const void *)p, size);
-               data_access_exceeds_word_size();
                barrier();
        }
 }
@@ -225,13 +212,10 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
        case 1: *(volatile __u8 *)p = *(__u8 *)res; break;
        case 2: *(volatile __u16 *)p = *(__u16 *)res; break;
        case 4: *(volatile __u32 *)p = *(__u32 *)res; break;
-#ifdef CONFIG_64BIT
        case 8: *(volatile __u64 *)p = *(__u64 *)res; break;
-#endif
        default:
                barrier();
                __builtin_memcpy((void *)p, (const void *)res, size);
-               data_access_exceeds_word_size();
                barrier();
        }
 }
index 306178d7309f193cb32443f49c888297ffa9573e..9c5e892547961544eae22a53669ba600e5ad4973 100644 (file)
@@ -77,7 +77,6 @@ struct cpuidle_device {
        unsigned int            cpu;
 
        int                     last_residency;
-       int                     state_count;
        struct cpuidle_state_usage      states_usage[CPUIDLE_STATE_MAX];
        struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX];
        struct cpuidle_driver_kobj *kobj_driver;
index 022e34fcbd1bf6b56cb5f0dbb250a28885797f80..52456aa566a05eded5d61eac974b237e00c26ce9 100644 (file)
@@ -14,6 +14,8 @@
 #include <asm/io.h>
 #include <asm/scatterlist.h>
 
+struct device;
+
 struct dma_pool *dma_pool_create(const char *name, struct device *dev, 
                        size_t size, size_t align, size_t allocation);
 
index cf7e431cbc730dff0ef445dc48989bf5156a09ac..af5be0368dec26c934565e634c0dc803958ed2cc 100644 (file)
@@ -942,6 +942,7 @@ extern int __init efi_setup_pcdp_console(char *);
 #define EFI_64BIT              5       /* Is the firmware 64-bit? */
 #define EFI_PARAVIRT           6       /* Access is via a paravirt interface */
 #define EFI_ARCH_1             7       /* First arch-specific bit */
+#define EFI_DBG                        8       /* Print additional debug info at runtime */
 
 #ifdef CONFIG_EFI
 /*
index f4131e8ead74965a73272949b3a9eae8fa08b5c7..52cc4492cb3a1bcb979124b097fefdcfbc89e390 100644 (file)
@@ -1549,7 +1549,7 @@ struct file_operations {
        long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
        long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
        int (*mmap) (struct file *, struct vm_area_struct *);
-       void (*mremap)(struct file *, struct vm_area_struct *);
+       int (*mremap)(struct file *, struct vm_area_struct *);
        int (*open) (struct inode *, struct file *);
        int (*flush) (struct file *, fl_owner_t id);
        int (*release) (struct inode *, struct file *);
index 2df8e8dd10a483d55d723b6ac4875de807ab2044..21b6d768edd7a4e0f1aee98ed9f437000fa1b996 100644 (file)
@@ -253,21 +253,41 @@ struct obs_kernel_param {
  * obs_kernel_param "array" too far apart in .init.setup.
  */
 #define __setup_param(str, unique_id, fn, early)                       \
-       static const char __setup_str_##unique_id[] __initconst \
-               __aligned(1) = str; \
-       static struct obs_kernel_param __setup_##unique_id      \
-               __used __section(.init.setup)                   \
-               __attribute__((aligned((sizeof(long)))))        \
+       static const char __setup_str_##unique_id[] __initconst         \
+               __aligned(1) = str;                                     \
+       static struct obs_kernel_param __setup_##unique_id              \
+               __used __section(.init.setup)                           \
+               __attribute__((aligned((sizeof(long)))))                \
                = { __setup_str_##unique_id, fn, early }
 
-#define __setup(str, fn)                                       \
+#define __setup(str, fn)                                               \
        __setup_param(str, fn, fn, 0)
 
-/* NOTE: fn is as per module_param, not __setup!  Emits warning if fn
- * returns non-zero. */
-#define early_param(str, fn)                                   \
+/*
+ * NOTE: fn is as per module_param, not __setup!
+ * Emits warning if fn returns non-zero.
+ */
+#define early_param(str, fn)                                           \
        __setup_param(str, fn, fn, 1)
 
+#define early_param_on_off(str_on, str_off, var, config)               \
+                                                                       \
+       int var = IS_ENABLED(config);                                   \
+                                                                       \
+       static int __init parse_##var##_on(char *arg)                   \
+       {                                                               \
+               var = 1;                                                \
+               return 0;                                               \
+       }                                                               \
+       __setup_param(str_on, parse_##var##_on, parse_##var##_on, 1);   \
+                                                                       \
+       static int __init parse_##var##_off(char *arg)                  \
+       {                                                               \
+               var = 0;                                                \
+               return 0;                                               \
+       }                                                               \
+       __setup_param(str_off, parse_##var##_off, parse_##var##_off, 1)
+
 /* Relies on boot_command_line being set */
 void __init parse_early_param(void);
 void __init parse_early_options(char *cmdline);
diff --git a/include/linux/intel_mid_dma.h b/include/linux/intel_mid_dma.h
deleted file mode 100644 (file)
index 10496bd..0000000
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- *  intel_mid_dma.h - Intel MID DMA Drivers
- *
- *  Copyright (C) 2008-10 Intel Corp
- *  Author: Vinod Koul <vinod.koul@intel.com>
- *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; version 2 of the License.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *
- */
-#ifndef __INTEL_MID_DMA_H__
-#define __INTEL_MID_DMA_H__
-
-#include <linux/dmaengine.h>
-
-#define DMA_PREP_CIRCULAR_LIST         (1 << 10)
-
-/*DMA mode configurations*/
-enum intel_mid_dma_mode {
-       LNW_DMA_PER_TO_MEM = 0, /*periphral to memory configuration*/
-       LNW_DMA_MEM_TO_PER,     /*memory to periphral configuration*/
-       LNW_DMA_MEM_TO_MEM,     /*mem to mem confg (testing only)*/
-};
-
-/*DMA handshaking*/
-enum intel_mid_dma_hs_mode {
-       LNW_DMA_HW_HS = 0,      /*HW Handshaking only*/
-       LNW_DMA_SW_HS = 1,      /*SW Handshaking not recommended*/
-};
-
-/*Burst size configuration*/
-enum intel_mid_dma_msize {
-       LNW_DMA_MSIZE_1 = 0x0,
-       LNW_DMA_MSIZE_4 = 0x1,
-       LNW_DMA_MSIZE_8 = 0x2,
-       LNW_DMA_MSIZE_16 = 0x3,
-       LNW_DMA_MSIZE_32 = 0x4,
-       LNW_DMA_MSIZE_64 = 0x5,
-};
-
-/**
- * struct intel_mid_dma_slave - DMA slave structure
- *
- * @dirn: DMA trf direction
- * @src_width: tx register width
- * @dst_width: rx register width
- * @hs_mode: HW/SW handshaking mode
- * @cfg_mode: DMA data transfer mode (per-per/mem-per/mem-mem)
- * @src_msize: Source DMA burst size
- * @dst_msize: Dst DMA burst size
- * @per_addr: Periphral address
- * @device_instance: DMA peripheral device instance, we can have multiple
- *             peripheral device connected to single DMAC
- */
-struct intel_mid_dma_slave {
-       enum intel_mid_dma_hs_mode      hs_mode;  /*handshaking*/
-       enum intel_mid_dma_mode         cfg_mode; /*mode configuration*/
-       unsigned int            device_instance; /*0, 1 for periphral instance*/
-       struct dma_slave_config         dma_slave;
-};
-
-#endif /*__INTEL_MID_DMA_H__*/
index bf3fe719c7ce9d3c0efa3c2449cc0d2a7e5b43ad..47b9ebd4a74fc667601d76476645fefbdb3c36f6 100644 (file)
@@ -38,16 +38,17 @@ bool irq_work_queue(struct irq_work *work);
 bool irq_work_queue_on(struct irq_work *work, int cpu);
 #endif
 
-void irq_work_run(void);
 void irq_work_tick(void);
 void irq_work_sync(struct irq_work *work);
 
 #ifdef CONFIG_IRQ_WORK
 #include <asm/irq_work.h>
 
+void irq_work_run(void);
 bool irq_work_needs_cpu(void);
 #else
 static inline bool irq_work_needs_cpu(void) { return false; }
+static inline void irq_work_run(void) { }
 #endif
 
 #endif /* _LINUX_IRQ_WORK_H */
index 98f923b6a0eaa78ee8b4ffdf57c87f146e3c03d6..f4de473f226b8ab5332dabc7083b74c80de8a664 100644 (file)
  * same as using STATIC_KEY_INIT_FALSE.
  */
 
+#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
+# define HAVE_JUMP_LABEL
+#endif
+
+#ifndef __ASSEMBLY__
+
 #include <linux/types.h>
 #include <linux/compiler.h>
 #include <linux/bug.h>
@@ -55,7 +61,7 @@ extern bool static_key_initialized;
                                    "%s used before call to jump_label_init", \
                                    __func__)
 
-#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
+#ifdef HAVE_JUMP_LABEL
 
 struct static_key {
        atomic_t enabled;
@@ -66,13 +72,18 @@ struct static_key {
 #endif
 };
 
-# include <asm/jump_label.h>
-# define HAVE_JUMP_LABEL
 #else
 struct static_key {
        atomic_t enabled;
 };
-#endif /* CC_HAVE_ASM_GOTO && CONFIG_JUMP_LABEL */
+#endif /* HAVE_JUMP_LABEL */
+#endif /* __ASSEMBLY__ */
+
+#ifdef HAVE_JUMP_LABEL
+#include <asm/jump_label.h>
+#endif
+
+#ifndef __ASSEMBLY__
 
 enum jump_label_type {
        JUMP_LABEL_DISABLE = 0,
@@ -203,3 +214,5 @@ static inline bool static_key_enabled(struct static_key *key)
 }
 
 #endif /* _LINUX_JUMP_LABEL_H */
+
+#endif /* __ASSEMBLY__ */
index d12b2104d19b422a9e3357f7186d80702807fa3d..82af5d0b996e7fb29edd029379f2ab9238a1b2a2 100644 (file)
@@ -165,12 +165,12 @@ enum kvm_bus {
        KVM_NR_BUSES
 };
 
-int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
                     int len, const void *val);
-int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
-                           int len, const void *val, long cookie);
-int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len,
-                   void *val);
+int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
+                           gpa_t addr, int len, const void *val, long cookie);
+int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
+                   int len, void *val);
 int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
                            int len, struct kvm_io_device *dev);
 int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
@@ -658,7 +658,6 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
 
 void *kvm_kvzalloc(unsigned long size);
-void kvm_kvfree(const void *addr);
 
 #ifndef __KVM_HAVE_ARCH_VM_ALLOC
 static inline struct kvm *kvm_arch_alloc_vm(void)
@@ -700,6 +699,20 @@ static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
 #endif
 }
 
+#ifdef __KVM_HAVE_ARCH_INTC_INITIALIZED
+/*
+ * returns true if the virtual interrupt controller is initialized and
+ * ready to accept virtual IRQ. On some architectures the virtual interrupt
+ * controller is dynamically instantiated and this is not always true.
+ */
+bool kvm_arch_intc_initialized(struct kvm *kvm);
+#else
+static inline bool kvm_arch_intc_initialized(struct kvm *kvm)
+{
+       return true;
+}
+#endif
+
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type);
 void kvm_arch_destroy_vm(struct kvm *kvm);
 void kvm_arch_sync_events(struct kvm *kvm);
@@ -969,11 +982,16 @@ static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 #endif /* CONFIG_HAVE_KVM_EVENTFD */
 
 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
-static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
+static inline bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
 {
        return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id;
 }
 
+static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
+{
+       return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
+}
+
 bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu);
 
 #else
index 160448f920acd443327be8fa4a2c70cd7c736134..de722d4e9d61b9a1e30e4e80f0bb7630acec9459 100644 (file)
@@ -79,7 +79,7 @@ struct mmc_command {
 #define mmc_cmd_type(cmd)      ((cmd)->flags & MMC_CMD_MASK)
 
        unsigned int            retries;        /* max number of retries */
-       unsigned int            error;          /* command error */
+       int                     error;          /* command error */
 
 /*
  * Standard errno values are used for errors, but some have specific
@@ -108,7 +108,7 @@ struct mmc_data {
        unsigned int            timeout_clks;   /* data timeout (in clocks) */
        unsigned int            blksz;          /* data block size */
        unsigned int            blocks;         /* number of blocks */
-       unsigned int            error;          /* data error */
+       int                     error;          /* data error */
        unsigned int            flags;
 
 #define MMC_DATA_WRITE (1 << 8)
index 471fb3116dbee5afb3b9fb89325de6a51699fca3..12111993a3175ede2a43a0c061f5cbaf01b50abf 100644 (file)
@@ -44,6 +44,7 @@ struct mmc_data;
  * struct dw_mci - MMC controller state shared between all slots
  * @lock: Spinlock protecting the queue and associated data.
  * @regs: Pointer to MMIO registers.
+ * @fifo_reg: Pointer to MMIO registers for data FIFO
  * @sg: Scatterlist entry currently being processed by PIO code, if any.
  * @sg_miter: PIO mapping scatterlist iterator.
  * @cur_slot: The slot which is currently using the controller.
@@ -79,7 +80,6 @@ struct mmc_data;
  * @current_speed: Configured rate of the controller.
  * @num_slots: Number of slots available.
  * @verid: Denote Version ID.
- * @data_offset: Set the offset of DATA register according to VERID.
  * @dev: Device associated with the MMC controller.
  * @pdata: Platform data associated with the MMC controller.
  * @drv_data: Driver specific data for identified variant of the controller
@@ -132,6 +132,7 @@ struct dw_mci {
        spinlock_t              lock;
        spinlock_t              irq_lock;
        void __iomem            *regs;
+       void __iomem            *fifo_reg;
 
        struct scatterlist      *sg;
        struct sg_mapping_iter  sg_miter;
@@ -172,7 +173,6 @@ struct dw_mci {
        u32                     num_slots;
        u32                     fifoth_val;
        u16                     verid;
-       u16                     data_offset;
        struct device           *dev;
        struct dw_mci_board     *pdata;
        const struct dw_mci_drv_data    *drv_data;
@@ -202,6 +202,8 @@ struct dw_mci {
        int                     irq;
 
        int                     sdio_id0;
+
+       struct timer_list       cmd11_timer;
 };
 
 /* DMA ops for Internal/External DMAC interface */
index 0c8cbe5d155023358f8fafea9b68caca91802aa2..b5bedaec6223679bc00a25af801c1ae57ff17ad2 100644 (file)
@@ -79,12 +79,6 @@ struct mmc_ios {
 };
 
 struct mmc_host_ops {
-       /*
-        * 'enable' is called when the host is claimed and 'disable' is called
-        * when the host is released. 'enable' and 'disable' are deprecated.
-        */
-       int (*enable)(struct mmc_host *host);
-       int (*disable)(struct mmc_host *host);
        /*
         * It is optional for the host to implement pre_req and post_req in
         * order to support double buffering of requests (prepare one
diff --git a/include/linux/mmc/sdhci-spear.h b/include/linux/mmc/sdhci-spear.h
deleted file mode 100644 (file)
index 8cc095a..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * include/linux/mmc/sdhci-spear.h
- *
- * SDHCI declarations specific to ST SPEAr platform
- *
- * Copyright (C) 2010 ST Microelectronics
- * Viresh Kumar <viresh.linux@gmail.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-
-#ifndef LINUX_MMC_SDHCI_SPEAR_H
-#define LINUX_MMC_SDHCI_SPEAR_H
-
-#include <linux/platform_device.h>
-/*
- * struct sdhci_plat_data: spear sdhci platform data structure
- *
- * card_int_gpio: gpio pin used for card detection
- */
-struct sdhci_plat_data {
-       int card_int_gpio;
-};
-
-/* This function is used to set platform_data field of pdev->dev */
-static inline void
-sdhci_set_plat_data(struct platform_device *pdev, struct sdhci_plat_data *data)
-{
-       pdev->dev.platform_data = data;
-}
-
-#endif /* LINUX_MMC_SDHCI_SPEAR_H */
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
deleted file mode 100644 (file)
index c3e3db1..0000000
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- *  linux/include/linux/mmc/sdhci.h - Secure Digital Host Controller Interface
- *
- *  Copyright (C) 2005-2008 Pierre Ossman, All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- */
-#ifndef LINUX_MMC_SDHCI_H
-#define LINUX_MMC_SDHCI_H
-
-#include <linux/scatterlist.h>
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <linux/io.h>
-#include <linux/mmc/host.h>
-
-struct sdhci_host_next {
-       unsigned int    sg_count;
-       s32             cookie;
-};
-
-struct sdhci_host {
-       /* Data set by hardware interface driver */
-       const char *hw_name;    /* Hardware bus name */
-
-       unsigned int quirks;    /* Deviations from spec. */
-
-/* Controller doesn't honor resets unless we touch the clock register */
-#define SDHCI_QUIRK_CLOCK_BEFORE_RESET                 (1<<0)
-/* Controller has bad caps bits, but really supports DMA */
-#define SDHCI_QUIRK_FORCE_DMA                          (1<<1)
-/* Controller doesn't like to be reset when there is no card inserted. */
-#define SDHCI_QUIRK_NO_CARD_NO_RESET                   (1<<2)
-/* Controller doesn't like clearing the power reg before a change */
-#define SDHCI_QUIRK_SINGLE_POWER_WRITE                 (1<<3)
-/* Controller has flaky internal state so reset it on each ios change */
-#define SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS              (1<<4)
-/* Controller has an unusable DMA engine */
-#define SDHCI_QUIRK_BROKEN_DMA                         (1<<5)
-/* Controller has an unusable ADMA engine */
-#define SDHCI_QUIRK_BROKEN_ADMA                                (1<<6)
-/* Controller can only DMA from 32-bit aligned addresses */
-#define SDHCI_QUIRK_32BIT_DMA_ADDR                     (1<<7)
-/* Controller can only DMA chunk sizes that are a multiple of 32 bits */
-#define SDHCI_QUIRK_32BIT_DMA_SIZE                     (1<<8)
-/* Controller can only ADMA chunks that are a multiple of 32 bits */
-#define SDHCI_QUIRK_32BIT_ADMA_SIZE                    (1<<9)
-/* Controller needs to be reset after each request to stay stable */
-#define SDHCI_QUIRK_RESET_AFTER_REQUEST                        (1<<10)
-/* Controller needs voltage and power writes to happen separately */
-#define SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER            (1<<11)
-/* Controller provides an incorrect timeout value for transfers */
-#define SDHCI_QUIRK_BROKEN_TIMEOUT_VAL                 (1<<12)
-/* Controller has an issue with buffer bits for small transfers */
-#define SDHCI_QUIRK_BROKEN_SMALL_PIO                   (1<<13)
-/* Controller does not provide transfer-complete interrupt when not busy */
-#define SDHCI_QUIRK_NO_BUSY_IRQ                                (1<<14)
-/* Controller has unreliable card detection */
-#define SDHCI_QUIRK_BROKEN_CARD_DETECTION              (1<<15)
-/* Controller reports inverted write-protect state */
-#define SDHCI_QUIRK_INVERTED_WRITE_PROTECT             (1<<16)
-/* Controller does not like fast PIO transfers */
-#define SDHCI_QUIRK_PIO_NEEDS_DELAY                    (1<<18)
-/* Controller has to be forced to use block size of 2048 bytes */
-#define SDHCI_QUIRK_FORCE_BLK_SZ_2048                  (1<<20)
-/* Controller cannot do multi-block transfers */
-#define SDHCI_QUIRK_NO_MULTIBLOCK                      (1<<21)
-/* Controller can only handle 1-bit data transfers */
-#define SDHCI_QUIRK_FORCE_1_BIT_DATA                   (1<<22)
-/* Controller needs 10ms delay between applying power and clock */
-#define SDHCI_QUIRK_DELAY_AFTER_POWER                  (1<<23)
-/* Controller uses SDCLK instead of TMCLK for data timeouts */
-#define SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK            (1<<24)
-/* Controller reports wrong base clock capability */
-#define SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN              (1<<25)
-/* Controller cannot support End Attribute in NOP ADMA descriptor */
-#define SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC              (1<<26)
-/* Controller is missing device caps. Use caps provided by host */
-#define SDHCI_QUIRK_MISSING_CAPS                       (1<<27)
-/* Controller uses Auto CMD12 command to stop the transfer */
-#define SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12             (1<<28)
-/* Controller doesn't have HISPD bit field in HI-SPEED SD card */
-#define SDHCI_QUIRK_NO_HISPD_BIT                       (1<<29)
-/* Controller treats ADMA descriptors with length 0000h incorrectly */
-#define SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC           (1<<30)
-/* The read-only detection via SDHCI_PRESENT_STATE register is unstable */
-#define SDHCI_QUIRK_UNSTABLE_RO_DETECT                 (1<<31)
-
-       unsigned int quirks2;   /* More deviations from spec. */
-
-#define SDHCI_QUIRK2_HOST_OFF_CARD_ON                  (1<<0)
-#define SDHCI_QUIRK2_HOST_NO_CMD23                     (1<<1)
-/* The system physically doesn't support 1.8v, even if the host does */
-#define SDHCI_QUIRK2_NO_1_8_V                          (1<<2)
-#define SDHCI_QUIRK2_PRESET_VALUE_BROKEN               (1<<3)
-#define SDHCI_QUIRK2_CARD_ON_NEEDS_BUS_ON              (1<<4)
-/* Controller has a non-standard host control register */
-#define SDHCI_QUIRK2_BROKEN_HOST_CONTROL               (1<<5)
-/* Controller does not support HS200 */
-#define SDHCI_QUIRK2_BROKEN_HS200                      (1<<6)
-/* Controller does not support DDR50 */
-#define SDHCI_QUIRK2_BROKEN_DDR50                      (1<<7)
-/* Stop command (CMD12) can set Transfer Complete when not using MMC_RSP_BUSY */
-#define SDHCI_QUIRK2_STOP_WITH_TC                      (1<<8)
-/* Controller does not support 64-bit DMA */
-#define SDHCI_QUIRK2_BROKEN_64_BIT_DMA                 (1<<9)
-/* need clear transfer mode register before send cmd */
-#define SDHCI_QUIRK2_CLEAR_TRANSFERMODE_REG_BEFORE_CMD (1<<10)
-/* Capability register bit-63 indicates HS400 support */
-#define SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400              (1<<11)
-/* forced tuned clock */
-#define SDHCI_QUIRK2_TUNING_WORK_AROUND                        (1<<12)
-/* disable the block count for single block transactions */
-#define SDHCI_QUIRK2_SUPPORT_SINGLE                    (1<<13)
-
-       int irq;                /* Device IRQ */
-       void __iomem *ioaddr;   /* Mapped address */
-
-       const struct sdhci_ops *ops;    /* Low level hw interface */
-
-       /* Internal data */
-       struct mmc_host *mmc;   /* MMC structure */
-       u64 dma_mask;           /* custom DMA mask */
-
-#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
-       struct led_classdev led;        /* LED control */
-       char led_name[32];
-#endif
-
-       spinlock_t lock;        /* Mutex */
-
-       int flags;              /* Host attributes */
-#define SDHCI_USE_SDMA         (1<<0)  /* Host is SDMA capable */
-#define SDHCI_USE_ADMA         (1<<1)  /* Host is ADMA capable */
-#define SDHCI_REQ_USE_DMA      (1<<2)  /* Use DMA for this req. */
-#define SDHCI_DEVICE_DEAD      (1<<3)  /* Device unresponsive */
-#define SDHCI_SDR50_NEEDS_TUNING (1<<4)        /* SDR50 needs tuning */
-#define SDHCI_NEEDS_RETUNING   (1<<5)  /* Host needs retuning */
-#define SDHCI_AUTO_CMD12       (1<<6)  /* Auto CMD12 support */
-#define SDHCI_AUTO_CMD23       (1<<7)  /* Auto CMD23 support */
-#define SDHCI_PV_ENABLED       (1<<8)  /* Preset value enabled */
-#define SDHCI_SDIO_IRQ_ENABLED (1<<9)  /* SDIO irq enabled */
-#define SDHCI_SDR104_NEEDS_TUNING (1<<10)      /* SDR104/HS200 needs tuning */
-#define SDHCI_USING_RETUNING_TIMER (1<<11)     /* Host is using a retuning timer for the card */
-#define SDHCI_USE_64_BIT_DMA   (1<<12) /* Use 64-bit DMA */
-#define SDHCI_HS400_TUNING     (1<<13) /* Tuning for HS400 */
-
-       unsigned int version;   /* SDHCI spec. version */
-
-       unsigned int max_clk;   /* Max possible freq (MHz) */
-       unsigned int timeout_clk;       /* Timeout freq (KHz) */
-       unsigned int clk_mul;   /* Clock Muliplier value */
-
-       unsigned int clock;     /* Current clock (MHz) */
-       u8 pwr;                 /* Current voltage */
-
-       bool runtime_suspended; /* Host is runtime suspended */
-       bool bus_on;            /* Bus power prevents runtime suspend */
-       bool preset_enabled;    /* Preset is enabled */
-
-       struct mmc_request *mrq;        /* Current request */
-       struct mmc_command *cmd;        /* Current command */
-       struct mmc_data *data;  /* Current data request */
-       unsigned int data_early:1;      /* Data finished before cmd */
-       unsigned int busy_handle:1;     /* Handling the order of Busy-end */
-
-       struct sg_mapping_iter sg_miter;        /* SG state for PIO */
-       unsigned int blocks;    /* remaining PIO blocks */
-
-       int sg_count;           /* Mapped sg entries */
-
-       void *adma_table;       /* ADMA descriptor table */
-       void *align_buffer;     /* Bounce buffer */
-
-       size_t adma_table_sz;   /* ADMA descriptor table size */
-       size_t align_buffer_sz; /* Bounce buffer size */
-
-       dma_addr_t adma_addr;   /* Mapped ADMA descr. table */
-       dma_addr_t align_addr;  /* Mapped bounce buffer */
-
-       unsigned int desc_sz;   /* ADMA descriptor size */
-       unsigned int align_sz;  /* ADMA alignment */
-       unsigned int align_mask;        /* ADMA alignment mask */
-
-       struct tasklet_struct finish_tasklet;   /* Tasklet structures */
-
-       struct timer_list timer;        /* Timer for timeouts */
-
-       u32 caps;               /* Alternative CAPABILITY_0 */
-       u32 caps1;              /* Alternative CAPABILITY_1 */
-
-       unsigned int            ocr_avail_sdio; /* OCR bit masks */
-       unsigned int            ocr_avail_sd;
-       unsigned int            ocr_avail_mmc;
-       u32 ocr_mask;           /* available voltages */
-
-       unsigned                timing;         /* Current timing */
-
-       u32                     thread_isr;
-
-       /* cached registers */
-       u32                     ier;
-
-       wait_queue_head_t       buf_ready_int;  /* Waitqueue for Buffer Read Ready interrupt */
-       unsigned int            tuning_done;    /* Condition flag set when CMD19 succeeds */
-
-       unsigned int            tuning_count;   /* Timer count for re-tuning */
-       unsigned int            tuning_mode;    /* Re-tuning mode supported by host */
-#define SDHCI_TUNING_MODE_1    0
-       struct timer_list       tuning_timer;   /* Timer for tuning */
-
-       struct sdhci_host_next  next_data;
-       unsigned long private[0] ____cacheline_aligned;
-};
-#endif /* LINUX_MMC_SDHCI_H */
index f279d9c158cd566d8e3c0bd2777094e553f81be0..2782df47101e0cd3ee6ff199ad97f75d640b9308 100644 (file)
@@ -474,16 +474,15 @@ struct zone {
        unsigned long           wait_table_bits;
 
        ZONE_PADDING(_pad1_)
-
-       /* Write-intensive fields used from the page allocator */
-       spinlock_t              lock;
-
        /* free areas of different sizes */
        struct free_area        free_area[MAX_ORDER];
 
        /* zone flags, see below */
        unsigned long           flags;
 
+       /* Write-intensive fields used from the page allocator */
+       spinlock_t              lock;
+
        ZONE_PADDING(_pad2_)
 
        /* Write-intensive fields used by page reclaim */
index 67bbcf0785f61da1d4a832e016de1cb99dd313a9..8e981be2e2c2ee8c0472e7824300a6178b474640 100644 (file)
@@ -55,9 +55,6 @@ struct omap_hsmmc_platform_data {
        u32 caps;       /* Used for the MMC driver on 2430 and later */
        u32 pm_caps;    /* PM capabilities of the mmc */
 
-       /* switch pin can be for card detect (default) or card cover */
-       unsigned cover:1;
-
        /* use the internal clock */
        unsigned internal_clock:1;
 
@@ -73,7 +70,8 @@ struct omap_hsmmc_platform_data {
 #define HSMMC_HAS_HSPE_SUPPORT (1 << 2)
        unsigned features;
 
-       int switch_pin;                 /* gpio (card detect) */
+       int gpio_cd;                    /* gpio (card detect) */
+       int gpio_cod;                   /* gpio (cover detect) */
        int gpio_wp;                    /* gpio (write protect) */
 
        int (*set_power)(struct device *dev, int power_on, int vdd);
index dcad7ee0d7466c8e7ffd7a050e814db551c95cac..8dcf6825fa88bbe25c7936797a4322bfd0e24318 100644 (file)
@@ -77,6 +77,7 @@ struct rtc_class_ops {
        int (*read_alarm)(struct device *, struct rtc_wkalrm *);
        int (*set_alarm)(struct device *, struct rtc_wkalrm *);
        int (*proc)(struct device *, struct seq_file *);
+       int (*set_mmss64)(struct device *, time64_t secs);
        int (*set_mmss)(struct device *, unsigned long secs);
        int (*read_callback)(struct device *, int data);
        int (*alarm_irq_enable)(struct device *, unsigned int enabled);
index a419b65770d669c3a51c88a86a145abbcd3db339..3f3308824fa41b473ac77e2927cfdec626bd8c0b 100644 (file)
@@ -176,6 +176,14 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
 extern void calc_global_load(unsigned long ticks);
 extern void update_cpu_load_nohz(void);
 
+/* Notifier for when a task gets migrated to a new CPU */
+struct task_migration_notifier {
+       struct task_struct *task;
+       int from_cpu;
+       int to_cpu;
+};
+extern void register_task_migration_notifier(struct notifier_block *n);
+
 extern unsigned long get_parent_ip(unsigned long addr);
 
 extern void dump_cpu_task(int cpu);
@@ -1115,15 +1123,28 @@ struct load_weight {
 };
 
 struct sched_avg {
+       u64 last_runnable_update;
+       s64 decay_count;
+       /*
+        * utilization_avg_contrib describes the amount of time that a
+        * sched_entity is running on a CPU. It is based on running_avg_sum
+        * and is scaled in the range [0..SCHED_LOAD_SCALE].
+        * load_avg_contrib described the amount of time that a sched_entity
+        * is runnable on a rq. It is based on both runnable_avg_sum and the
+        * weight of the task.
+        */
+       unsigned long load_avg_contrib, utilization_avg_contrib;
        /*
         * These sums represent an infinite geometric series and so are bound
         * above by 1024/(1-y).  Thus we only need a u32 to store them for all
         * choices of y < 1-2^(-32)*1024.
+        * running_avg_sum reflects the time that the sched_entity is
+        * effectively running on the CPU.
+        * runnable_avg_sum represents the amount of time a sched_entity is on
+        * a runqueue which includes the running time that is monitored by
+        * running_avg_sum.
         */
-       u32 runnable_avg_sum, runnable_avg_period;
-       u64 last_runnable_update;
-       s64 decay_count;
-       unsigned long load_avg_contrib;
+       u32 runnable_avg_sum, avg_period, running_avg_sum;
 };
 
 #ifdef CONFIG_SCHEDSTATS
index f5df8f687b4d097dd7855e3670a2b790cb380d5d..5f68d0a391cee8506f8e0d94cda72d8bd357b10f 100644 (file)
@@ -108,7 +108,7 @@ static inline unsigned __read_seqcount_begin(const seqcount_t *s)
        unsigned ret;
 
 repeat:
-       ret = ACCESS_ONCE(s->sequence);
+       ret = READ_ONCE(s->sequence);
        if (unlikely(ret & 1)) {
                cpu_relax();
                goto repeat;
@@ -127,7 +127,7 @@ repeat:
  */
 static inline unsigned raw_read_seqcount(const seqcount_t *s)
 {
-       unsigned ret = ACCESS_ONCE(s->sequence);
+       unsigned ret = READ_ONCE(s->sequence);
        smp_rmb();
        return ret;
 }
@@ -179,7 +179,7 @@ static inline unsigned read_seqcount_begin(const seqcount_t *s)
  */
 static inline unsigned raw_seqcount_begin(const seqcount_t *s)
 {
-       unsigned ret = ACCESS_ONCE(s->sequence);
+       unsigned ret = READ_ONCE(s->sequence);
        smp_rmb();
        return ret & ~1;
 }
index 856d34dde79bc9d81faae4171fb0c5492829c133..d673072346f2e0415fd4706c7606d5081f87f686 100644 (file)
@@ -162,8 +162,6 @@ struct spi_transfer;
  * @remove: Unbinds this driver from the spi device
  * @shutdown: Standard shutdown callback used during system state
  *     transitions such as powerdown/halt and kexec
- * @suspend: Standard suspend callback used during system state transitions
- * @resume: Standard resume callback used during system state transitions
  * @driver: SPI device drivers should initialize the name and owner
  *     field of this structure.
  *
@@ -184,8 +182,6 @@ struct spi_driver {
        int                     (*probe)(struct spi_device *spi);
        int                     (*remove)(struct spi_device *spi);
        void                    (*shutdown)(struct spi_device *spi);
-       int                     (*suspend)(struct spi_device *spi, pm_message_t mesg);
-       int                     (*resume)(struct spi_device *spi);
        struct device_driver    driver;
 };
 
@@ -294,6 +290,8 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  *                    transfer_one_message are mutually exclusive; when both
  *                    are set, the generic subsystem does not call your
  *                    transfer_one callback.
+ * @handle_err: the subsystem calls the driver to handle an error that occurs
+ *             in the generic implementation of transfer_one_message().
  * @unprepare_message: undo any work done by prepare_message().
  * @cs_gpios: Array of GPIOs to use as chip select lines; one per CS
  *     number. Any individual value may be -ENOENT for CS lines that
@@ -448,6 +446,8 @@ struct spi_master {
        void (*set_cs)(struct spi_device *spi, bool enable);
        int (*transfer_one)(struct spi_master *master, struct spi_device *spi,
                            struct spi_transfer *transfer);
+       void (*handle_err)(struct spi_master *master,
+                          struct spi_message *message);
 
        /* gpio chip select */
        int                     *cs_gpios;
index f4aec0e75c3a268cebeecd2dcdc43fc68126a27a..076af437284d59f9b2597e9d6f448804cf56fcb3 100644 (file)
@@ -19,3 +19,12 @@ enum {
 #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
 #endif
 #endif
+
+/**
+ * offsetofend(TYPE, MEMBER)
+ *
+ * @TYPE: The type of the structure
+ * @MEMBER: The member within the structure to get the end offset of
+ */
+#define offsetofend(TYPE, MEMBER) \
+       (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER))
index 9c085dc12ae92626e3d6ae831df435a82f188c08..f8492da57ad32e7607e320510a24ac0165dcd8cd 100644 (file)
@@ -1,7 +1,5 @@
-/*  linux/include/linux/tick.h
- *
- *  This file contains the structure definitions for tick related functions
- *
+/*
+ * Tick related global functions
  */
 #ifndef _LINUX_TICK_H
 #define _LINUX_TICK_H
 #include <linux/clockchips.h>
 #include <linux/irqflags.h>
 #include <linux/percpu.h>
-#include <linux/hrtimer.h>
 #include <linux/context_tracking_state.h>
 #include <linux/cpumask.h>
 #include <linux/sched.h>
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
-
-enum tick_device_mode {
-       TICKDEV_MODE_PERIODIC,
-       TICKDEV_MODE_ONESHOT,
-};
-
-struct tick_device {
-       struct clock_event_device *evtdev;
-       enum tick_device_mode mode;
-};
-
-enum tick_nohz_mode {
-       NOHZ_MODE_INACTIVE,
-       NOHZ_MODE_LOWRES,
-       NOHZ_MODE_HIGHRES,
-};
-
-/**
- * struct tick_sched - sched tick emulation and no idle tick control/stats
- * @sched_timer:       hrtimer to schedule the periodic tick in high
- *                     resolution mode
- * @last_tick:         Store the last tick expiry time when the tick
- *                     timer is modified for nohz sleeps. This is necessary
- *                     to resume the tick timer operation in the timeline
- *                     when the CPU returns from nohz sleep.
- * @tick_stopped:      Indicator that the idle tick has been stopped
- * @idle_jiffies:      jiffies at the entry to idle for idle time accounting
- * @idle_calls:                Total number of idle calls
- * @idle_sleeps:       Number of idle calls, where the sched tick was stopped
- * @idle_entrytime:    Time when the idle call was entered
- * @idle_waketime:     Time when the idle was interrupted
- * @idle_exittime:     Time when the idle state was left
- * @idle_sleeptime:    Sum of the time slept in idle with sched tick stopped
- * @iowait_sleeptime:  Sum of the time slept in idle with sched tick stopped, with IO outstanding
- * @sleep_length:      Duration of the current idle sleep
- * @do_timer_lst:      CPU was the last one doing do_timer before going idle
- */
-struct tick_sched {
-       struct hrtimer                  sched_timer;
-       unsigned long                   check_clocks;
-       enum tick_nohz_mode             nohz_mode;
-       ktime_t                         last_tick;
-       int                             inidle;
-       int                             tick_stopped;
-       unsigned long                   idle_jiffies;
-       unsigned long                   idle_calls;
-       unsigned long                   idle_sleeps;
-       int                             idle_active;
-       ktime_t                         idle_entrytime;
-       ktime_t                         idle_waketime;
-       ktime_t                         idle_exittime;
-       ktime_t                         idle_sleeptime;
-       ktime_t                         iowait_sleeptime;
-       ktime_t                         sleep_length;
-       unsigned long                   last_jiffies;
-       unsigned long                   next_jiffies;
-       ktime_t                         idle_expires;
-       int                             do_timer_last;
-};
-
 extern void __init tick_init(void);
-extern int tick_is_oneshot_available(void);
-extern struct tick_device *tick_get_device(int cpu);
-
 extern void tick_freeze(void);
 extern void tick_unfreeze(void);
+/* Should be core only, but ARM BL switcher requires it */
+extern void tick_suspend_local(void);
+/* Should be core only, but XEN resume magic and ARM BL switcher require it */
+extern void tick_resume_local(void);
+extern void tick_handover_do_timer(void);
+extern void tick_cleanup_dead_cpu(int cpu);
+#else /* CONFIG_GENERIC_CLOCKEVENTS */
+static inline void tick_init(void) { }
+static inline void tick_freeze(void) { }
+static inline void tick_unfreeze(void) { }
+static inline void tick_suspend_local(void) { }
+static inline void tick_resume_local(void) { }
+static inline void tick_handover_do_timer(void) { }
+static inline void tick_cleanup_dead_cpu(int cpu) { }
+#endif /* !CONFIG_GENERIC_CLOCKEVENTS */
 
-# ifdef CONFIG_HIGH_RES_TIMERS
-extern int tick_init_highres(void);
-extern int tick_program_event(ktime_t expires, int force);
-extern void tick_setup_sched_timer(void);
-# endif
-
-# if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
-extern void tick_cancel_sched_timer(int cpu);
-# else
-static inline void tick_cancel_sched_timer(int cpu) { }
-# endif
-
-# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
-extern struct tick_device *tick_get_broadcast_device(void);
-extern struct cpumask *tick_get_broadcast_mask(void);
-
-#  ifdef CONFIG_TICK_ONESHOT
-extern struct cpumask *tick_get_broadcast_oneshot_mask(void);
-#  endif
-
-# endif /* BROADCAST */
-
-# ifdef CONFIG_TICK_ONESHOT
-extern void tick_clock_notify(void);
-extern int tick_check_oneshot_change(int allow_nohz);
-extern struct tick_sched *tick_get_tick_sched(int cpu);
+#ifdef CONFIG_TICK_ONESHOT
 extern void tick_irq_enter(void);
-extern int tick_oneshot_mode_active(void);
 #  ifndef arch_needs_cpu
 #   define arch_needs_cpu() (0)
 #  endif
 # else
-static inline void tick_clock_notify(void) { }
-static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
 static inline void tick_irq_enter(void) { }
-static inline int tick_oneshot_mode_active(void) { return 0; }
-# endif
+#endif
 
-#else /* CONFIG_GENERIC_CLOCKEVENTS */
-static inline void tick_init(void) { }
-static inline void tick_freeze(void) { }
-static inline void tick_unfreeze(void) { }
-static inline void tick_cancel_sched_timer(int cpu) { }
-static inline void tick_clock_notify(void) { }
-static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
-static inline void tick_irq_enter(void) { }
-static inline int tick_oneshot_mode_active(void) { return 0; }
-#endif /* !CONFIG_GENERIC_CLOCKEVENTS */
+#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
+extern void hotplug_cpu__broadcast_tick_pull(int dead_cpu);
+#else
+static inline void hotplug_cpu__broadcast_tick_pull(int dead_cpu) { }
+#endif
 
-# ifdef CONFIG_NO_HZ_COMMON
-DECLARE_PER_CPU(struct tick_sched, tick_cpu_sched);
+enum tick_broadcast_mode {
+       TICK_BROADCAST_OFF,
+       TICK_BROADCAST_ON,
+       TICK_BROADCAST_FORCE,
+};
+
+enum tick_broadcast_state {
+       TICK_BROADCAST_EXIT,
+       TICK_BROADCAST_ENTER,
+};
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+extern void tick_broadcast_control(enum tick_broadcast_mode mode);
+#else
+static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { }
+#endif /* BROADCAST */
+
+#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
+extern int tick_broadcast_oneshot_control(enum tick_broadcast_state state);
+#else
+static inline int tick_broadcast_oneshot_control(enum tick_broadcast_state state) { return 0; }
+#endif
 
-static inline int tick_nohz_tick_stopped(void)
+static inline void tick_broadcast_enable(void)
+{
+       tick_broadcast_control(TICK_BROADCAST_ON);
+}
+static inline void tick_broadcast_disable(void)
+{
+       tick_broadcast_control(TICK_BROADCAST_OFF);
+}
+static inline void tick_broadcast_force(void)
+{
+       tick_broadcast_control(TICK_BROADCAST_FORCE);
+}
+static inline int tick_broadcast_enter(void)
 {
-       return __this_cpu_read(tick_cpu_sched.tick_stopped);
+       return tick_broadcast_oneshot_control(TICK_BROADCAST_ENTER);
+}
+static inline void tick_broadcast_exit(void)
+{
+       tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT);
 }
 
+#ifdef CONFIG_NO_HZ_COMMON
+extern int tick_nohz_tick_stopped(void);
 extern void tick_nohz_idle_enter(void);
 extern void tick_nohz_idle_exit(void);
 extern void tick_nohz_irq_exit(void);
 extern ktime_t tick_nohz_get_sleep_length(void);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
-
-# else /* !CONFIG_NO_HZ_COMMON */
-static inline int tick_nohz_tick_stopped(void)
-{
-       return 0;
-}
-
+#else /* !CONFIG_NO_HZ_COMMON */
+static inline int tick_nohz_tick_stopped(void) { return 0; }
 static inline void tick_nohz_idle_enter(void) { }
 static inline void tick_nohz_idle_exit(void) { }
 
@@ -163,7 +111,7 @@ static inline ktime_t tick_nohz_get_sleep_length(void)
 }
 static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; }
 static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
-# endif /* !CONFIG_NO_HZ_COMMON */
+#endif /* !CONFIG_NO_HZ_COMMON */
 
 #ifdef CONFIG_NO_HZ_FULL
 extern bool tick_nohz_full_running;
index 05af9a3348934602456ec18b9cd29df84ac5a167..fb86963859c772846dfc531fc9cc8c0825f36ac7 100644 (file)
  * @read:      Read function of @clock
  * @mask:      Bitmask for two's complement subtraction of non 64bit clocks
  * @cycle_last: @clock cycle value at last update
- * @mult:      NTP adjusted multiplier for scaled math conversion
+ * @mult:      (NTP adjusted) multiplier for scaled math conversion
  * @shift:     Shift value for scaled math conversion
  * @xtime_nsec: Shifted (fractional) nano seconds offset for readout
- * @base_mono:  ktime_t (nanoseconds) base time for readout
+ * @base:      ktime_t (nanoseconds) base time for readout
  *
  * This struct has size 56 byte on 64 bit. Together with a seqcount it
  * occupies a single 64byte cache line.
  *
  * The struct is separate from struct timekeeper as it is also used
- * for a fast NMI safe accessor to clock monotonic.
+ * for a fast NMI safe accessors.
  */
 struct tk_read_base {
        struct clocksource      *clock;
@@ -35,12 +35,13 @@ struct tk_read_base {
        u32                     mult;
        u32                     shift;
        u64                     xtime_nsec;
-       ktime_t                 base_mono;
+       ktime_t                 base;
 };
 
 /**
  * struct timekeeper - Structure holding internal timekeeping values.
- * @tkr:               The readout base structure
+ * @tkr_mono:          The readout base structure for CLOCK_MONOTONIC
+ * @tkr_raw:           The readout base structure for CLOCK_MONOTONIC_RAW
  * @xtime_sec:         Current CLOCK_REALTIME time in seconds
  * @ktime_sec:         Current CLOCK_MONOTONIC time in seconds
  * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset
@@ -48,7 +49,6 @@ struct tk_read_base {
  * @offs_boot:         Offset clock monotonic -> clock boottime
  * @offs_tai:          Offset clock monotonic -> clock tai
  * @tai_offset:                The current UTC to TAI offset in seconds
- * @base_raw:          Monotonic raw base time in ktime_t format
  * @raw_time:          Monotonic raw base time in timespec64 format
  * @cycle_interval:    Number of clock cycles in one NTP interval
  * @xtime_interval:    Number of clock shifted nano seconds in one NTP
@@ -76,7 +76,8 @@ struct tk_read_base {
  * used instead.
  */
 struct timekeeper {
-       struct tk_read_base     tkr;
+       struct tk_read_base     tkr_mono;
+       struct tk_read_base     tkr_raw;
        u64                     xtime_sec;
        unsigned long           ktime_sec;
        struct timespec64       wall_to_monotonic;
@@ -84,7 +85,6 @@ struct timekeeper {
        ktime_t                 offs_boot;
        ktime_t                 offs_tai;
        s32                     tai_offset;
-       ktime_t                 base_raw;
        struct timespec64       raw_time;
 
        /* The following members are for timekeeping internal use */
index 3eaae47542751962579a3c6736f18917e4da7ad3..99176af216af449563e3a190b96edc04ea1a1f9e 100644 (file)
@@ -214,12 +214,18 @@ static inline u64 ktime_get_boot_ns(void)
        return ktime_to_ns(ktime_get_boottime());
 }
 
+static inline u64 ktime_get_tai_ns(void)
+{
+       return ktime_to_ns(ktime_get_clocktai());
+}
+
 static inline u64 ktime_get_raw_ns(void)
 {
        return ktime_to_ns(ktime_get_raw());
 }
 
 extern u64 ktime_get_mono_fast_ns(void);
+extern u64 ktime_get_raw_fast_ns(void);
 
 /*
  * Timespec interfaces utilizing the ktime based ones
@@ -242,6 +248,9 @@ static inline void timekeeping_clocktai(struct timespec *ts)
 /*
  * RTC specific
  */
+extern bool timekeeping_rtc_skipsuspend(void);
+extern bool timekeeping_rtc_skipresume(void);
+
 extern void timekeeping_inject_sleeptime64(struct timespec64 *delta);
 
 /*
@@ -253,17 +262,14 @@ extern void getnstime_raw_and_real(struct timespec *ts_raw,
 /*
  * Persistent clock related interfaces
  */
-extern bool persistent_clock_exist;
 extern int persistent_clock_is_local;
 
-static inline bool has_persistent_clock(void)
-{
-       return persistent_clock_exist;
-}
-
 extern void read_persistent_clock(struct timespec *ts);
+extern void read_persistent_clock64(struct timespec64 *ts);
 extern void read_boot_clock(struct timespec *ts);
+extern void read_boot_clock64(struct timespec64 *ts);
 extern int update_persistent_clock(struct timespec now);
+extern int update_persistent_clock64(struct timespec64 now);
 
 
 #endif
index 2d67b8998fd8b49d877d65b0b94a022be47d4e28..049b2f497bc79cac035faac38e4b808d9daf9af4 100644 (file)
@@ -78,19 +78,6 @@ extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
 extern void vfio_unregister_iommu_driver(
                                const struct vfio_iommu_driver_ops *ops);
 
-/**
- * offsetofend(TYPE, MEMBER)
- *
- * @TYPE: The type of the structure
- * @MEMBER: The member within the structure to get the end offset of
- *
- * Simple helper macro for dealing with variable sized structures passed
- * from user space.  This allows us to easily determine if the provided
- * structure is sized to include various fields.
- */
-#define offsetofend(TYPE, MEMBER) \
-       (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER))
-
 /*
  * External user API
  */
index c2e570336269b8e2e3c574c607afc651d8f1a6eb..6008b0985b7b18a5e15c157a487ccfb6b640b312 100644 (file)
 #define                ISI_CFG1_FRATE_DIV_MASK         (7 << 8)
 #define ISI_CFG1_DISCR                         (1 << 11)
 #define ISI_CFG1_FULL_MODE                     (1 << 12)
+/* Definition for THMASK(ISI_V2) */
+#define                ISI_CFG1_THMASK_BEATS_4         (0 << 13)
+#define                ISI_CFG1_THMASK_BEATS_8         (1 << 13)
+#define                ISI_CFG1_THMASK_BEATS_16        (2 << 13)
 
 /* Bitfields in CFG2 */
 #define ISI_CFG2_GRAYSCALE                     (1 << 13)
diff --git a/include/trace/events/regmap.h b/include/trace/events/regmap.h
deleted file mode 100644 (file)
index 22317d2..0000000
+++ /dev/null
@@ -1,251 +0,0 @@
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM regmap
-
-#if !defined(_TRACE_REGMAP_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_REGMAP_H
-
-#include <linux/ktime.h>
-#include <linux/tracepoint.h>
-
-#include "../../../drivers/base/regmap/internal.h"
-
-/*
- * Log register events
- */
-DECLARE_EVENT_CLASS(regmap_reg,
-
-       TP_PROTO(struct regmap *map, unsigned int reg,
-                unsigned int val),
-
-       TP_ARGS(map, reg, val),
-
-       TP_STRUCT__entry(
-               __string(       name,           regmap_name(map)        )
-               __field(        unsigned int,   reg                     )
-               __field(        unsigned int,   val                     )
-       ),
-
-       TP_fast_assign(
-               __assign_str(name, regmap_name(map));
-               __entry->reg = reg;
-               __entry->val = val;
-       ),
-
-       TP_printk("%s reg=%x val=%x", __get_str(name),
-                 (unsigned int)__entry->reg,
-                 (unsigned int)__entry->val)
-);
-
-DEFINE_EVENT(regmap_reg, regmap_reg_write,
-
-       TP_PROTO(struct regmap *map, unsigned int reg,
-                unsigned int val),
-
-       TP_ARGS(map, reg, val)
-
-);
-
-DEFINE_EVENT(regmap_reg, regmap_reg_read,
-
-       TP_PROTO(struct regmap *map, unsigned int reg,
-                unsigned int val),
-
-       TP_ARGS(map, reg, val)
-
-);
-
-DEFINE_EVENT(regmap_reg, regmap_reg_read_cache,
-
-       TP_PROTO(struct regmap *map, unsigned int reg,
-                unsigned int val),
-
-       TP_ARGS(map, reg, val)
-
-);
-
-DECLARE_EVENT_CLASS(regmap_block,
-
-       TP_PROTO(struct regmap *map, unsigned int reg, int count),
-
-       TP_ARGS(map, reg, count),
-
-       TP_STRUCT__entry(
-               __string(       name,           regmap_name(map)        )
-               __field(        unsigned int,   reg                     )
-               __field(        int,            count                   )
-       ),
-
-       TP_fast_assign(
-               __assign_str(name, regmap_name(map));
-               __entry->reg = reg;
-               __entry->count = count;
-       ),
-
-       TP_printk("%s reg=%x count=%d", __get_str(name),
-                 (unsigned int)__entry->reg,
-                 (int)__entry->count)
-);
-
-DEFINE_EVENT(regmap_block, regmap_hw_read_start,
-
-       TP_PROTO(struct regmap *map, unsigned int reg, int count),
-
-       TP_ARGS(map, reg, count)
-);
-
-DEFINE_EVENT(regmap_block, regmap_hw_read_done,
-
-       TP_PROTO(struct regmap *map, unsigned int reg, int count),
-
-       TP_ARGS(map, reg, count)
-);
-
-DEFINE_EVENT(regmap_block, regmap_hw_write_start,
-
-       TP_PROTO(struct regmap *map, unsigned int reg, int count),
-
-       TP_ARGS(map, reg, count)
-);
-
-DEFINE_EVENT(regmap_block, regmap_hw_write_done,
-
-       TP_PROTO(struct regmap *map, unsigned int reg, int count),
-
-       TP_ARGS(map, reg, count)
-);
-
-TRACE_EVENT(regcache_sync,
-
-       TP_PROTO(struct regmap *map, const char *type,
-                const char *status),
-
-       TP_ARGS(map, type, status),
-
-       TP_STRUCT__entry(
-               __string(       name,           regmap_name(map)        )
-               __string(       status,         status                  )
-               __string(       type,           type                    )
-               __field(        int,            type                    )
-       ),
-
-       TP_fast_assign(
-               __assign_str(name, regmap_name(map));
-               __assign_str(status, status);
-               __assign_str(type, type);
-       ),
-
-       TP_printk("%s type=%s status=%s", __get_str(name),
-                 __get_str(type), __get_str(status))
-);
-
-DECLARE_EVENT_CLASS(regmap_bool,
-
-       TP_PROTO(struct regmap *map, bool flag),
-
-       TP_ARGS(map, flag),
-
-       TP_STRUCT__entry(
-               __string(       name,           regmap_name(map)        )
-               __field(        int,            flag                    )
-       ),
-
-       TP_fast_assign(
-               __assign_str(name, regmap_name(map));
-               __entry->flag = flag;
-       ),
-
-       TP_printk("%s flag=%d", __get_str(name),
-                 (int)__entry->flag)
-);
-
-DEFINE_EVENT(regmap_bool, regmap_cache_only,
-
-       TP_PROTO(struct regmap *map, bool flag),
-
-       TP_ARGS(map, flag)
-
-);
-
-DEFINE_EVENT(regmap_bool, regmap_cache_bypass,
-
-       TP_PROTO(struct regmap *map, bool flag),
-
-       TP_ARGS(map, flag)
-
-);
-
-DECLARE_EVENT_CLASS(regmap_async,
-
-       TP_PROTO(struct regmap *map),
-
-       TP_ARGS(map),
-
-       TP_STRUCT__entry(
-               __string(       name,           regmap_name(map)        )
-       ),
-
-       TP_fast_assign(
-               __assign_str(name, regmap_name(map));
-       ),
-
-       TP_printk("%s", __get_str(name))
-);
-
-DEFINE_EVENT(regmap_block, regmap_async_write_start,
-
-       TP_PROTO(struct regmap *map, unsigned int reg, int count),
-
-       TP_ARGS(map, reg, count)
-);
-
-DEFINE_EVENT(regmap_async, regmap_async_io_complete,
-
-       TP_PROTO(struct regmap *map),
-
-       TP_ARGS(map)
-
-);
-
-DEFINE_EVENT(regmap_async, regmap_async_complete_start,
-
-       TP_PROTO(struct regmap *map),
-
-       TP_ARGS(map)
-
-);
-
-DEFINE_EVENT(regmap_async, regmap_async_complete_done,
-
-       TP_PROTO(struct regmap *map),
-
-       TP_ARGS(map)
-
-);
-
-TRACE_EVENT(regcache_drop_region,
-
-       TP_PROTO(struct regmap *map, unsigned int from,
-                unsigned int to),
-
-       TP_ARGS(map, from, to),
-
-       TP_STRUCT__entry(
-               __string(       name,           regmap_name(map)        )
-               __field(        unsigned int,   from                    )
-               __field(        unsigned int,   to                      )
-       ),
-
-       TP_fast_assign(
-               __assign_str(name, regmap_name(map));
-               __entry->from = from;
-               __entry->to = to;
-       ),
-
-       TP_printk("%s %u-%u", __get_str(name), (unsigned int)__entry->from,
-                 (unsigned int)__entry->to)
-);
-
-#endif /* _TRACE_REGMAP_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
index 805570650062e99dfbb20edbab759e1322462170..f574d7be7631e5795f26a8934391b8b6f1473f8e 100644 (file)
@@ -147,6 +147,16 @@ struct kvm_pit_config {
 
 #define KVM_PIT_SPEAKER_DUMMY     1
 
+struct kvm_s390_skeys {
+       __u64 start_gfn;
+       __u64 count;
+       __u64 skeydata_addr;
+       __u32 flags;
+       __u32 reserved[9];
+};
+#define KVM_S390_GET_SKEYS_NONE   1
+#define KVM_S390_SKEYS_MAX        1048576
+
 #define KVM_EXIT_UNKNOWN          0
 #define KVM_EXIT_EXCEPTION        1
 #define KVM_EXIT_IO               2
@@ -172,6 +182,7 @@ struct kvm_pit_config {
 #define KVM_EXIT_S390_TSCH        22
 #define KVM_EXIT_EPR              23
 #define KVM_EXIT_SYSTEM_EVENT     24
+#define KVM_EXIT_S390_STSI        25
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -309,6 +320,15 @@ struct kvm_run {
                        __u32 type;
                        __u64 flags;
                } system_event;
+               /* KVM_EXIT_S390_STSI */
+               struct {
+                       __u64 addr;
+                       __u8 ar;
+                       __u8 reserved;
+                       __u8 fc;
+                       __u8 sel1;
+                       __u16 sel2;
+               } s390_stsi;
                /* Fix the size of the union. */
                char padding[256];
        };
@@ -324,7 +344,7 @@ struct kvm_run {
        __u64 kvm_dirty_regs;
        union {
                struct kvm_sync_regs regs;
-               char padding[1024];
+               char padding[2048];
        } s;
 };
 
@@ -365,6 +385,24 @@ struct kvm_translation {
        __u8  pad[5];
 };
 
+/* for KVM_S390_MEM_OP */
+struct kvm_s390_mem_op {
+       /* in */
+       __u64 gaddr;            /* the guest address */
+       __u64 flags;            /* flags */
+       __u32 size;             /* amount of bytes */
+       __u32 op;               /* type of operation */
+       __u64 buf;              /* buffer in userspace */
+       __u8 ar;                /* the access register number */
+       __u8 reserved[31];      /* should be set to 0 */
+};
+/* types for kvm_s390_mem_op->op */
+#define KVM_S390_MEMOP_LOGICAL_READ    0
+#define KVM_S390_MEMOP_LOGICAL_WRITE   1
+/* flags for kvm_s390_mem_op->flags */
+#define KVM_S390_MEMOP_F_CHECK_ONLY            (1ULL << 0)
+#define KVM_S390_MEMOP_F_INJECT_EXCEPTION      (1ULL << 1)
+
 /* for KVM_INTERRUPT */
 struct kvm_interrupt {
        /* in */
@@ -520,6 +558,13 @@ struct kvm_s390_irq {
        } u;
 };
 
+struct kvm_s390_irq_state {
+       __u64 buf;
+       __u32 flags;
+       __u32 len;
+       __u32 reserved[4];
+};
+
 /* for KVM_SET_GUEST_DEBUG */
 
 #define KVM_GUESTDBG_ENABLE            0x00000001
@@ -760,6 +805,14 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_ENABLE_HCALL 104
 #define KVM_CAP_CHECK_EXTENSION_VM 105
 #define KVM_CAP_S390_USER_SIGP 106
+#define KVM_CAP_S390_VECTOR_REGISTERS 107
+#define KVM_CAP_S390_MEM_OP 108
+#define KVM_CAP_S390_USER_STSI 109
+#define KVM_CAP_S390_SKEYS 110
+#define KVM_CAP_MIPS_FPU 111
+#define KVM_CAP_MIPS_MSA 112
+#define KVM_CAP_S390_INJECT_IRQ 113
+#define KVM_CAP_S390_IRQ_STATE 114
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1135,6 +1188,16 @@ struct kvm_s390_ucas_mapping {
 #define KVM_ARM_VCPU_INIT        _IOW(KVMIO,  0xae, struct kvm_vcpu_init)
 #define KVM_ARM_PREFERRED_TARGET  _IOR(KVMIO,  0xaf, struct kvm_vcpu_init)
 #define KVM_GET_REG_LIST         _IOWR(KVMIO, 0xb0, struct kvm_reg_list)
+/* Available with KVM_CAP_S390_MEM_OP */
+#define KVM_S390_MEM_OP                  _IOW(KVMIO,  0xb1, struct kvm_s390_mem_op)
+/* Available with KVM_CAP_S390_SKEYS */
+#define KVM_S390_GET_SKEYS      _IOW(KVMIO, 0xb2, struct kvm_s390_skeys)
+#define KVM_S390_SET_SKEYS      _IOW(KVMIO, 0xb3, struct kvm_s390_skeys)
+/* Available with KVM_CAP_S390_INJECT_IRQ */
+#define KVM_S390_IRQ              _IOW(KVMIO,  0xb4, struct kvm_s390_irq)
+/* Available with KVM_CAP_S390_IRQ_STATE */
+#define KVM_S390_SET_IRQ_STATE   _IOW(KVMIO, 0xb5, struct kvm_s390_irq_state)
+#define KVM_S390_GET_IRQ_STATE   _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU    (1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3         (1 << 1)
index 1972b161c61e98fbe3e3ce003744cf1d2e8c5b1c..82eea9c5af61c2922a221dfc59885b7409305cff 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/gfp.h>
 #include <linux/suspend.h>
 #include <linux/lockdep.h>
+#include <linux/tick.h>
 #include <trace/events/power.h>
 
 #include "smpboot.h"
@@ -338,6 +339,8 @@ static int __ref take_cpu_down(void *_param)
                return err;
 
        cpu_notify(CPU_DYING | param->mod, param->hcpu);
+       /* Give up timekeeping duties */
+       tick_handover_do_timer();
        /* Park the stopper thread */
        kthread_park(current);
        return 0;
@@ -411,10 +414,12 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
        while (!idle_cpu(cpu))
                cpu_relax();
 
+       hotplug_cpu__broadcast_tick_pull(cpu);
        /* This actually kills the CPU. */
        __cpu_die(cpu);
 
        /* CPU is completely dead: tell everyone.  Too late to complain. */
+       tick_cleanup_dead_cpu(cpu);
        cpu_notify_nofail(CPU_DEAD | mod, hcpu);
 
        check_for_tasks(cpu);
index 2a5e3830e953b72cca4aeac4ec537e276f0f355b..2579e407ff67d039106207f78a466f824e515db6 100644 (file)
@@ -900,7 +900,7 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
        if (!p)
                return -ESRCH;
 
-       if (!p->mm) {
+       if (unlikely(p->flags & PF_KTHREAD)) {
                put_task_struct(p);
                return -EPERM;
        }
index d1fe2ba5bac958bc85da8e8868408d8c6c809dc3..75e114bdf3f26f379c4382dce2bc5c128c06b868 100644 (file)
@@ -78,7 +78,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
                 */
                return;
        }
-       ACCESS_ONCE(prev->next) = node;
+       WRITE_ONCE(prev->next, node);
 
        /* Wait until the lock holder passes the lock down. */
        arch_mcs_spin_lock_contended(&node->locked);
@@ -91,7 +91,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 static inline
 void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 {
-       struct mcs_spinlock *next = ACCESS_ONCE(node->next);
+       struct mcs_spinlock *next = READ_ONCE(node->next);
 
        if (likely(!next)) {
                /*
@@ -100,7 +100,7 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
                if (likely(cmpxchg(lock, node, NULL) == node))
                        return;
                /* Wait until the next pointer is set */
-               while (!(next = ACCESS_ONCE(node->next)))
+               while (!(next = READ_ONCE(node->next)))
                        cpu_relax_lowlatency();
        }
 
index 94674e5919cba54e339addf0c7c7cf2b90f75c27..4cccea6b8934f5697fa3dfa609ae4bd10db78b6f 100644 (file)
@@ -25,7 +25,7 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/debug_locks.h>
-#include "mcs_spinlock.h"
+#include <linux/osq_lock.h>
 
 /*
  * In the DEBUG case we are using the "NULL fastpath" for mutexes,
@@ -217,44 +217,35 @@ ww_mutex_set_context_slowpath(struct ww_mutex *lock,
 }
 
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
-{
-       if (lock->owner != owner)
-               return false;
-
-       /*
-        * Ensure we emit the owner->on_cpu, dereference _after_ checking
-        * lock->owner still matches owner, if that fails, owner might
-        * point to free()d memory, if it still matches, the rcu_read_lock()
-        * ensures the memory stays valid.
-        */
-       barrier();
-
-       return owner->on_cpu;
-}
-
 /*
  * Look out! "owner" is an entirely speculative pointer
  * access and not reliable.
  */
 static noinline
-int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
+bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
 {
+       bool ret = true;
+
        rcu_read_lock();
-       while (owner_running(lock, owner)) {
-               if (need_resched())
+       while (lock->owner == owner) {
+               /*
+                * Ensure we emit the owner->on_cpu, dereference _after_
+                * checking lock->owner still matches owner. If that fails,
+                * owner might point to freed memory. If it still matches,
+                * the rcu_read_lock() ensures the memory stays valid.
+                */
+               barrier();
+
+               if (!owner->on_cpu || need_resched()) {
+                       ret = false;
                        break;
+               }
 
                cpu_relax_lowlatency();
        }
        rcu_read_unlock();
 
-       /*
-        * We break out the loop above on need_resched() and when the
-        * owner changed, which is a sign for heavy contention. Return
-        * success only when lock->owner is NULL.
-        */
-       return lock->owner == NULL;
+       return ret;
 }
 
 /*
@@ -269,7 +260,7 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock)
                return 0;
 
        rcu_read_lock();
-       owner = ACCESS_ONCE(lock->owner);
+       owner = READ_ONCE(lock->owner);
        if (owner)
                retval = owner->on_cpu;
        rcu_read_unlock();
@@ -343,7 +334,7 @@ static bool mutex_optimistic_spin(struct mutex *lock,
                         * As such, when deadlock detection needs to be
                         * performed the optimistic spinning cannot be done.
                         */
-                       if (ACCESS_ONCE(ww->ctx))
+                       if (READ_ONCE(ww->ctx))
                                break;
                }
 
@@ -351,7 +342,7 @@ static bool mutex_optimistic_spin(struct mutex *lock,
                 * If there's an owner, wait for it to either
                 * release the lock or go to sleep.
                 */
-               owner = ACCESS_ONCE(lock->owner);
+               owner = READ_ONCE(lock->owner);
                if (owner && !mutex_spin_on_owner(lock, owner))
                        break;
 
@@ -490,7 +481,7 @@ static inline int __sched
 __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
 {
        struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
-       struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx);
+       struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx);
 
        if (!hold_ctx)
                return 0;
index c112d00341b05773934ecdb2977c0d2aca1d5c11..dc85ee23a26f79416a140241e3067a5a2ca24d0b 100644 (file)
@@ -98,7 +98,7 @@ bool osq_lock(struct optimistic_spin_queue *lock)
 
        prev = decode_cpu(old);
        node->prev = prev;
-       ACCESS_ONCE(prev->next) = node;
+       WRITE_ONCE(prev->next, node);
 
        /*
         * Normally @prev is untouchable after the above store; because at that
@@ -109,7 +109,7 @@ bool osq_lock(struct optimistic_spin_queue *lock)
         * cmpxchg in an attempt to undo our queueing.
         */
 
-       while (!ACCESS_ONCE(node->locked)) {
+       while (!READ_ONCE(node->locked)) {
                /*
                 * If we need to reschedule bail... so we can block.
                 */
@@ -148,7 +148,7 @@ unqueue:
                 * Or we race against a concurrent unqueue()'s step-B, in which
                 * case its step-C will write us a new @node->prev pointer.
                 */
-               prev = ACCESS_ONCE(node->prev);
+               prev = READ_ONCE(node->prev);
        }
 
        /*
@@ -170,8 +170,8 @@ unqueue:
         * it will wait in Step-A.
         */
 
-       ACCESS_ONCE(next->prev) = prev;
-       ACCESS_ONCE(prev->next) = next;
+       WRITE_ONCE(next->prev, prev);
+       WRITE_ONCE(prev->next, next);
 
        return false;
 }
@@ -193,11 +193,11 @@ void osq_unlock(struct optimistic_spin_queue *lock)
        node = this_cpu_ptr(&osq_node);
        next = xchg(&node->next, NULL);
        if (next) {
-               ACCESS_ONCE(next->locked) = 1;
+               WRITE_ONCE(next->locked, 1);
                return;
        }
 
        next = osq_wait_next(lock, node, NULL);
        if (next)
-               ACCESS_ONCE(next->locked) = 1;
+               WRITE_ONCE(next->locked, 1);
 }
index 6357265a31ad1a34b881aba31abe27ab6d3921ec..b73279367087ca779072b79a784f69224929c149 100644 (file)
@@ -349,7 +349,7 @@ static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
  *
  * @task:      the task owning the mutex (owner) for which a chain walk is
  *             probably needed
- * @deadlock_detect: do we have to carry out deadlock detection?
+ * @chwalk:    do we have to carry out deadlock detection?
  * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck
  *             things for a task that has just got its priority adjusted, and
  *             is waiting on a mutex)
index 2555ae15ec14c78d6c8f5030fea52daa74b5a5c9..3a504857206536f68fda513fecf63841d2979a06 100644 (file)
@@ -85,6 +85,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
 
                list_del(&waiter->list);
                tsk = waiter->task;
+               /*
+                * Make sure we do not wakeup the next reader before
+                * setting the nil condition to grant the next reader;
+                * otherwise we could miss the wakeup on the other
+                * side and end up sleeping again. See the pairing
+                * in rwsem_down_read_failed().
+                */
                smp_mb();
                waiter->task = NULL;
                wake_up_process(tsk);
index 2f7cc4076f50aa0c534c22e527ab3d1f11ce9a66..3417d0172a5d2e7cd69460ed4ef96c02f6c578d0 100644 (file)
@@ -14,8 +14,9 @@
 #include <linux/init.h>
 #include <linux/export.h>
 #include <linux/sched/rt.h>
+#include <linux/osq_lock.h>
 
-#include "mcs_spinlock.h"
+#include "rwsem.h"
 
 /*
  * Guide to the rw_semaphore's count field for common values.
@@ -186,6 +187,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
                waiter = list_entry(next, struct rwsem_waiter, list);
                next = waiter->list.next;
                tsk = waiter->task;
+               /*
+                * Make sure we do not wakeup the next reader before
+                * setting the nil condition to grant the next reader;
+                * otherwise we could miss the wakeup on the other
+                * side and end up sleeping again. See the pairing
+                * in rwsem_down_read_failed().
+                */
                smp_mb();
                waiter->task = NULL;
                wake_up_process(tsk);
@@ -258,6 +266,7 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
                    RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) {
                if (!list_is_singular(&sem->wait_list))
                        rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
+               rwsem_set_owner(sem);
                return true;
        }
 
@@ -270,15 +279,17 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
  */
 static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
 {
-       long old, count = ACCESS_ONCE(sem->count);
+       long old, count = READ_ONCE(sem->count);
 
        while (true) {
                if (!(count == 0 || count == RWSEM_WAITING_BIAS))
                        return false;
 
                old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS);
-               if (old == count)
+               if (old == count) {
+                       rwsem_set_owner(sem);
                        return true;
+               }
 
                count = old;
        }
@@ -287,60 +298,67 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
 static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
 {
        struct task_struct *owner;
-       bool on_cpu = false;
+       bool ret = true;
 
        if (need_resched())
                return false;
 
        rcu_read_lock();
-       owner = ACCESS_ONCE(sem->owner);
-       if (owner)
-               on_cpu = owner->on_cpu;
-       rcu_read_unlock();
-
-       /*
-        * If sem->owner is not set, yet we have just recently entered the
-        * slowpath, then there is a possibility reader(s) may have the lock.
-        * To be safe, avoid spinning in these situations.
-        */
-       return on_cpu;
-}
-
-static inline bool owner_running(struct rw_semaphore *sem,
-                                struct task_struct *owner)
-{
-       if (sem->owner != owner)
-               return false;
-
-       /*
-        * Ensure we emit the owner->on_cpu, dereference _after_ checking
-        * sem->owner still matches owner, if that fails, owner might
-        * point to free()d memory, if it still matches, the rcu_read_lock()
-        * ensures the memory stays valid.
-        */
-       barrier();
+       owner = READ_ONCE(sem->owner);
+       if (!owner) {
+               long count = READ_ONCE(sem->count);
+               /*
+                * If sem->owner is not set, yet we have just recently entered the
+                * slowpath with the lock being active, then there is a possibility
+                * reader(s) may have the lock. To be safe, bail spinning in these
+                * situations.
+                */
+               if (count & RWSEM_ACTIVE_MASK)
+                       ret = false;
+               goto done;
+       }
 
-       return owner->on_cpu;
+       ret = owner->on_cpu;
+done:
+       rcu_read_unlock();
+       return ret;
 }
 
 static noinline
 bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
 {
+       long count;
+
        rcu_read_lock();
-       while (owner_running(sem, owner)) {
-               if (need_resched())
-                       break;
+       while (sem->owner == owner) {
+               /*
+                * Ensure we emit the owner->on_cpu, dereference _after_
+                * checking sem->owner still matches owner, if that fails,
+                * owner might point to free()d memory, if it still matches,
+                * the rcu_read_lock() ensures the memory stays valid.
+                */
+               barrier();
+
+               /* abort spinning when need_resched or owner is not running */
+               if (!owner->on_cpu || need_resched()) {
+                       rcu_read_unlock();
+                       return false;
+               }
 
                cpu_relax_lowlatency();
        }
        rcu_read_unlock();
 
+       if (READ_ONCE(sem->owner))
+               return true; /* new owner, continue spinning */
+
        /*
-        * We break out the loop above on need_resched() or when the
-        * owner changed, which is a sign for heavy contention. Return
-        * success only when sem->owner is NULL.
+        * When the owner is not set, the lock could be free or
+        * held by readers. Check the counter to verify the
+        * state.
         */
-       return sem->owner == NULL;
+       count = READ_ONCE(sem->count);
+       return (count == 0 || count == RWSEM_WAITING_BIAS);
 }
 
 static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
@@ -358,7 +376,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
                goto done;
 
        while (true) {
-               owner = ACCESS_ONCE(sem->owner);
+               owner = READ_ONCE(sem->owner);
                if (owner && !rwsem_spin_on_owner(sem, owner))
                        break;
 
@@ -432,7 +450,7 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
 
        /* we're now waiting on the lock, but no longer actively locking */
        if (waiting) {
-               count = ACCESS_ONCE(sem->count);
+               count = READ_ONCE(sem->count);
 
                /*
                 * If there were already threads queued before us and there are
index e2d3bc7f03b41e1c01a7c8fc548ac162cdfa151e..205be0ce34de73e8590f2cd4cdb369526b236540 100644 (file)
@@ -9,29 +9,9 @@
 #include <linux/sched.h>
 #include <linux/export.h>
 #include <linux/rwsem.h>
-
 #include <linux/atomic.h>
 
-#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
-static inline void rwsem_set_owner(struct rw_semaphore *sem)
-{
-       sem->owner = current;
-}
-
-static inline void rwsem_clear_owner(struct rw_semaphore *sem)
-{
-       sem->owner = NULL;
-}
-
-#else
-static inline void rwsem_set_owner(struct rw_semaphore *sem)
-{
-}
-
-static inline void rwsem_clear_owner(struct rw_semaphore *sem)
-{
-}
-#endif
+#include "rwsem.h"
 
 /*
  * lock for reading
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
new file mode 100644 (file)
index 0000000..870ed9a
--- /dev/null
@@ -0,0 +1,20 @@
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+static inline void rwsem_set_owner(struct rw_semaphore *sem)
+{
+       sem->owner = current;
+}
+
+static inline void rwsem_clear_owner(struct rw_semaphore *sem)
+{
+       sem->owner = NULL;
+}
+
+#else
+static inline void rwsem_set_owner(struct rw_semaphore *sem)
+{
+}
+
+static inline void rwsem_clear_owner(struct rw_semaphore *sem)
+{
+}
+#endif
index 99fdf94efce80f432fc4ab203aeb2a918ad5c564..ec53f594e9c9fcf434a4ffaf48ff2405d7272b73 100644 (file)
@@ -2479,6 +2479,23 @@ static int elf_header_check(struct load_info *info)
        return 0;
 }
 
+#define COPY_CHUNK_SIZE (16*PAGE_SIZE)
+
+static int copy_chunked_from_user(void *dst, const void __user *usrc, unsigned long len)
+{
+       do {
+               unsigned long n = min(len, COPY_CHUNK_SIZE);
+
+               if (copy_from_user(dst, usrc, n) != 0)
+                       return -EFAULT;
+               cond_resched();
+               dst += n;
+               usrc += n;
+               len -= n;
+       } while (len);
+       return 0;
+}
+
 /* Sets info->hdr and info->len. */
 static int copy_module_from_user(const void __user *umod, unsigned long len,
                                  struct load_info *info)
@@ -2498,7 +2515,7 @@ static int copy_module_from_user(const void __user *umod, unsigned long len,
        if (!info->hdr)
                return -ENOMEM;
 
-       if (copy_from_user(info->hdr, umod, info->len) != 0) {
+       if (copy_chunked_from_user(info->hdr, umod, info->len) != 0) {
                vfree(info->hdr);
                return -EFAULT;
        }
index c24d5a23bf939be71f75736efdbd3874da157c2d..5235dd4e1e2f68a97fa6836d98854d2e8e46724e 100644 (file)
@@ -955,25 +955,6 @@ static void mark_nosave_pages(struct memory_bitmap *bm)
        }
 }
 
-static bool is_nosave_page(unsigned long pfn)
-{
-       struct nosave_region *region;
-
-       list_for_each_entry(region, &nosave_regions, list) {
-               if (pfn >= region->start_pfn && pfn < region->end_pfn) {
-                       pr_err("PM: %#010llx in e820 nosave region: "
-                              "[mem %#010llx-%#010llx]\n",
-                              (unsigned long long) pfn << PAGE_SHIFT,
-                              (unsigned long long) region->start_pfn << PAGE_SHIFT,
-                              ((unsigned long long) region->end_pfn << PAGE_SHIFT)
-                                       - 1);
-                       return true;
-               }
-       }
-
-       return false;
-}
-
 /**
  *     create_basic_memory_bitmaps - create bitmaps needed for marking page
  *     frames that should not be saved and free page frames.  The pointers
@@ -2042,7 +2023,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm)
        do {
                pfn = memory_bm_next_pfn(bm);
                if (likely(pfn != BM_END_OF_MAP)) {
-                       if (likely(pfn_valid(pfn)) && !is_nosave_page(pfn))
+                       if (likely(pfn_valid(pfn)))
                                swsusp_set_page_free(pfn_to_page(pfn));
                        else
                                return -EFAULT;
index 62671f53202ac7d4de8037dce950c934c7a4ddbc..261af7bfcb67d55e9ec9c19f657453f3e2607803 100644 (file)
@@ -689,6 +689,23 @@ static inline bool got_nohz_idle_kick(void)
 #ifdef CONFIG_NO_HZ_FULL
 bool sched_can_stop_tick(void)
 {
+       /*
+        * FIFO realtime policy runs the highest priority task. Other runnable
+        * tasks are of a lower priority. The scheduler tick does nothing.
+        */
+       if (current->policy == SCHED_FIFO)
+               return true;
+
+       /*
+        * Round-robin realtime tasks time slice with other tasks at the same
+        * realtime priority. Is this task the only one at this priority?
+        */
+       if (current->policy == SCHED_RR) {
+               struct sched_rt_entity *rt_se = &current->rt;
+
+               return rt_se->run_list.prev == rt_se->run_list.next;
+       }
+
        /*
         * More than one running task need preemption.
         * nr_running update is assumed to be visible
@@ -996,6 +1013,13 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
                rq_clock_skip_update(rq, true);
 }
 
+static ATOMIC_NOTIFIER_HEAD(task_migration_notifier);
+
+void register_task_migration_notifier(struct notifier_block *n)
+{
+       atomic_notifier_chain_register(&task_migration_notifier, n);
+}
+
 #ifdef CONFIG_SMP
 void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 {
@@ -1026,10 +1050,18 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
        trace_sched_migrate_task(p, new_cpu);
 
        if (task_cpu(p) != new_cpu) {
+               struct task_migration_notifier tmn;
+
                if (p->sched_class->migrate_task_rq)
                        p->sched_class->migrate_task_rq(p, new_cpu);
                p->se.nr_migrations++;
                perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0);
+
+               tmn.task = p;
+               tmn.from_cpu = task_cpu(p);
+               tmn.to_cpu = new_cpu;
+
+               atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn);
        }
 
        __set_task_cpu(p, new_cpu);
@@ -5320,36 +5352,13 @@ static int sched_cpu_active(struct notifier_block *nfb,
 static int sched_cpu_inactive(struct notifier_block *nfb,
                                        unsigned long action, void *hcpu)
 {
-       unsigned long flags;
-       long cpu = (long)hcpu;
-       struct dl_bw *dl_b;
-
        switch (action & ~CPU_TASKS_FROZEN) {
        case CPU_DOWN_PREPARE:
-               set_cpu_active(cpu, false);
-
-               /* explicitly allow suspend */
-               if (!(action & CPU_TASKS_FROZEN)) {
-                       bool overflow;
-                       int cpus;
-
-                       rcu_read_lock_sched();
-                       dl_b = dl_bw_of(cpu);
-
-                       raw_spin_lock_irqsave(&dl_b->lock, flags);
-                       cpus = dl_bw_cpus(cpu);
-                       overflow = __dl_overflow(dl_b, cpus, 0, 0);
-                       raw_spin_unlock_irqrestore(&dl_b->lock, flags);
-
-                       rcu_read_unlock_sched();
-
-                       if (overflow)
-                               return notifier_from_errno(-EBUSY);
-               }
+               set_cpu_active((long)hcpu, false);
                return NOTIFY_OK;
+       default:
+               return NOTIFY_DONE;
        }
-
-       return NOTIFY_DONE;
 }
 
 static int __init migration_init(void)
@@ -5430,17 +5439,6 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
                        break;
                }
 
-               /*
-                * Even though we initialize ->capacity to something semi-sane,
-                * we leave capacity_orig unset. This allows us to detect if
-                * domain iteration is still funny without causing /0 traps.
-                */
-               if (!group->sgc->capacity_orig) {
-                       printk(KERN_CONT "\n");
-                       printk(KERN_ERR "ERROR: domain->cpu_capacity not set\n");
-                       break;
-               }
-
                if (!cpumask_weight(sched_group_cpus(group))) {
                        printk(KERN_CONT "\n");
                        printk(KERN_ERR "ERROR: empty group\n");
@@ -5924,7 +5922,6 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
                 * die on a /0 trap.
                 */
                sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
-               sg->sgc->capacity_orig = sg->sgc->capacity;
 
                /*
                 * Make sure the first group of this domain contains the
@@ -6235,6 +6232,7 @@ sd_init(struct sched_domain_topology_level *tl, int cpu)
         */
 
        if (sd->flags & SD_SHARE_CPUCAPACITY) {
+               sd->flags |= SD_PREFER_SIBLING;
                sd->imbalance_pct = 110;
                sd->smt_gain = 1178; /* ~15% */
 
@@ -7000,7 +6998,6 @@ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
                 */
 
        case CPU_ONLINE:
-       case CPU_DOWN_FAILED:
                cpuset_update_active_cpus(true);
                break;
        default:
@@ -7012,8 +7009,30 @@ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
 static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
                               void *hcpu)
 {
-       switch (action) {
+       unsigned long flags;
+       long cpu = (long)hcpu;
+       struct dl_bw *dl_b;
+
+       switch (action & ~CPU_TASKS_FROZEN) {
        case CPU_DOWN_PREPARE:
+               /* explicitly allow suspend */
+               if (!(action & CPU_TASKS_FROZEN)) {
+                       bool overflow;
+                       int cpus;
+
+                       rcu_read_lock_sched();
+                       dl_b = dl_bw_of(cpu);
+
+                       raw_spin_lock_irqsave(&dl_b->lock, flags);
+                       cpus = dl_bw_cpus(cpu);
+                       overflow = __dl_overflow(dl_b, cpus, 0, 0);
+                       raw_spin_unlock_irqrestore(&dl_b->lock, flags);
+
+                       rcu_read_unlock_sched();
+
+                       if (overflow)
+                               return notifier_from_errno(-EBUSY);
+               }
                cpuset_update_active_cpus(false);
                break;
        case CPU_DOWN_PREPARE_FROZEN:
@@ -7158,8 +7177,8 @@ void __init sched_init(void)
                rq->calc_load_active = 0;
                rq->calc_load_update = jiffies + LOAD_FREQ;
                init_cfs_rq(&rq->cfs);
-               init_rt_rq(&rq->rt, rq);
-               init_dl_rq(&rq->dl, rq);
+               init_rt_rq(&rq->rt);
+               init_dl_rq(&rq->dl);
 #ifdef CONFIG_FAIR_GROUP_SCHED
                root_task_group.shares = ROOT_TASK_GROUP_LOAD;
                INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
@@ -7199,7 +7218,7 @@ void __init sched_init(void)
 #ifdef CONFIG_SMP
                rq->sd = NULL;
                rq->rd = NULL;
-               rq->cpu_capacity = SCHED_CAPACITY_SCALE;
+               rq->cpu_capacity = rq->cpu_capacity_orig = SCHED_CAPACITY_SCALE;
                rq->post_schedule = 0;
                rq->active_balance = 0;
                rq->next_balance = jiffies;
@@ -7798,7 +7817,7 @@ static int sched_rt_global_constraints(void)
 }
 #endif /* CONFIG_RT_GROUP_SCHED */
 
-static int sched_dl_global_constraints(void)
+static int sched_dl_global_validate(void)
 {
        u64 runtime = global_rt_runtime();
        u64 period = global_rt_period();
@@ -7899,11 +7918,11 @@ int sched_rt_handler(struct ctl_table *table, int write,
                if (ret)
                        goto undo;
 
-               ret = sched_rt_global_constraints();
+               ret = sched_dl_global_validate();
                if (ret)
                        goto undo;
 
-               ret = sched_dl_global_constraints();
+               ret = sched_rt_global_constraints();
                if (ret)
                        goto undo;
 
index 3fa8fa6d940300c1fbae721503aad2666f72b4e5..5e95145088fd37b3d07ccac66c3cd58f7effe10a 100644 (file)
@@ -69,7 +69,7 @@ void init_dl_bw(struct dl_bw *dl_b)
        dl_b->total_bw = 0;
 }
 
-void init_dl_rq(struct dl_rq *dl_rq, struct rq *rq)
+void init_dl_rq(struct dl_rq *dl_rq)
 {
        dl_rq->rb_root = RB_ROOT;
 
@@ -218,6 +218,52 @@ static inline void set_post_schedule(struct rq *rq)
        rq->post_schedule = has_pushable_dl_tasks(rq);
 }
 
+static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq);
+
+static void dl_task_offline_migration(struct rq *rq, struct task_struct *p)
+{
+       struct rq *later_rq = NULL;
+       bool fallback = false;
+
+       later_rq = find_lock_later_rq(p, rq);
+
+       if (!later_rq) {
+               int cpu;
+
+               /*
+                * If we cannot preempt any rq, fall back to pick any
+                * online cpu.
+                */
+               fallback = true;
+               cpu = cpumask_any_and(cpu_active_mask, tsk_cpus_allowed(p));
+               if (cpu >= nr_cpu_ids) {
+                       /*
+                        * Fail to find any suitable cpu.
+                        * The task will never come back!
+                        */
+                       BUG_ON(dl_bandwidth_enabled());
+
+                       /*
+                        * If admission control is disabled we
+                        * try a little harder to let the task
+                        * run.
+                        */
+                       cpu = cpumask_any(cpu_active_mask);
+               }
+               later_rq = cpu_rq(cpu);
+               double_lock_balance(rq, later_rq);
+       }
+
+       deactivate_task(rq, p, 0);
+       set_task_cpu(p, later_rq->cpu);
+       activate_task(later_rq, p, ENQUEUE_REPLENISH);
+
+       if (!fallback)
+               resched_curr(later_rq);
+
+       double_unlock_balance(rq, later_rq);
+}
+
 #else
 
 static inline
@@ -514,7 +560,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
        unsigned long flags;
        struct rq *rq;
 
-       rq = task_rq_lock(current, &flags);
+       rq = task_rq_lock(p, &flags);
 
        /*
         * We need to take care of several possible races here:
@@ -536,6 +582,17 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
        sched_clock_tick();
        update_rq_clock(rq);
 
+#ifdef CONFIG_SMP
+       /*
+        * If we find that the rq the task was on is no longer
+        * available, we need to select a new rq.
+        */
+       if (unlikely(!rq->online)) {
+               dl_task_offline_migration(rq, p);
+               goto unlock;
+       }
+#endif
+
        /*
         * If the throttle happened during sched-out; like:
         *
@@ -569,7 +626,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
                push_dl_task(rq);
 #endif
 unlock:
-       task_rq_unlock(rq, current, &flags);
+       task_rq_unlock(rq, p, &flags);
 
        return HRTIMER_NORESTART;
 }
@@ -914,6 +971,12 @@ static void yield_task_dl(struct rq *rq)
        }
        update_rq_clock(rq);
        update_curr_dl(rq);
+       /*
+        * Tell update_rq_clock() that we've just updated,
+        * so we don't do microscopic update in schedule()
+        * and double the fastpath cost.
+        */
+       rq_clock_skip_update(rq, true);
 }
 
 #ifdef CONFIG_SMP
@@ -1659,14 +1722,6 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
 {
        int check_resched = 1;
 
-       /*
-        * If p is throttled, don't consider the possibility
-        * of preempting rq->curr, the check will be done right
-        * after its runtime will get replenished.
-        */
-       if (unlikely(p->dl.dl_throttled))
-               return;
-
        if (task_on_rq_queued(p) && rq->curr != p) {
 #ifdef CONFIG_SMP
                if (p->nr_cpus_allowed > 1 && rq->dl.overloaded &&
index 8baaf858d25c49921eaa3d9a83235b9f0d2b8c6c..a245c1fc6f0a610f17e2d13635306d681e2ef821 100644 (file)
@@ -71,7 +71,7 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
        if (!se) {
                struct sched_avg *avg = &cpu_rq(cpu)->avg;
                P(avg->runnable_avg_sum);
-               P(avg->runnable_avg_period);
+               P(avg->avg_period);
                return;
        }
 
@@ -94,8 +94,10 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
        P(se->load.weight);
 #ifdef CONFIG_SMP
        P(se->avg.runnable_avg_sum);
-       P(se->avg.runnable_avg_period);
+       P(se->avg.running_avg_sum);
+       P(se->avg.avg_period);
        P(se->avg.load_avg_contrib);
+       P(se->avg.utilization_avg_contrib);
        P(se->avg.decay_count);
 #endif
 #undef PN
@@ -214,6 +216,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
                        cfs_rq->runnable_load_avg);
        SEQ_printf(m, "  .%-30s: %ld\n", "blocked_load_avg",
                        cfs_rq->blocked_load_avg);
+       SEQ_printf(m, "  .%-30s: %ld\n", "utilization_load_avg",
+                       cfs_rq->utilization_load_avg);
 #ifdef CONFIG_FAIR_GROUP_SCHED
        SEQ_printf(m, "  .%-30s: %ld\n", "tg_load_contrib",
                        cfs_rq->tg_load_contrib);
@@ -636,8 +640,10 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
        P(se.load.weight);
 #ifdef CONFIG_SMP
        P(se.avg.runnable_avg_sum);
-       P(se.avg.runnable_avg_period);
+       P(se.avg.running_avg_sum);
+       P(se.avg.avg_period);
        P(se.avg.load_avg_contrib);
+       P(se.avg.utilization_avg_contrib);
        P(se.avg.decay_count);
 #endif
        P(policy);
index bcfe32088b3768363c2f37502a953b61a361f7ff..ffeaa4105e48a36105ecaea8967082e1e7a7af98 100644 (file)
@@ -670,6 +670,7 @@ static int select_idle_sibling(struct task_struct *p, int cpu);
 static unsigned long task_h_load(struct task_struct *p);
 
 static inline void __update_task_entity_contrib(struct sched_entity *se);
+static inline void __update_task_entity_utilization(struct sched_entity *se);
 
 /* Give new task start runnable values to heavy its load in infant time */
 void init_task_runnable_average(struct task_struct *p)
@@ -677,9 +678,10 @@ void init_task_runnable_average(struct task_struct *p)
        u32 slice;
 
        slice = sched_slice(task_cfs_rq(p), &p->se) >> 10;
-       p->se.avg.runnable_avg_sum = slice;
-       p->se.avg.runnable_avg_period = slice;
+       p->se.avg.runnable_avg_sum = p->se.avg.running_avg_sum = slice;
+       p->se.avg.avg_period = slice;
        __update_task_entity_contrib(&p->se);
+       __update_task_entity_utilization(&p->se);
 }
 #else
 void init_task_runnable_average(struct task_struct *p)
@@ -1196,9 +1198,11 @@ static void task_numa_assign(struct task_numa_env *env,
 static bool load_too_imbalanced(long src_load, long dst_load,
                                struct task_numa_env *env)
 {
-       long imb, old_imb;
-       long orig_src_load, orig_dst_load;
        long src_capacity, dst_capacity;
+       long orig_src_load;
+       long load_a, load_b;
+       long moved_load;
+       long imb;
 
        /*
         * The load is corrected for the CPU capacity available on each node.
@@ -1211,30 +1215,39 @@ static bool load_too_imbalanced(long src_load, long dst_load,
        dst_capacity = env->dst_stats.compute_capacity;
 
        /* We care about the slope of the imbalance, not the direction. */
-       if (dst_load < src_load)
-               swap(dst_load, src_load);
+       load_a = dst_load;
+       load_b = src_load;
+       if (load_a < load_b)
+               swap(load_a, load_b);
 
        /* Is the difference below the threshold? */
-       imb = dst_load * src_capacity * 100 -
-             src_load * dst_capacity * env->imbalance_pct;
+       imb = load_a * src_capacity * 100 -
+               load_b * dst_capacity * env->imbalance_pct;
        if (imb <= 0)
                return false;
 
        /*
         * The imbalance is above the allowed threshold.
-        * Compare it with the old imbalance.
+        * Allow a move that brings us closer to a balanced situation,
+        * without moving things past the point of balance.
         */
        orig_src_load = env->src_stats.load;
-       orig_dst_load = env->dst_stats.load;
 
-       if (orig_dst_load < orig_src_load)
-               swap(orig_dst_load, orig_src_load);
-
-       old_imb = orig_dst_load * src_capacity * 100 -
-                 orig_src_load * dst_capacity * env->imbalance_pct;
+       /*
+        * In a task swap, there will be one load moving from src to dst,
+        * and another moving back. This is the net sum of both moves.
+        * A simple task move will always have a positive value.
+        * Allow the move if it brings the system closer to a balanced
+        * situation, without crossing over the balance point.
+        */
+       moved_load = orig_src_load - src_load;
 
-       /* Would this change make things worse? */
-       return (imb > old_imb);
+       if (moved_load > 0)
+               /* Moving src -> dst. Did we overshoot balance? */
+               return src_load * dst_capacity < dst_load * src_capacity;
+       else
+               /* Moving dst -> src. Did we overshoot balance? */
+               return dst_load * src_capacity < src_load * dst_capacity;
 }
 
 /*
@@ -1675,7 +1688,7 @@ static u64 numa_get_avg_runtime(struct task_struct *p, u64 *period)
                *period = now - p->last_task_numa_placement;
        } else {
                delta = p->se.avg.runnable_avg_sum;
-               *period = p->se.avg.runnable_avg_period;
+               *period = p->se.avg.avg_period;
        }
 
        p->last_sum_exec_runtime = runtime;
@@ -1765,6 +1778,8 @@ static int preferred_group_nid(struct task_struct *p, int nid)
                        }
                }
                /* Next round, evaluate the nodes within max_group. */
+               if (!max_faults)
+                       break;
                nodes = max_group;
        }
        return nid;
@@ -2165,8 +2180,10 @@ void task_numa_work(struct callback_head *work)
                vma = mm->mmap;
        }
        for (; vma; vma = vma->vm_next) {
-               if (!vma_migratable(vma) || !vma_policy_mof(vma))
+               if (!vma_migratable(vma) || !vma_policy_mof(vma) ||
+                       is_vm_hugetlb_page(vma)) {
                        continue;
+               }
 
                /*
                 * Shared library pages mapped by multiple processes are not
@@ -2501,13 +2518,15 @@ static u32 __compute_runnable_contrib(u64 n)
  *   load_avg = u_0` + y*(u_0 + u_1*y + u_2*y^2 + ... )
  *            = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}]
  */
-static __always_inline int __update_entity_runnable_avg(u64 now,
+static __always_inline int __update_entity_runnable_avg(u64 now, int cpu,
                                                        struct sched_avg *sa,
-                                                       int runnable)
+                                                       int runnable,
+                                                       int running)
 {
        u64 delta, periods;
        u32 runnable_contrib;
        int delta_w, decayed = 0;
+       unsigned long scale_freq = arch_scale_freq_capacity(NULL, cpu);
 
        delta = now - sa->last_runnable_update;
        /*
@@ -2529,7 +2548,7 @@ static __always_inline int __update_entity_runnable_avg(u64 now,
        sa->last_runnable_update = now;
 
        /* delta_w is the amount already accumulated against our next period */
-       delta_w = sa->runnable_avg_period % 1024;
+       delta_w = sa->avg_period % 1024;
        if (delta + delta_w >= 1024) {
                /* period roll-over */
                decayed = 1;
@@ -2542,7 +2561,10 @@ static __always_inline int __update_entity_runnable_avg(u64 now,
                delta_w = 1024 - delta_w;
                if (runnable)
                        sa->runnable_avg_sum += delta_w;
-               sa->runnable_avg_period += delta_w;
+               if (running)
+                       sa->running_avg_sum += delta_w * scale_freq
+                               >> SCHED_CAPACITY_SHIFT;
+               sa->avg_period += delta_w;
 
                delta -= delta_w;
 
@@ -2552,20 +2574,28 @@ static __always_inline int __update_entity_runnable_avg(u64 now,
 
                sa->runnable_avg_sum = decay_load(sa->runnable_avg_sum,
                                                  periods + 1);
-               sa->runnable_avg_period = decay_load(sa->runnable_avg_period,
+               sa->running_avg_sum = decay_load(sa->running_avg_sum,
+                                                 periods + 1);
+               sa->avg_period = decay_load(sa->avg_period,
                                                     periods + 1);
 
                /* Efficiently calculate \sum (1..n_period) 1024*y^i */
                runnable_contrib = __compute_runnable_contrib(periods);
                if (runnable)
                        sa->runnable_avg_sum += runnable_contrib;
-               sa->runnable_avg_period += runnable_contrib;
+               if (running)
+                       sa->running_avg_sum += runnable_contrib * scale_freq
+                               >> SCHED_CAPACITY_SHIFT;
+               sa->avg_period += runnable_contrib;
        }
 
        /* Remainder of delta accrued against u_0` */
        if (runnable)
                sa->runnable_avg_sum += delta;
-       sa->runnable_avg_period += delta;
+       if (running)
+               sa->running_avg_sum += delta * scale_freq
+                       >> SCHED_CAPACITY_SHIFT;
+       sa->avg_period += delta;
 
        return decayed;
 }
@@ -2582,6 +2612,8 @@ static inline u64 __synchronize_entity_decay(struct sched_entity *se)
                return 0;
 
        se->avg.load_avg_contrib = decay_load(se->avg.load_avg_contrib, decays);
+       se->avg.utilization_avg_contrib =
+               decay_load(se->avg.utilization_avg_contrib, decays);
 
        return decays;
 }
@@ -2617,7 +2649,7 @@ static inline void __update_tg_runnable_avg(struct sched_avg *sa,
 
        /* The fraction of a cpu used by this cfs_rq */
        contrib = div_u64((u64)sa->runnable_avg_sum << NICE_0_SHIFT,
-                         sa->runnable_avg_period + 1);
+                         sa->avg_period + 1);
        contrib -= cfs_rq->tg_runnable_contrib;
 
        if (abs(contrib) > cfs_rq->tg_runnable_contrib / 64) {
@@ -2670,7 +2702,8 @@ static inline void __update_group_entity_contrib(struct sched_entity *se)
 
 static inline void update_rq_runnable_avg(struct rq *rq, int runnable)
 {
-       __update_entity_runnable_avg(rq_clock_task(rq), &rq->avg, runnable);
+       __update_entity_runnable_avg(rq_clock_task(rq), cpu_of(rq), &rq->avg,
+                       runnable, runnable);
        __update_tg_runnable_avg(&rq->avg, &rq->cfs);
 }
 #else /* CONFIG_FAIR_GROUP_SCHED */
@@ -2688,7 +2721,7 @@ static inline void __update_task_entity_contrib(struct sched_entity *se)
 
        /* avoid overflowing a 32-bit type w/ SCHED_LOAD_SCALE */
        contrib = se->avg.runnable_avg_sum * scale_load_down(se->load.weight);
-       contrib /= (se->avg.runnable_avg_period + 1);
+       contrib /= (se->avg.avg_period + 1);
        se->avg.load_avg_contrib = scale_load(contrib);
 }
 
@@ -2707,6 +2740,30 @@ static long __update_entity_load_avg_contrib(struct sched_entity *se)
        return se->avg.load_avg_contrib - old_contrib;
 }
 
+
+static inline void __update_task_entity_utilization(struct sched_entity *se)
+{
+       u32 contrib;
+
+       /* avoid overflowing a 32-bit type w/ SCHED_LOAD_SCALE */
+       contrib = se->avg.running_avg_sum * scale_load_down(SCHED_LOAD_SCALE);
+       contrib /= (se->avg.avg_period + 1);
+       se->avg.utilization_avg_contrib = scale_load(contrib);
+}
+
+static long __update_entity_utilization_avg_contrib(struct sched_entity *se)
+{
+       long old_contrib = se->avg.utilization_avg_contrib;
+
+       if (entity_is_task(se))
+               __update_task_entity_utilization(se);
+       else
+               se->avg.utilization_avg_contrib =
+                                       group_cfs_rq(se)->utilization_load_avg;
+
+       return se->avg.utilization_avg_contrib - old_contrib;
+}
+
 static inline void subtract_blocked_load_contrib(struct cfs_rq *cfs_rq,
                                                 long load_contrib)
 {
@@ -2723,7 +2780,8 @@ static inline void update_entity_load_avg(struct sched_entity *se,
                                          int update_cfs_rq)
 {
        struct cfs_rq *cfs_rq = cfs_rq_of(se);
-       long contrib_delta;
+       long contrib_delta, utilization_delta;
+       int cpu = cpu_of(rq_of(cfs_rq));
        u64 now;
 
        /*
@@ -2735,18 +2793,22 @@ static inline void update_entity_load_avg(struct sched_entity *se,
        else
                now = cfs_rq_clock_task(group_cfs_rq(se));
 
-       if (!__update_entity_runnable_avg(now, &se->avg, se->on_rq))
+       if (!__update_entity_runnable_avg(now, cpu, &se->avg, se->on_rq,
+                                       cfs_rq->curr == se))
                return;
 
        contrib_delta = __update_entity_load_avg_contrib(se);
+       utilization_delta = __update_entity_utilization_avg_contrib(se);
 
        if (!update_cfs_rq)
                return;
 
-       if (se->on_rq)
+       if (se->on_rq) {
                cfs_rq->runnable_load_avg += contrib_delta;
-       else
+               cfs_rq->utilization_load_avg += utilization_delta;
+       } else {
                subtract_blocked_load_contrib(cfs_rq, -contrib_delta);
+       }
 }
 
 /*
@@ -2821,6 +2883,7 @@ static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq,
        }
 
        cfs_rq->runnable_load_avg += se->avg.load_avg_contrib;
+       cfs_rq->utilization_load_avg += se->avg.utilization_avg_contrib;
        /* we force update consideration on load-balancer moves */
        update_cfs_rq_blocked_load(cfs_rq, !wakeup);
 }
@@ -2839,6 +2902,7 @@ static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq,
        update_cfs_rq_blocked_load(cfs_rq, !sleep);
 
        cfs_rq->runnable_load_avg -= se->avg.load_avg_contrib;
+       cfs_rq->utilization_load_avg -= se->avg.utilization_avg_contrib;
        if (sleep) {
                cfs_rq->blocked_load_avg += se->avg.load_avg_contrib;
                se->avg.decay_count = atomic64_read(&cfs_rq->decay_counter);
@@ -3176,6 +3240,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
                 */
                update_stats_wait_end(cfs_rq, se);
                __dequeue_entity(cfs_rq, se);
+               update_entity_load_avg(se, 1);
        }
 
        update_stats_curr_start(cfs_rq, se);
@@ -4302,6 +4367,11 @@ static unsigned long capacity_of(int cpu)
        return cpu_rq(cpu)->cpu_capacity;
 }
 
+static unsigned long capacity_orig_of(int cpu)
+{
+       return cpu_rq(cpu)->cpu_capacity_orig;
+}
+
 static unsigned long cpu_avg_load_per_task(int cpu)
 {
        struct rq *rq = cpu_rq(cpu);
@@ -4715,6 +4785,33 @@ next:
 done:
        return target;
 }
+/*
+ * get_cpu_usage returns the amount of capacity of a CPU that is used by CFS
+ * tasks. The unit of the return value must be the one of capacity so we can
+ * compare the usage with the capacity of the CPU that is available for CFS
+ * task (ie cpu_capacity).
+ * cfs.utilization_load_avg is the sum of running time of runnable tasks on a
+ * CPU. It represents the amount of utilization of a CPU in the range
+ * [0..SCHED_LOAD_SCALE].  The usage of a CPU can't be higher than the full
+ * capacity of the CPU because it's about the running time on this CPU.
+ * Nevertheless, cfs.utilization_load_avg can be higher than SCHED_LOAD_SCALE
+ * because of unfortunate rounding in avg_period and running_load_avg or just
+ * after migrating tasks until the average stabilizes with the new running
+ * time. So we need to check that the usage stays into the range
+ * [0..cpu_capacity_orig] and cap if necessary.
+ * Without capping the usage, a group could be seen as overloaded (CPU0 usage
+ * at 121% + CPU1 usage at 80%) whereas CPU1 has 20% of available capacity
+ */
+static int get_cpu_usage(int cpu)
+{
+       unsigned long usage = cpu_rq(cpu)->cfs.utilization_load_avg;
+       unsigned long capacity = capacity_orig_of(cpu);
+
+       if (usage >= SCHED_LOAD_SCALE)
+               return capacity;
+
+       return (usage * capacity) >> SCHED_LOAD_SHIFT;
+}
 
 /*
  * select_task_rq_fair: Select target runqueue for the waking task in domains
@@ -5841,12 +5938,12 @@ struct sg_lb_stats {
        unsigned long sum_weighted_load; /* Weighted load of group's tasks */
        unsigned long load_per_task;
        unsigned long group_capacity;
+       unsigned long group_usage; /* Total usage of the group */
        unsigned int sum_nr_running; /* Nr tasks running in the group */
-       unsigned int group_capacity_factor;
        unsigned int idle_cpus;
        unsigned int group_weight;
        enum group_type group_type;
-       int group_has_free_capacity;
+       int group_no_capacity;
 #ifdef CONFIG_NUMA_BALANCING
        unsigned int nr_numa_running;
        unsigned int nr_preferred_running;
@@ -5917,16 +6014,6 @@ static inline int get_sd_load_idx(struct sched_domain *sd,
        return load_idx;
 }
 
-static unsigned long default_scale_capacity(struct sched_domain *sd, int cpu)
-{
-       return SCHED_CAPACITY_SCALE;
-}
-
-unsigned long __weak arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
-{
-       return default_scale_capacity(sd, cpu);
-}
-
 static unsigned long default_scale_cpu_capacity(struct sched_domain *sd, int cpu)
 {
        if ((sd->flags & SD_SHARE_CPUCAPACITY) && (sd->span_weight > 1))
@@ -5943,7 +6030,7 @@ unsigned long __weak arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
 static unsigned long scale_rt_capacity(int cpu)
 {
        struct rq *rq = cpu_rq(cpu);
-       u64 total, available, age_stamp, avg;
+       u64 total, used, age_stamp, avg;
        s64 delta;
 
        /*
@@ -5959,19 +6046,12 @@ static unsigned long scale_rt_capacity(int cpu)
 
        total = sched_avg_period() + delta;
 
-       if (unlikely(total < avg)) {
-               /* Ensures that capacity won't end up being negative */
-               available = 0;
-       } else {
-               available = total - avg;
-       }
-
-       if (unlikely((s64)total < SCHED_CAPACITY_SCALE))
-               total = SCHED_CAPACITY_SCALE;
+       used = div_u64(avg, total);
 
-       total >>= SCHED_CAPACITY_SHIFT;
+       if (likely(used < SCHED_CAPACITY_SCALE))
+               return SCHED_CAPACITY_SCALE - used;
 
-       return div_u64(available, total);
+       return 1;
 }
 
 static void update_cpu_capacity(struct sched_domain *sd, int cpu)
@@ -5986,14 +6066,7 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu)
 
        capacity >>= SCHED_CAPACITY_SHIFT;
 
-       sdg->sgc->capacity_orig = capacity;
-
-       if (sched_feat(ARCH_CAPACITY))
-               capacity *= arch_scale_freq_capacity(sd, cpu);
-       else
-               capacity *= default_scale_capacity(sd, cpu);
-
-       capacity >>= SCHED_CAPACITY_SHIFT;
+       cpu_rq(cpu)->cpu_capacity_orig = capacity;
 
        capacity *= scale_rt_capacity(cpu);
        capacity >>= SCHED_CAPACITY_SHIFT;
@@ -6009,7 +6082,7 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
 {
        struct sched_domain *child = sd->child;
        struct sched_group *group, *sdg = sd->groups;
-       unsigned long capacity, capacity_orig;
+       unsigned long capacity;
        unsigned long interval;
 
        interval = msecs_to_jiffies(sd->balance_interval);
@@ -6021,7 +6094,7 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
                return;
        }
 
-       capacity_orig = capacity = 0;
+       capacity = 0;
 
        if (child->flags & SD_OVERLAP) {
                /*
@@ -6041,19 +6114,15 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
                         * Use capacity_of(), which is set irrespective of domains
                         * in update_cpu_capacity().
                         *
-                        * This avoids capacity/capacity_orig from being 0 and
+                        * This avoids capacity from being 0 and
                         * causing divide-by-zero issues on boot.
-                        *
-                        * Runtime updates will correct capacity_orig.
                         */
                        if (unlikely(!rq->sd)) {
-                               capacity_orig += capacity_of(cpu);
                                capacity += capacity_of(cpu);
                                continue;
                        }
 
                        sgc = rq->sd->groups->sgc;
-                       capacity_orig += sgc->capacity_orig;
                        capacity += sgc->capacity;
                }
        } else  {
@@ -6064,39 +6133,24 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
 
                group = child->groups;
                do {
-                       capacity_orig += group->sgc->capacity_orig;
                        capacity += group->sgc->capacity;
                        group = group->next;
                } while (group != child->groups);
        }
 
-       sdg->sgc->capacity_orig = capacity_orig;
        sdg->sgc->capacity = capacity;
 }
 
 /*
- * Try and fix up capacity for tiny siblings, this is needed when
- * things like SD_ASYM_PACKING need f_b_g to select another sibling
- * which on its own isn't powerful enough.
- *
- * See update_sd_pick_busiest() and check_asym_packing().
+ * Check whether the capacity of the rq has been noticeably reduced by side
+ * activity. The imbalance_pct is used for the threshold.
+ * Return true is the capacity is reduced
  */
 static inline int
-fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
+check_cpu_capacity(struct rq *rq, struct sched_domain *sd)
 {
-       /*
-        * Only siblings can have significantly less than SCHED_CAPACITY_SCALE
-        */
-       if (!(sd->flags & SD_SHARE_CPUCAPACITY))
-               return 0;
-
-       /*
-        * If ~90% of the cpu_capacity is still there, we're good.
-        */
-       if (group->sgc->capacity * 32 > group->sgc->capacity_orig * 29)
-               return 1;
-
-       return 0;
+       return ((rq->cpu_capacity * sd->imbalance_pct) <
+                               (rq->cpu_capacity_orig * 100));
 }
 
 /*
@@ -6134,37 +6188,56 @@ static inline int sg_imbalanced(struct sched_group *group)
 }
 
 /*
- * Compute the group capacity factor.
- *
- * Avoid the issue where N*frac(smt_capacity) >= 1 creates 'phantom' cores by
- * first dividing out the smt factor and computing the actual number of cores
- * and limit unit capacity with that.
+ * group_has_capacity returns true if the group has spare capacity that could
+ * be used by some tasks.
+ * We consider that a group has spare capacity if the  * number of task is
+ * smaller than the number of CPUs or if the usage is lower than the available
+ * capacity for CFS tasks.
+ * For the latter, we use a threshold to stabilize the state, to take into
+ * account the variance of the tasks' load and to return true if the available
+ * capacity in meaningful for the load balancer.
+ * As an example, an available capacity of 1% can appear but it doesn't make
+ * any benefit for the load balance.
  */
-static inline int sg_capacity_factor(struct lb_env *env, struct sched_group *group)
+static inline bool
+group_has_capacity(struct lb_env *env, struct sg_lb_stats *sgs)
 {
-       unsigned int capacity_factor, smt, cpus;
-       unsigned int capacity, capacity_orig;
+       if (sgs->sum_nr_running < sgs->group_weight)
+               return true;
 
-       capacity = group->sgc->capacity;
-       capacity_orig = group->sgc->capacity_orig;
-       cpus = group->group_weight;
+       if ((sgs->group_capacity * 100) >
+                       (sgs->group_usage * env->sd->imbalance_pct))
+               return true;
+
+       return false;
+}
 
-       /* smt := ceil(cpus / capacity), assumes: 1 < smt_capacity < 2 */
-       smt = DIV_ROUND_UP(SCHED_CAPACITY_SCALE * cpus, capacity_orig);
-       capacity_factor = cpus / smt; /* cores */
+/*
+ *  group_is_overloaded returns true if the group has more tasks than it can
+ *  handle.
+ *  group_is_overloaded is not equals to !group_has_capacity because a group
+ *  with the exact right number of tasks, has no more spare capacity but is not
+ *  overloaded so both group_has_capacity and group_is_overloaded return
+ *  false.
+ */
+static inline bool
+group_is_overloaded(struct lb_env *env, struct sg_lb_stats *sgs)
+{
+       if (sgs->sum_nr_running <= sgs->group_weight)
+               return false;
 
-       capacity_factor = min_t(unsigned,
-               capacity_factor, DIV_ROUND_CLOSEST(capacity, SCHED_CAPACITY_SCALE));
-       if (!capacity_factor)
-               capacity_factor = fix_small_capacity(env->sd, group);
+       if ((sgs->group_capacity * 100) <
+                       (sgs->group_usage * env->sd->imbalance_pct))
+               return true;
 
-       return capacity_factor;
+       return false;
 }
 
-static enum group_type
-group_classify(struct sched_group *group, struct sg_lb_stats *sgs)
+static enum group_type group_classify(struct lb_env *env,
+               struct sched_group *group,
+               struct sg_lb_stats *sgs)
 {
-       if (sgs->sum_nr_running > sgs->group_capacity_factor)
+       if (sgs->group_no_capacity)
                return group_overloaded;
 
        if (sg_imbalanced(group))
@@ -6202,6 +6275,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
                        load = source_load(i, load_idx);
 
                sgs->group_load += load;
+               sgs->group_usage += get_cpu_usage(i);
                sgs->sum_nr_running += rq->cfs.h_nr_running;
 
                if (rq->nr_running > 1)
@@ -6224,11 +6298,9 @@ static inline void update_sg_lb_stats(struct lb_env *env,
                sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
 
        sgs->group_weight = group->group_weight;
-       sgs->group_capacity_factor = sg_capacity_factor(env, group);
-       sgs->group_type = group_classify(group, sgs);
 
-       if (sgs->group_capacity_factor > sgs->sum_nr_running)
-               sgs->group_has_free_capacity = 1;
+       sgs->group_no_capacity = group_is_overloaded(env, sgs);
+       sgs->group_type = group_classify(env, group, sgs);
 }
 
 /**
@@ -6350,18 +6422,19 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
 
                /*
                 * In case the child domain prefers tasks go to siblings
-                * first, lower the sg capacity factor to one so that we'll try
+                * first, lower the sg capacity so that we'll try
                 * and move all the excess tasks away. We lower the capacity
                 * of a group only if the local group has the capacity to fit
-                * these excess tasks, i.e. nr_running < group_capacity_factor. The
-                * extra check prevents the case where you always pull from the
-                * heaviest group when it is already under-utilized (possible
-                * with a large weight task outweighs the tasks on the system).
+                * these excess tasks. The extra check prevents the case where
+                * you always pull from the heaviest group when it is already
+                * under-utilized (possible with a large weight task outweighs
+                * the tasks on the system).
                 */
                if (prefer_sibling && sds->local &&
-                   sds->local_stat.group_has_free_capacity) {
-                       sgs->group_capacity_factor = min(sgs->group_capacity_factor, 1U);
-                       sgs->group_type = group_classify(sg, sgs);
+                   group_has_capacity(env, &sds->local_stat) &&
+                   (sgs->sum_nr_running > 1)) {
+                       sgs->group_no_capacity = 1;
+                       sgs->group_type = group_overloaded;
                }
 
                if (update_sd_pick_busiest(env, sds, sg, sgs)) {
@@ -6541,11 +6614,12 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
         */
        if (busiest->group_type == group_overloaded &&
            local->group_type   == group_overloaded) {
-               load_above_capacity =
-                       (busiest->sum_nr_running - busiest->group_capacity_factor);
-
-               load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_CAPACITY_SCALE);
-               load_above_capacity /= busiest->group_capacity;
+               load_above_capacity = busiest->sum_nr_running *
+                                       SCHED_LOAD_SCALE;
+               if (load_above_capacity > busiest->group_capacity)
+                       load_above_capacity -= busiest->group_capacity;
+               else
+                       load_above_capacity = ~0UL;
        }
 
        /*
@@ -6608,6 +6682,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
        local = &sds.local_stat;
        busiest = &sds.busiest_stat;
 
+       /* ASYM feature bypasses nice load balance check */
        if ((env->idle == CPU_IDLE || env->idle == CPU_NEWLY_IDLE) &&
            check_asym_packing(env, &sds))
                return sds.busiest;
@@ -6628,8 +6703,8 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
                goto force_balance;
 
        /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */
-       if (env->idle == CPU_NEWLY_IDLE && local->group_has_free_capacity &&
-           !busiest->group_has_free_capacity)
+       if (env->idle == CPU_NEWLY_IDLE && group_has_capacity(env, local) &&
+           busiest->group_no_capacity)
                goto force_balance;
 
        /*
@@ -6688,7 +6763,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
        int i;
 
        for_each_cpu_and(i, sched_group_cpus(group), env->cpus) {
-               unsigned long capacity, capacity_factor, wl;
+               unsigned long capacity, wl;
                enum fbq_type rt;
 
                rq = cpu_rq(i);
@@ -6717,9 +6792,6 @@ static struct rq *find_busiest_queue(struct lb_env *env,
                        continue;
 
                capacity = capacity_of(i);
-               capacity_factor = DIV_ROUND_CLOSEST(capacity, SCHED_CAPACITY_SCALE);
-               if (!capacity_factor)
-                       capacity_factor = fix_small_capacity(env->sd, group);
 
                wl = weighted_cpuload(i);
 
@@ -6727,7 +6799,9 @@ static struct rq *find_busiest_queue(struct lb_env *env,
                 * When comparing with imbalance, use weighted_cpuload()
                 * which is not scaled with the cpu capacity.
                 */
-               if (capacity_factor && rq->nr_running == 1 && wl > env->imbalance)
+
+               if (rq->nr_running == 1 && wl > env->imbalance &&
+                   !check_cpu_capacity(rq, env->sd))
                        continue;
 
                /*
@@ -6775,6 +6849,19 @@ static int need_active_balance(struct lb_env *env)
                        return 1;
        }
 
+       /*
+        * The dst_cpu is idle and the src_cpu CPU has only 1 CFS task.
+        * It's worth migrating the task if the src_cpu's capacity is reduced
+        * because of other sched_class or IRQs if more capacity stays
+        * available on dst_cpu.
+        */
+       if ((env->idle != CPU_NOT_IDLE) &&
+           (env->src_rq->cfs.h_nr_running == 1)) {
+               if ((check_cpu_capacity(env->src_rq, sd)) &&
+                   (capacity_of(env->src_cpu)*sd->imbalance_pct < capacity_of(env->dst_cpu)*100))
+                       return 1;
+       }
+
        return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
 }
 
@@ -6874,6 +6961,9 @@ redo:
 
        schedstat_add(sd, lb_imbalance[idle], env.imbalance);
 
+       env.src_cpu = busiest->cpu;
+       env.src_rq = busiest;
+
        ld_moved = 0;
        if (busiest->nr_running > 1) {
                /*
@@ -6883,8 +6973,6 @@ redo:
                 * correctly treated as an imbalance.
                 */
                env.flags |= LBF_ALL_PINNED;
-               env.src_cpu   = busiest->cpu;
-               env.src_rq    = busiest;
                env.loop_max  = min(sysctl_sched_nr_migrate, busiest->nr_running);
 
 more_balance:
@@ -7584,22 +7672,25 @@ end:
 
 /*
  * Current heuristic for kicking the idle load balancer in the presence
- * of an idle cpu is the system.
+ * of an idle cpu in the system.
  *   - This rq has more than one task.
- *   - At any scheduler domain level, this cpu's scheduler group has multiple
- *     busy cpu's exceeding the group's capacity.
+ *   - This rq has at least one CFS task and the capacity of the CPU is
+ *     significantly reduced because of RT tasks or IRQs.
+ *   - At parent of LLC scheduler domain level, this cpu's scheduler group has
+ *     multiple busy cpu.
  *   - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler
  *     domain span are idle.
  */
-static inline int nohz_kick_needed(struct rq *rq)
+static inline bool nohz_kick_needed(struct rq *rq)
 {
        unsigned long now = jiffies;
        struct sched_domain *sd;
        struct sched_group_capacity *sgc;
        int nr_busy, cpu = rq->cpu;
+       bool kick = false;
 
        if (unlikely(rq->idle_balance))
-               return 0;
+               return false;
 
        /*
        * We may be recently in ticked or tickless idle mode. At the first
@@ -7613,38 +7704,46 @@ static inline int nohz_kick_needed(struct rq *rq)
         * balancing.
         */
        if (likely(!atomic_read(&nohz.nr_cpus)))
-               return 0;
+               return false;
 
        if (time_before(now, nohz.next_balance))
-               return 0;
+               return false;
 
        if (rq->nr_running >= 2)
-               goto need_kick;
+               return true;
 
        rcu_read_lock();
        sd = rcu_dereference(per_cpu(sd_busy, cpu));
-
        if (sd) {
                sgc = sd->groups->sgc;
                nr_busy = atomic_read(&sgc->nr_busy_cpus);
 
-               if (nr_busy > 1)
-                       goto need_kick_unlock;
+               if (nr_busy > 1) {
+                       kick = true;
+                       goto unlock;
+               }
+
        }
 
-       sd = rcu_dereference(per_cpu(sd_asym, cpu));
+       sd = rcu_dereference(rq->sd);
+       if (sd) {
+               if ((rq->cfs.h_nr_running >= 1) &&
+                               check_cpu_capacity(rq, sd)) {
+                       kick = true;
+                       goto unlock;
+               }
+       }
 
+       sd = rcu_dereference(per_cpu(sd_asym, cpu));
        if (sd && (cpumask_first_and(nohz.idle_cpus_mask,
-                                 sched_domain_span(sd)) < cpu))
-               goto need_kick_unlock;
-
-       rcu_read_unlock();
-       return 0;
+                                 sched_domain_span(sd)) < cpu)) {
+               kick = true;
+               goto unlock;
+       }
 
-need_kick_unlock:
+unlock:
        rcu_read_unlock();
-need_kick:
-       return 1;
+       return kick;
 }
 #else
 static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) { }
@@ -7660,14 +7759,16 @@ static void run_rebalance_domains(struct softirq_action *h)
        enum cpu_idle_type idle = this_rq->idle_balance ?
                                                CPU_IDLE : CPU_NOT_IDLE;
 
-       rebalance_domains(this_rq, idle);
-
        /*
         * If this cpu has a pending nohz_balance_kick, then do the
         * balancing on behalf of the other idle cpus whose ticks are
-        * stopped.
+        * stopped. Do nohz_idle_balance *before* rebalance_domains to
+        * give the idle cpus a chance to load balance. Else we may
+        * load balance only within the local sched_domain hierarchy
+        * and abort nohz_idle_balance altogether if we pull some load.
         */
        nohz_idle_balance(this_rq, idle);
+       rebalance_domains(this_rq, idle);
 }
 
 /*
index 90284d117fe65ffc7ee1de7127995a750c84df92..91e33cd485f6577050672432c02354393887774e 100644 (file)
@@ -56,6 +56,19 @@ SCHED_FEAT(NONTASK_CAPACITY, true)
  */
 SCHED_FEAT(TTWU_QUEUE, true)
 
+#ifdef HAVE_RT_PUSH_IPI
+/*
+ * In order to avoid a thundering herd attack of CPUs that are
+ * lowering their priorities at the same time, and there being
+ * a single CPU that has an RT task that can migrate and is waiting
+ * to run, where the other CPUs will try to take that CPUs
+ * rq lock and possibly create a large contention, sending an
+ * IPI to that CPU and let that CPU push the RT task to where
+ * it should go may be a better scenario.
+ */
+SCHED_FEAT(RT_PUSH_IPI, true)
+#endif
+
 SCHED_FEAT(FORCE_SD_OVERLAP, false)
 SCHED_FEAT(RT_RUNTIME_SHARE, true)
 SCHED_FEAT(LB_MIN, false)
index 80014a17834214fcad51add08b2b171463e84128..4d207d2abcbd9d3d613c1035d5c436fcd08b3a14 100644 (file)
@@ -158,8 +158,7 @@ static void cpuidle_idle_call(void)
         * is used from another cpu as a broadcast timer, this call may
         * fail if it is not available
         */
-       if (broadcast &&
-           clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu))
+       if (broadcast && tick_broadcast_enter())
                goto use_default;
 
        /* Take note of the planned idle state. */
@@ -176,7 +175,7 @@ static void cpuidle_idle_call(void)
        idle_set_state(this_rq(), NULL);
 
        if (broadcast)
-               clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+               tick_broadcast_exit();
 
        /*
         * Give the governor an opportunity to reflect on the outcome
index f4d4b077eba0a67a5c55e6a04dee8f6ce78f322c..575da76a3874a8c1b2ddd0f518e5ecea7a805262 100644 (file)
@@ -6,6 +6,7 @@
 #include "sched.h"
 
 #include <linux/slab.h>
+#include <linux/irq_work.h>
 
 int sched_rr_timeslice = RR_TIMESLICE;
 
@@ -59,7 +60,11 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
        raw_spin_unlock(&rt_b->rt_runtime_lock);
 }
 
-void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
+#ifdef CONFIG_SMP
+static void push_irq_work_func(struct irq_work *work);
+#endif
+
+void init_rt_rq(struct rt_rq *rt_rq)
 {
        struct rt_prio_array *array;
        int i;
@@ -78,7 +83,14 @@ void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
        rt_rq->rt_nr_migratory = 0;
        rt_rq->overloaded = 0;
        plist_head_init(&rt_rq->pushable_tasks);
+
+#ifdef HAVE_RT_PUSH_IPI
+       rt_rq->push_flags = 0;
+       rt_rq->push_cpu = nr_cpu_ids;
+       raw_spin_lock_init(&rt_rq->push_lock);
+       init_irq_work(&rt_rq->push_work, push_irq_work_func);
 #endif
+#endif /* CONFIG_SMP */
        /* We start is dequeued state, because no RT tasks are queued */
        rt_rq->rt_queued = 0;
 
@@ -193,7 +205,7 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
                if (!rt_se)
                        goto err_free_rq;
 
-               init_rt_rq(rt_rq, cpu_rq(i));
+               init_rt_rq(rt_rq);
                rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
                init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
        }
@@ -1778,6 +1790,164 @@ static void push_rt_tasks(struct rq *rq)
                ;
 }
 
+#ifdef HAVE_RT_PUSH_IPI
+/*
+ * The search for the next cpu always starts at rq->cpu and ends
+ * when we reach rq->cpu again. It will never return rq->cpu.
+ * This returns the next cpu to check, or nr_cpu_ids if the loop
+ * is complete.
+ *
+ * rq->rt.push_cpu holds the last cpu returned by this function,
+ * or if this is the first instance, it must hold rq->cpu.
+ */
+static int rto_next_cpu(struct rq *rq)
+{
+       int prev_cpu = rq->rt.push_cpu;
+       int cpu;
+
+       cpu = cpumask_next(prev_cpu, rq->rd->rto_mask);
+
+       /*
+        * If the previous cpu is less than the rq's CPU, then it already
+        * passed the end of the mask, and has started from the beginning.
+        * We end if the next CPU is greater or equal to rq's CPU.
+        */
+       if (prev_cpu < rq->cpu) {
+               if (cpu >= rq->cpu)
+                       return nr_cpu_ids;
+
+       } else if (cpu >= nr_cpu_ids) {
+               /*
+                * We passed the end of the mask, start at the beginning.
+                * If the result is greater or equal to the rq's CPU, then
+                * the loop is finished.
+                */
+               cpu = cpumask_first(rq->rd->rto_mask);
+               if (cpu >= rq->cpu)
+                       return nr_cpu_ids;
+       }
+       rq->rt.push_cpu = cpu;
+
+       /* Return cpu to let the caller know if the loop is finished or not */
+       return cpu;
+}
+
+static int find_next_push_cpu(struct rq *rq)
+{
+       struct rq *next_rq;
+       int cpu;
+
+       while (1) {
+               cpu = rto_next_cpu(rq);
+               if (cpu >= nr_cpu_ids)
+                       break;
+               next_rq = cpu_rq(cpu);
+
+               /* Make sure the next rq can push to this rq */
+               if (next_rq->rt.highest_prio.next < rq->rt.highest_prio.curr)
+                       break;
+       }
+
+       return cpu;
+}
+
+#define RT_PUSH_IPI_EXECUTING          1
+#define RT_PUSH_IPI_RESTART            2
+
+static void tell_cpu_to_push(struct rq *rq)
+{
+       int cpu;
+
+       if (rq->rt.push_flags & RT_PUSH_IPI_EXECUTING) {
+               raw_spin_lock(&rq->rt.push_lock);
+               /* Make sure it's still executing */
+               if (rq->rt.push_flags & RT_PUSH_IPI_EXECUTING) {
+                       /*
+                        * Tell the IPI to restart the loop as things have
+                        * changed since it started.
+                        */
+                       rq->rt.push_flags |= RT_PUSH_IPI_RESTART;
+                       raw_spin_unlock(&rq->rt.push_lock);
+                       return;
+               }
+               raw_spin_unlock(&rq->rt.push_lock);
+       }
+
+       /* When here, there's no IPI going around */
+
+       rq->rt.push_cpu = rq->cpu;
+       cpu = find_next_push_cpu(rq);
+       if (cpu >= nr_cpu_ids)
+               return;
+
+       rq->rt.push_flags = RT_PUSH_IPI_EXECUTING;
+
+       irq_work_queue_on(&rq->rt.push_work, cpu);
+}
+
+/* Called from hardirq context */
+static void try_to_push_tasks(void *arg)
+{
+       struct rt_rq *rt_rq = arg;
+       struct rq *rq, *src_rq;
+       int this_cpu;
+       int cpu;
+
+       this_cpu = rt_rq->push_cpu;
+
+       /* Paranoid check */
+       BUG_ON(this_cpu != smp_processor_id());
+
+       rq = cpu_rq(this_cpu);
+       src_rq = rq_of_rt_rq(rt_rq);
+
+again:
+       if (has_pushable_tasks(rq)) {
+               raw_spin_lock(&rq->lock);
+               push_rt_task(rq);
+               raw_spin_unlock(&rq->lock);
+       }
+
+       /* Pass the IPI to the next rt overloaded queue */
+       raw_spin_lock(&rt_rq->push_lock);
+       /*
+        * If the source queue changed since the IPI went out,
+        * we need to restart the search from that CPU again.
+        */
+       if (rt_rq->push_flags & RT_PUSH_IPI_RESTART) {
+               rt_rq->push_flags &= ~RT_PUSH_IPI_RESTART;
+               rt_rq->push_cpu = src_rq->cpu;
+       }
+
+       cpu = find_next_push_cpu(src_rq);
+
+       if (cpu >= nr_cpu_ids)
+               rt_rq->push_flags &= ~RT_PUSH_IPI_EXECUTING;
+       raw_spin_unlock(&rt_rq->push_lock);
+
+       if (cpu >= nr_cpu_ids)
+               return;
+
+       /*
+        * It is possible that a restart caused this CPU to be
+        * chosen again. Don't bother with an IPI, just see if we
+        * have more to push.
+        */
+       if (unlikely(cpu == rq->cpu))
+               goto again;
+
+       /* Try the next RT overloaded CPU */
+       irq_work_queue_on(&rt_rq->push_work, cpu);
+}
+
+static void push_irq_work_func(struct irq_work *work)
+{
+       struct rt_rq *rt_rq = container_of(work, struct rt_rq, push_work);
+
+       try_to_push_tasks(rt_rq);
+}
+#endif /* HAVE_RT_PUSH_IPI */
+
 static int pull_rt_task(struct rq *this_rq)
 {
        int this_cpu = this_rq->cpu, ret = 0, cpu;
@@ -1793,6 +1963,13 @@ static int pull_rt_task(struct rq *this_rq)
         */
        smp_rmb();
 
+#ifdef HAVE_RT_PUSH_IPI
+       if (sched_feat(RT_PUSH_IPI)) {
+               tell_cpu_to_push(this_rq);
+               return 0;
+       }
+#endif
+
        for_each_cpu(cpu, this_rq->rd->rto_mask) {
                if (this_cpu == cpu)
                        continue;
index dc0f435a27794657258623ac8a7f53f7326ff7ac..e0e1299939588ac47f08b13b45f1a6e2e9cf4d7f 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 #include <linux/stop_machine.h>
+#include <linux/irq_work.h>
 #include <linux/tick.h>
 #include <linux/slab.h>
 
@@ -362,8 +363,14 @@ struct cfs_rq {
         * Under CFS, load is tracked on a per-entity basis and aggregated up.
         * This allows for the description of both thread and group usage (in
         * the FAIR_GROUP_SCHED case).
+        * runnable_load_avg is the sum of the load_avg_contrib of the
+        * sched_entities on the rq.
+        * blocked_load_avg is similar to runnable_load_avg except that its
+        * the blocked sched_entities on the rq.
+        * utilization_load_avg is the sum of the average running time of the
+        * sched_entities on the rq.
         */
-       unsigned long runnable_load_avg, blocked_load_avg;
+       unsigned long runnable_load_avg, blocked_load_avg, utilization_load_avg;
        atomic64_t decay_counter;
        u64 last_decay;
        atomic_long_t removed_load;
@@ -418,6 +425,11 @@ static inline int rt_bandwidth_enabled(void)
        return sysctl_sched_rt_runtime >= 0;
 }
 
+/* RT IPI pull logic requires IRQ_WORK */
+#ifdef CONFIG_IRQ_WORK
+# define HAVE_RT_PUSH_IPI
+#endif
+
 /* Real-Time classes' related field in a runqueue: */
 struct rt_rq {
        struct rt_prio_array active;
@@ -435,7 +447,13 @@ struct rt_rq {
        unsigned long rt_nr_total;
        int overloaded;
        struct plist_head pushable_tasks;
+#ifdef HAVE_RT_PUSH_IPI
+       int push_flags;
+       int push_cpu;
+       struct irq_work push_work;
+       raw_spinlock_t push_lock;
 #endif
+#endif /* CONFIG_SMP */
        int rt_queued;
 
        int rt_throttled;
@@ -597,6 +615,7 @@ struct rq {
        struct sched_domain *sd;
 
        unsigned long cpu_capacity;
+       unsigned long cpu_capacity_orig;
 
        unsigned char idle_balance;
        /* For active balancing */
@@ -807,7 +826,7 @@ struct sched_group_capacity {
         * CPU capacity of this group, SCHED_LOAD_SCALE being max capacity
         * for a single CPU.
         */
-       unsigned int capacity, capacity_orig;
+       unsigned int capacity;
        unsigned long next_update;
        int imbalance; /* XXX unrelated to capacity but shared group state */
        /*
@@ -1368,9 +1387,18 @@ static inline int hrtick_enabled(struct rq *rq)
 
 #ifdef CONFIG_SMP
 extern void sched_avg_update(struct rq *rq);
+
+#ifndef arch_scale_freq_capacity
+static __always_inline
+unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
+{
+       return SCHED_CAPACITY_SCALE;
+}
+#endif
+
 static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
 {
-       rq->rt_avg += rt_delta;
+       rq->rt_avg += rt_delta * arch_scale_freq_capacity(NULL, cpu_of(rq));
        sched_avg_update(rq);
 }
 #else
@@ -1643,8 +1671,8 @@ extern void print_rt_stats(struct seq_file *m, int cpu);
 extern void print_dl_stats(struct seq_file *m, int cpu);
 
 extern void init_cfs_rq(struct cfs_rq *cfs_rq);
-extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq);
-extern void init_dl_rq(struct dl_rq *dl_rq, struct rq *rq);
+extern void init_rt_rq(struct rt_rq *rt_rq);
+extern void init_dl_rq(struct dl_rq *dl_rq);
 
 extern void cfs_bandwidth_usage_inc(void);
 extern void cfs_bandwidth_usage_dec(void);
index d626dc98e8df952eff1df8ced84b2557475fe6c6..579ce1b929afde343a29fc77e4e7c4997ea18852 100644 (file)
@@ -33,12 +33,6 @@ config ARCH_USES_GETTIMEOFFSET
 config GENERIC_CLOCKEVENTS
        bool
 
-# Migration helper. Builds, but does not invoke
-config GENERIC_CLOCKEVENTS_BUILD
-       bool
-       default y
-       depends on GENERIC_CLOCKEVENTS
-
 # Architecture can handle broadcast in a driver-agnostic way
 config ARCH_HAS_TICK_BROADCAST
        bool
index c09c07817d7a7c854a1b88b12b894f60c547cda9..01f0312419b3cb44d8fa455d8cfaa2ad14d5ef0d 100644 (file)
@@ -2,15 +2,13 @@ obj-y += time.o timer.o hrtimer.o itimer.o posix-timers.o posix-cpu-timers.o
 obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o
 obj-y += timeconv.o timecounter.o posix-clock.o alarmtimer.o
 
-obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD)                += clockevents.o
-obj-$(CONFIG_GENERIC_CLOCKEVENTS)              += tick-common.o
+obj-$(CONFIG_GENERIC_CLOCKEVENTS)              += clockevents.o tick-common.o
 ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y)
  obj-y                                         += tick-broadcast.o
  obj-$(CONFIG_TICK_ONESHOT)                    += tick-broadcast-hrtimer.o
 endif
 obj-$(CONFIG_GENERIC_SCHED_CLOCK)              += sched_clock.o
-obj-$(CONFIG_TICK_ONESHOT)                     += tick-oneshot.o
-obj-$(CONFIG_TICK_ONESHOT)                     += tick-sched.o
+obj-$(CONFIG_TICK_ONESHOT)                     += tick-oneshot.o tick-sched.o
 obj-$(CONFIG_TIMER_STATS)                      += timer_stats.o
 obj-$(CONFIG_DEBUG_FS)                         += timekeeping_debug.o
 obj-$(CONFIG_TEST_UDELAY)                      += test_udelay.o
index 55449909f11475372135ac61b33e65114eb151ba..25d942d1da27095e6366d720a0f8de58009cb5f3 100644 (file)
@@ -94,25 +94,76 @@ u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt)
 }
 EXPORT_SYMBOL_GPL(clockevent_delta2ns);
 
+static int __clockevents_set_state(struct clock_event_device *dev,
+                                  enum clock_event_state state)
+{
+       /* Transition with legacy set_mode() callback */
+       if (dev->set_mode) {
+               /* Legacy callback doesn't support new modes */
+               if (state > CLOCK_EVT_STATE_ONESHOT)
+                       return -ENOSYS;
+               /*
+                * 'clock_event_state' and 'clock_event_mode' have 1-to-1
+                * mapping until *_ONESHOT, and so a simple cast will work.
+                */
+               dev->set_mode((enum clock_event_mode)state, dev);
+               dev->mode = (enum clock_event_mode)state;
+               return 0;
+       }
+
+       if (dev->features & CLOCK_EVT_FEAT_DUMMY)
+               return 0;
+
+       /* Transition with new state-specific callbacks */
+       switch (state) {
+       case CLOCK_EVT_STATE_DETACHED:
+               /*
+                * This is an internal state, which is guaranteed to go from
+                * SHUTDOWN to DETACHED. No driver interaction required.
+                */
+               return 0;
+
+       case CLOCK_EVT_STATE_SHUTDOWN:
+               return dev->set_state_shutdown(dev);
+
+       case CLOCK_EVT_STATE_PERIODIC:
+               /* Core internal bug */
+               if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC))
+                       return -ENOSYS;
+               return dev->set_state_periodic(dev);
+
+       case CLOCK_EVT_STATE_ONESHOT:
+               /* Core internal bug */
+               if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
+                       return -ENOSYS;
+               return dev->set_state_oneshot(dev);
+
+       default:
+               return -ENOSYS;
+       }
+}
+
 /**
- * clockevents_set_mode - set the operating mode of a clock event device
+ * clockevents_set_state - set the operating state of a clock event device
  * @dev:       device to modify
- * @mode:      new mode
+ * @state:     new state
  *
  * Must be called with interrupts disabled !
  */
-void clockevents_set_mode(struct clock_event_device *dev,
-                                enum clock_event_mode mode)
+void clockevents_set_state(struct clock_event_device *dev,
+                          enum clock_event_state state)
 {
-       if (dev->mode != mode) {
-               dev->set_mode(mode, dev);
-               dev->mode = mode;
+       if (dev->state != state) {
+               if (__clockevents_set_state(dev, state))
+                       return;
+
+               dev->state = state;
 
                /*
                 * A nsec2cyc multiplicator of 0 is invalid and we'd crash
                 * on it, so fix it up and emit a warning:
                 */
-               if (mode == CLOCK_EVT_MODE_ONESHOT) {
+               if (state == CLOCK_EVT_STATE_ONESHOT) {
                        if (unlikely(!dev->mult)) {
                                dev->mult = 1;
                                WARN_ON(1);
@@ -127,10 +178,28 @@ void clockevents_set_mode(struct clock_event_device *dev,
  */
 void clockevents_shutdown(struct clock_event_device *dev)
 {
-       clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
+       clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
        dev->next_event.tv64 = KTIME_MAX;
 }
 
+/**
+ * clockevents_tick_resume -   Resume the tick device before using it again
+ * @dev:                       device to resume
+ */
+int clockevents_tick_resume(struct clock_event_device *dev)
+{
+       int ret = 0;
+
+       if (dev->set_mode) {
+               dev->set_mode(CLOCK_EVT_MODE_RESUME, dev);
+               dev->mode = CLOCK_EVT_MODE_RESUME;
+       } else if (dev->tick_resume) {
+               ret = dev->tick_resume(dev);
+       }
+
+       return ret;
+}
+
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST
 
 /* Limit min_delta to a jiffie */
@@ -183,7 +252,7 @@ static int clockevents_program_min_delta(struct clock_event_device *dev)
                delta = dev->min_delta_ns;
                dev->next_event = ktime_add_ns(ktime_get(), delta);
 
-               if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
+               if (dev->state == CLOCK_EVT_STATE_SHUTDOWN)
                        return 0;
 
                dev->retries++;
@@ -220,7 +289,7 @@ static int clockevents_program_min_delta(struct clock_event_device *dev)
        delta = dev->min_delta_ns;
        dev->next_event = ktime_add_ns(ktime_get(), delta);
 
-       if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
+       if (dev->state == CLOCK_EVT_STATE_SHUTDOWN)
                return 0;
 
        dev->retries++;
@@ -252,7 +321,7 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
 
        dev->next_event = expires;
 
-       if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
+       if (dev->state == CLOCK_EVT_STATE_SHUTDOWN)
                return 0;
 
        /* Shortcut for clockevent devices that can deal with ktime. */
@@ -297,7 +366,7 @@ static int clockevents_replace(struct clock_event_device *ced)
        struct clock_event_device *dev, *newdev = NULL;
 
        list_for_each_entry(dev, &clockevent_devices, list) {
-               if (dev == ced || dev->mode != CLOCK_EVT_MODE_UNUSED)
+               if (dev == ced || dev->state != CLOCK_EVT_STATE_DETACHED)
                        continue;
 
                if (!tick_check_replacement(newdev, dev))
@@ -323,7 +392,7 @@ static int clockevents_replace(struct clock_event_device *ced)
 static int __clockevents_try_unbind(struct clock_event_device *ced, int cpu)
 {
        /* Fast track. Device is unused */
-       if (ced->mode == CLOCK_EVT_MODE_UNUSED) {
+       if (ced->state == CLOCK_EVT_STATE_DETACHED) {
                list_del_init(&ced->list);
                return 0;
        }
@@ -373,6 +442,37 @@ int clockevents_unbind_device(struct clock_event_device *ced, int cpu)
 }
 EXPORT_SYMBOL_GPL(clockevents_unbind);
 
+/* Sanity check of state transition callbacks */
+static int clockevents_sanity_check(struct clock_event_device *dev)
+{
+       /* Legacy set_mode() callback */
+       if (dev->set_mode) {
+               /* We shouldn't be supporting new modes now */
+               WARN_ON(dev->set_state_periodic || dev->set_state_oneshot ||
+                       dev->set_state_shutdown || dev->tick_resume);
+
+               BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
+               return 0;
+       }
+
+       if (dev->features & CLOCK_EVT_FEAT_DUMMY)
+               return 0;
+
+       /* New state-specific callbacks */
+       if (!dev->set_state_shutdown)
+               return -EINVAL;
+
+       if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
+           !dev->set_state_periodic)
+               return -EINVAL;
+
+       if ((dev->features & CLOCK_EVT_FEAT_ONESHOT) &&
+           !dev->set_state_oneshot)
+               return -EINVAL;
+
+       return 0;
+}
+
 /**
  * clockevents_register_device - register a clock event device
  * @dev:       device to register
@@ -381,7 +481,11 @@ void clockevents_register_device(struct clock_event_device *dev)
 {
        unsigned long flags;
 
-       BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
+       BUG_ON(clockevents_sanity_check(dev));
+
+       /* Initialize state to DETACHED */
+       dev->state = CLOCK_EVT_STATE_DETACHED;
+
        if (!dev->cpumask) {
                WARN_ON(num_possible_cpus() > 1);
                dev->cpumask = cpumask_of(smp_processor_id());
@@ -445,11 +549,11 @@ int __clockevents_update_freq(struct clock_event_device *dev, u32 freq)
 {
        clockevents_config(dev, freq);
 
-       if (dev->mode == CLOCK_EVT_MODE_ONESHOT)
+       if (dev->state == CLOCK_EVT_STATE_ONESHOT)
                return clockevents_program_event(dev, dev->next_event, false);
 
-       if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
-               dev->set_mode(CLOCK_EVT_MODE_PERIODIC, dev);
+       if (dev->state == CLOCK_EVT_STATE_PERIODIC)
+               return __clockevents_set_state(dev, CLOCK_EVT_STATE_PERIODIC);
 
        return 0;
 }
@@ -491,30 +595,27 @@ void clockevents_handle_noop(struct clock_event_device *dev)
  * @old:       device to release (can be NULL)
  * @new:       device to request (can be NULL)
  *
- * Called from the notifier chain. clockevents_lock is held already
+ * Called from various tick functions with clockevents_lock held and
+ * interrupts disabled.
  */
 void clockevents_exchange_device(struct clock_event_device *old,
                                 struct clock_event_device *new)
 {
-       unsigned long flags;
-
-       local_irq_save(flags);
        /*
         * Caller releases a clock event device. We queue it into the
         * released list and do a notify add later.
         */
        if (old) {
                module_put(old->owner);
-               clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED);
+               clockevents_set_state(old, CLOCK_EVT_STATE_DETACHED);
                list_del(&old->list);
                list_add(&old->list, &clockevents_released);
        }
 
        if (new) {
-               BUG_ON(new->mode != CLOCK_EVT_MODE_UNUSED);
+               BUG_ON(new->state != CLOCK_EVT_STATE_DETACHED);
                clockevents_shutdown(new);
        }
-       local_irq_restore(flags);
 }
 
 /**
@@ -541,74 +642,40 @@ void clockevents_resume(void)
                        dev->resume(dev);
 }
 
-#ifdef CONFIG_GENERIC_CLOCKEVENTS
+#ifdef CONFIG_HOTPLUG_CPU
 /**
- * clockevents_notify - notification about relevant events
- * Returns 0 on success, any other value on error
+ * tick_cleanup_dead_cpu - Cleanup the tick and clockevents of a dead cpu
  */
-int clockevents_notify(unsigned long reason, void *arg)
+void tick_cleanup_dead_cpu(int cpu)
 {
        struct clock_event_device *dev, *tmp;
        unsigned long flags;
-       int cpu, ret = 0;
 
        raw_spin_lock_irqsave(&clockevents_lock, flags);
 
-       switch (reason) {
-       case CLOCK_EVT_NOTIFY_BROADCAST_ON:
-       case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
-       case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
-               tick_broadcast_on_off(reason, arg);
-               break;
-
-       case CLOCK_EVT_NOTIFY_BROADCAST_ENTER:
-       case CLOCK_EVT_NOTIFY_BROADCAST_EXIT:
-               ret = tick_broadcast_oneshot_control(reason);
-               break;
-
-       case CLOCK_EVT_NOTIFY_CPU_DYING:
-               tick_handover_do_timer(arg);
-               break;
-
-       case CLOCK_EVT_NOTIFY_SUSPEND:
-               tick_suspend();
-               tick_suspend_broadcast();
-               break;
-
-       case CLOCK_EVT_NOTIFY_RESUME:
-               tick_resume();
-               break;
-
-       case CLOCK_EVT_NOTIFY_CPU_DEAD:
-               tick_shutdown_broadcast_oneshot(arg);
-               tick_shutdown_broadcast(arg);
-               tick_shutdown(arg);
-               /*
-                * Unregister the clock event devices which were
-                * released from the users in the notify chain.
-                */
-               list_for_each_entry_safe(dev, tmp, &clockevents_released, list)
+       tick_shutdown_broadcast_oneshot(cpu);
+       tick_shutdown_broadcast(cpu);
+       tick_shutdown(cpu);
+       /*
+        * Unregister the clock event devices which were
+        * released from the users in the notify chain.
+        */
+       list_for_each_entry_safe(dev, tmp, &clockevents_released, list)
+               list_del(&dev->list);
+       /*
+        * Now check whether the CPU has left unused per cpu devices
+        */
+       list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) {
+               if (cpumask_test_cpu(cpu, dev->cpumask) &&
+                   cpumask_weight(dev->cpumask) == 1 &&
+                   !tick_is_broadcast_device(dev)) {
+                       BUG_ON(dev->state != CLOCK_EVT_STATE_DETACHED);
                        list_del(&dev->list);
-               /*
-                * Now check whether the CPU has left unused per cpu devices
-                */
-               cpu = *((int *)arg);
-               list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) {
-                       if (cpumask_test_cpu(cpu, dev->cpumask) &&
-                           cpumask_weight(dev->cpumask) == 1 &&
-                           !tick_is_broadcast_device(dev)) {
-                               BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
-                               list_del(&dev->list);
-                       }
                }
-               break;
-       default:
-               break;
        }
        raw_spin_unlock_irqrestore(&clockevents_lock, flags);
-       return ret;
 }
-EXPORT_SYMBOL_GPL(clockevents_notify);
+#endif
 
 #ifdef CONFIG_SYSFS
 struct bus_type clockevents_subsys = {
@@ -727,5 +794,3 @@ static int __init clockevents_init_sysfs(void)
 }
 device_initcall(clockevents_init_sysfs);
 #endif /* SYSFS */
-
-#endif /* GENERIC_CLOCK_EVENTS */
index 4892352f0e4989c561c5d16ba3b27c063082a8f6..15facb1b9c606c7a5fa5ea3500ea7dd4bf523477 100644 (file)
@@ -142,13 +142,6 @@ static void __clocksource_unstable(struct clocksource *cs)
                schedule_work(&watchdog_work);
 }
 
-static void clocksource_unstable(struct clocksource *cs, int64_t delta)
-{
-       printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n",
-              cs->name, delta);
-       __clocksource_unstable(cs);
-}
-
 /**
  * clocksource_mark_unstable - mark clocksource unstable via watchdog
  * @cs:                clocksource to be marked unstable
@@ -174,7 +167,7 @@ void clocksource_mark_unstable(struct clocksource *cs)
 static void clocksource_watchdog(unsigned long data)
 {
        struct clocksource *cs;
-       cycle_t csnow, wdnow, delta;
+       cycle_t csnow, wdnow, cslast, wdlast, delta;
        int64_t wd_nsec, cs_nsec;
        int next_cpu, reset_pending;
 
@@ -213,6 +206,8 @@ static void clocksource_watchdog(unsigned long data)
 
                delta = clocksource_delta(csnow, cs->cs_last, cs->mask);
                cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
+               wdlast = cs->wd_last; /* save these in case we print them */
+               cslast = cs->cs_last;
                cs->cs_last = csnow;
                cs->wd_last = wdnow;
 
@@ -221,7 +216,12 @@ static void clocksource_watchdog(unsigned long data)
 
                /* Check the deviation from the watchdog clocksource. */
                if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) {
-                       clocksource_unstable(cs, cs_nsec - wd_nsec);
+                       pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable, because the skew is too large:\n", cs->name);
+                       pr_warn("       '%s' wd_now: %llx wd_last: %llx mask: %llx\n",
+                               watchdog->name, wdnow, wdlast, watchdog->mask);
+                       pr_warn("       '%s' cs_now: %llx cs_last: %llx mask: %llx\n",
+                               cs->name, csnow, cslast, cs->mask);
+                       __clocksource_unstable(cs);
                        continue;
                }
 
@@ -469,26 +469,25 @@ static u32 clocksource_max_adjustment(struct clocksource *cs)
  * @shift:     cycle to nanosecond divisor (power of two)
  * @maxadj:    maximum adjustment value to mult (~11%)
  * @mask:      bitmask for two's complement subtraction of non 64 bit counters
+ * @max_cyc:   maximum cycle value before potential overflow (does not include
+ *             any safety margin)
+ *
+ * NOTE: This function includes a safety margin of 50%, in other words, we
+ * return half the number of nanoseconds the hardware counter can technically
+ * cover. This is done so that we can potentially detect problems caused by
+ * delayed timers or bad hardware, which might result in time intervals that
+ * are larger then what the math used can handle without overflows.
  */
-u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)
+u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc)
 {
        u64 max_nsecs, max_cycles;
 
        /*
         * Calculate the maximum number of cycles that we can pass to the
-        * cyc2ns function without overflowing a 64-bit signed result. The
-        * maximum number of cycles is equal to ULLONG_MAX/(mult+maxadj)
-        * which is equivalent to the below.
-        * max_cycles < (2^63)/(mult + maxadj)
-        * max_cycles < 2^(log2((2^63)/(mult + maxadj)))
-        * max_cycles < 2^(log2(2^63) - log2(mult + maxadj))
-        * max_cycles < 2^(63 - log2(mult + maxadj))
-        * max_cycles < 1 << (63 - log2(mult + maxadj))
-        * Please note that we add 1 to the result of the log2 to account for
-        * any rounding errors, ensure the above inequality is satisfied and
-        * no overflow will occur.
+        * cyc2ns() function without overflowing a 64-bit result.
         */
-       max_cycles = 1ULL << (63 - (ilog2(mult + maxadj) + 1));
+       max_cycles = ULLONG_MAX;
+       do_div(max_cycles, mult+maxadj);
 
        /*
         * The actual maximum number of cycles we can defer the clocksource is
@@ -499,27 +498,26 @@ u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)
        max_cycles = min(max_cycles, mask);
        max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift);
 
+       /* return the max_cycles value as well if requested */
+       if (max_cyc)
+               *max_cyc = max_cycles;
+
+       /* Return 50% of the actual maximum, so we can detect bad values */
+       max_nsecs >>= 1;
+
        return max_nsecs;
 }
 
 /**
- * clocksource_max_deferment - Returns max time the clocksource can be deferred
- * @cs:         Pointer to clocksource
+ * clocksource_update_max_deferment - Updates the clocksource max_idle_ns & max_cycles
+ * @cs:         Pointer to clocksource to be updated
  *
  */
-static u64 clocksource_max_deferment(struct clocksource *cs)
+static inline void clocksource_update_max_deferment(struct clocksource *cs)
 {
-       u64 max_nsecs;
-
-       max_nsecs = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj,
-                                         cs->mask);
-       /*
-        * To ensure that the clocksource does not wrap whilst we are idle,
-        * limit the time the clocksource can be deferred by 12.5%. Please
-        * note a margin of 12.5% is used because this can be computed with
-        * a shift, versus say 10% which would require division.
-        */
-       return max_nsecs - (max_nsecs >> 3);
+       cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift,
+                                               cs->maxadj, cs->mask,
+                                               &cs->max_cycles);
 }
 
 #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET
@@ -648,7 +646,7 @@ static void clocksource_enqueue(struct clocksource *cs)
 }
 
 /**
- * __clocksource_updatefreq_scale - Used update clocksource with new freq
+ * __clocksource_update_freq_scale - Used update clocksource with new freq
  * @cs:                clocksource to be registered
  * @scale:     Scale factor multiplied against freq to get clocksource hz
  * @freq:      clocksource frequency (cycles per second) divided by scale
@@ -656,48 +654,64 @@ static void clocksource_enqueue(struct clocksource *cs)
  * This should only be called from the clocksource->enable() method.
  *
  * This *SHOULD NOT* be called directly! Please use the
- * clocksource_updatefreq_hz() or clocksource_updatefreq_khz helper functions.
+ * __clocksource_update_freq_hz() or __clocksource_update_freq_khz() helper
+ * functions.
  */
-void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
+void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq)
 {
        u64 sec;
+
        /*
-        * Calc the maximum number of seconds which we can run before
-        * wrapping around. For clocksources which have a mask > 32bit
-        * we need to limit the max sleep time to have a good
-        * conversion precision. 10 minutes is still a reasonable
-        * amount. That results in a shift value of 24 for a
-        * clocksource with mask >= 40bit and f >= 4GHz. That maps to
-        * ~ 0.06ppm granularity for NTP. We apply the same 12.5%
-        * margin as we do in clocksource_max_deferment()
+        * Default clocksources are *special* and self-define their mult/shift.
+        * But, you're not special, so you should specify a freq value.
         */
-       sec = (cs->mask - (cs->mask >> 3));
-       do_div(sec, freq);
-       do_div(sec, scale);
-       if (!sec)
-               sec = 1;
-       else if (sec > 600 && cs->mask > UINT_MAX)
-               sec = 600;
-
-       clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
-                              NSEC_PER_SEC / scale, sec * scale);
-
+       if (freq) {
+               /*
+                * Calc the maximum number of seconds which we can run before
+                * wrapping around. For clocksources which have a mask > 32-bit
+                * we need to limit the max sleep time to have a good
+                * conversion precision. 10 minutes is still a reasonable
+                * amount. That results in a shift value of 24 for a
+                * clocksource with mask >= 40-bit and f >= 4GHz. That maps to
+                * ~ 0.06ppm granularity for NTP.
+                */
+               sec = cs->mask;
+               do_div(sec, freq);
+               do_div(sec, scale);
+               if (!sec)
+                       sec = 1;
+               else if (sec > 600 && cs->mask > UINT_MAX)
+                       sec = 600;
+
+               clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
+                                      NSEC_PER_SEC / scale, sec * scale);
+       }
        /*
-        * for clocksources that have large mults, to avoid overflow.
-        * Since mult may be adjusted by ntp, add an safety extra margin
-        *
+        * Ensure clocksources that have large 'mult' values don't overflow
+        * when adjusted.
         */
        cs->maxadj = clocksource_max_adjustment(cs);
-       while ((cs->mult + cs->maxadj < cs->mult)
-               || (cs->mult - cs->maxadj > cs->mult)) {
+       while (freq && ((cs->mult + cs->maxadj < cs->mult)
+               || (cs->mult - cs->maxadj > cs->mult))) {
                cs->mult >>= 1;
                cs->shift--;
                cs->maxadj = clocksource_max_adjustment(cs);
        }
 
-       cs->max_idle_ns = clocksource_max_deferment(cs);
+       /*
+        * Only warn for *special* clocksources that self-define
+        * their mult/shift values and don't specify a freq.
+        */
+       WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
+               "timekeeping: Clocksource %s might overflow on 11%% adjustment\n",
+               cs->name);
+
+       clocksource_update_max_deferment(cs);
+
+       pr_info("clocksource %s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n",
+                       cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns);
 }
-EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);
+EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale);
 
 /**
  * __clocksource_register_scale - Used to install new clocksources
@@ -714,7 +728,7 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
 {
 
        /* Initialize mult/shift and max_idle_ns */
-       __clocksource_updatefreq_scale(cs, scale, freq);
+       __clocksource_update_freq_scale(cs, scale, freq);
 
        /* Add clocksource to the clocksource list */
        mutex_lock(&clocksource_mutex);
@@ -726,33 +740,6 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
 }
 EXPORT_SYMBOL_GPL(__clocksource_register_scale);
 
-
-/**
- * clocksource_register - Used to install new clocksources
- * @cs:                clocksource to be registered
- *
- * Returns -EBUSY if registration fails, zero otherwise.
- */
-int clocksource_register(struct clocksource *cs)
-{
-       /* calculate max adjustment for given mult/shift */
-       cs->maxadj = clocksource_max_adjustment(cs);
-       WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
-               "Clocksource %s might overflow on 11%% adjustment\n",
-               cs->name);
-
-       /* calculate max idle time permitted for this clocksource */
-       cs->max_idle_ns = clocksource_max_deferment(cs);
-
-       mutex_lock(&clocksource_mutex);
-       clocksource_enqueue(cs);
-       clocksource_enqueue_watchdog(cs);
-       clocksource_select();
-       mutex_unlock(&clocksource_mutex);
-       return 0;
-}
-EXPORT_SYMBOL(clocksource_register);
-
 static void __clocksource_change_rating(struct clocksource *cs, int rating)
 {
        list_del(&cs->list);
index bee0c1f780911a97a4598b81089c9bc4d807d037..76d4bd962b19b3bab345460676954ef6f7c14568 100644 (file)
@@ -54,7 +54,7 @@
 
 #include <trace/events/timer.h>
 
-#include "timekeeping.h"
+#include "tick-internal.h"
 
 /*
  * The timer bases:
@@ -1707,17 +1707,10 @@ static int hrtimer_cpu_notify(struct notifier_block *self,
                break;
 
 #ifdef CONFIG_HOTPLUG_CPU
-       case CPU_DYING:
-       case CPU_DYING_FROZEN:
-               clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu);
-               break;
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
-       {
-               clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &scpu);
                migrate_hrtimers(scpu);
                break;
-       }
 #endif
 
        default:
index a6a5bf53e86d25575f90518399407a4fb65a85ed..347fecf86a3fb2242e0a88b63975424f0293dde7 100644 (file)
@@ -25,7 +25,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 
-#include "tick-internal.h"
+#include "timekeeping.h"
 
 /* The Jiffies based clocksource is the lowest common
  * denominator clock source which should function on
@@ -71,6 +71,7 @@ static struct clocksource clocksource_jiffies = {
        .mask           = 0xffffffff, /*32bits*/
        .mult           = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
        .shift          = JIFFIES_SHIFT,
+       .max_cycles     = 10,
 };
 
 __cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
@@ -94,7 +95,7 @@ EXPORT_SYMBOL(jiffies);
 
 static int __init init_jiffies_clocksource(void)
 {
-       return clocksource_register(&clocksource_jiffies);
+       return __clocksource_register(&clocksource_jiffies);
 }
 
 core_initcall(init_jiffies_clocksource);
@@ -130,6 +131,6 @@ int register_refined_jiffies(long cycles_per_second)
 
        refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT;
 
-       clocksource_register(&refined_jiffies);
+       __clocksource_register(&refined_jiffies);
        return 0;
 }
index 0f60b08a4f073e9246ced1dc3b5de5f50efd7cf4..7a681003001c0ee75631e2c5c56e528ec0ea98df 100644 (file)
@@ -17,7 +17,6 @@
 #include <linux/module.h>
 #include <linux/rtc.h>
 
-#include "tick-internal.h"
 #include "ntp_internal.h"
 
 /*
@@ -459,6 +458,16 @@ out:
        return leap;
 }
 
+#ifdef CONFIG_GENERIC_CMOS_UPDATE
+int __weak update_persistent_clock64(struct timespec64 now64)
+{
+       struct timespec now;
+
+       now = timespec64_to_timespec(now64);
+       return update_persistent_clock(now);
+}
+#endif
+
 #if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC)
 static void sync_cmos_clock(struct work_struct *work);
 
@@ -494,8 +503,9 @@ static void sync_cmos_clock(struct work_struct *work)
                if (persistent_clock_is_local)
                        adjust.tv_sec -= (sys_tz.tz_minuteswest * 60);
 #ifdef CONFIG_GENERIC_CMOS_UPDATE
-               fail = update_persistent_clock(timespec64_to_timespec(adjust));
+               fail = update_persistent_clock64(adjust);
 #endif
+
 #ifdef CONFIG_RTC_SYSTOHC
                if (fail == -ENODEV)
                        fail = rtc_set_ntp_time(adjust);
index 01d2d15aa66233dc62db43f8e988a0f5519a729b..a26036d37a3895f163a20abdde5c6361d0110cf1 100644 (file)
@@ -1,5 +1,6 @@
 /*
- * sched_clock.c: support for extending counters to full 64-bit ns counter
+ * sched_clock.c: Generic sched_clock() support, to extend low level
+ *                hardware time counters to full 64-bit ns values.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
 #include <linux/seqlock.h>
 #include <linux/bitops.h>
 
-struct clock_data {
-       ktime_t wrap_kt;
+/**
+ * struct clock_read_data - data required to read from sched_clock()
+ *
+ * @epoch_ns:          sched_clock() value at last update
+ * @epoch_cyc:         Clock cycle value at last update.
+ * @sched_clock_mask:   Bitmask for two's complement subtraction of non 64bit
+ *                     clocks.
+ * @read_sched_clock:  Current clock source (or dummy source when suspended).
+ * @mult:              Multipler for scaled math conversion.
+ * @shift:             Shift value for scaled math conversion.
+ *
+ * Care must be taken when updating this structure; it is read by
+ * some very hot code paths. It occupies <=40 bytes and, when combined
+ * with the seqcount used to synchronize access, comfortably fits into
+ * a 64 byte cache line.
+ */
+struct clock_read_data {
        u64 epoch_ns;
        u64 epoch_cyc;
-       seqcount_t seq;
-       unsigned long rate;
+       u64 sched_clock_mask;
+       u64 (*read_sched_clock)(void);
        u32 mult;
        u32 shift;
-       bool suspended;
+};
+
+/**
+ * struct clock_data - all data needed for sched_clock() (including
+ *                     registration of a new clock source)
+ *
+ * @seq:               Sequence counter for protecting updates. The lowest
+ *                     bit is the index for @read_data.
+ * @read_data:         Data required to read from sched_clock.
+ * @wrap_kt:           Duration for which clock can run before wrapping.
+ * @rate:              Tick rate of the registered clock.
+ * @actual_read_sched_clock: Registered hardware level clock read function.
+ *
+ * The ordering of this structure has been chosen to optimize cache
+ * performance. In particular 'seq' and 'read_data[0]' (combined) should fit
+ * into a single 64-byte cache line.
+ */
+struct clock_data {
+       seqcount_t              seq;
+       struct clock_read_data  read_data[2];
+       ktime_t                 wrap_kt;
+       unsigned long           rate;
+
+       u64 (*actual_read_sched_clock)(void);
 };
 
 static struct hrtimer sched_clock_timer;
@@ -34,12 +73,6 @@ static int irqtime = -1;
 
 core_param(irqtime, irqtime, int, 0400);
 
-static struct clock_data cd = {
-       .mult   = NSEC_PER_SEC / HZ,
-};
-
-static u64 __read_mostly sched_clock_mask;
-
 static u64 notrace jiffy_sched_clock_read(void)
 {
        /*
@@ -49,7 +82,11 @@ static u64 notrace jiffy_sched_clock_read(void)
        return (u64)(jiffies - INITIAL_JIFFIES);
 }
 
-static u64 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read;
+static struct clock_data cd ____cacheline_aligned = {
+       .read_data[0] = { .mult = NSEC_PER_SEC / HZ,
+                         .read_sched_clock = jiffy_sched_clock_read, },
+       .actual_read_sched_clock = jiffy_sched_clock_read,
+};
 
 static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
 {
@@ -58,111 +95,136 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
 
 unsigned long long notrace sched_clock(void)
 {
-       u64 epoch_ns;
-       u64 epoch_cyc;
-       u64 cyc;
+       u64 cyc, res;
        unsigned long seq;
-
-       if (cd.suspended)
-               return cd.epoch_ns;
+       struct clock_read_data *rd;
 
        do {
-               seq = raw_read_seqcount_begin(&cd.seq);
-               epoch_cyc = cd.epoch_cyc;
-               epoch_ns = cd.epoch_ns;
+               seq = raw_read_seqcount(&cd.seq);
+               rd = cd.read_data + (seq & 1);
+
+               cyc = (rd->read_sched_clock() - rd->epoch_cyc) &
+                     rd->sched_clock_mask;
+               res = rd->epoch_ns + cyc_to_ns(cyc, rd->mult, rd->shift);
        } while (read_seqcount_retry(&cd.seq, seq));
 
-       cyc = read_sched_clock();
-       cyc = (cyc - epoch_cyc) & sched_clock_mask;
-       return epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift);
+       return res;
+}
+
+/*
+ * Updating the data required to read the clock.
+ *
+ * sched_clock() will never observe mis-matched data even if called from
+ * an NMI. We do this by maintaining an odd/even copy of the data and
+ * steering sched_clock() to one or the other using a sequence counter.
+ * In order to preserve the data cache profile of sched_clock() as much
+ * as possible the system reverts back to the even copy when the update
+ * completes; the odd copy is used *only* during an update.
+ */
+static void update_clock_read_data(struct clock_read_data *rd)
+{
+       /* update the backup (odd) copy with the new data */
+       cd.read_data[1] = *rd;
+
+       /* steer readers towards the odd copy */
+       raw_write_seqcount_latch(&cd.seq);
+
+       /* now its safe for us to update the normal (even) copy */
+       cd.read_data[0] = *rd;
+
+       /* switch readers back to the even copy */
+       raw_write_seqcount_latch(&cd.seq);
 }
 
 /*
- * Atomically update the sched_clock epoch.
+ * Atomically update the sched_clock() epoch.
  */
-static void notrace update_sched_clock(void)
+static void update_sched_clock(void)
 {
-       unsigned long flags;
        u64 cyc;
        u64 ns;
+       struct clock_read_data rd;
+
+       rd = cd.read_data[0];
+
+       cyc = cd.actual_read_sched_clock();
+       ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift);
+
+       rd.epoch_ns = ns;
+       rd.epoch_cyc = cyc;
 
-       cyc = read_sched_clock();
-       ns = cd.epoch_ns +
-               cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
-                         cd.mult, cd.shift);
-
-       raw_local_irq_save(flags);
-       raw_write_seqcount_begin(&cd.seq);
-       cd.epoch_ns = ns;
-       cd.epoch_cyc = cyc;
-       raw_write_seqcount_end(&cd.seq);
-       raw_local_irq_restore(flags);
+       update_clock_read_data(&rd);
 }
 
 static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt)
 {
        update_sched_clock();
        hrtimer_forward_now(hrt, cd.wrap_kt);
+
        return HRTIMER_RESTART;
 }
 
-void __init sched_clock_register(u64 (*read)(void), int bits,
-                                unsigned long rate)
+void __init
+sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
 {
        u64 res, wrap, new_mask, new_epoch, cyc, ns;
        u32 new_mult, new_shift;
-       ktime_t new_wrap_kt;
        unsigned long r;
        char r_unit;
+       struct clock_read_data rd;
 
        if (cd.rate > rate)
                return;
 
        WARN_ON(!irqs_disabled());
 
-       /* calculate the mult/shift to convert counter ticks to ns. */
+       /* Calculate the mult/shift to convert counter ticks to ns. */
        clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600);
 
        new_mask = CLOCKSOURCE_MASK(bits);
+       cd.rate = rate;
+
+       /* Calculate how many nanosecs until we risk wrapping */
+       wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask, NULL);
+       cd.wrap_kt = ns_to_ktime(wrap);
 
-       /* calculate how many ns until we wrap */
-       wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask);
-       new_wrap_kt = ns_to_ktime(wrap - (wrap >> 3));
+       rd = cd.read_data[0];
 
-       /* update epoch for new counter and update epoch_ns from old counter*/
+       /* Update epoch for new counter and update 'epoch_ns' from old counter*/
        new_epoch = read();
-       cyc = read_sched_clock();
-       ns = cd.epoch_ns + cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
-                         cd.mult, cd.shift);
+       cyc = cd.actual_read_sched_clock();
+       ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift);
+       cd.actual_read_sched_clock = read;
 
-       raw_write_seqcount_begin(&cd.seq);
-       read_sched_clock = read;
-       sched_clock_mask = new_mask;
-       cd.rate = rate;
-       cd.wrap_kt = new_wrap_kt;
-       cd.mult = new_mult;
-       cd.shift = new_shift;
-       cd.epoch_cyc = new_epoch;
-       cd.epoch_ns = ns;
-       raw_write_seqcount_end(&cd.seq);
+       rd.read_sched_clock     = read;
+       rd.sched_clock_mask     = new_mask;
+       rd.mult                 = new_mult;
+       rd.shift                = new_shift;
+       rd.epoch_cyc            = new_epoch;
+       rd.epoch_ns             = ns;
+
+       update_clock_read_data(&rd);
 
        r = rate;
        if (r >= 4000000) {
                r /= 1000000;
                r_unit = 'M';
-       } else if (r >= 1000) {
-               r /= 1000;
-               r_unit = 'k';
-       } else
-               r_unit = ' ';
-
-       /* calculate the ns resolution of this counter */
+       } else {
+               if (r >= 1000) {
+                       r /= 1000;
+                       r_unit = 'k';
+               } else {
+                       r_unit = ' ';
+               }
+       }
+
+       /* Calculate the ns resolution of this counter */
        res = cyc_to_ns(1ULL, new_mult, new_shift);
 
        pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n",
                bits, r, r_unit, res, wrap);
 
-       /* Enable IRQ time accounting if we have a fast enough sched_clock */
+       /* Enable IRQ time accounting if we have a fast enough sched_clock() */
        if (irqtime > 0 || (irqtime == -1 && rate >= 1000000))
                enable_sched_clock_irqtime();
 
@@ -172,10 +234,10 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
 void __init sched_clock_postinit(void)
 {
        /*
-        * If no sched_clock function has been provided at that point,
+        * If no sched_clock() function has been provided at that point,
         * make it the final one one.
         */
-       if (read_sched_clock == jiffy_sched_clock_read)
+       if (cd.actual_read_sched_clock == jiffy_sched_clock_read)
                sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ);
 
        update_sched_clock();
@@ -189,29 +251,53 @@ void __init sched_clock_postinit(void)
        hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
 }
 
+/*
+ * Clock read function for use when the clock is suspended.
+ *
+ * This function makes it appear to sched_clock() as if the clock
+ * stopped counting at its last update.
+ *
+ * This function must only be called from the critical
+ * section in sched_clock(). It relies on the read_seqcount_retry()
+ * at the end of the critical section to be sure we observe the
+ * correct copy of 'epoch_cyc'.
+ */
+static u64 notrace suspended_sched_clock_read(void)
+{
+       unsigned long seq = raw_read_seqcount(&cd.seq);
+
+       return cd.read_data[seq & 1].epoch_cyc;
+}
+
 static int sched_clock_suspend(void)
 {
+       struct clock_read_data *rd = &cd.read_data[0];
+
        update_sched_clock();
        hrtimer_cancel(&sched_clock_timer);
-       cd.suspended = true;
+       rd->read_sched_clock = suspended_sched_clock_read;
+
        return 0;
 }
 
 static void sched_clock_resume(void)
 {
-       cd.epoch_cyc = read_sched_clock();
+       struct clock_read_data *rd = &cd.read_data[0];
+
+       rd->epoch_cyc = cd.actual_read_sched_clock();
        hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
-       cd.suspended = false;
+       rd->read_sched_clock = cd.actual_read_sched_clock;
 }
 
 static struct syscore_ops sched_clock_ops = {
-       .suspend = sched_clock_suspend,
-       .resume = sched_clock_resume,
+       .suspend        = sched_clock_suspend,
+       .resume         = sched_clock_resume,
 };
 
 static int __init sched_clock_syscore_init(void)
 {
        register_syscore_ops(&sched_clock_ops);
+
        return 0;
 }
 device_initcall(sched_clock_syscore_init);
index 066f0ec05e487396315356df0ea04c8563ffa390..7e8ca4f448a88c5ad5708106bbd889e22715b3ad 100644 (file)
@@ -33,12 +33,14 @@ static cpumask_var_t tick_broadcast_mask;
 static cpumask_var_t tick_broadcast_on;
 static cpumask_var_t tmpmask;
 static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
-static int tick_broadcast_force;
+static int tick_broadcast_forced;
 
 #ifdef CONFIG_TICK_ONESHOT
 static void tick_broadcast_clear_oneshot(int cpu);
+static void tick_resume_broadcast_oneshot(struct clock_event_device *bc);
 #else
 static inline void tick_broadcast_clear_oneshot(int cpu) { }
+static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { }
 #endif
 
 /*
@@ -303,7 +305,7 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
        /*
         * The device is in periodic mode. No reprogramming necessary:
         */
-       if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
+       if (dev->state == CLOCK_EVT_STATE_PERIODIC)
                goto unlock;
 
        /*
@@ -324,49 +326,54 @@ unlock:
        raw_spin_unlock(&tick_broadcast_lock);
 }
 
-/*
- * Powerstate information: The system enters/leaves a state, where
- * affected devices might stop
+/**
+ * tick_broadcast_control - Enable/disable or force broadcast mode
+ * @mode:      The selected broadcast mode
+ *
+ * Called when the system enters a state where affected tick devices
+ * might stop. Note: TICK_BROADCAST_FORCE cannot be undone.
+ *
+ * Called with interrupts disabled, so clockevents_lock is not
+ * required here because the local clock event device cannot go away
+ * under us.
  */
-static void tick_do_broadcast_on_off(unsigned long *reason)
+void tick_broadcast_control(enum tick_broadcast_mode mode)
 {
        struct clock_event_device *bc, *dev;
        struct tick_device *td;
-       unsigned long flags;
        int cpu, bc_stopped;
 
-       raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
-
-       cpu = smp_processor_id();
-       td = &per_cpu(tick_cpu_device, cpu);
+       td = this_cpu_ptr(&tick_cpu_device);
        dev = td->evtdev;
-       bc = tick_broadcast_device.evtdev;
 
        /*
         * Is the device not affected by the powerstate ?
         */
        if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
-               goto out;
+               return;
 
        if (!tick_device_is_functional(dev))
-               goto out;
+               return;
 
+       raw_spin_lock(&tick_broadcast_lock);
+       cpu = smp_processor_id();
+       bc = tick_broadcast_device.evtdev;
        bc_stopped = cpumask_empty(tick_broadcast_mask);
 
-       switch (*reason) {
-       case CLOCK_EVT_NOTIFY_BROADCAST_ON:
-       case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
+       switch (mode) {
+       case TICK_BROADCAST_FORCE:
+               tick_broadcast_forced = 1;
+       case TICK_BROADCAST_ON:
                cpumask_set_cpu(cpu, tick_broadcast_on);
                if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
                        if (tick_broadcast_device.mode ==
                            TICKDEV_MODE_PERIODIC)
                                clockevents_shutdown(dev);
                }
-               if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
-                       tick_broadcast_force = 1;
                break;
-       case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
-               if (tick_broadcast_force)
+
+       case TICK_BROADCAST_OFF:
+               if (tick_broadcast_forced)
                        break;
                cpumask_clear_cpu(cpu, tick_broadcast_on);
                if (!tick_device_is_functional(dev))
@@ -388,22 +395,9 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
                else
                        tick_broadcast_setup_oneshot(bc);
        }
-out:
-       raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
-}
-
-/*
- * Powerstate information: The system enters/leaves a state, where
- * affected devices might stop.
- */
-void tick_broadcast_on_off(unsigned long reason, int *oncpu)
-{
-       if (!cpumask_test_cpu(*oncpu, cpu_online_mask))
-               printk(KERN_ERR "tick-broadcast: ignoring broadcast for "
-                      "offline CPU #%d\n", *oncpu);
-       else
-               tick_do_broadcast_on_off(&reason);
+       raw_spin_unlock(&tick_broadcast_lock);
 }
+EXPORT_SYMBOL_GPL(tick_broadcast_control);
 
 /*
  * Set the periodic handler depending on broadcast on/off
@@ -416,14 +410,14 @@ void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
                dev->event_handler = tick_handle_periodic_broadcast;
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
 /*
  * Remove a CPU from broadcasting
  */
-void tick_shutdown_broadcast(unsigned int *cpup)
+void tick_shutdown_broadcast(unsigned int cpu)
 {
        struct clock_event_device *bc;
        unsigned long flags;
-       unsigned int cpu = *cpup;
 
        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 
@@ -438,6 +432,7 @@ void tick_shutdown_broadcast(unsigned int *cpup)
 
        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
+#endif
 
 void tick_suspend_broadcast(void)
 {
@@ -453,38 +448,48 @@ void tick_suspend_broadcast(void)
        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
 
-int tick_resume_broadcast(void)
+/*
+ * This is called from tick_resume_local() on a resuming CPU. That's
+ * called from the core resume function, tick_unfreeze() and the magic XEN
+ * resume hackery.
+ *
+ * In none of these cases the broadcast device mode can change and the
+ * bit of the resuming CPU in the broadcast mask is safe as well.
+ */
+bool tick_resume_check_broadcast(void)
+{
+       if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT)
+               return false;
+       else
+               return cpumask_test_cpu(smp_processor_id(), tick_broadcast_mask);
+}
+
+void tick_resume_broadcast(void)
 {
        struct clock_event_device *bc;
        unsigned long flags;
-       int broadcast = 0;
 
        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 
        bc = tick_broadcast_device.evtdev;
 
        if (bc) {
-               clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME);
+               clockevents_tick_resume(bc);
 
                switch (tick_broadcast_device.mode) {
                case TICKDEV_MODE_PERIODIC:
                        if (!cpumask_empty(tick_broadcast_mask))
                                tick_broadcast_start_periodic(bc);
-                       broadcast = cpumask_test_cpu(smp_processor_id(),
-                                                    tick_broadcast_mask);
                        break;
                case TICKDEV_MODE_ONESHOT:
                        if (!cpumask_empty(tick_broadcast_mask))
-                               broadcast = tick_resume_broadcast_oneshot(bc);
+                               tick_resume_broadcast_oneshot(bc);
                        break;
                }
        }
        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
-
-       return broadcast;
 }
 
-
 #ifdef CONFIG_TICK_ONESHOT
 
 static cpumask_var_t tick_broadcast_oneshot_mask;
@@ -532,8 +537,8 @@ static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
 {
        int ret;
 
-       if (bc->mode != CLOCK_EVT_MODE_ONESHOT)
-               clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
+       if (bc->state != CLOCK_EVT_STATE_ONESHOT)
+               clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT);
 
        ret = clockevents_program_event(bc, expires, force);
        if (!ret)
@@ -541,10 +546,9 @@ static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
        return ret;
 }
 
-int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
+static void tick_resume_broadcast_oneshot(struct clock_event_device *bc)
 {
-       clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
-       return 0;
+       clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT);
 }
 
 /*
@@ -562,8 +566,8 @@ void tick_check_oneshot_broadcast_this_cpu(void)
                 * switched over, leave the device alone.
                 */
                if (td->mode == TICKDEV_MODE_ONESHOT) {
-                       clockevents_set_mode(td->evtdev,
-                                            CLOCK_EVT_MODE_ONESHOT);
+                       clockevents_set_state(td->evtdev,
+                                             CLOCK_EVT_STATE_ONESHOT);
                }
        }
 }
@@ -666,31 +670,26 @@ static void broadcast_shutdown_local(struct clock_event_device *bc,
                if (dev->next_event.tv64 < bc->next_event.tv64)
                        return;
        }
-       clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
+       clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
 }
 
-static void broadcast_move_bc(int deadcpu)
-{
-       struct clock_event_device *bc = tick_broadcast_device.evtdev;
-
-       if (!bc || !broadcast_needs_cpu(bc, deadcpu))
-               return;
-       /* This moves the broadcast assignment to this cpu */
-       clockevents_program_event(bc, bc->next_event, 1);
-}
-
-/*
- * Powerstate information: The system enters/leaves a state, where
- * affected devices might stop
+/**
+ * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode
+ * @state:     The target state (enter/exit)
+ *
+ * The system enters/leaves a state, where affected devices might stop
  * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups.
+ *
+ * Called with interrupts disabled, so clockevents_lock is not
+ * required here because the local clock event device cannot go away
+ * under us.
  */
-int tick_broadcast_oneshot_control(unsigned long reason)
+int tick_broadcast_oneshot_control(enum tick_broadcast_state state)
 {
        struct clock_event_device *bc, *dev;
        struct tick_device *td;
-       unsigned long flags;
-       ktime_t now;
        int cpu, ret = 0;
+       ktime_t now;
 
        /*
         * Periodic mode does not care about the enter/exit of power
@@ -703,17 +702,17 @@ int tick_broadcast_oneshot_control(unsigned long reason)
         * We are called with preemtion disabled from the depth of the
         * idle code, so we can't be moved away.
         */
-       cpu = smp_processor_id();
-       td = &per_cpu(tick_cpu_device, cpu);
+       td = this_cpu_ptr(&tick_cpu_device);
        dev = td->evtdev;
 
        if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
                return 0;
 
+       raw_spin_lock(&tick_broadcast_lock);
        bc = tick_broadcast_device.evtdev;
+       cpu = smp_processor_id();
 
-       raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
-       if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
+       if (state == TICK_BROADCAST_ENTER) {
                if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
                        WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
                        broadcast_shutdown_local(bc, dev);
@@ -741,7 +740,7 @@ int tick_broadcast_oneshot_control(unsigned long reason)
                        cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
        } else {
                if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
-                       clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
+                       clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
                        /*
                         * The cpu which was handling the broadcast
                         * timer marked this cpu in the broadcast
@@ -805,9 +804,10 @@ int tick_broadcast_oneshot_control(unsigned long reason)
                }
        }
 out:
-       raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+       raw_spin_unlock(&tick_broadcast_lock);
        return ret;
 }
+EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control);
 
 /*
  * Reset the one shot broadcast for a cpu
@@ -842,7 +842,7 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
 
        /* Set it up only once ! */
        if (bc->event_handler != tick_handle_oneshot_broadcast) {
-               int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
+               int was_periodic = bc->state == CLOCK_EVT_STATE_PERIODIC;
 
                bc->event_handler = tick_handle_oneshot_broadcast;
 
@@ -858,7 +858,7 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
                           tick_broadcast_oneshot_mask, tmpmask);
 
                if (was_periodic && !cpumask_empty(tmpmask)) {
-                       clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
+                       clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT);
                        tick_broadcast_init_next_event(tmpmask,
                                                       tick_next_period);
                        tick_broadcast_set_event(bc, cpu, tick_next_period, 1);
@@ -894,14 +894,28 @@ void tick_broadcast_switch_to_oneshot(void)
        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+void hotplug_cpu__broadcast_tick_pull(int deadcpu)
+{
+       struct clock_event_device *bc;
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
+       bc = tick_broadcast_device.evtdev;
+
+       if (bc && broadcast_needs_cpu(bc, deadcpu)) {
+               /* This moves the broadcast assignment to this CPU: */
+               clockevents_program_event(bc, bc->next_event, 1);
+       }
+       raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+}
 
 /*
  * Remove a dead CPU from broadcasting
  */
-void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
+void tick_shutdown_broadcast_oneshot(unsigned int cpu)
 {
        unsigned long flags;
-       unsigned int cpu = *cpup;
 
        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 
@@ -913,10 +927,9 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
        cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
        cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
 
-       broadcast_move_bc(cpu);
-
        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
+#endif
 
 /*
  * Check, whether the broadcast device is in one shot mode
index f7c515595b42b2bf9794a8f3f4ee1f9a2c17df89..3ae6afa1eb98e71cc82272cd0a79a25101eff429 100644 (file)
@@ -102,7 +102,7 @@ void tick_handle_periodic(struct clock_event_device *dev)
 
        tick_periodic(cpu);
 
-       if (dev->mode != CLOCK_EVT_MODE_ONESHOT)
+       if (dev->state != CLOCK_EVT_STATE_ONESHOT)
                return;
        for (;;) {
                /*
@@ -140,7 +140,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
 
        if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
            !tick_broadcast_oneshot_active()) {
-               clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC);
+               clockevents_set_state(dev, CLOCK_EVT_STATE_PERIODIC);
        } else {
                unsigned long seq;
                ktime_t next;
@@ -150,7 +150,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
                        next = tick_next_period;
                } while (read_seqretry(&jiffies_lock, seq));
 
-               clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
+               clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
 
                for (;;) {
                        if (!clockevents_program_event(dev, next, false))
@@ -332,14 +332,16 @@ out_bc:
        tick_install_broadcast_device(newdev);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
 /*
  * Transfer the do_timer job away from a dying cpu.
  *
- * Called with interrupts disabled.
+ * Called with interrupts disabled. Not locking required. If
+ * tick_do_timer_cpu is owned by this cpu, nothing can change it.
  */
-void tick_handover_do_timer(int *cpup)
+void tick_handover_do_timer(void)
 {
-       if (*cpup == tick_do_timer_cpu) {
+       if (tick_do_timer_cpu == smp_processor_id()) {
                int cpu = cpumask_first(cpu_online_mask);
 
                tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu :
@@ -354,9 +356,9 @@ void tick_handover_do_timer(int *cpup)
  * access the hardware device itself.
  * We just set the mode and remove it from the lists.
  */
-void tick_shutdown(unsigned int *cpup)
+void tick_shutdown(unsigned int cpu)
 {
-       struct tick_device *td = &per_cpu(tick_cpu_device, *cpup);
+       struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
        struct clock_event_device *dev = td->evtdev;
 
        td->mode = TICKDEV_MODE_PERIODIC;
@@ -365,27 +367,42 @@ void tick_shutdown(unsigned int *cpup)
                 * Prevent that the clock events layer tries to call
                 * the set mode function!
                 */
+               dev->state = CLOCK_EVT_STATE_DETACHED;
                dev->mode = CLOCK_EVT_MODE_UNUSED;
                clockevents_exchange_device(dev, NULL);
                dev->event_handler = clockevents_handle_noop;
                td->evtdev = NULL;
        }
 }
+#endif
 
-void tick_suspend(void)
+/**
+ * tick_suspend_local - Suspend the local tick device
+ *
+ * Called from the local cpu for freeze with interrupts disabled.
+ *
+ * No locks required. Nothing can change the per cpu device.
+ */
+void tick_suspend_local(void)
 {
        struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
 
        clockevents_shutdown(td->evtdev);
 }
 
-void tick_resume(void)
+/**
+ * tick_resume_local - Resume the local tick device
+ *
+ * Called from the local CPU for unfreeze or XEN resume magic.
+ *
+ * No locks required. Nothing can change the per cpu device.
+ */
+void tick_resume_local(void)
 {
        struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
-       int broadcast = tick_resume_broadcast();
-
-       clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME);
+       bool broadcast = tick_resume_check_broadcast();
 
+       clockevents_tick_resume(td->evtdev);
        if (!broadcast) {
                if (td->mode == TICKDEV_MODE_PERIODIC)
                        tick_setup_periodic(td->evtdev, 0);
@@ -394,6 +411,35 @@ void tick_resume(void)
        }
 }
 
+/**
+ * tick_suspend - Suspend the tick and the broadcast device
+ *
+ * Called from syscore_suspend() via timekeeping_suspend with only one
+ * CPU online and interrupts disabled or from tick_unfreeze() under
+ * tick_freeze_lock.
+ *
+ * No locks required. Nothing can change the per cpu device.
+ */
+void tick_suspend(void)
+{
+       tick_suspend_local();
+       tick_suspend_broadcast();
+}
+
+/**
+ * tick_resume - Resume the tick and the broadcast device
+ *
+ * Called from syscore_resume() via timekeeping_resume with only one
+ * CPU online and interrupts disabled.
+ *
+ * No locks required. Nothing can change the per cpu device.
+ */
+void tick_resume(void)
+{
+       tick_resume_broadcast();
+       tick_resume_local();
+}
+
 static DEFINE_RAW_SPINLOCK(tick_freeze_lock);
 static unsigned int tick_freeze_depth;
 
@@ -411,12 +457,10 @@ void tick_freeze(void)
        raw_spin_lock(&tick_freeze_lock);
 
        tick_freeze_depth++;
-       if (tick_freeze_depth == num_online_cpus()) {
+       if (tick_freeze_depth == num_online_cpus())
                timekeeping_suspend();
-       } else {
-               tick_suspend();
-               tick_suspend_broadcast();
-       }
+       else
+               tick_suspend_local();
 
        raw_spin_unlock(&tick_freeze_lock);
 }
@@ -437,7 +481,7 @@ void tick_unfreeze(void)
        if (tick_freeze_depth == num_online_cpus())
                timekeeping_resume();
        else
-               tick_resume();
+               tick_resume_local();
 
        tick_freeze_depth--;
 
index 366aeb4f2c6696ee6239e501ea4904f3812cd44c..b64fdd8054c56b042784fdce988ebad64f2ea803 100644 (file)
@@ -5,15 +5,12 @@
 #include <linux/tick.h>
 
 #include "timekeeping.h"
+#include "tick-sched.h"
 
-extern seqlock_t jiffies_lock;
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
 
-#define CS_NAME_LEN    32
-
-#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD
-
-#define TICK_DO_TIMER_NONE     -1
-#define TICK_DO_TIMER_BOOT     -2
+# define TICK_DO_TIMER_NONE    -1
+# define TICK_DO_TIMER_BOOT    -2
 
 DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
 extern ktime_t tick_next_period;
@@ -23,21 +20,72 @@ extern int tick_do_timer_cpu __read_mostly;
 extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast);
 extern void tick_handle_periodic(struct clock_event_device *dev);
 extern void tick_check_new_device(struct clock_event_device *dev);
-extern void tick_handover_do_timer(int *cpup);
-extern void tick_shutdown(unsigned int *cpup);
+extern void tick_shutdown(unsigned int cpu);
 extern void tick_suspend(void);
 extern void tick_resume(void);
 extern bool tick_check_replacement(struct clock_event_device *curdev,
                                   struct clock_event_device *newdev);
 extern void tick_install_replacement(struct clock_event_device *dev);
+extern int tick_is_oneshot_available(void);
+extern struct tick_device *tick_get_device(int cpu);
 
-extern void clockevents_shutdown(struct clock_event_device *dev);
+extern int clockevents_tick_resume(struct clock_event_device *dev);
+/* Check, if the device is functional or a dummy for broadcast */
+static inline int tick_device_is_functional(struct clock_event_device *dev)
+{
+       return !(dev->features & CLOCK_EVT_FEAT_DUMMY);
+}
 
+extern void clockevents_shutdown(struct clock_event_device *dev);
+extern void clockevents_exchange_device(struct clock_event_device *old,
+                                       struct clock_event_device *new);
+extern void clockevents_set_state(struct clock_event_device *dev,
+                                enum clock_event_state state);
+extern int clockevents_program_event(struct clock_event_device *dev,
+                                    ktime_t expires, bool force);
+extern void clockevents_handle_noop(struct clock_event_device *dev);
+extern int __clockevents_update_freq(struct clock_event_device *dev, u32 freq);
 extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt);
 
-/*
- * NO_HZ / high resolution timer shared code
- */
+/* Broadcasting support */
+# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
+extern void tick_install_broadcast_device(struct clock_event_device *dev);
+extern int tick_is_broadcast_device(struct clock_event_device *dev);
+extern void tick_shutdown_broadcast(unsigned int cpu);
+extern void tick_suspend_broadcast(void);
+extern void tick_resume_broadcast(void);
+extern bool tick_resume_check_broadcast(void);
+extern void tick_broadcast_init(void);
+extern void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
+extern int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq);
+extern struct tick_device *tick_get_broadcast_device(void);
+extern struct cpumask *tick_get_broadcast_mask(void);
+# else /* !CONFIG_GENERIC_CLOCKEVENTS_BROADCAST: */
+static inline void tick_install_broadcast_device(struct clock_event_device *dev) { }
+static inline int tick_is_broadcast_device(struct clock_event_device *dev) { return 0; }
+static inline int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { return 0; }
+static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { }
+static inline void tick_shutdown_broadcast(unsigned int cpu) { }
+static inline void tick_suspend_broadcast(void) { }
+static inline void tick_resume_broadcast(void) { }
+static inline bool tick_resume_check_broadcast(void) { return false; }
+static inline void tick_broadcast_init(void) { }
+static inline int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq) { return -ENODEV; }
+
+/* Set the periodic handler in non broadcast mode */
+static inline void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
+{
+       dev->event_handler = tick_handle_periodic;
+}
+# endif /* !CONFIG_GENERIC_CLOCKEVENTS_BROADCAST */
+
+#else /* !GENERIC_CLOCKEVENTS: */
+static inline void tick_suspend(void) { }
+static inline void tick_resume(void) { }
+#endif /* !GENERIC_CLOCKEVENTS */
+
+/* Oneshot related functions */
 #ifdef CONFIG_TICK_ONESHOT
 extern void tick_setup_oneshot(struct clock_event_device *newdev,
                               void (*handler)(struct clock_event_device *),
@@ -46,58 +94,42 @@ extern int tick_program_event(ktime_t expires, int force);
 extern void tick_oneshot_notify(void);
 extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *));
 extern void tick_resume_oneshot(void);
-# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+static inline bool tick_oneshot_possible(void) { return true; }
+extern int tick_oneshot_mode_active(void);
+extern void tick_clock_notify(void);
+extern int tick_check_oneshot_change(int allow_nohz);
+extern int tick_init_highres(void);
+#else /* !CONFIG_TICK_ONESHOT: */
+static inline
+void tick_setup_oneshot(struct clock_event_device *newdev,
+                       void (*handler)(struct clock_event_device *),
+                       ktime_t nextevt) { BUG(); }
+static inline void tick_resume_oneshot(void) { BUG(); }
+static inline int tick_program_event(ktime_t expires, int force) { return 0; }
+static inline void tick_oneshot_notify(void) { }
+static inline bool tick_oneshot_possible(void) { return false; }
+static inline int tick_oneshot_mode_active(void) { return 0; }
+static inline void tick_clock_notify(void) { }
+static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
+#endif /* !CONFIG_TICK_ONESHOT */
+
+/* Functions related to oneshot broadcasting */
+#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
 extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
-extern int tick_broadcast_oneshot_control(unsigned long reason);
 extern void tick_broadcast_switch_to_oneshot(void);
-extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
-extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc);
+extern void tick_shutdown_broadcast_oneshot(unsigned int cpu);
 extern int tick_broadcast_oneshot_active(void);
 extern void tick_check_oneshot_broadcast_this_cpu(void);
 bool tick_broadcast_oneshot_available(void);
-# else /* BROADCAST */
-static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
-{
-       BUG();
-}
-static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; }
+extern struct cpumask *tick_get_broadcast_oneshot_mask(void);
+#else /* !(BROADCAST && ONESHOT): */
+static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
 static inline void tick_broadcast_switch_to_oneshot(void) { }
-static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
+static inline void tick_shutdown_broadcast_oneshot(unsigned int cpu) { }
 static inline int tick_broadcast_oneshot_active(void) { return 0; }
 static inline void tick_check_oneshot_broadcast_this_cpu(void) { }
-static inline bool tick_broadcast_oneshot_available(void) { return true; }
-# endif /* !BROADCAST */
-
-#else /* !ONESHOT */
-static inline
-void tick_setup_oneshot(struct clock_event_device *newdev,
-                       void (*handler)(struct clock_event_device *),
-                       ktime_t nextevt)
-{
-       BUG();
-}
-static inline void tick_resume_oneshot(void)
-{
-       BUG();
-}
-static inline int tick_program_event(ktime_t expires, int force)
-{
-       return 0;
-}
-static inline void tick_oneshot_notify(void) { }
-static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
-{
-       BUG();
-}
-static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; }
-static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
-static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
-{
-       return 0;
-}
-static inline int tick_broadcast_oneshot_active(void) { return 0; }
-static inline bool tick_broadcast_oneshot_available(void) { return false; }
-#endif /* !TICK_ONESHOT */
+static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); }
+#endif /* !(BROADCAST && ONESHOT) */
 
 /* NO_HZ_FULL internal */
 #ifdef CONFIG_NO_HZ_FULL
@@ -105,68 +137,3 @@ extern void tick_nohz_init(void);
 # else
 static inline void tick_nohz_init(void) { }
 #endif
-
-/*
- * Broadcasting support
- */
-#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
-extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
-extern void tick_install_broadcast_device(struct clock_event_device *dev);
-extern int tick_is_broadcast_device(struct clock_event_device *dev);
-extern void tick_broadcast_on_off(unsigned long reason, int *oncpu);
-extern void tick_shutdown_broadcast(unsigned int *cpup);
-extern void tick_suspend_broadcast(void);
-extern int tick_resume_broadcast(void);
-extern void tick_broadcast_init(void);
-extern void
-tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
-int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq);
-
-#else /* !BROADCAST */
-
-static inline void tick_install_broadcast_device(struct clock_event_device *dev)
-{
-}
-
-static inline int tick_is_broadcast_device(struct clock_event_device *dev)
-{
-       return 0;
-}
-static inline int tick_device_uses_broadcast(struct clock_event_device *dev,
-                                            int cpu)
-{
-       return 0;
-}
-static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { }
-static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { }
-static inline void tick_shutdown_broadcast(unsigned int *cpup) { }
-static inline void tick_suspend_broadcast(void) { }
-static inline int tick_resume_broadcast(void) { return 0; }
-static inline void tick_broadcast_init(void) { }
-static inline int tick_broadcast_update_freq(struct clock_event_device *dev,
-                                            u32 freq) { return -ENODEV; }
-
-/*
- * Set the periodic handler in non broadcast mode
- */
-static inline void tick_set_periodic_handler(struct clock_event_device *dev,
-                                            int broadcast)
-{
-       dev->event_handler = tick_handle_periodic;
-}
-#endif /* !BROADCAST */
-
-/*
- * Check, if the device is functional or a dummy for broadcast
- */
-static inline int tick_device_is_functional(struct clock_event_device *dev)
-{
-       return !(dev->features & CLOCK_EVT_FEAT_DUMMY);
-}
-
-int __clockevents_update_freq(struct clock_event_device *dev, u32 freq);
-
-#endif
-
-extern void do_timer(unsigned long ticks);
-extern void update_wall_time(void);
index 7ce740e78e1b506b155c07e3ac50a9a96e6b262d..67a64b1670bfdb984c7d9edec34f7eadd04800ec 100644 (file)
@@ -38,7 +38,7 @@ void tick_resume_oneshot(void)
 {
        struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
 
-       clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
+       clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
        clockevents_program_event(dev, ktime_get(), true);
 }
 
@@ -50,7 +50,7 @@ void tick_setup_oneshot(struct clock_event_device *newdev,
                        ktime_t next_event)
 {
        newdev->event_handler = handler;
-       clockevents_set_mode(newdev, CLOCK_EVT_MODE_ONESHOT);
+       clockevents_set_state(newdev, CLOCK_EVT_STATE_ONESHOT);
        clockevents_program_event(newdev, next_event, true);
 }
 
@@ -81,7 +81,7 @@ int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *))
 
        td->mode = TICKDEV_MODE_ONESHOT;
        dev->event_handler = handler;
-       clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
+       clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
        tick_broadcast_switch_to_oneshot();
        return 0;
 }
index a4c4edac45281b5c25080efd61e19cc10e9d8636..914259128145e2394e65bd36f18aaf9a81f78843 100644 (file)
@@ -34,7 +34,7 @@
 /*
  * Per cpu nohz control structure
  */
-DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
+static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
 
 /*
  * The time, when the last jiffy update happened. Protected by jiffies_lock.
@@ -416,6 +416,11 @@ static int __init setup_tick_nohz(char *str)
 
 __setup("nohz=", setup_tick_nohz);
 
+int tick_nohz_tick_stopped(void)
+{
+       return __this_cpu_read(tick_cpu_sched.tick_stopped);
+}
+
 /**
  * tick_nohz_update_jiffies - update jiffies when idle was interrupted
  *
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
new file mode 100644 (file)
index 0000000..28b5da3
--- /dev/null
@@ -0,0 +1,74 @@
+#ifndef _TICK_SCHED_H
+#define _TICK_SCHED_H
+
+#include <linux/hrtimer.h>
+
+enum tick_device_mode {
+       TICKDEV_MODE_PERIODIC,
+       TICKDEV_MODE_ONESHOT,
+};
+
+struct tick_device {
+       struct clock_event_device *evtdev;
+       enum tick_device_mode mode;
+};
+
+enum tick_nohz_mode {
+       NOHZ_MODE_INACTIVE,
+       NOHZ_MODE_LOWRES,
+       NOHZ_MODE_HIGHRES,
+};
+
+/**
+ * struct tick_sched - sched tick emulation and no idle tick control/stats
+ * @sched_timer:       hrtimer to schedule the periodic tick in high
+ *                     resolution mode
+ * @last_tick:         Store the last tick expiry time when the tick
+ *                     timer is modified for nohz sleeps. This is necessary
+ *                     to resume the tick timer operation in the timeline
+ *                     when the CPU returns from nohz sleep.
+ * @tick_stopped:      Indicator that the idle tick has been stopped
+ * @idle_jiffies:      jiffies at the entry to idle for idle time accounting
+ * @idle_calls:                Total number of idle calls
+ * @idle_sleeps:       Number of idle calls, where the sched tick was stopped
+ * @idle_entrytime:    Time when the idle call was entered
+ * @idle_waketime:     Time when the idle was interrupted
+ * @idle_exittime:     Time when the idle state was left
+ * @idle_sleeptime:    Sum of the time slept in idle with sched tick stopped
+ * @iowait_sleeptime:  Sum of the time slept in idle with sched tick stopped, with IO outstanding
+ * @sleep_length:      Duration of the current idle sleep
+ * @do_timer_lst:      CPU was the last one doing do_timer before going idle
+ */
+struct tick_sched {
+       struct hrtimer                  sched_timer;
+       unsigned long                   check_clocks;
+       enum tick_nohz_mode             nohz_mode;
+       ktime_t                         last_tick;
+       int                             inidle;
+       int                             tick_stopped;
+       unsigned long                   idle_jiffies;
+       unsigned long                   idle_calls;
+       unsigned long                   idle_sleeps;
+       int                             idle_active;
+       ktime_t                         idle_entrytime;
+       ktime_t                         idle_waketime;
+       ktime_t                         idle_exittime;
+       ktime_t                         idle_sleeptime;
+       ktime_t                         iowait_sleeptime;
+       ktime_t                         sleep_length;
+       unsigned long                   last_jiffies;
+       unsigned long                   next_jiffies;
+       ktime_t                         idle_expires;
+       int                             do_timer_last;
+};
+
+extern struct tick_sched *tick_get_tick_sched(int cpu);
+
+extern void tick_setup_sched_timer(void);
+#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
+extern void tick_cancel_sched_timer(int cpu);
+#else
+static inline void tick_cancel_sched_timer(int cpu) { }
+#endif
+
+#endif
index 91db94136c1062571ba0d0f1bfd1ed687770af3e..946acb72179facb1c173e54592b3c1c3637f8abd 100644 (file)
@@ -59,17 +59,15 @@ struct tk_fast {
 };
 
 static struct tk_fast tk_fast_mono ____cacheline_aligned;
+static struct tk_fast tk_fast_raw  ____cacheline_aligned;
 
 /* flag for if timekeeping is suspended */
 int __read_mostly timekeeping_suspended;
 
-/* Flag for if there is a persistent clock on this platform */
-bool __read_mostly persistent_clock_exist = false;
-
 static inline void tk_normalize_xtime(struct timekeeper *tk)
 {
-       while (tk->tkr.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr.shift)) {
-               tk->tkr.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr.shift;
+       while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) {
+               tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
                tk->xtime_sec++;
        }
 }
@@ -79,20 +77,20 @@ static inline struct timespec64 tk_xtime(struct timekeeper *tk)
        struct timespec64 ts;
 
        ts.tv_sec = tk->xtime_sec;
-       ts.tv_nsec = (long)(tk->tkr.xtime_nsec >> tk->tkr.shift);
+       ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
        return ts;
 }
 
 static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts)
 {
        tk->xtime_sec = ts->tv_sec;
-       tk->tkr.xtime_nsec = (u64)ts->tv_nsec << tk->tkr.shift;
+       tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift;
 }
 
 static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts)
 {
        tk->xtime_sec += ts->tv_sec;
-       tk->tkr.xtime_nsec += (u64)ts->tv_nsec << tk->tkr.shift;
+       tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift;
        tk_normalize_xtime(tk);
 }
 
@@ -118,6 +116,117 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
        tk->offs_boot = ktime_add(tk->offs_boot, delta);
 }
 
+#ifdef CONFIG_DEBUG_TIMEKEEPING
+#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
+/*
+ * These simple flag variables are managed
+ * without locks, which is racy, but ok since
+ * we don't really care about being super
+ * precise about how many events were seen,
+ * just that a problem was observed.
+ */
+static int timekeeping_underflow_seen;
+static int timekeeping_overflow_seen;
+
+/* last_warning is only modified under the timekeeping lock */
+static long timekeeping_last_warning;
+
+static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
+{
+
+       cycle_t max_cycles = tk->tkr_mono.clock->max_cycles;
+       const char *name = tk->tkr_mono.clock->name;
+
+       if (offset > max_cycles) {
+               printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n",
+                               offset, name, max_cycles);
+               printk_deferred("         timekeeping: Your kernel is sick, but tries to cope by capping time updates\n");
+       } else {
+               if (offset > (max_cycles >> 1)) {
+                       printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the the '%s' clock's 50%% safety margin (%lld)\n",
+                                       offset, name, max_cycles >> 1);
+                       printk_deferred("      timekeeping: Your kernel is still fine, but is feeling a bit nervous\n");
+               }
+       }
+
+       if (timekeeping_underflow_seen) {
+               if (jiffies - timekeeping_last_warning > WARNING_FREQ) {
+                       printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name);
+                       printk_deferred("         Please report this, consider using a different clocksource, if possible.\n");
+                       printk_deferred("         Your kernel is probably still fine.\n");
+                       timekeeping_last_warning = jiffies;
+               }
+               timekeeping_underflow_seen = 0;
+       }
+
+       if (timekeeping_overflow_seen) {
+               if (jiffies - timekeeping_last_warning > WARNING_FREQ) {
+                       printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name);
+                       printk_deferred("         Please report this, consider using a different clocksource, if possible.\n");
+                       printk_deferred("         Your kernel is probably still fine.\n");
+                       timekeeping_last_warning = jiffies;
+               }
+               timekeeping_overflow_seen = 0;
+       }
+}
+
+static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
+{
+       cycle_t now, last, mask, max, delta;
+       unsigned int seq;
+
+       /*
+        * Since we're called holding a seqlock, the data may shift
+        * under us while we're doing the calculation. This can cause
+        * false positives, since we'd note a problem but throw the
+        * results away. So nest another seqlock here to atomically
+        * grab the points we are checking with.
+        */
+       do {
+               seq = read_seqcount_begin(&tk_core.seq);
+               now = tkr->read(tkr->clock);
+               last = tkr->cycle_last;
+               mask = tkr->mask;
+               max = tkr->clock->max_cycles;
+       } while (read_seqcount_retry(&tk_core.seq, seq));
+
+       delta = clocksource_delta(now, last, mask);
+
+       /*
+        * Try to catch underflows by checking if we are seeing small
+        * mask-relative negative values.
+        */
+       if (unlikely((~delta & mask) < (mask >> 3))) {
+               timekeeping_underflow_seen = 1;
+               delta = 0;
+       }
+
+       /* Cap delta value to the max_cycles values to avoid mult overflows */
+       if (unlikely(delta > max)) {
+               timekeeping_overflow_seen = 1;
+               delta = tkr->clock->max_cycles;
+       }
+
+       return delta;
+}
+#else
+static inline void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
+{
+}
+static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
+{
+       cycle_t cycle_now, delta;
+
+       /* read clocksource */
+       cycle_now = tkr->read(tkr->clock);
+
+       /* calculate the delta since the last update_wall_time */
+       delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
+
+       return delta;
+}
+#endif
+
 /**
  * tk_setup_internals - Set up internals to use clocksource clock.
  *
@@ -135,11 +244,16 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
        u64 tmp, ntpinterval;
        struct clocksource *old_clock;
 
-       old_clock = tk->tkr.clock;
-       tk->tkr.clock = clock;
-       tk->tkr.read = clock->read;
-       tk->tkr.mask = clock->mask;
-       tk->tkr.cycle_last = tk->tkr.read(clock);
+       old_clock = tk->tkr_mono.clock;
+       tk->tkr_mono.clock = clock;
+       tk->tkr_mono.read = clock->read;
+       tk->tkr_mono.mask = clock->mask;
+       tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock);
+
+       tk->tkr_raw.clock = clock;
+       tk->tkr_raw.read = clock->read;
+       tk->tkr_raw.mask = clock->mask;
+       tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
 
        /* Do the ns -> cycle conversion first, using original mult */
        tmp = NTP_INTERVAL_LENGTH;
@@ -163,11 +277,14 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
        if (old_clock) {
                int shift_change = clock->shift - old_clock->shift;
                if (shift_change < 0)
-                       tk->tkr.xtime_nsec >>= -shift_change;
+                       tk->tkr_mono.xtime_nsec >>= -shift_change;
                else
-                       tk->tkr.xtime_nsec <<= shift_change;
+                       tk->tkr_mono.xtime_nsec <<= shift_change;
        }
-       tk->tkr.shift = clock->shift;
+       tk->tkr_raw.xtime_nsec = 0;
+
+       tk->tkr_mono.shift = clock->shift;
+       tk->tkr_raw.shift = clock->shift;
 
        tk->ntp_error = 0;
        tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
@@ -178,7 +295,8 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
         * active clocksource. These value will be adjusted via NTP
         * to counteract clock drifting.
         */
-       tk->tkr.mult = clock->mult;
+       tk->tkr_mono.mult = clock->mult;
+       tk->tkr_raw.mult = clock->mult;
        tk->ntp_err_mult = 0;
 }
 
@@ -193,14 +311,10 @@ static inline u32 arch_gettimeoffset(void) { return 0; }
 
 static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
 {
-       cycle_t cycle_now, delta;
+       cycle_t delta;
        s64 nsec;
 
-       /* read clocksource: */
-       cycle_now = tkr->read(tkr->clock);
-
-       /* calculate the delta since the last update_wall_time: */
-       delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
+       delta = timekeeping_get_delta(tkr);
 
        nsec = delta * tkr->mult + tkr->xtime_nsec;
        nsec >>= tkr->shift;
@@ -209,25 +323,6 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
        return nsec + arch_gettimeoffset();
 }
 
-static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
-{
-       struct clocksource *clock = tk->tkr.clock;
-       cycle_t cycle_now, delta;
-       s64 nsec;
-
-       /* read clocksource: */
-       cycle_now = tk->tkr.read(clock);
-
-       /* calculate the delta since the last update_wall_time: */
-       delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
-
-       /* convert delta to nanoseconds. */
-       nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
-
-       /* If arch requires, add in get_arch_timeoffset() */
-       return nsec + arch_gettimeoffset();
-}
-
 /**
  * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
  * @tkr: Timekeeping readout base from which we take the update
@@ -267,18 +362,18 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
  * slightly wrong timestamp (a few nanoseconds). See
  * @ktime_get_mono_fast_ns.
  */
-static void update_fast_timekeeper(struct tk_read_base *tkr)
+static void update_fast_timekeeper(struct tk_read_base *tkr, struct tk_fast *tkf)
 {
-       struct tk_read_base *base = tk_fast_mono.base;
+       struct tk_read_base *base = tkf->base;
 
        /* Force readers off to base[1] */
-       raw_write_seqcount_latch(&tk_fast_mono.seq);
+       raw_write_seqcount_latch(&tkf->seq);
 
        /* Update base[0] */
        memcpy(base, tkr, sizeof(*base));
 
        /* Force readers back to base[0] */
-       raw_write_seqcount_latch(&tk_fast_mono.seq);
+       raw_write_seqcount_latch(&tkf->seq);
 
        /* Update base[1] */
        memcpy(base + 1, base, sizeof(*base));
@@ -316,22 +411,33 @@ static void update_fast_timekeeper(struct tk_read_base *tkr)
  * of the following timestamps. Callers need to be aware of that and
  * deal with it.
  */
-u64 notrace ktime_get_mono_fast_ns(void)
+static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
 {
        struct tk_read_base *tkr;
        unsigned int seq;
        u64 now;
 
        do {
-               seq = raw_read_seqcount(&tk_fast_mono.seq);
-               tkr = tk_fast_mono.base + (seq & 0x01);
-               now = ktime_to_ns(tkr->base_mono) + timekeeping_get_ns(tkr);
+               seq = raw_read_seqcount(&tkf->seq);
+               tkr = tkf->base + (seq & 0x01);
+               now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr);
+       } while (read_seqcount_retry(&tkf->seq, seq));
 
-       } while (read_seqcount_retry(&tk_fast_mono.seq, seq));
        return now;
 }
+
+u64 ktime_get_mono_fast_ns(void)
+{
+       return __ktime_get_fast_ns(&tk_fast_mono);
+}
 EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
 
+u64 ktime_get_raw_fast_ns(void)
+{
+       return __ktime_get_fast_ns(&tk_fast_raw);
+}
+EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
+
 /* Suspend-time cycles value for halted fast timekeeper. */
 static cycle_t cycles_at_suspend;
 
@@ -353,12 +459,17 @@ static cycle_t dummy_clock_read(struct clocksource *cs)
 static void halt_fast_timekeeper(struct timekeeper *tk)
 {
        static struct tk_read_base tkr_dummy;
-       struct tk_read_base *tkr = &tk->tkr;
+       struct tk_read_base *tkr = &tk->tkr_mono;
 
        memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
        cycles_at_suspend = tkr->read(tkr->clock);
        tkr_dummy.read = dummy_clock_read;
-       update_fast_timekeeper(&tkr_dummy);
+       update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
+
+       tkr = &tk->tkr_raw;
+       memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
+       tkr_dummy.read = dummy_clock_read;
+       update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
 }
 
 #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
@@ -369,8 +480,8 @@ static inline void update_vsyscall(struct timekeeper *tk)
 
        xt = timespec64_to_timespec(tk_xtime(tk));
        wm = timespec64_to_timespec(tk->wall_to_monotonic);
-       update_vsyscall_old(&xt, &wm, tk->tkr.clock, tk->tkr.mult,
-                           tk->tkr.cycle_last);
+       update_vsyscall_old(&xt, &wm, tk->tkr_mono.clock, tk->tkr_mono.mult,
+                           tk->tkr_mono.cycle_last);
 }
 
 static inline void old_vsyscall_fixup(struct timekeeper *tk)
@@ -387,11 +498,11 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk)
        * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD
        * users are removed, this can be killed.
        */
-       remainder = tk->tkr.xtime_nsec & ((1ULL << tk->tkr.shift) - 1);
-       tk->tkr.xtime_nsec -= remainder;
-       tk->tkr.xtime_nsec += 1ULL << tk->tkr.shift;
+       remainder = tk->tkr_mono.xtime_nsec & ((1ULL << tk->tkr_mono.shift) - 1);
+       tk->tkr_mono.xtime_nsec -= remainder;
+       tk->tkr_mono.xtime_nsec += 1ULL << tk->tkr_mono.shift;
        tk->ntp_error += remainder << tk->ntp_error_shift;
-       tk->ntp_error -= (1ULL << tk->tkr.shift) << tk->ntp_error_shift;
+       tk->ntp_error -= (1ULL << tk->tkr_mono.shift) << tk->ntp_error_shift;
 }
 #else
 #define old_vsyscall_fixup(tk)
@@ -456,17 +567,17 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
         */
        seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
        nsec = (u32) tk->wall_to_monotonic.tv_nsec;
-       tk->tkr.base_mono = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
+       tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
 
        /* Update the monotonic raw base */
-       tk->base_raw = timespec64_to_ktime(tk->raw_time);
+       tk->tkr_raw.base = timespec64_to_ktime(tk->raw_time);
 
        /*
         * The sum of the nanoseconds portions of xtime and
         * wall_to_monotonic can be greater/equal one second. Take
         * this into account before updating tk->ktime_sec.
         */
-       nsec += (u32)(tk->tkr.xtime_nsec >> tk->tkr.shift);
+       nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
        if (nsec >= NSEC_PER_SEC)
                seconds++;
        tk->ktime_sec = seconds;
@@ -489,7 +600,8 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
                memcpy(&shadow_timekeeper, &tk_core.timekeeper,
                       sizeof(tk_core.timekeeper));
 
-       update_fast_timekeeper(&tk->tkr);
+       update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono);
+       update_fast_timekeeper(&tk->tkr_raw,  &tk_fast_raw);
 }
 
 /**
@@ -501,22 +613,23 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
  */
 static void timekeeping_forward_now(struct timekeeper *tk)
 {
-       struct clocksource *clock = tk->tkr.clock;
+       struct clocksource *clock = tk->tkr_mono.clock;
        cycle_t cycle_now, delta;
        s64 nsec;
 
-       cycle_now = tk->tkr.read(clock);
-       delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
-       tk->tkr.cycle_last = cycle_now;
+       cycle_now = tk->tkr_mono.read(clock);
+       delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
+       tk->tkr_mono.cycle_last = cycle_now;
+       tk->tkr_raw.cycle_last  = cycle_now;
 
-       tk->tkr.xtime_nsec += delta * tk->tkr.mult;
+       tk->tkr_mono.xtime_nsec += delta * tk->tkr_mono.mult;
 
        /* If arch requires, add in get_arch_timeoffset() */
-       tk->tkr.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr.shift;
+       tk->tkr_mono.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_mono.shift;
 
        tk_normalize_xtime(tk);
 
-       nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
+       nsec = clocksource_cyc2ns(delta, tk->tkr_raw.mult, tk->tkr_raw.shift);
        timespec64_add_ns(&tk->raw_time, nsec);
 }
 
@@ -537,7 +650,7 @@ int __getnstimeofday64(struct timespec64 *ts)
                seq = read_seqcount_begin(&tk_core.seq);
 
                ts->tv_sec = tk->xtime_sec;
-               nsecs = timekeeping_get_ns(&tk->tkr);
+               nsecs = timekeeping_get_ns(&tk->tkr_mono);
 
        } while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -577,8 +690,8 @@ ktime_t ktime_get(void)
 
        do {
                seq = read_seqcount_begin(&tk_core.seq);
-               base = tk->tkr.base_mono;
-               nsecs = timekeeping_get_ns(&tk->tkr);
+               base = tk->tkr_mono.base;
+               nsecs = timekeeping_get_ns(&tk->tkr_mono);
 
        } while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -603,8 +716,8 @@ ktime_t ktime_get_with_offset(enum tk_offsets offs)
 
        do {
                seq = read_seqcount_begin(&tk_core.seq);
-               base = ktime_add(tk->tkr.base_mono, *offset);
-               nsecs = timekeeping_get_ns(&tk->tkr);
+               base = ktime_add(tk->tkr_mono.base, *offset);
+               nsecs = timekeeping_get_ns(&tk->tkr_mono);
 
        } while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -645,8 +758,8 @@ ktime_t ktime_get_raw(void)
 
        do {
                seq = read_seqcount_begin(&tk_core.seq);
-               base = tk->base_raw;
-               nsecs = timekeeping_get_ns_raw(tk);
+               base = tk->tkr_raw.base;
+               nsecs = timekeeping_get_ns(&tk->tkr_raw);
 
        } while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -674,7 +787,7 @@ void ktime_get_ts64(struct timespec64 *ts)
        do {
                seq = read_seqcount_begin(&tk_core.seq);
                ts->tv_sec = tk->xtime_sec;
-               nsec = timekeeping_get_ns(&tk->tkr);
+               nsec = timekeeping_get_ns(&tk->tkr_mono);
                tomono = tk->wall_to_monotonic;
 
        } while (read_seqcount_retry(&tk_core.seq, seq));
@@ -759,8 +872,8 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
                ts_real->tv_sec = tk->xtime_sec;
                ts_real->tv_nsec = 0;
 
-               nsecs_raw = timekeeping_get_ns_raw(tk);
-               nsecs_real = timekeeping_get_ns(&tk->tkr);
+               nsecs_raw  = timekeeping_get_ns(&tk->tkr_raw);
+               nsecs_real = timekeeping_get_ns(&tk->tkr_mono);
 
        } while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -943,7 +1056,7 @@ static int change_clocksource(void *data)
         */
        if (try_module_get(new->owner)) {
                if (!new->enable || new->enable(new) == 0) {
-                       old = tk->tkr.clock;
+                       old = tk->tkr_mono.clock;
                        tk_setup_internals(tk, new);
                        if (old->disable)
                                old->disable(old);
@@ -971,11 +1084,11 @@ int timekeeping_notify(struct clocksource *clock)
 {
        struct timekeeper *tk = &tk_core.timekeeper;
 
-       if (tk->tkr.clock == clock)
+       if (tk->tkr_mono.clock == clock)
                return 0;
        stop_machine(change_clocksource, clock, NULL);
        tick_clock_notify();
-       return tk->tkr.clock == clock ? 0 : -1;
+       return tk->tkr_mono.clock == clock ? 0 : -1;
 }
 
 /**
@@ -993,7 +1106,7 @@ void getrawmonotonic64(struct timespec64 *ts)
 
        do {
                seq = read_seqcount_begin(&tk_core.seq);
-               nsecs = timekeeping_get_ns_raw(tk);
+               nsecs = timekeeping_get_ns(&tk->tkr_raw);
                ts64 = tk->raw_time;
 
        } while (read_seqcount_retry(&tk_core.seq, seq));
@@ -1016,7 +1129,7 @@ int timekeeping_valid_for_hres(void)
        do {
                seq = read_seqcount_begin(&tk_core.seq);
 
-               ret = tk->tkr.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
+               ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
 
        } while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -1035,7 +1148,7 @@ u64 timekeeping_max_deferment(void)
        do {
                seq = read_seqcount_begin(&tk_core.seq);
 
-               ret = tk->tkr.clock->max_idle_ns;
+               ret = tk->tkr_mono.clock->max_idle_ns;
 
        } while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -1057,6 +1170,14 @@ void __weak read_persistent_clock(struct timespec *ts)
        ts->tv_nsec = 0;
 }
 
+void __weak read_persistent_clock64(struct timespec64 *ts64)
+{
+       struct timespec ts;
+
+       read_persistent_clock(&ts);
+       *ts64 = timespec_to_timespec64(ts);
+}
+
 /**
  * read_boot_clock -  Return time of the system start.
  *
@@ -1072,6 +1193,20 @@ void __weak read_boot_clock(struct timespec *ts)
        ts->tv_nsec = 0;
 }
 
+void __weak read_boot_clock64(struct timespec64 *ts64)
+{
+       struct timespec ts;
+
+       read_boot_clock(&ts);
+       *ts64 = timespec_to_timespec64(ts);
+}
+
+/* Flag for if timekeeping_resume() has injected sleeptime */
+static bool sleeptime_injected;
+
+/* Flag for if there is a persistent clock on this platform */
+static bool persistent_clock_exists;
+
 /*
  * timekeeping_init - Initializes the clocksource and common timekeeping values
  */
@@ -1081,20 +1216,17 @@ void __init timekeeping_init(void)
        struct clocksource *clock;
        unsigned long flags;
        struct timespec64 now, boot, tmp;
-       struct timespec ts;
 
-       read_persistent_clock(&ts);
-       now = timespec_to_timespec64(ts);
+       read_persistent_clock64(&now);
        if (!timespec64_valid_strict(&now)) {
                pr_warn("WARNING: Persistent clock returned invalid value!\n"
                        "         Check your CMOS/BIOS settings.\n");
                now.tv_sec = 0;
                now.tv_nsec = 0;
        } else if (now.tv_sec || now.tv_nsec)
-               persistent_clock_exist = true;
+               persistent_clock_exists = true;
 
-       read_boot_clock(&ts);
-       boot = timespec_to_timespec64(ts);
+       read_boot_clock64(&boot);
        if (!timespec64_valid_strict(&boot)) {
                pr_warn("WARNING: Boot clock returned invalid value!\n"
                        "         Check your CMOS/BIOS settings.\n");
@@ -1114,7 +1246,6 @@ void __init timekeeping_init(void)
        tk_set_xtime(tk, &now);
        tk->raw_time.tv_sec = 0;
        tk->raw_time.tv_nsec = 0;
-       tk->base_raw.tv64 = 0;
        if (boot.tv_sec == 0 && boot.tv_nsec == 0)
                boot = tk_xtime(tk);
 
@@ -1127,7 +1258,7 @@ void __init timekeeping_init(void)
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 }
 
-/* time in seconds when suspend began */
+/* time in seconds when suspend began for persistent clock */
 static struct timespec64 timekeeping_suspend_time;
 
 /**
@@ -1152,12 +1283,49 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
        tk_debug_account_sleep_time(delta);
 }
 
+#if defined(CONFIG_PM_SLEEP) && defined(CONFIG_RTC_HCTOSYS_DEVICE)
+/**
+ * We have three kinds of time sources to use for sleep time
+ * injection, the preference order is:
+ * 1) non-stop clocksource
+ * 2) persistent clock (ie: RTC accessible when irqs are off)
+ * 3) RTC
+ *
+ * 1) and 2) are used by timekeeping, 3) by RTC subsystem.
+ * If system has neither 1) nor 2), 3) will be used finally.
+ *
+ *
+ * If timekeeping has injected sleeptime via either 1) or 2),
+ * 3) becomes needless, so in this case we don't need to call
+ * rtc_resume(), and this is what timekeeping_rtc_skipresume()
+ * means.
+ */
+bool timekeeping_rtc_skipresume(void)
+{
+       return sleeptime_injected;
+}
+
+/**
+ * 1) can be determined whether to use or not only when doing
+ * timekeeping_resume() which is invoked after rtc_suspend(),
+ * so we can't skip rtc_suspend() surely if system has 1).
+ *
+ * But if system has 2), 2) will definitely be used, so in this
+ * case we don't need to call rtc_suspend(), and this is what
+ * timekeeping_rtc_skipsuspend() means.
+ */
+bool timekeeping_rtc_skipsuspend(void)
+{
+       return persistent_clock_exists;
+}
+
 /**
  * timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values
  * @delta: pointer to a timespec64 delta value
  *
- * This hook is for architectures that cannot support read_persistent_clock
+ * This hook is for architectures that cannot support read_persistent_clock64
  * because their RTC/persistent clock is only accessible when irqs are enabled.
+ * and also don't have an effective nonstop clocksource.
  *
  * This function should only be called by rtc_resume(), and allows
  * a suspend offset to be injected into the timekeeping values.
@@ -1167,13 +1335,6 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta)
        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned long flags;
 
-       /*
-        * Make sure we don't set the clock twice, as timekeeping_resume()
-        * already did it
-        */
-       if (has_persistent_clock())
-               return;
-
        raw_spin_lock_irqsave(&timekeeper_lock, flags);
        write_seqcount_begin(&tk_core.seq);
 
@@ -1189,26 +1350,21 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta)
        /* signal hrtimers about time change */
        clock_was_set();
 }
+#endif
 
 /**
  * timekeeping_resume - Resumes the generic timekeeping subsystem.
- *
- * This is for the generic clocksource timekeeping.
- * xtime/wall_to_monotonic/jiffies/etc are
- * still managed by arch specific suspend/resume code.
  */
 void timekeeping_resume(void)
 {
        struct timekeeper *tk = &tk_core.timekeeper;
-       struct clocksource *clock = tk->tkr.clock;
+       struct clocksource *clock = tk->tkr_mono.clock;
        unsigned long flags;
        struct timespec64 ts_new, ts_delta;
-       struct timespec tmp;
        cycle_t cycle_now, cycle_delta;
-       bool suspendtime_found = false;
 
-       read_persistent_clock(&tmp);
-       ts_new = timespec_to_timespec64(tmp);
+       sleeptime_injected = false;
+       read_persistent_clock64(&ts_new);
 
        clockevents_resume();
        clocksource_resume();
@@ -1228,16 +1384,16 @@ void timekeeping_resume(void)
         * The less preferred source will only be tried if there is no better
         * usable source. The rtc part is handled separately in rtc core code.
         */
-       cycle_now = tk->tkr.read(clock);
+       cycle_now = tk->tkr_mono.read(clock);
        if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
-               cycle_now > tk->tkr.cycle_last) {
+               cycle_now > tk->tkr_mono.cycle_last) {
                u64 num, max = ULLONG_MAX;
                u32 mult = clock->mult;
                u32 shift = clock->shift;
                s64 nsec = 0;
 
-               cycle_delta = clocksource_delta(cycle_now, tk->tkr.cycle_last,
-                                               tk->tkr.mask);
+               cycle_delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last,
+                                               tk->tkr_mono.mask);
 
                /*
                 * "cycle_delta * mutl" may cause 64 bits overflow, if the
@@ -1253,17 +1409,19 @@ void timekeeping_resume(void)
                nsec += ((u64) cycle_delta * mult) >> shift;
 
                ts_delta = ns_to_timespec64(nsec);
-               suspendtime_found = true;
+               sleeptime_injected = true;
        } else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) {
                ts_delta = timespec64_sub(ts_new, timekeeping_suspend_time);
-               suspendtime_found = true;
+               sleeptime_injected = true;
        }
 
-       if (suspendtime_found)
+       if (sleeptime_injected)
                __timekeeping_inject_sleeptime(tk, &ts_delta);
 
        /* Re-base the last cycle value */
-       tk->tkr.cycle_last = cycle_now;
+       tk->tkr_mono.cycle_last = cycle_now;
+       tk->tkr_raw.cycle_last  = cycle_now;
+
        tk->ntp_error = 0;
        timekeeping_suspended = 0;
        timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
@@ -1272,9 +1430,7 @@ void timekeeping_resume(void)
 
        touch_softlockup_watchdog();
 
-       clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL);
-
-       /* Resume hrtimers */
+       tick_resume();
        hrtimers_resume();
 }
 
@@ -1284,10 +1440,8 @@ int timekeeping_suspend(void)
        unsigned long flags;
        struct timespec64               delta, delta_delta;
        static struct timespec64        old_delta;
-       struct timespec tmp;
 
-       read_persistent_clock(&tmp);
-       timekeeping_suspend_time = timespec_to_timespec64(tmp);
+       read_persistent_clock64(&timekeeping_suspend_time);
 
        /*
         * On some systems the persistent_clock can not be detected at
@@ -1295,31 +1449,33 @@ int timekeeping_suspend(void)
         * value returned, update the persistent_clock_exists flag.
         */
        if (timekeeping_suspend_time.tv_sec || timekeeping_suspend_time.tv_nsec)
-               persistent_clock_exist = true;
+               persistent_clock_exists = true;
 
        raw_spin_lock_irqsave(&timekeeper_lock, flags);
        write_seqcount_begin(&tk_core.seq);
        timekeeping_forward_now(tk);
        timekeeping_suspended = 1;
 
-       /*
-        * To avoid drift caused by repeated suspend/resumes,
-        * which each can add ~1 second drift error,
-        * try to compensate so the difference in system time
-        * and persistent_clock time stays close to constant.
-        */
-       delta = timespec64_sub(tk_xtime(tk), timekeeping_suspend_time);
-       delta_delta = timespec64_sub(delta, old_delta);
-       if (abs(delta_delta.tv_sec)  >= 2) {
+       if (persistent_clock_exists) {
                /*
-                * if delta_delta is too large, assume time correction
-                * has occured and set old_delta to the current delta.
+                * To avoid drift caused by repeated suspend/resumes,
+                * which each can add ~1 second drift error,
+                * try to compensate so the difference in system time
+                * and persistent_clock time stays close to constant.
                 */
-               old_delta = delta;
-       } else {
-               /* Otherwise try to adjust old_system to compensate */
-               timekeeping_suspend_time =
-                       timespec64_add(timekeeping_suspend_time, delta_delta);
+               delta = timespec64_sub(tk_xtime(tk), timekeeping_suspend_time);
+               delta_delta = timespec64_sub(delta, old_delta);
+               if (abs(delta_delta.tv_sec) >= 2) {
+                       /*
+                        * if delta_delta is too large, assume time correction
+                        * has occurred and set old_delta to the current delta.
+                        */
+                       old_delta = delta;
+               } else {
+                       /* Otherwise try to adjust old_system to compensate */
+                       timekeeping_suspend_time =
+                               timespec64_add(timekeeping_suspend_time, delta_delta);
+               }
        }
 
        timekeeping_update(tk, TK_MIRROR);
@@ -1327,7 +1483,7 @@ int timekeeping_suspend(void)
        write_seqcount_end(&tk_core.seq);
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
-       clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
+       tick_suspend();
        clocksource_suspend();
        clockevents_suspend();
 
@@ -1416,15 +1572,15 @@ static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
         *
         * XXX - TODO: Doc ntp_error calculation.
         */
-       if ((mult_adj > 0) && (tk->tkr.mult + mult_adj < mult_adj)) {
+       if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) {
                /* NTP adjustment caused clocksource mult overflow */
                WARN_ON_ONCE(1);
                return;
        }
 
-       tk->tkr.mult += mult_adj;
+       tk->tkr_mono.mult += mult_adj;
        tk->xtime_interval += interval;
-       tk->tkr.xtime_nsec -= offset;
+       tk->tkr_mono.xtime_nsec -= offset;
        tk->ntp_error -= (interval - offset) << tk->ntp_error_shift;
 }
 
@@ -1486,13 +1642,13 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
                tk->ntp_err_mult = 0;
        }
 
-       if (unlikely(tk->tkr.clock->maxadj &&
-               (abs(tk->tkr.mult - tk->tkr.clock->mult)
-                       > tk->tkr.clock->maxadj))) {
+       if (unlikely(tk->tkr_mono.clock->maxadj &&
+               (abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult)
+                       > tk->tkr_mono.clock->maxadj))) {
                printk_once(KERN_WARNING
                        "Adjusting %s more than 11%% (%ld vs %ld)\n",
-                       tk->tkr.clock->name, (long)tk->tkr.mult,
-                       (long)tk->tkr.clock->mult + tk->tkr.clock->maxadj);
+                       tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult,
+                       (long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj);
        }
 
        /*
@@ -1509,9 +1665,9 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
         * We'll correct this error next time through this function, when
         * xtime_nsec is not as small.
         */
-       if (unlikely((s64)tk->tkr.xtime_nsec < 0)) {
-               s64 neg = -(s64)tk->tkr.xtime_nsec;
-               tk->tkr.xtime_nsec = 0;
+       if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) {
+               s64 neg = -(s64)tk->tkr_mono.xtime_nsec;
+               tk->tkr_mono.xtime_nsec = 0;
                tk->ntp_error += neg << tk->ntp_error_shift;
        }
 }
@@ -1526,13 +1682,13 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
  */
 static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
 {
-       u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr.shift;
+       u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
        unsigned int clock_set = 0;
 
-       while (tk->tkr.xtime_nsec >= nsecps) {
+       while (tk->tkr_mono.xtime_nsec >= nsecps) {
                int leap;
 
-               tk->tkr.xtime_nsec -= nsecps;
+               tk->tkr_mono.xtime_nsec -= nsecps;
                tk->xtime_sec++;
 
                /* Figure out if its a leap sec and apply if needed */
@@ -1577,9 +1733,10 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
 
        /* Accumulate one shifted interval */
        offset -= interval;
-       tk->tkr.cycle_last += interval;
+       tk->tkr_mono.cycle_last += interval;
+       tk->tkr_raw.cycle_last  += interval;
 
-       tk->tkr.xtime_nsec += tk->xtime_interval << shift;
+       tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift;
        *clock_set |= accumulate_nsecs_to_secs(tk);
 
        /* Accumulate raw time */
@@ -1622,14 +1779,17 @@ void update_wall_time(void)
 #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
        offset = real_tk->cycle_interval;
 #else
-       offset = clocksource_delta(tk->tkr.read(tk->tkr.clock),
-                                  tk->tkr.cycle_last, tk->tkr.mask);
+       offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock),
+                                  tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
 #endif
 
        /* Check if there's really nothing to do */
        if (offset < real_tk->cycle_interval)
                goto out;
 
+       /* Do some additional sanity checking */
+       timekeeping_check_update(real_tk, offset);
+
        /*
         * With NO_HZ we may have to accumulate many cycle_intervals
         * (think "ticks") worth of time at once. To do this efficiently,
@@ -1784,8 +1944,8 @@ ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, ktime_t *offs_boot,
        do {
                seq = read_seqcount_begin(&tk_core.seq);
 
-               base = tk->tkr.base_mono;
-               nsecs = tk->tkr.xtime_nsec >> tk->tkr.shift;
+               base = tk->tkr_mono.base;
+               nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
 
                *offs_real = tk->offs_real;
                *offs_boot = tk->offs_boot;
@@ -1816,8 +1976,8 @@ ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot,
        do {
                seq = read_seqcount_begin(&tk_core.seq);
 
-               base = tk->tkr.base_mono;
-               nsecs = timekeeping_get_ns(&tk->tkr);
+               base = tk->tkr_mono.base;
+               nsecs = timekeeping_get_ns(&tk->tkr_mono);
 
                *offs_real = tk->offs_real;
                *offs_boot = tk->offs_boot;
index 1d91416055d5e9f05e5b6d482bc8ab490acdeeb2..ead8794b9a4e470242d37684fd04079ffbd70dec 100644 (file)
@@ -19,4 +19,11 @@ extern void timekeeping_clocktai(struct timespec *ts);
 extern int timekeeping_suspend(void);
 extern void timekeeping_resume(void);
 
+extern void do_timer(unsigned long ticks);
+extern void update_wall_time(void);
+
+extern seqlock_t jiffies_lock;
+
+#define CS_NAME_LEN    32
+
 #endif
index 2d3f5c5049394615912b09ad16a4e639cb6cba9b..2ece3aa5069cade64b8c4982e920a45bea5ba232 100644 (file)
@@ -90,8 +90,18 @@ struct tvec_base {
        struct tvec tv5;
 } ____cacheline_aligned;
 
+/*
+ * __TIMER_INITIALIZER() needs to set ->base to a valid pointer (because we've
+ * made NULL special, hint: lock_timer_base()) and we cannot get a compile time
+ * pointer to per-cpu entries because we don't know where we'll map the section,
+ * even for the boot cpu.
+ *
+ * And so we use boot_tvec_bases for boot CPU and per-cpu __tvec_bases for the
+ * rest of them.
+ */
 struct tvec_base boot_tvec_bases;
 EXPORT_SYMBOL(boot_tvec_bases);
+
 static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
 
 /* Functions below help us manage 'deferrable' flag */
@@ -1027,6 +1037,8 @@ int try_to_del_timer_sync(struct timer_list *timer)
 EXPORT_SYMBOL(try_to_del_timer_sync);
 
 #ifdef CONFIG_SMP
+static DEFINE_PER_CPU(struct tvec_base, __tvec_bases);
+
 /**
  * del_timer_sync - deactivate a timer and wait for the handler to finish.
  * @timer: the timer to be deactivated
@@ -1532,64 +1544,6 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout)
 }
 EXPORT_SYMBOL(schedule_timeout_uninterruptible);
 
-static int init_timers_cpu(int cpu)
-{
-       int j;
-       struct tvec_base *base;
-       static char tvec_base_done[NR_CPUS];
-
-       if (!tvec_base_done[cpu]) {
-               static char boot_done;
-
-               if (boot_done) {
-                       /*
-                        * The APs use this path later in boot
-                        */
-                       base = kzalloc_node(sizeof(*base), GFP_KERNEL,
-                                           cpu_to_node(cpu));
-                       if (!base)
-                               return -ENOMEM;
-
-                       /* Make sure tvec_base has TIMER_FLAG_MASK bits free */
-                       if (WARN_ON(base != tbase_get_base(base))) {
-                               kfree(base);
-                               return -ENOMEM;
-                       }
-                       per_cpu(tvec_bases, cpu) = base;
-               } else {
-                       /*
-                        * This is for the boot CPU - we use compile-time
-                        * static initialisation because per-cpu memory isn't
-                        * ready yet and because the memory allocators are not
-                        * initialised either.
-                        */
-                       boot_done = 1;
-                       base = &boot_tvec_bases;
-               }
-               spin_lock_init(&base->lock);
-               tvec_base_done[cpu] = 1;
-               base->cpu = cpu;
-       } else {
-               base = per_cpu(tvec_bases, cpu);
-       }
-
-
-       for (j = 0; j < TVN_SIZE; j++) {
-               INIT_LIST_HEAD(base->tv5.vec + j);
-               INIT_LIST_HEAD(base->tv4.vec + j);
-               INIT_LIST_HEAD(base->tv3.vec + j);
-               INIT_LIST_HEAD(base->tv2.vec + j);
-       }
-       for (j = 0; j < TVR_SIZE; j++)
-               INIT_LIST_HEAD(base->tv1.vec + j);
-
-       base->timer_jiffies = jiffies;
-       base->next_timer = base->timer_jiffies;
-       base->active_timers = 0;
-       base->all_timers = 0;
-       return 0;
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head)
 {
@@ -1631,55 +1585,86 @@ static void migrate_timers(int cpu)
                migrate_timer_list(new_base, old_base->tv5.vec + i);
        }
 
+       old_base->active_timers = 0;
+       old_base->all_timers = 0;
+
        spin_unlock(&old_base->lock);
        spin_unlock_irq(&new_base->lock);
        put_cpu_var(tvec_bases);
 }
-#endif /* CONFIG_HOTPLUG_CPU */
 
 static int timer_cpu_notify(struct notifier_block *self,
                                unsigned long action, void *hcpu)
 {
-       long cpu = (long)hcpu;
-       int err;
-
-       switch(action) {
-       case CPU_UP_PREPARE:
-       case CPU_UP_PREPARE_FROZEN:
-               err = init_timers_cpu(cpu);
-               if (err < 0)
-                       return notifier_from_errno(err);
-               break;
-#ifdef CONFIG_HOTPLUG_CPU
+       switch (action) {
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
-               migrate_timers(cpu);
+               migrate_timers((long)hcpu);
                break;
-#endif
        default:
                break;
        }
+
        return NOTIFY_OK;
 }
 
-static struct notifier_block timers_nb = {
-       .notifier_call  = timer_cpu_notify,
-};
+static inline void timer_register_cpu_notifier(void)
+{
+       cpu_notifier(timer_cpu_notify, 0);
+}
+#else
+static inline void timer_register_cpu_notifier(void) { }
+#endif /* CONFIG_HOTPLUG_CPU */
 
+static void __init init_timer_cpu(struct tvec_base *base, int cpu)
+{
+       int j;
 
-void __init init_timers(void)
+       BUG_ON(base != tbase_get_base(base));
+
+       base->cpu = cpu;
+       per_cpu(tvec_bases, cpu) = base;
+       spin_lock_init(&base->lock);
+
+       for (j = 0; j < TVN_SIZE; j++) {
+               INIT_LIST_HEAD(base->tv5.vec + j);
+               INIT_LIST_HEAD(base->tv4.vec + j);
+               INIT_LIST_HEAD(base->tv3.vec + j);
+               INIT_LIST_HEAD(base->tv2.vec + j);
+       }
+       for (j = 0; j < TVR_SIZE; j++)
+               INIT_LIST_HEAD(base->tv1.vec + j);
+
+       base->timer_jiffies = jiffies;
+       base->next_timer = base->timer_jiffies;
+}
+
+static void __init init_timer_cpus(void)
 {
-       int err;
+       struct tvec_base *base;
+       int local_cpu = smp_processor_id();
+       int cpu;
 
+       for_each_possible_cpu(cpu) {
+               if (cpu == local_cpu)
+                       base = &boot_tvec_bases;
+#ifdef CONFIG_SMP
+               else
+                       base = per_cpu_ptr(&__tvec_bases, cpu);
+#endif
+
+               init_timer_cpu(base, cpu);
+       }
+}
+
+void __init init_timers(void)
+{
        /* ensure there are enough low bits for flags in timer->base pointer */
        BUILD_BUG_ON(__alignof__(struct tvec_base) & TIMER_FLAG_MASK);
 
-       err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
-                              (void *)(long)smp_processor_id());
-       BUG_ON(err != NOTIFY_OK);
-
+       init_timer_cpus();
        init_timer_stats();
-       register_cpu_notifier(&timers_nb);
+       timer_register_cpu_notifier();
        open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
 }
 
index 61ed862cdd376222dedfa317301f3d6c3dbd3404..e878c2e0ba45e06c4690646a8853406e11dd1a15 100644 (file)
 #include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <linux/kallsyms.h>
-#include <linux/tick.h>
 
 #include <asm/uaccess.h>
 
+#include "tick-internal.h"
 
 struct timer_list_iter {
        int cpu;
@@ -228,9 +228,35 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
        print_name_offset(m, dev->set_next_event);
        SEQ_printf(m, "\n");
 
-       SEQ_printf(m, " set_mode:       ");
-       print_name_offset(m, dev->set_mode);
-       SEQ_printf(m, "\n");
+       if (dev->set_mode) {
+               SEQ_printf(m, " set_mode:       ");
+               print_name_offset(m, dev->set_mode);
+               SEQ_printf(m, "\n");
+       } else {
+               if (dev->set_state_shutdown) {
+                       SEQ_printf(m, " shutdown: ");
+                       print_name_offset(m, dev->set_state_shutdown);
+                       SEQ_printf(m, "\n");
+               }
+
+               if (dev->set_state_periodic) {
+                       SEQ_printf(m, " periodic: ");
+                       print_name_offset(m, dev->set_state_periodic);
+                       SEQ_printf(m, "\n");
+               }
+
+               if (dev->set_state_oneshot) {
+                       SEQ_printf(m, " oneshot:  ");
+                       print_name_offset(m, dev->set_state_oneshot);
+                       SEQ_printf(m, "\n");
+               }
+
+               if (dev->tick_resume) {
+                       SEQ_printf(m, " resume:   ");
+                       print_name_offset(m, dev->tick_resume);
+                       SEQ_printf(m, "\n");
+               }
+       }
 
        SEQ_printf(m, " event_handler:  ");
        print_name_offset(m, dev->event_handler);
index c5cefb3c009ce9cd51199dc5fef683d7bc9b1bdc..36b6fa88ce5b412f92b15da530772c5a058e5d12 100644 (file)
@@ -865,6 +865,19 @@ config SCHED_STACK_END_CHECK
          data corruption or a sporadic crash at a later stage once the region
          is examined. The runtime overhead introduced is minimal.
 
+config DEBUG_TIMEKEEPING
+       bool "Enable extra timekeeping sanity checking"
+       help
+         This option will enable additional timekeeping sanity checks
+         which may be helpful when diagnosing issues where timekeeping
+         problems are suspected.
+
+         This may include checks in the timekeeping hotpaths, so this
+         option may have a (very small) performance impact to some
+         workloads.
+
+         If unsure, say N.
+
 config TIMER_STATS
        bool "Collect kernel timers statistics"
        depends on DEBUG_KERNEL && PROC_FS
index ecb9a665ec19b5c8b6e062568dffb0c4c0b12d90..494994bf17c8ec9764cbb784cbd88840fa0919c9 100644 (file)
@@ -18,7 +18,7 @@
 #define CMPXCHG_LOOP(CODE, SUCCESS) do {                                       \
        struct lockref old;                                                     \
        BUILD_BUG_ON(sizeof(old) != 8);                                         \
-       old.lock_count = ACCESS_ONCE(lockref->lock_count);                      \
+       old.lock_count = READ_ONCE(lockref->lock_count);                        \
        while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) {     \
                struct lockref new = old, prev = old;                           \
                CODE                                                            \
index 57dadc025c6444dd1038691d160803745db124f8..2dc44b1cb1dfc2f6d644d2d655155e6cfce2bdeb 100644 (file)
@@ -286,8 +286,14 @@ static unsigned long move_vma(struct vm_area_struct *vma,
                old_len = new_len;
                old_addr = new_addr;
                new_addr = -ENOMEM;
-       } else if (vma->vm_file && vma->vm_file->f_op->mremap)
-               vma->vm_file->f_op->mremap(vma->vm_file, new_vma);
+       } else if (vma->vm_file && vma->vm_file->f_op->mremap) {
+               err = vma->vm_file->f_op->mremap(vma->vm_file, new_vma);
+               if (err < 0) {
+                       move_page_tables(new_vma, new_addr, vma, old_addr,
+                                        moved_len, true);
+                       return err;
+               }
+       }
 
        /* Conceal VM_ACCOUNT so old reservation is not undone */
        if (vm_flags & VM_ACCOUNT) {
index 6b3f54ed65ba6fc4ff392877f662ef5dddeb8939..a9f4ae45b7fb856295a30a87a6c3701933d2f3f2 100644 (file)
@@ -484,7 +484,7 @@ static int ceph_tcp_connect(struct ceph_connection *con)
                               IPPROTO_TCP, &sock);
        if (ret)
                return ret;
-       sock->sk->sk_allocation = GFP_NOFS | __GFP_MEMALLOC;
+       sock->sk->sk_allocation = GFP_NOFS;
 
 #ifdef CONFIG_LOCKDEP
        lockdep_set_class(&sock->sk->sk_lock, &socket_class);
@@ -520,8 +520,6 @@ static int ceph_tcp_connect(struct ceph_connection *con)
                               ret);
        }
 
-       sk_set_memalloc(sock->sk);
-
        con->sock = sock;
        return 0;
 }
@@ -2808,11 +2806,8 @@ static void con_work(struct work_struct *work)
 {
        struct ceph_connection *con = container_of(work, struct ceph_connection,
                                                   work.work);
-       unsigned long pflags = current->flags;
        bool fault;
 
-       current->flags |= PF_MEMALLOC;
-
        mutex_lock(&con->mutex);
        while (true) {
                int ret;
@@ -2866,8 +2861,6 @@ static void con_work(struct work_struct *work)
                con_fault_finish(con);
 
        con->ops->put(con);
-
-       tsk_restore_flags(current, pflags, PF_MEMALLOC);
 }
 
 /*
index a422aaa3bb0cb6a7ea2f1e0de3b86380a52bf012..9ee25a63f684269c8fc600b1293b9b4421a7f020 100644 (file)
@@ -96,10 +96,10 @@ int snd_bebob_maudio_load_firmware(struct fw_unit *unit)
        struct fw_device *device = fw_parent_device(unit);
        int err, rcode;
        u64 date;
-       __be32 cues[3] = {
-               MAUDIO_BOOTLOADER_CUE1,
-               MAUDIO_BOOTLOADER_CUE2,
-               MAUDIO_BOOTLOADER_CUE3
+       __le32 cues[3] = {
+               cpu_to_le32(MAUDIO_BOOTLOADER_CUE1),
+               cpu_to_le32(MAUDIO_BOOTLOADER_CUE2),
+               cpu_to_le32(MAUDIO_BOOTLOADER_CUE3)
        };
 
        /* check date of software used to build */
index 74382137b9f5abcd67b9c4c44581c9a80f26d17d..f9d12c0a7e5a342f81f6e47c720a2187fd940367 100644 (file)
@@ -2912,6 +2912,8 @@ static void alc283_init(struct hda_codec *codec)
 
        if (!hp_pin)
                return;
+
+       msleep(30);
        hp_pin_sense = snd_hda_jack_detect(codec, hp_pin);
 
        /* Index 0x43 Direct Drive HP AMP LPM Control 1 */
@@ -3607,6 +3609,7 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec)
 
        switch (codec->vendor_id) {
        case 0x10ec0255:
+       case 0x10ec0256:
                alc_process_coef_fw(codec, coef0255);
                break;
        case 0x10ec0233:
@@ -3662,6 +3665,7 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin,
 
        switch (codec->vendor_id) {
        case 0x10ec0255:
+       case 0x10ec0256:
                alc_write_coef_idx(codec, 0x45, 0xc489);
                snd_hda_set_pin_ctl_cache(codec, hp_pin, 0);
                alc_process_coef_fw(codec, coef0255);
@@ -3731,6 +3735,7 @@ static void alc_headset_mode_default(struct hda_codec *codec)
 
        switch (codec->vendor_id) {
        case 0x10ec0255:
+       case 0x10ec0256:
                alc_process_coef_fw(codec, coef0255);
                break;
        case 0x10ec0233:
@@ -3785,6 +3790,7 @@ static void alc_headset_mode_ctia(struct hda_codec *codec)
 
        switch (codec->vendor_id) {
        case 0x10ec0255:
+       case 0x10ec0256:
                alc_process_coef_fw(codec, coef0255);
                break;
        case 0x10ec0233:
@@ -3839,6 +3845,7 @@ static void alc_headset_mode_omtp(struct hda_codec *codec)
 
        switch (codec->vendor_id) {
        case 0x10ec0255:
+       case 0x10ec0256:
                alc_process_coef_fw(codec, coef0255);
                break;
        case 0x10ec0233:
@@ -3884,6 +3891,7 @@ static void alc_determine_headset_type(struct hda_codec *codec)
 
        switch (codec->vendor_id) {
        case 0x10ec0255:
+       case 0x10ec0256:
                alc_process_coef_fw(codec, coef0255);
                msleep(300);
                val = alc_read_coef_idx(codec, 0x46);
@@ -4364,6 +4372,7 @@ enum {
        ALC269_FIXUP_QUANTA_MUTE,
        ALC269_FIXUP_LIFEBOOK,
        ALC269_FIXUP_LIFEBOOK_EXTMIC,
+       ALC269_FIXUP_LIFEBOOK_HP_PIN,
        ALC269_FIXUP_AMIC,
        ALC269_FIXUP_DMIC,
        ALC269VB_FIXUP_AMIC,
@@ -4517,6 +4526,13 @@ static const struct hda_fixup alc269_fixups[] = {
                        { }
                },
        },
+       [ALC269_FIXUP_LIFEBOOK_HP_PIN] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x21, 0x0221102f }, /* HP out */
+                       { }
+               },
+       },
        [ALC269_FIXUP_AMIC] = {
                .type = HDA_FIXUP_PINS,
                .v.pins = (const struct hda_pintbl[]) {
@@ -5010,6 +5026,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x104d, 0x9084, "Sony VAIO", ALC275_FIXUP_SONY_HWEQ),
        SND_PCI_QUIRK(0x104d, 0x9099, "Sony VAIO S13", ALC275_FIXUP_SONY_DISABLE_AAMIX),
        SND_PCI_QUIRK(0x10cf, 0x1475, "Lifebook", ALC269_FIXUP_LIFEBOOK),
+       SND_PCI_QUIRK(0x10cf, 0x15dc, "Lifebook T731", ALC269_FIXUP_LIFEBOOK_HP_PIN),
        SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC),
        SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC),
        SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_BXBT2807_MIC),
@@ -5217,6 +5234,16 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
                {0x17, 0x40000000},
                {0x1d, 0x40700001},
                {0x21, 0x02211050}),
+       SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+               {0x12, 0x90a60140},
+               {0x13, 0x40000000},
+               {0x14, 0x90170110},
+               {0x19, 0x411111f0},
+               {0x1a, 0x411111f0},
+               {0x1b, 0x411111f0},
+               {0x1d, 0x40700001},
+               {0x1e, 0x411111f0},
+               {0x21, 0x02211020}),
        SND_HDA_PIN_QUIRK(0x10ec0280, 0x103c, "HP", ALC280_FIXUP_HP_GPIO4,
                {0x12, 0x90a60130},
                {0x13, 0x40000000},
index 9974f201a08f44ee25b109c7ff78e13440146c4b..474cae82a8742704af9875ef7734710ac05d71cf 100644 (file)
@@ -1156,25 +1156,6 @@ static int pcm512x_hw_params(struct snd_pcm_substream *substream,
                                ret, pcm512x->pll_out);
                        return ret;
                }
-
-               gpio = PCM512x_G1OE << (4 - 1);
-               ret = regmap_update_bits(pcm512x->regmap, PCM512x_GPIO_EN,
-                                        gpio, gpio);
-               if (ret != 0) {
-                       dev_err(codec->dev, "Failed to enable gpio %d: %d\n",
-                               4, ret);
-                       return ret;
-               }
-
-               gpio = PCM512x_GPIO_OUTPUT_1 + 4 - 1;
-               ret = regmap_update_bits(pcm512x->regmap, gpio,
-                                        PCM512x_GxSL, PCM512x_GxSL_PLLLK);
-               if (ret != 0) {
-                       dev_err(codec->dev,
-                               "Failed to output pll lock on %d: %d\n",
-                               ret, 4);
-                       return ret;
-               }
        }
 
        ret = regmap_update_bits(pcm512x->regmap, PCM512x_SYNCHRONIZE,
index dc9df007d3e33358a51b73bb6446d81f6d427ab1..337c317ead6fbc2fa7e3ae80bcfb7cb64119cf55 100644 (file)
@@ -192,6 +192,7 @@ static const struct rc_config {
        { USB_ID(0x041e, 0x3040), 2, 2, 6, 6,  2,  0x6e91 }, /* Live! 24-bit */
        { USB_ID(0x041e, 0x3042), 0, 1, 1, 1,  1,  0x000d }, /* Usb X-Fi S51 */
        { USB_ID(0x041e, 0x30df), 0, 1, 1, 1,  1,  0x000d }, /* Usb X-Fi S51 Pro */
+       { USB_ID(0x041e, 0x3237), 0, 1, 1, 1,  1,  0x000d }, /* Usb X-Fi S51 Pro */
        { USB_ID(0x041e, 0x3048), 2, 2, 6, 6,  2,  0x6e91 }, /* Toshiba SB0500 */
 };
 
index 753a47de8459b7a0b505e72d2f660793d9ede885..9a28365126f9ab170454e46249348a06df8a2f32 100644 (file)
@@ -1113,8 +1113,13 @@ void snd_usb_set_format_quirk(struct snd_usb_substream *subs,
 
 bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
 {
-       /* MS Lifecam HD-5000 doesn't support reading the sample rate. */
-       return chip->usb_id == USB_ID(0x045E, 0x076D);
+       /* devices which do not support reading the sample rate. */
+       switch (chip->usb_id) {
+       case USB_ID(0x045E, 0x076D): /* MS Lifecam HD-5000 */
+       case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */
+               return true;
+       }
+       return false;
 }
 
 /* Marantz/Denon USB DACs need a vendor cmd to switch
index d66ab799b35fd5cab83e5486368e40c39c2927ee..8c0c1a2770c8fe01bb479c97847540befe164011 100644 (file)
@@ -1,12 +1,12 @@
 
-MEMCPY_FN(__memcpy,
+MEMCPY_FN(memcpy_orig,
        "x86-64-unrolled",
        "unrolled memcpy() in arch/x86/lib/memcpy_64.S")
 
-MEMCPY_FN(memcpy_c,
+MEMCPY_FN(__memcpy,
        "x86-64-movsq",
        "movsq-based memcpy() in arch/x86/lib/memcpy_64.S")
 
-MEMCPY_FN(memcpy_c_e,
+MEMCPY_FN(memcpy_erms,
        "x86-64-movsb",
        "movsb-based memcpy() in arch/x86/lib/memcpy_64.S")
index fcd9cf00600a970677dd331cfc7f71de0d67d00e..e4c2c30143b95133913a2bc3569b2871b79aa75c 100644 (file)
@@ -1,8 +1,6 @@
 #define memcpy MEMCPY /* don't hide glibc's memcpy() */
 #define altinstr_replacement text
 #define globl p2align 4; .globl
-#define Lmemcpy_c globl memcpy_c; memcpy_c
-#define Lmemcpy_c_e globl memcpy_c_e; memcpy_c_e
 #include "../../../arch/x86/lib/memcpy_64.S"
 /*
  * We need to provide note.GNU-stack section, saying that we want
index db1d3a29d97fec67c47a4dd84eca2f7779ec1865..d3dfb7936dcdfd688d20ae680b3b0c0009cdeb55 100644 (file)
@@ -36,7 +36,7 @@ static const struct option options[] = {
                    "Specify length of memory to copy. "
                    "Available units: B, KB, MB, GB and TB (upper and lower)"),
        OPT_STRING('r', "routine", &routine, "default",
-                   "Specify routine to copy"),
+                   "Specify routine to copy, \"all\" runs all available routines"),
        OPT_INTEGER('i', "iterations", &iterations,
                    "repeat memcpy() invocation this number of times"),
        OPT_BOOLEAN('c', "cycle", &use_cycle,
@@ -135,55 +135,16 @@ struct bench_mem_info {
        const char *const *usage;
 };
 
-static int bench_mem_common(int argc, const char **argv,
-                    const char *prefix __maybe_unused,
-                    struct bench_mem_info *info)
+static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t len, double totallen)
 {
-       int i;
-       size_t len;
-       double totallen;
+       const struct routine *r = &info->routines[r_idx];
        double result_bps[2];
        u64 result_cycle[2];
 
-       argc = parse_options(argc, argv, options,
-                            info->usage, 0);
-
-       if (no_prefault && only_prefault) {
-               fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
-               return 1;
-       }
-
-       if (use_cycle)
-               init_cycle();
-
-       len = (size_t)perf_atoll((char *)length_str);
-       totallen = (double)len * iterations;
-
        result_cycle[0] = result_cycle[1] = 0ULL;
        result_bps[0] = result_bps[1] = 0.0;
 
-       if ((s64)len <= 0) {
-               fprintf(stderr, "Invalid length:%s\n", length_str);
-               return 1;
-       }
-
-       /* same to without specifying either of prefault and no-prefault */
-       if (only_prefault && no_prefault)
-               only_prefault = no_prefault = false;
-
-       for (i = 0; info->routines[i].name; i++) {
-               if (!strcmp(info->routines[i].name, routine))
-                       break;
-       }
-       if (!info->routines[i].name) {
-               printf("Unknown routine:%s\n", routine);
-               printf("Available routines...\n");
-               for (i = 0; info->routines[i].name; i++) {
-                       printf("\t%s ... %s\n",
-                              info->routines[i].name, info->routines[i].desc);
-               }
-               return 1;
-       }
+       printf("Routine %s (%s)\n", r->name, r->desc);
 
        if (bench_format == BENCH_FORMAT_DEFAULT)
                printf("# Copying %s Bytes ...\n\n", length_str);
@@ -191,28 +152,17 @@ static int bench_mem_common(int argc, const char **argv,
        if (!only_prefault && !no_prefault) {
                /* show both of results */
                if (use_cycle) {
-                       result_cycle[0] =
-                               info->do_cycle(&info->routines[i], len, false);
-                       result_cycle[1] =
-                               info->do_cycle(&info->routines[i], len, true);
+                       result_cycle[0] = info->do_cycle(r, len, false);
+                       result_cycle[1] = info->do_cycle(r, len, true);
                } else {
-                       result_bps[0] =
-                               info->do_gettimeofday(&info->routines[i],
-                                               len, false);
-                       result_bps[1] =
-                               info->do_gettimeofday(&info->routines[i],
-                                               len, true);
+                       result_bps[0]   = info->do_gettimeofday(r, len, false);
+                       result_bps[1]   = info->do_gettimeofday(r, len, true);
                }
        } else {
-               if (use_cycle) {
-                       result_cycle[pf] =
-                               info->do_cycle(&info->routines[i],
-                                               len, only_prefault);
-               } else {
-                       result_bps[pf] =
-                               info->do_gettimeofday(&info->routines[i],
-                                               len, only_prefault);
-               }
+               if (use_cycle)
+                       result_cycle[pf] = info->do_cycle(r, len, only_prefault);
+               else
+                       result_bps[pf] = info->do_gettimeofday(r, len, only_prefault);
        }
 
        switch (bench_format) {
@@ -265,6 +215,60 @@ static int bench_mem_common(int argc, const char **argv,
                die("unknown format: %d\n", bench_format);
                break;
        }
+}
+
+static int bench_mem_common(int argc, const char **argv,
+                    const char *prefix __maybe_unused,
+                    struct bench_mem_info *info)
+{
+       int i;
+       size_t len;
+       double totallen;
+
+       argc = parse_options(argc, argv, options,
+                            info->usage, 0);
+
+       if (no_prefault && only_prefault) {
+               fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
+               return 1;
+       }
+
+       if (use_cycle)
+               init_cycle();
+
+       len = (size_t)perf_atoll((char *)length_str);
+       totallen = (double)len * iterations;
+
+       if ((s64)len <= 0) {
+               fprintf(stderr, "Invalid length:%s\n", length_str);
+               return 1;
+       }
+
+       /* same to without specifying either of prefault and no-prefault */
+       if (only_prefault && no_prefault)
+               only_prefault = no_prefault = false;
+
+       if (!strncmp(routine, "all", 3)) {
+               for (i = 0; info->routines[i].name; i++)
+                       __bench_mem_routine(info, i, len, totallen);
+               return 0;
+       }
+
+       for (i = 0; info->routines[i].name; i++) {
+               if (!strcmp(info->routines[i].name, routine))
+                       break;
+       }
+       if (!info->routines[i].name) {
+               printf("Unknown routine:%s\n", routine);
+               printf("Available routines...\n");
+               for (i = 0; info->routines[i].name; i++) {
+                       printf("\t%s ... %s\n",
+                              info->routines[i].name, info->routines[i].desc);
+               }
+               return 1;
+       }
+
+       __bench_mem_routine(info, i, len, totallen);
 
        return 0;
 }
index a71dff97c1f54e87aea5034398534bacc77d6a56..f02d028771d970da5d611c31d1b406179b450d45 100644 (file)
@@ -1,12 +1,12 @@
 
-MEMSET_FN(__memset,
+MEMSET_FN(memset_orig,
        "x86-64-unrolled",
        "unrolled memset() in arch/x86/lib/memset_64.S")
 
-MEMSET_FN(memset_c,
+MEMSET_FN(__memset,
        "x86-64-stosq",
        "movsq-based memset() in arch/x86/lib/memset_64.S")
 
-MEMSET_FN(memset_c_e,
+MEMSET_FN(memset_erms,
        "x86-64-stosb",
        "movsb-based memset() in arch/x86/lib/memset_64.S")
index 9e5af89ed13af64bed89e46e7a8df1e1b6a52298..de278784c866a3804040408454f38c4a4f7ca44e 100644 (file)
@@ -1,8 +1,6 @@
 #define memset MEMSET /* don't hide glibc's memset() */
 #define altinstr_replacement text
 #define globl p2align 4; .globl
-#define Lmemset_c globl memset_c; memset_c
-#define Lmemset_c_e globl memset_c_e; memset_c_e
 #include "../../../arch/x86/lib/memset_64.S"
 
 /*
index 6789d788d4947d31f890ca56660d8ef968901b82..3a3a0f16456ae3369cd73faff1d3af9e4e8d4180 100644 (file)
@@ -4,5 +4,6 @@
 /* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */
 
 #define altinstruction_entry #
+#define ALTERNATIVE_2 #
 
 #endif
index 0db571340edbd94a7042a63f517f410b42c127d7..95abddcd78397fc3b64884f6bec31a5f6005b9e8 100644 (file)
@@ -17,6 +17,7 @@ TARGETS += sysctl
 TARGETS += timers
 TARGETS += user
 TARGETS += vm
+TARGETS += x86
 #Please keep the TARGETS list alphabetically sorted
 
 TARGETS_HOTPLUG = cpu-hotplug
@@ -55,7 +56,40 @@ clean_hotplug:
                make -C $$TARGET clean; \
        done;
 
+INSTALL_PATH ?= install
+INSTALL_PATH := $(abspath $(INSTALL_PATH))
+ALL_SCRIPT := $(INSTALL_PATH)/run_kselftest.sh
+
+install:
+ifdef INSTALL_PATH
+       @# Ask all targets to install their files
+       mkdir -p $(INSTALL_PATH)
+       for TARGET in $(TARGETS); do \
+               mkdir -p $(INSTALL_PATH)/$$TARGET ; \
+               make -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \
+       done;
+
+       @# Ask all targets to emit their test scripts
+       echo "#!/bin/bash" > $(ALL_SCRIPT)
+       echo "cd \$$(dirname \$$0)" >> $(ALL_SCRIPT)
+       echo "ROOT=\$$PWD" >> $(ALL_SCRIPT)
+
+       for TARGET in $(TARGETS); do \
+               echo "echo ; echo Running tests in $$TARGET" >> $(ALL_SCRIPT); \
+               echo "echo ========================================" >> $(ALL_SCRIPT); \
+               echo "cd $$TARGET" >> $(ALL_SCRIPT); \
+               make -s --no-print-directory -C $$TARGET emit_tests >> $(ALL_SCRIPT); \
+               echo "cd \$$ROOT" >> $(ALL_SCRIPT); \
+       done;
+
+       chmod u+x $(ALL_SCRIPT)
+else
+       $(error Error: set INSTALL_PATH to use install)
+endif
+
 clean:
        for TARGET in $(TARGETS); do \
                make -C $$TARGET clean; \
        done;
+
+.PHONY: install
index e18b42b254af814d7eb03f9459a2d24f67397077..1822356402090df03ee6311cb99dd7e2c7db94d7 100644 (file)
@@ -16,8 +16,9 @@ else
        echo "Not an x86 target, can't build breakpoints selftests"
 endif
 
-run_tests:
-       @./breakpoint_test || echo "breakpoints selftests: [FAIL]"
+TEST_PROGS := breakpoint_test
+
+include ../lib.mk
 
 clean:
        rm -fr breakpoint_test
index e9c28d8dc84bf2426e931b063477dd700a95ffab..fe1f99101c5d5023d4687893ed0bf7a5e876f1a9 100644 (file)
@@ -1,9 +1,10 @@
 all:
 
-run_tests:
-       @/bin/bash ./on-off-test.sh || echo "cpu-hotplug selftests: [FAIL]"
+TEST_PROGS := cpu-on-off-test.sh
+
+include ../lib.mk
 
 run_full_test:
-       @/bin/bash ./on-off-test.sh -a || echo "cpu-hotplug selftests: [FAIL]"
+       @/bin/bash ./cpu-on-off-test.sh -a || echo "cpu-hotplug selftests: [FAIL]"
 
 clean:
diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
new file mode 100755 (executable)
index 0000000..98b1d65
--- /dev/null
@@ -0,0 +1,269 @@
+#!/bin/bash
+
+SYSFS=
+
+prerequisite()
+{
+       msg="skip all tests:"
+
+       if [ $UID != 0 ]; then
+               echo $msg must be run as root >&2
+               exit 0
+       fi
+
+       taskset -p 01 $$
+
+       SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
+
+       if [ ! -d "$SYSFS" ]; then
+               echo $msg sysfs is not mounted >&2
+               exit 0
+       fi
+
+       if ! ls $SYSFS/devices/system/cpu/cpu* > /dev/null 2>&1; then
+               echo $msg cpu hotplug is not supported >&2
+               exit 0
+       fi
+
+       echo "CPU online/offline summary:"
+       online_cpus=`cat $SYSFS/devices/system/cpu/online`
+       online_max=${online_cpus##*-}
+       echo -e "\t Cpus in online state: $online_cpus"
+
+       offline_cpus=`cat $SYSFS/devices/system/cpu/offline`
+       if [[ "a$offline_cpus" = "a" ]]; then
+               offline_cpus=0
+       else
+               offline_max=${offline_cpus##*-}
+       fi
+       echo -e "\t Cpus in offline state: $offline_cpus"
+}
+
+#
+# list all hot-pluggable CPUs
+#
+hotpluggable_cpus()
+{
+       local state=${1:-.\*}
+
+       for cpu in $SYSFS/devices/system/cpu/cpu*; do
+               if [ -f $cpu/online ] && grep -q $state $cpu/online; then
+                       echo ${cpu##/*/cpu}
+               fi
+       done
+}
+
+hotplaggable_offline_cpus()
+{
+       hotpluggable_cpus 0
+}
+
+hotpluggable_online_cpus()
+{
+       hotpluggable_cpus 1
+}
+
+cpu_is_online()
+{
+       grep -q 1 $SYSFS/devices/system/cpu/cpu$1/online
+}
+
+cpu_is_offline()
+{
+       grep -q 0 $SYSFS/devices/system/cpu/cpu$1/online
+}
+
+online_cpu()
+{
+       echo 1 > $SYSFS/devices/system/cpu/cpu$1/online
+}
+
+offline_cpu()
+{
+       echo 0 > $SYSFS/devices/system/cpu/cpu$1/online
+}
+
+online_cpu_expect_success()
+{
+       local cpu=$1
+
+       if ! online_cpu $cpu; then
+               echo $FUNCNAME $cpu: unexpected fail >&2
+       elif ! cpu_is_online $cpu; then
+               echo $FUNCNAME $cpu: unexpected offline >&2
+       fi
+}
+
+online_cpu_expect_fail()
+{
+       local cpu=$1
+
+       if online_cpu $cpu 2> /dev/null; then
+               echo $FUNCNAME $cpu: unexpected success >&2
+       elif ! cpu_is_offline $cpu; then
+               echo $FUNCNAME $cpu: unexpected online >&2
+       fi
+}
+
+offline_cpu_expect_success()
+{
+       local cpu=$1
+
+       if ! offline_cpu $cpu; then
+               echo $FUNCNAME $cpu: unexpected fail >&2
+       elif ! cpu_is_offline $cpu; then
+               echo $FUNCNAME $cpu: unexpected offline >&2
+       fi
+}
+
+offline_cpu_expect_fail()
+{
+       local cpu=$1
+
+       if offline_cpu $cpu 2> /dev/null; then
+               echo $FUNCNAME $cpu: unexpected success >&2
+       elif ! cpu_is_online $cpu; then
+               echo $FUNCNAME $cpu: unexpected offline >&2
+       fi
+}
+
+error=-12
+allcpus=0
+priority=0
+online_cpus=0
+online_max=0
+offline_cpus=0
+offline_max=0
+
+while getopts e:ahp: opt; do
+       case $opt in
+       e)
+               error=$OPTARG
+               ;;
+       a)
+               allcpus=1
+               ;;
+       h)
+               echo "Usage $0 [ -a ] [ -e errno ] [ -p notifier-priority ]"
+               echo -e "\t default offline one cpu"
+               echo -e "\t run with -a option to offline all cpus"
+               exit
+               ;;
+       p)
+               priority=$OPTARG
+               ;;
+       esac
+done
+
+if ! [ "$error" -ge -4095 -a "$error" -lt 0 ]; then
+       echo "error code must be -4095 <= errno < 0" >&2
+       exit 1
+fi
+
+prerequisite
+
+#
+# Safe test (default) - offline and online one cpu
+#
+if [ $allcpus -eq 0 ]; then
+       echo "Limited scope test: one hotplug cpu"
+       echo -e "\t (leaves cpu in the original state):"
+       echo -e "\t online to offline to online: cpu $online_max"
+       offline_cpu_expect_success $online_max
+       online_cpu_expect_success $online_max
+
+       if [[ $offline_cpus -gt 0 ]]; then
+               echo -e "\t offline to online to offline: cpu $offline_max"
+               online_cpu_expect_success $offline_max
+               offline_cpu_expect_success $offline_max
+       fi
+       exit 0
+else
+       echo "Full scope test: all hotplug cpus"
+       echo -e "\t online all offline cpus"
+       echo -e "\t offline all online cpus"
+       echo -e "\t online all offline cpus"
+fi
+
+#
+# Online all hot-pluggable CPUs
+#
+for cpu in `hotplaggable_offline_cpus`; do
+       online_cpu_expect_success $cpu
+done
+
+#
+# Offline all hot-pluggable CPUs
+#
+for cpu in `hotpluggable_online_cpus`; do
+       offline_cpu_expect_success $cpu
+done
+
+#
+# Online all hot-pluggable CPUs again
+#
+for cpu in `hotplaggable_offline_cpus`; do
+       online_cpu_expect_success $cpu
+done
+
+#
+# Test with cpu notifier error injection
+#
+
+DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'`
+NOTIFIER_ERR_INJECT_DIR=$DEBUGFS/notifier-error-inject/cpu
+
+prerequisite_extra()
+{
+       msg="skip extra tests:"
+
+       /sbin/modprobe -q -r cpu-notifier-error-inject
+       /sbin/modprobe -q cpu-notifier-error-inject priority=$priority
+
+       if [ ! -d "$DEBUGFS" ]; then
+               echo $msg debugfs is not mounted >&2
+               exit 0
+       fi
+
+       if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then
+               echo $msg cpu-notifier-error-inject module is not available >&2
+               exit 0
+       fi
+}
+
+prerequisite_extra
+
+#
+# Offline all hot-pluggable CPUs
+#
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error
+for cpu in `hotpluggable_online_cpus`; do
+       offline_cpu_expect_success $cpu
+done
+
+#
+# Test CPU hot-add error handling (offline => online)
+#
+echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_UP_PREPARE/error
+for cpu in `hotplaggable_offline_cpus`; do
+       online_cpu_expect_fail $cpu
+done
+
+#
+# Online all hot-pluggable CPUs
+#
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_UP_PREPARE/error
+for cpu in `hotplaggable_offline_cpus`; do
+       online_cpu_expect_success $cpu
+done
+
+#
+# Test CPU hot-remove error handling (online => offline)
+#
+echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error
+for cpu in `hotpluggable_online_cpus`; do
+       offline_cpu_expect_fail $cpu
+done
+
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error
+/sbin/modprobe -q -r cpu-notifier-error-inject
diff --git a/tools/testing/selftests/cpu-hotplug/on-off-test.sh b/tools/testing/selftests/cpu-hotplug/on-off-test.sh
deleted file mode 100644 (file)
index 98b1d65..0000000
+++ /dev/null
@@ -1,269 +0,0 @@
-#!/bin/bash
-
-SYSFS=
-
-prerequisite()
-{
-       msg="skip all tests:"
-
-       if [ $UID != 0 ]; then
-               echo $msg must be run as root >&2
-               exit 0
-       fi
-
-       taskset -p 01 $$
-
-       SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
-
-       if [ ! -d "$SYSFS" ]; then
-               echo $msg sysfs is not mounted >&2
-               exit 0
-       fi
-
-       if ! ls $SYSFS/devices/system/cpu/cpu* > /dev/null 2>&1; then
-               echo $msg cpu hotplug is not supported >&2
-               exit 0
-       fi
-
-       echo "CPU online/offline summary:"
-       online_cpus=`cat $SYSFS/devices/system/cpu/online`
-       online_max=${online_cpus##*-}
-       echo -e "\t Cpus in online state: $online_cpus"
-
-       offline_cpus=`cat $SYSFS/devices/system/cpu/offline`
-       if [[ "a$offline_cpus" = "a" ]]; then
-               offline_cpus=0
-       else
-               offline_max=${offline_cpus##*-}
-       fi
-       echo -e "\t Cpus in offline state: $offline_cpus"
-}
-
-#
-# list all hot-pluggable CPUs
-#
-hotpluggable_cpus()
-{
-       local state=${1:-.\*}
-
-       for cpu in $SYSFS/devices/system/cpu/cpu*; do
-               if [ -f $cpu/online ] && grep -q $state $cpu/online; then
-                       echo ${cpu##/*/cpu}
-               fi
-       done
-}
-
-hotplaggable_offline_cpus()
-{
-       hotpluggable_cpus 0
-}
-
-hotpluggable_online_cpus()
-{
-       hotpluggable_cpus 1
-}
-
-cpu_is_online()
-{
-       grep -q 1 $SYSFS/devices/system/cpu/cpu$1/online
-}
-
-cpu_is_offline()
-{
-       grep -q 0 $SYSFS/devices/system/cpu/cpu$1/online
-}
-
-online_cpu()
-{
-       echo 1 > $SYSFS/devices/system/cpu/cpu$1/online
-}
-
-offline_cpu()
-{
-       echo 0 > $SYSFS/devices/system/cpu/cpu$1/online
-}
-
-online_cpu_expect_success()
-{
-       local cpu=$1
-
-       if ! online_cpu $cpu; then
-               echo $FUNCNAME $cpu: unexpected fail >&2
-       elif ! cpu_is_online $cpu; then
-               echo $FUNCNAME $cpu: unexpected offline >&2
-       fi
-}
-
-online_cpu_expect_fail()
-{
-       local cpu=$1
-
-       if online_cpu $cpu 2> /dev/null; then
-               echo $FUNCNAME $cpu: unexpected success >&2
-       elif ! cpu_is_offline $cpu; then
-               echo $FUNCNAME $cpu: unexpected online >&2
-       fi
-}
-
-offline_cpu_expect_success()
-{
-       local cpu=$1
-
-       if ! offline_cpu $cpu; then
-               echo $FUNCNAME $cpu: unexpected fail >&2
-       elif ! cpu_is_offline $cpu; then
-               echo $FUNCNAME $cpu: unexpected offline >&2
-       fi
-}
-
-offline_cpu_expect_fail()
-{
-       local cpu=$1
-
-       if offline_cpu $cpu 2> /dev/null; then
-               echo $FUNCNAME $cpu: unexpected success >&2
-       elif ! cpu_is_online $cpu; then
-               echo $FUNCNAME $cpu: unexpected offline >&2
-       fi
-}
-
-error=-12
-allcpus=0
-priority=0
-online_cpus=0
-online_max=0
-offline_cpus=0
-offline_max=0
-
-while getopts e:ahp: opt; do
-       case $opt in
-       e)
-               error=$OPTARG
-               ;;
-       a)
-               allcpus=1
-               ;;
-       h)
-               echo "Usage $0 [ -a ] [ -e errno ] [ -p notifier-priority ]"
-               echo -e "\t default offline one cpu"
-               echo -e "\t run with -a option to offline all cpus"
-               exit
-               ;;
-       p)
-               priority=$OPTARG
-               ;;
-       esac
-done
-
-if ! [ "$error" -ge -4095 -a "$error" -lt 0 ]; then
-       echo "error code must be -4095 <= errno < 0" >&2
-       exit 1
-fi
-
-prerequisite
-
-#
-# Safe test (default) - offline and online one cpu
-#
-if [ $allcpus -eq 0 ]; then
-       echo "Limited scope test: one hotplug cpu"
-       echo -e "\t (leaves cpu in the original state):"
-       echo -e "\t online to offline to online: cpu $online_max"
-       offline_cpu_expect_success $online_max
-       online_cpu_expect_success $online_max
-
-       if [[ $offline_cpus -gt 0 ]]; then
-               echo -e "\t offline to online to offline: cpu $offline_max"
-               online_cpu_expect_success $offline_max
-               offline_cpu_expect_success $offline_max
-       fi
-       exit 0
-else
-       echo "Full scope test: all hotplug cpus"
-       echo -e "\t online all offline cpus"
-       echo -e "\t offline all online cpus"
-       echo -e "\t online all offline cpus"
-fi
-
-#
-# Online all hot-pluggable CPUs
-#
-for cpu in `hotplaggable_offline_cpus`; do
-       online_cpu_expect_success $cpu
-done
-
-#
-# Offline all hot-pluggable CPUs
-#
-for cpu in `hotpluggable_online_cpus`; do
-       offline_cpu_expect_success $cpu
-done
-
-#
-# Online all hot-pluggable CPUs again
-#
-for cpu in `hotplaggable_offline_cpus`; do
-       online_cpu_expect_success $cpu
-done
-
-#
-# Test with cpu notifier error injection
-#
-
-DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'`
-NOTIFIER_ERR_INJECT_DIR=$DEBUGFS/notifier-error-inject/cpu
-
-prerequisite_extra()
-{
-       msg="skip extra tests:"
-
-       /sbin/modprobe -q -r cpu-notifier-error-inject
-       /sbin/modprobe -q cpu-notifier-error-inject priority=$priority
-
-       if [ ! -d "$DEBUGFS" ]; then
-               echo $msg debugfs is not mounted >&2
-               exit 0
-       fi
-
-       if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then
-               echo $msg cpu-notifier-error-inject module is not available >&2
-               exit 0
-       fi
-}
-
-prerequisite_extra
-
-#
-# Offline all hot-pluggable CPUs
-#
-echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error
-for cpu in `hotpluggable_online_cpus`; do
-       offline_cpu_expect_success $cpu
-done
-
-#
-# Test CPU hot-add error handling (offline => online)
-#
-echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_UP_PREPARE/error
-for cpu in `hotplaggable_offline_cpus`; do
-       online_cpu_expect_fail $cpu
-done
-
-#
-# Online all hot-pluggable CPUs
-#
-echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_UP_PREPARE/error
-for cpu in `hotplaggable_offline_cpus`; do
-       online_cpu_expect_success $cpu
-done
-
-#
-# Test CPU hot-remove error handling (online => offline)
-#
-echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error
-for cpu in `hotpluggable_online_cpus`; do
-       offline_cpu_expect_fail $cpu
-done
-
-echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error
-/sbin/modprobe -q -r cpu-notifier-error-inject
index 29e8c6bc81b04330ee5a360218f0a8fb787ef3c3..736c3ddfc787499a3a114ab87ad173054e5468c2 100644 (file)
@@ -1,12 +1,13 @@
-CC = $(CROSS_COMPILE)gcc
 CFLAGS = -Wall
 
 test_objs = open-unlink create-read
 
 all: $(test_objs)
 
-run_tests: all
-       @/bin/bash ./efivarfs.sh || echo "efivarfs selftests: [FAIL]"
+TEST_PROGS := efivarfs.sh
+TEST_FILES := $(test_objs)
+
+include ../lib.mk
 
 clean:
        rm -f $(test_objs)
old mode 100644 (file)
new mode 100755 (executable)
index 66dfc2ce178896ffe0938b5dc09f1f339d93076c..4edb7d0da29b81aa1b0240ed600a42c42d63df9f 100644 (file)
@@ -1,4 +1,3 @@
-CC = $(CROSS_COMPILE)gcc
 CFLAGS = -Wall
 BINARIES = execveat
 DEPS = execveat.symlink execveat.denatured script subdir
@@ -18,8 +17,12 @@ execveat.denatured: execveat
 %: %.c
        $(CC) $(CFLAGS) -o $@ $^
 
-run_tests: all
-       ./execveat
+TEST_PROGS := execveat
+TEST_FILES := $(DEPS)
+
+include ../lib.mk
+
+override EMIT_TESTS := echo "mkdir -p subdir; (./execveat && echo \"selftests: execveat [PASS]\") || echo \"selftests: execveat [FAIL]\""
 
 clean:
        rm -rf $(BINARIES) $(DEPS) subdir.moved execveat.moved xxxxx*
index e23cce0bbc3a5ee90db9ef01c1a7b44c7d3e41aa..9bf82234855b8f5b0a831ecc97385a5dc5ef4728 100644 (file)
@@ -3,25 +3,9 @@
 # No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
 all:
 
-fw_filesystem:
-       @if /bin/sh ./fw_filesystem.sh ; then \
-                echo "fw_filesystem: ok"; \
-        else \
-                echo "fw_filesystem: [FAIL]"; \
-                exit 1; \
-        fi
+TEST_PROGS := fw_filesystem.sh fw_userhelper.sh
 
-fw_userhelper:
-       @if /bin/sh ./fw_userhelper.sh ; then \
-                echo "fw_userhelper: ok"; \
-        else \
-                echo "fw_userhelper: [FAIL]"; \
-                exit 1; \
-        fi
-
-run_tests: all fw_filesystem fw_userhelper
+include ../lib.mk
 
 # Nothing to clean up.
 clean:
-
-.PHONY: all clean run_tests fw_filesystem fw_userhelper
index 76cc9f1562679dcd48ecd3ab5f4b15228cb92f24..346720639d1d6c6d126379738969e679455ae630 100644 (file)
@@ -1,7 +1,8 @@
 all:
 
-run_tests:
-       @/bin/sh ./ftracetest || echo "ftrace selftests: [FAIL]"
+TEST_PROGS := ftracetest
+
+include ../lib.mk
 
 clean:
        rm -rf logs/*
index fd9c49a13612b2a3334a7f2eab6467e6844c861d..aa51f6c17359e18de0823f125ed7239b5f29c841 100644 (file)
@@ -2,4 +2,4 @@
 # description: Basic event tracing check
 test -f available_events -a -f set_event -a -d events
 # check scheduler events are available
-grep -q sched available_events && exit 0 || exit -1
\ No newline at end of file
+grep -q sched available_events && exit 0 || exit $FAIL
index 668616d9bb0364ce7be40f02a550602cda826b4f..87eb9d6dd4ca0e376faaa4eea161026417b2971f 100644 (file)
@@ -9,7 +9,11 @@ do_reset() {
 fail() { #msg
     do_reset
     echo $1
-    exit -1
+    exit $FAIL
+}
+
+yield() {
+    ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
 }
 
 if [ ! -f set_event -o ! -d events/sched ]; then
@@ -21,7 +25,8 @@ reset_tracer
 do_reset
 
 echo 'sched:sched_switch' > set_event
-usleep 1
+
+yield
 
 count=`cat trace | grep sched_switch | wc -l`
 if [ $count -eq 0 ]; then
@@ -31,7 +36,8 @@ fi
 do_reset
 
 echo 1 > events/sched/sched_switch/enable
-usleep 1
+
+yield
 
 count=`cat trace | grep sched_switch | wc -l`
 if [ $count -eq 0 ]; then
@@ -41,7 +47,8 @@ fi
 do_reset
 
 echo 0 > events/sched/sched_switch/enable
-usleep 1
+
+yield
 
 count=`cat trace | grep sched_switch | wc -l`
 if [ $count -ne 0 ]; then
index 655c415b6e7ff9b350474951aaff90cbd2727d19..ced27ef0638f2d3f21cd3a4fc436ebdd7bc8b259 100644 (file)
@@ -9,7 +9,11 @@ do_reset() {
 fail() { #msg
     do_reset
     echo $1
-    exit -1
+    exit $FAIL
+}
+
+yield() {
+    ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
 }
 
 if [ ! -f set_event -o ! -d events/sched ]; then
@@ -21,7 +25,8 @@ reset_tracer
 do_reset
 
 echo 'sched:*' > set_event
-usleep 1
+
+yield
 
 count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
 if [ $count -lt 3 ]; then
@@ -31,7 +36,8 @@ fi
 do_reset
 
 echo 1 > events/sched/enable
-usleep 1
+
+yield
 
 count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
 if [ $count -lt 3 ]; then
@@ -41,7 +47,8 @@ fi
 do_reset
 
 echo 0 > events/sched/enable
-usleep 1
+
+yield
 
 count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
 if [ $count -ne 0 ]; then
index 480845774007f7d9ba9ccf43fa272b9858f5ef53..0bb5df3c00d41701f7392c73c31c95b10867f1d8 100644 (file)
@@ -9,7 +9,11 @@ do_reset() {
 fail() { #msg
     do_reset
     echo $1
-    exit -1
+    exit $FAIL
+}
+
+yield() {
+    ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
 }
 
 if [ ! -f available_events -o ! -f set_event -o ! -d events ]; then
@@ -21,6 +25,9 @@ reset_tracer
 do_reset
 
 echo '*:*' > set_event
+
+yield
+
 count=`cat trace | grep -v ^# | wc -l`
 if [ $count -eq 0 ]; then
     fail "none of events are recorded"
@@ -29,6 +36,9 @@ fi
 do_reset
 
 echo 1 > events/enable
+
+yield
+
 count=`cat trace | grep -v ^# | wc -l`
 if [ $count -eq 0 ]; then
     fail "none of events are recorded"
@@ -37,6 +47,9 @@ fi
 do_reset
 
 echo 0 > events/enable
+
+yield
+
 count=`cat trace | grep -v ^# | wc -l`
 if [ $count -ne 0 ]; then
     fail "any of events should not be recorded"
index c15e018e022085c7e8e1ed619d5a9de7aaff7470..15c2dba06ea288a9d73652bfdcf001bd52fda936 100644 (file)
@@ -16,7 +16,9 @@ fi
 
 do_reset() {
     reset_tracer
-    echo 0 > /proc/sys/kernel/stack_tracer_enabled
+    if [ -e /proc/sys/kernel/stack_tracer_enabled ]; then
+           echo 0 > /proc/sys/kernel/stack_tracer_enabled
+    fi
     enable_tracing
     clear_trace
     echo > set_ftrace_filter
@@ -25,7 +27,7 @@ do_reset() {
 fail() { # msg
     do_reset
     echo $1
-    exit -1
+    exit $FAIL
 }
 
 disable_tracing
index 6af5f6360b184c32bdee9f8c4d40327909e839c7..0ab2189613efe534b6e443e2ee01d3ecbecd6c2b 100644 (file)
@@ -17,7 +17,7 @@ do_reset() {
 fail() { # msg
     do_reset
     echo $1
-    exit -1
+    exit $FAIL
 }
 
 disable_tracing
index 2e719cb1fc4d91492df00c37562898e68022a4a1..7808336d6f50832a456eca8061c6a88c3ba831b5 100644 (file)
@@ -31,7 +31,7 @@ fail() { # mesg
     reset_tracer
     echo > set_ftrace_filter
     echo $1
-    exit -1
+    exit $FAIL
 }
 
 echo "Testing function tracer with profiler:"
diff --git a/tools/testing/selftests/gen_kselftest_tar.sh b/tools/testing/selftests/gen_kselftest_tar.sh
new file mode 100755 (executable)
index 0000000..17d5bd0
--- /dev/null
@@ -0,0 +1,55 @@
+#!/bin/bash
+#
+# gen_kselftest_tar
+# Generate kselftest tarball
+# Author: Shuah Khan <shuahkh@osg.samsung.com>
+# Copyright (C) 2015 Samsung Electronics Co., Ltd.
+
+# This software may be freely redistributed under the terms of the GNU
+# General Public License (GPLv2).
+
+# main
+main()
+{
+       if [ "$#" -eq 0 ]; then
+               echo "$0: Generating default compression gzip"
+               copts="cvzf"
+               ext=".tar.gz"
+       else
+               case "$1" in
+                       tar)
+                               copts="cvf"
+                               ext=".tar"
+                               ;;
+                       targz)
+                               copts="cvzf"
+                               ext=".tar.gz"
+                               ;;
+                       tarbz2)
+                               copts="cvjf"
+                               ext=".tar.bz2"
+                               ;;
+                       tarxz)
+                               copts="cvJf"
+                               ext=".tar.xz"
+                               ;;
+                       *)
+                       echo "Unknown tarball format $1"
+                       exit 1
+                       ;;
+       esac
+       fi
+
+       install_dir=./kselftest
+
+# Run install using INSTALL_KSFT_PATH override to generate install
+# directory
+./kselftest_install.sh
+tar $copts kselftest${ext} $install_dir
+echo "Kselftest archive kselftest${ext} created!"
+
+# clean up install directory
+rm -rf kselftest
+}
+
+main "$@"
index 74bbefdeaf4c187b07c02e67fedd74619f6215d6..25d2e702c68a53f7021d47404fd59d69241f430c 100644 (file)
@@ -12,14 +12,11 @@ endif
 CFLAGS += -I../../../../usr/include/
 
 all:
-ifeq ($(ARCH),x86)
-       gcc $(CFLAGS) msgque.c -o msgque_test
-else
-       echo "Not an x86 target, can't build msgque selftest"
-endif
+       $(CC) $(CFLAGS) msgque.c -o msgque_test
+
+TEST_PROGS := msgque_test
 
-run_tests: all
-       ./msgque_test
+include ../lib.mk
 
 clean:
        rm -fr ./msgque_test
index ff0eefdc6ceb92168b298831bf046f01b1ec1bf1..2ae7450a9a8984e2f0ba86a96842010375076757 100644 (file)
@@ -1,10 +1,10 @@
-CC := $(CROSS_COMPILE)$(CC)
 CFLAGS += -I../../../../usr/include/
 
 all: kcmp_test
 
-run_tests: all
-       @./kcmp_test || echo "kcmp_test: [FAIL]"
+TEST_PROGS := kcmp_test
+
+include ../lib.mk
 
 clean:
        $(RM) kcmp_test kcmp-test-file
diff --git a/tools/testing/selftests/kselftest_install.sh b/tools/testing/selftests/kselftest_install.sh
new file mode 100755 (executable)
index 0000000..1555fbd
--- /dev/null
@@ -0,0 +1,37 @@
+#!/bin/bash
+#
+# Kselftest Install
+# Install kselftest tests
+# Author: Shuah Khan <shuahkh@osg.samsung.com>
+# Copyright (C) 2015 Samsung Electronics Co., Ltd.
+
+# This software may be freely redistributed under the terms of the GNU
+# General Public License (GPLv2).
+
+install_loc=`pwd`
+
+main()
+{
+       if [ $(basename $install_loc) !=  "selftests" ]; then
+               echo "$0: Please run it in selftests directory ..."
+               exit 1;
+       fi
+       if [ "$#" -eq 0 ]; then
+               echo "$0: Installing in default location - $install_loc ..."
+       elif [ ! -d "$1" ]; then
+               echo "$0: $1 doesn't exist!!"
+               exit 1;
+       else
+               install_loc=$1
+               echo "$0: Installing in specified location - $install_loc ..."
+       fi
+
+       install_dir=$install_loc/kselftest
+
+# Create install directory
+       mkdir -p $install_dir
+# Build tests
+       INSTALL_PATH=$install_dir make install
+}
+
+main "$@"
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
new file mode 100644 (file)
index 0000000..2194155
--- /dev/null
@@ -0,0 +1,35 @@
+# This mimics the top-level Makefile. We do it explicitly here so that this
+# Makefile can operate with or without the kbuild infrastructure.
+CC := $(CROSS_COMPILE)gcc
+
+define RUN_TESTS
+       @for TEST in $(TEST_PROGS); do \
+               (./$$TEST && echo "selftests: $$TEST [PASS]") || echo "selftests: $$TEST [FAIL]"; \
+       done;
+endef
+
+run_tests: all
+       $(RUN_TESTS)
+
+define INSTALL_RULE
+       mkdir -p $(INSTALL_PATH)
+       install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
+endef
+
+install: all
+ifdef INSTALL_PATH
+       $(INSTALL_RULE)
+else
+       $(error Error: set INSTALL_PATH to use install)
+endif
+
+define EMIT_TESTS
+       @for TEST in $(TEST_PROGS); do \
+               echo "(./$$TEST && echo \"selftests: $$TEST [PASS]\") || echo \"selftests: $$TEST [FAIL]\""; \
+       done;
+endef
+
+emit_tests:
+       $(EMIT_TESTS)
+
+.PHONY: run_tests all clean install emit_tests
index b80cd10d53bac058926758b7df013d1190d6e67f..3e7eb7972511c657af9ca54e6468542a42a5bd33 100644 (file)
@@ -1,17 +1,19 @@
+CC = $(CROSS_COMPILE)gcc
 CFLAGS += -D_FILE_OFFSET_BITS=64
 CFLAGS += -I../../../../include/uapi/
 CFLAGS += -I../../../../include/
+CFLAGS += -I../../../../usr/include/
 
 all:
-       gcc $(CFLAGS) memfd_test.c -o memfd_test
+       $(CC) $(CFLAGS) memfd_test.c -o memfd_test
 
-run_tests: all
-       gcc $(CFLAGS) memfd_test.c -o memfd_test
-       @./memfd_test || echo "memfd_test: [FAIL]"
+TEST_PROGS := memfd_test
+
+include ../lib.mk
 
 build_fuse:
-       gcc $(CFLAGS) fuse_mnt.c `pkg-config fuse --cflags --libs` -o fuse_mnt
-       gcc $(CFLAGS) fuse_test.c -o fuse_test
+       $(CC) $(CFLAGS) fuse_mnt.c `pkg-config fuse --cflags --libs` -o fuse_mnt
+       $(CC) $(CFLAGS) fuse_test.c -o fuse_test
 
 run_fuse: build_fuse
        @./run_fuse_test.sh || echo "fuse_test: [FAIL]"
index d46b8d489cd252f18450b06dad87bb87f672e117..afb2624c704894fc280f553fbad9ac1f9fb07363 100644 (file)
@@ -1,9 +1,12 @@
 all:
 
-run_tests:
-       @/bin/bash ./on-off-test.sh -r 2 || echo "memory-hotplug selftests: [FAIL]"
+include ../lib.mk
+
+TEST_PROGS := mem-on-off-test.sh
+override RUN_TESTS := ./mem-on-off-test.sh -r 2 || echo "selftests: memory-hotplug [FAIL]"
+override EMIT_TESTS := echo "$(RUN_TESTS)"
 
 run_full_test:
-       @/bin/bash ./on-off-test.sh || echo "memory-hotplug selftests: [FAIL]"
+       @/bin/bash ./mem-on-off-test.sh || echo "memory-hotplug selftests: [FAIL]"
 
 clean:
diff --git a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
new file mode 100755 (executable)
index 0000000..6cddde0
--- /dev/null
@@ -0,0 +1,238 @@
+#!/bin/bash
+
+SYSFS=
+
+prerequisite()
+{
+       msg="skip all tests:"
+
+       if [ $UID != 0 ]; then
+               echo $msg must be run as root >&2
+               exit 0
+       fi
+
+       SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
+
+       if [ ! -d "$SYSFS" ]; then
+               echo $msg sysfs is not mounted >&2
+               exit 0
+       fi
+
+       if ! ls $SYSFS/devices/system/memory/memory* > /dev/null 2>&1; then
+               echo $msg memory hotplug is not supported >&2
+               exit 0
+       fi
+}
+
+#
+# list all hot-pluggable memory
+#
+hotpluggable_memory()
+{
+       local state=${1:-.\*}
+
+       for memory in $SYSFS/devices/system/memory/memory*; do
+               if grep -q 1 $memory/removable &&
+                  grep -q $state $memory/state; then
+                       echo ${memory##/*/memory}
+               fi
+       done
+}
+
+hotplaggable_offline_memory()
+{
+       hotpluggable_memory offline
+}
+
+hotpluggable_online_memory()
+{
+       hotpluggable_memory online
+}
+
+memory_is_online()
+{
+       grep -q online $SYSFS/devices/system/memory/memory$1/state
+}
+
+memory_is_offline()
+{
+       grep -q offline $SYSFS/devices/system/memory/memory$1/state
+}
+
+online_memory()
+{
+       echo online > $SYSFS/devices/system/memory/memory$1/state
+}
+
+offline_memory()
+{
+       echo offline > $SYSFS/devices/system/memory/memory$1/state
+}
+
+online_memory_expect_success()
+{
+       local memory=$1
+
+       if ! online_memory $memory; then
+               echo $FUNCNAME $memory: unexpected fail >&2
+       elif ! memory_is_online $memory; then
+               echo $FUNCNAME $memory: unexpected offline >&2
+       fi
+}
+
+online_memory_expect_fail()
+{
+       local memory=$1
+
+       if online_memory $memory 2> /dev/null; then
+               echo $FUNCNAME $memory: unexpected success >&2
+       elif ! memory_is_offline $memory; then
+               echo $FUNCNAME $memory: unexpected online >&2
+       fi
+}
+
+offline_memory_expect_success()
+{
+       local memory=$1
+
+       if ! offline_memory $memory; then
+               echo $FUNCNAME $memory: unexpected fail >&2
+       elif ! memory_is_offline $memory; then
+               echo $FUNCNAME $memory: unexpected offline >&2
+       fi
+}
+
+offline_memory_expect_fail()
+{
+       local memory=$1
+
+       if offline_memory $memory 2> /dev/null; then
+               echo $FUNCNAME $memory: unexpected success >&2
+       elif ! memory_is_online $memory; then
+               echo $FUNCNAME $memory: unexpected offline >&2
+       fi
+}
+
+error=-12
+priority=0
+ratio=10
+
+while getopts e:hp:r: opt; do
+       case $opt in
+       e)
+               error=$OPTARG
+               ;;
+       h)
+               echo "Usage $0 [ -e errno ] [ -p notifier-priority ] [ -r percent-of-memory-to-offline ]"
+               exit
+               ;;
+       p)
+               priority=$OPTARG
+               ;;
+       r)
+               ratio=$OPTARG
+               ;;
+       esac
+done
+
+if ! [ "$error" -ge -4095 -a "$error" -lt 0 ]; then
+       echo "error code must be -4095 <= errno < 0" >&2
+       exit 1
+fi
+
+prerequisite
+
+echo "Test scope: $ratio% hotplug memory"
+echo -e "\t online all hotplug memory in offline state"
+echo -e "\t offline $ratio% hotplug memory in online state"
+echo -e "\t online all hotplug memory in offline state"
+
+#
+# Online all hot-pluggable memory
+#
+for memory in `hotplaggable_offline_memory`; do
+       echo offline-online $memory
+       online_memory_expect_success $memory
+done
+
+#
+# Offline $ratio percent of hot-pluggable memory
+#
+for memory in `hotpluggable_online_memory`; do
+       if [ $((RANDOM % 100)) -lt $ratio ]; then
+               echo online-offline $memory
+               offline_memory_expect_success $memory
+       fi
+done
+
+#
+# Online all hot-pluggable memory again
+#
+for memory in `hotplaggable_offline_memory`; do
+       echo offline-online $memory
+       online_memory_expect_success $memory
+done
+
+#
+# Test with memory notifier error injection
+#
+
+DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'`
+NOTIFIER_ERR_INJECT_DIR=$DEBUGFS/notifier-error-inject/memory
+
+prerequisite_extra()
+{
+       msg="skip extra tests:"
+
+       /sbin/modprobe -q -r memory-notifier-error-inject
+       /sbin/modprobe -q memory-notifier-error-inject priority=$priority
+
+       if [ ! -d "$DEBUGFS" ]; then
+               echo $msg debugfs is not mounted >&2
+               exit 0
+       fi
+
+       if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then
+               echo $msg memory-notifier-error-inject module is not available >&2
+               exit 0
+       fi
+}
+
+prerequisite_extra
+
+#
+# Offline $ratio percent of hot-pluggable memory
+#
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
+for memory in `hotpluggable_online_memory`; do
+       if [ $((RANDOM % 100)) -lt $ratio ]; then
+               offline_memory_expect_success $memory
+       fi
+done
+
+#
+# Test memory hot-add error handling (offline => online)
+#
+echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error
+for memory in `hotplaggable_offline_memory`; do
+       online_memory_expect_fail $memory
+done
+
+#
+# Online all hot-pluggable memory
+#
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error
+for memory in `hotplaggable_offline_memory`; do
+       online_memory_expect_success $memory
+done
+
+#
+# Test memory hot-remove error handling (online => offline)
+#
+echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
+for memory in `hotpluggable_online_memory`; do
+       offline_memory_expect_fail $memory
+done
+
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
+/sbin/modprobe -q -r memory-notifier-error-inject
diff --git a/tools/testing/selftests/memory-hotplug/on-off-test.sh b/tools/testing/selftests/memory-hotplug/on-off-test.sh
deleted file mode 100644 (file)
index 6cddde0..0000000
+++ /dev/null
@@ -1,238 +0,0 @@
-#!/bin/bash
-
-SYSFS=
-
-prerequisite()
-{
-       msg="skip all tests:"
-
-       if [ $UID != 0 ]; then
-               echo $msg must be run as root >&2
-               exit 0
-       fi
-
-       SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
-
-       if [ ! -d "$SYSFS" ]; then
-               echo $msg sysfs is not mounted >&2
-               exit 0
-       fi
-
-       if ! ls $SYSFS/devices/system/memory/memory* > /dev/null 2>&1; then
-               echo $msg memory hotplug is not supported >&2
-               exit 0
-       fi
-}
-
-#
-# list all hot-pluggable memory
-#
-hotpluggable_memory()
-{
-       local state=${1:-.\*}
-
-       for memory in $SYSFS/devices/system/memory/memory*; do
-               if grep -q 1 $memory/removable &&
-                  grep -q $state $memory/state; then
-                       echo ${memory##/*/memory}
-               fi
-       done
-}
-
-hotplaggable_offline_memory()
-{
-       hotpluggable_memory offline
-}
-
-hotpluggable_online_memory()
-{
-       hotpluggable_memory online
-}
-
-memory_is_online()
-{
-       grep -q online $SYSFS/devices/system/memory/memory$1/state
-}
-
-memory_is_offline()
-{
-       grep -q offline $SYSFS/devices/system/memory/memory$1/state
-}
-
-online_memory()
-{
-       echo online > $SYSFS/devices/system/memory/memory$1/state
-}
-
-offline_memory()
-{
-       echo offline > $SYSFS/devices/system/memory/memory$1/state
-}
-
-online_memory_expect_success()
-{
-       local memory=$1
-
-       if ! online_memory $memory; then
-               echo $FUNCNAME $memory: unexpected fail >&2
-       elif ! memory_is_online $memory; then
-               echo $FUNCNAME $memory: unexpected offline >&2
-       fi
-}
-
-online_memory_expect_fail()
-{
-       local memory=$1
-
-       if online_memory $memory 2> /dev/null; then
-               echo $FUNCNAME $memory: unexpected success >&2
-       elif ! memory_is_offline $memory; then
-               echo $FUNCNAME $memory: unexpected online >&2
-       fi
-}
-
-offline_memory_expect_success()
-{
-       local memory=$1
-
-       if ! offline_memory $memory; then
-               echo $FUNCNAME $memory: unexpected fail >&2
-       elif ! memory_is_offline $memory; then
-               echo $FUNCNAME $memory: unexpected offline >&2
-       fi
-}
-
-offline_memory_expect_fail()
-{
-       local memory=$1
-
-       if offline_memory $memory 2> /dev/null; then
-               echo $FUNCNAME $memory: unexpected success >&2
-       elif ! memory_is_online $memory; then
-               echo $FUNCNAME $memory: unexpected offline >&2
-       fi
-}
-
-error=-12
-priority=0
-ratio=10
-
-while getopts e:hp:r: opt; do
-       case $opt in
-       e)
-               error=$OPTARG
-               ;;
-       h)
-               echo "Usage $0 [ -e errno ] [ -p notifier-priority ] [ -r percent-of-memory-to-offline ]"
-               exit
-               ;;
-       p)
-               priority=$OPTARG
-               ;;
-       r)
-               ratio=$OPTARG
-               ;;
-       esac
-done
-
-if ! [ "$error" -ge -4095 -a "$error" -lt 0 ]; then
-       echo "error code must be -4095 <= errno < 0" >&2
-       exit 1
-fi
-
-prerequisite
-
-echo "Test scope: $ratio% hotplug memory"
-echo -e "\t online all hotplug memory in offline state"
-echo -e "\t offline $ratio% hotplug memory in online state"
-echo -e "\t online all hotplug memory in offline state"
-
-#
-# Online all hot-pluggable memory
-#
-for memory in `hotplaggable_offline_memory`; do
-       echo offline-online $memory
-       online_memory_expect_success $memory
-done
-
-#
-# Offline $ratio percent of hot-pluggable memory
-#
-for memory in `hotpluggable_online_memory`; do
-       if [ $((RANDOM % 100)) -lt $ratio ]; then
-               echo online-offline $memory
-               offline_memory_expect_success $memory
-       fi
-done
-
-#
-# Online all hot-pluggable memory again
-#
-for memory in `hotplaggable_offline_memory`; do
-       echo offline-online $memory
-       online_memory_expect_success $memory
-done
-
-#
-# Test with memory notifier error injection
-#
-
-DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'`
-NOTIFIER_ERR_INJECT_DIR=$DEBUGFS/notifier-error-inject/memory
-
-prerequisite_extra()
-{
-       msg="skip extra tests:"
-
-       /sbin/modprobe -q -r memory-notifier-error-inject
-       /sbin/modprobe -q memory-notifier-error-inject priority=$priority
-
-       if [ ! -d "$DEBUGFS" ]; then
-               echo $msg debugfs is not mounted >&2
-               exit 0
-       fi
-
-       if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then
-               echo $msg memory-notifier-error-inject module is not available >&2
-               exit 0
-       fi
-}
-
-prerequisite_extra
-
-#
-# Offline $ratio percent of hot-pluggable memory
-#
-echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
-for memory in `hotpluggable_online_memory`; do
-       if [ $((RANDOM % 100)) -lt $ratio ]; then
-               offline_memory_expect_success $memory
-       fi
-done
-
-#
-# Test memory hot-add error handling (offline => online)
-#
-echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error
-for memory in `hotplaggable_offline_memory`; do
-       online_memory_expect_fail $memory
-done
-
-#
-# Online all hot-pluggable memory
-#
-echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error
-for memory in `hotplaggable_offline_memory`; do
-       online_memory_expect_success $memory
-done
-
-#
-# Test memory hot-remove error handling (online => offline)
-#
-echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
-for memory in `hotpluggable_online_memory`; do
-       offline_memory_expect_fail $memory
-done
-
-echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
-/sbin/modprobe -q -r memory-notifier-error-inject
diff --git a/tools/testing/selftests/mount/.gitignore b/tools/testing/selftests/mount/.gitignore
new file mode 100644 (file)
index 0000000..856ad41
--- /dev/null
@@ -0,0 +1 @@
+unprivileged-remount-test
index 337d853c2b72e8f434e1ed94e4e0f53fac49bbde..95580a97326e166ce2cd9f267055bbc2cdbc2355 100644 (file)
@@ -1,17 +1,16 @@
 # Makefile for mount selftests.
-
+CFLAGS = -Wall \
+         -O2
 all: unprivileged-remount-test
 
 unprivileged-remount-test: unprivileged-remount-test.c
-       gcc -Wall -O2 unprivileged-remount-test.c -o unprivileged-remount-test
+       $(CC) $(CFLAGS) unprivileged-remount-test.c -o unprivileged-remount-test
 
-# Allow specific tests to be selected.
-test_unprivileged_remount: unprivileged-remount-test
-       @if [ -f /proc/self/uid_map ] ; then ./unprivileged-remount-test ; fi
+include ../lib.mk
 
-run_tests: all test_unprivileged_remount
+TEST_PROGS := unprivileged-remount-test
+override RUN_TESTS := if [ -f /proc/self/uid_map ] ; then ./unprivileged-remount-test ; fi
+override EMIT_TESTS := echo "$(RUN_TESTS)"
 
 clean:
        rm -f unprivileged-remount-test
-
-.PHONY: all test_unprivileged_remount
index 8056e2e68fa4cd56d27e61ba1cab7cc54cf514f9..0e3b41eb85cde2cd553bda36ac0b09f43553ceb8 100644 (file)
@@ -1,10 +1,22 @@
+CFLAGS = -O2
+
 all:
-       gcc -O2 mq_open_tests.c -o mq_open_tests -lrt
-       gcc -O2 -o mq_perf_tests mq_perf_tests.c -lrt -lpthread -lpopt
+       $(CC) $(CFLAGS) mq_open_tests.c -o mq_open_tests -lrt
+       $(CC) $(CFLAGS) -o mq_perf_tests mq_perf_tests.c -lrt -lpthread -lpopt
+
+include ../lib.mk
+
+override define RUN_TESTS
+       @./mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]"
+       @./mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]"
+endef
+
+TEST_PROGS := mq_open_tests mq_perf_tests
 
-run_tests:
-       @./mq_open_tests /test1 || echo "mq_open_tests: [FAIL]"
-       @./mq_perf_tests || echo "mq_perf_tests: [FAIL]"
+override define EMIT_TESTS
+       echo "./mq_open_tests /test1 || echo \"selftests: mq_open_tests [FAIL]\""
+       echo "./mq_perf_tests || echo \"selftests: mq_perf_tests [FAIL]\""
+endef
 
 clean:
        rm -f mq_open_tests mq_perf_tests
index 62f22cc9941ce7595d8ff7a085b339adc044eb0e..fac4782c51d8439e524a3675bd6acc5d2df79c21 100644 (file)
@@ -1,6 +1,5 @@
 # Makefile for net selftests
 
-CC = $(CROSS_COMPILE)gcc
 CFLAGS = -Wall -O2 -g
 
 CFLAGS += -I../../../../usr/include/
@@ -11,9 +10,10 @@ all: $(NET_PROGS)
 %: %.c
        $(CC) $(CFLAGS) -o $@ $^
 
-run_tests: all
-       @/bin/sh ./run_netsocktests || echo "sockettests: [FAIL]"
-       @/bin/sh ./run_afpackettests || echo "afpackettests: [FAIL]"
-       ./test_bpf.sh
+TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh
+TEST_FILES := $(NET_PROGS)
+
+include ../lib.mk
+
 clean:
        $(RM) $(NET_PROGS)
old mode 100644 (file)
new mode 100755 (executable)
old mode 100644 (file)
new mode 100755 (executable)
index 1d5e7ad2c46008590f9b18cd54ca330cf8a81745..2958fe9a74e97b2c86ce1c8e6072f49291cca257 100644 (file)
@@ -8,10 +8,9 @@ ifeq ($(ARCH),powerpc)
 
 GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown")
 
-CC := $(CROSS_COMPILE)$(CC)
 CFLAGS := -Wall -O2 -flto -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS)
 
-export CC CFLAGS
+export CFLAGS
 
 TARGETS = pmu copyloops mm tm primitives stringloops
 
@@ -22,10 +21,25 @@ all: $(TARGETS)
 $(TARGETS):
        $(MAKE) -k -C $@ all
 
-run_tests: all
+include ../lib.mk
+
+override define RUN_TESTS
        @for TARGET in $(TARGETS); do \
                $(MAKE) -C $$TARGET run_tests; \
        done;
+endef
+
+override define INSTALL_RULE
+       @for TARGET in $(TARGETS); do \
+               $(MAKE) -C $$TARGET install; \
+       done;
+endef
+
+override define EMIT_TESTS
+       @for TARGET in $(TARGETS); do \
+               $(MAKE) -s -C $$TARGET emit_tests; \
+       done;
+endef
 
 clean:
        @for TARGET in $(TARGETS); do \
@@ -36,4 +50,4 @@ clean:
 tags:
        find . -name '*.c' -o -name '*.h' | xargs ctags
 
-.PHONY: all run_tests clean tags $(TARGETS)
+.PHONY: tags $(TARGETS)
index 6f2d3be227f9909622876c6215bcf534b88a455c..c05023514ce8a019a33c8af97516ad1d2be015ad 100644 (file)
@@ -6,24 +6,19 @@ CFLAGS += -D SELFTEST
 # Use our CFLAGS for the implicit .S rule
 ASFLAGS = $(CFLAGS)
 
-PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7
+TEST_PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7
 EXTRA_SOURCES := validate.c ../harness.c
 
-all: $(PROGS)
+all: $(TEST_PROGS)
 
 copyuser_64:     CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_base
 copyuser_power7: CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_power7
 memcpy_64:       CPPFLAGS += -D COPY_LOOP=test_memcpy
 memcpy_power7:   CPPFLAGS += -D COPY_LOOP=test_memcpy_power7
 
-$(PROGS): $(EXTRA_SOURCES)
+$(TEST_PROGS): $(EXTRA_SOURCES)
 
-run_tests: all
-       @-for PROG in $(PROGS); do \
-               ./$$PROG; \
-       done;
+include ../../lib.mk
 
 clean:
-       rm -f $(PROGS) *.o
-
-.PHONY: all run_tests clean
+       rm -f $(TEST_PROGS) *.o
index a14c538dd7f8d6068fe80bc4427a8e72e0cb94e6..41cc3ed66818bffa72d3bfffcbdecb44123c6ffc 100644 (file)
@@ -1,21 +1,16 @@
 noarg:
        $(MAKE) -C ../
 
-PROGS := hugetlb_vs_thp_test subpage_prot
+TEST_PROGS := hugetlb_vs_thp_test subpage_prot
 
-all: $(PROGS) tempfile
+all: $(TEST_PROGS) tempfile
 
-$(PROGS): ../harness.c
+$(TEST_PROGS): ../harness.c
 
-run_tests: all
-       @-for PROG in $(PROGS); do \
-               ./$$PROG; \
-       done;
+include ../../lib.mk
 
 tempfile:
        dd if=/dev/zero of=tempfile bs=64k count=1
 
 clean:
-       rm -f $(PROGS) tempfile
-
-.PHONY: all run_tests clean
+       rm -f $(TEST_PROGS) tempfile
index c9f4263906a5e67200284006fcae44bec8628065..5a161175bbd4197e907dbeb68f7c5f7003ecdf76 100644 (file)
@@ -1,38 +1,42 @@
 noarg:
        $(MAKE) -C ../
 
-PROGS := count_instructions l3_bank_test per_event_excludes
+TEST_PROGS := count_instructions l3_bank_test per_event_excludes
 EXTRA_SOURCES := ../harness.c event.c lib.c
 
-SUB_TARGETS = ebb
+all: $(TEST_PROGS) ebb
 
-all: $(PROGS) $(SUB_TARGETS)
-
-$(PROGS): $(EXTRA_SOURCES)
+$(TEST_PROGS): $(EXTRA_SOURCES)
 
 # loop.S can only be built 64-bit
 count_instructions: loop.S count_instructions.c $(EXTRA_SOURCES)
        $(CC) $(CFLAGS) -m64 -o $@ $^
 
-run_tests: all sub_run_tests
-       @-for PROG in $(PROGS); do \
-               ./$$PROG; \
-       done;
+include ../../lib.mk
 
-clean: sub_clean
-       rm -f $(PROGS) loop.o
+DEFAULT_RUN_TESTS := $(RUN_TESTS)
+override define RUN_TESTS
+       $(DEFAULT_RUN_TESTS)
+       $(MAKE) -C ebb run_tests
+endef
 
-$(SUB_TARGETS):
-       $(MAKE) -k -C $@ all
+DEFAULT_EMIT_TESTS := $(EMIT_TESTS)
+override define EMIT_TESTS
+       $(DEFAULT_EMIT_TESTS)
+       $(MAKE) -s -C ebb emit_tests
+endef
 
-sub_run_tests: all
-       @for TARGET in $(SUB_TARGETS); do \
-               $(MAKE) -C $$TARGET run_tests; \
-       done;
+DEFAULT_INSTALL := $(INSTALL_RULE)
+override define INSTALL_RULE
+       $(DEFAULT_INSTALL_RULE)
+       $(MAKE) -C ebb install
+endef
 
-sub_clean:
-       @for TARGET in $(SUB_TARGETS); do \
-               $(MAKE) -C $$TARGET clean; \
-       done;
+clean:
+       rm -f $(TEST_PROGS) loop.o
+       $(MAKE) -C ebb clean
+
+ebb:
+       $(MAKE) -k -C $@ all
 
-.PHONY: all run_tests clean sub_run_tests sub_clean $(SUB_TARGETS)
+.PHONY: all run_tests clean ebb
index 3dc4332698cb4f57c7160b22fe3c5fa740be6e79..5cdc9dbf2b279c95cd3f9603759b63bc5c0cfee5 100644 (file)
@@ -4,7 +4,7 @@ noarg:
 # The EBB handler is 64-bit code and everything links against it
 CFLAGS += -m64
 
-PROGS := reg_access_test event_attributes_test cycles_test     \
+TEST_PROGS := reg_access_test event_attributes_test cycles_test        \
         cycles_with_freeze_test pmc56_overflow_test            \
         ebb_vs_cpu_event_test cpu_event_vs_ebb_test            \
         cpu_event_pinned_vs_ebb_test task_event_vs_ebb_test    \
@@ -16,18 +16,15 @@ PROGS := reg_access_test event_attributes_test cycles_test  \
         lost_exception_test no_handler_test                    \
         cycles_with_mmcr2_test
 
-all: $(PROGS)
+all: $(TEST_PROGS)
 
-$(PROGS): ../../harness.c ../event.c ../lib.c ebb.c ebb_handler.S trace.c busy_loop.S
+$(TEST_PROGS): ../../harness.c ../event.c ../lib.c ebb.c ebb_handler.S trace.c busy_loop.S
 
 instruction_count_test: ../loop.S
 
 lost_exception_test: ../lib.c
 
-run_tests: all
-       @-for PROG in $(PROGS); do \
-               ./$$PROG; \
-       done;
+include ../../../lib.mk
 
 clean:
-       rm -f $(PROGS)
+       rm -f $(TEST_PROGS)
index ea737ca01732b2726fa7d149ce40d85c28fb7ccb..b68c6221d3d1bdad82a50356fa5c73083611d948 100644 (file)
@@ -1,17 +1,12 @@
 CFLAGS += -I$(CURDIR)
 
-PROGS := load_unaligned_zeropad
+TEST_PROGS := load_unaligned_zeropad
 
-all: $(PROGS)
+all: $(TEST_PROGS)
 
-$(PROGS): ../harness.c
+$(TEST_PROGS): ../harness.c
 
-run_tests: all
-       @-for PROG in $(PROGS); do \
-               ./$$PROG; \
-       done;
+include ../../lib.mk
 
 clean:
-       rm -f $(PROGS) *.o
-
-.PHONY: all run_tests clean
+       rm -f $(TEST_PROGS) *.o
index 506d7734647764cd077a669678db0eb9626ae23f..2a728f4d2873de636379c277feae38f004c60934 100644 (file)
@@ -2,19 +2,14 @@
 CFLAGS += -m64
 CFLAGS += -I$(CURDIR)
 
-PROGS := memcmp
+TEST_PROGS := memcmp
 EXTRA_SOURCES := memcmp_64.S ../harness.c
 
-all: $(PROGS)
+all: $(TEST_PROGS)
 
-$(PROGS): $(EXTRA_SOURCES)
+$(TEST_PROGS): $(EXTRA_SOURCES)
 
-run_tests: all
-       @-for PROG in $(PROGS); do \
-               ./$$PROG; \
-       done;
+include ../../lib.mk
 
 clean:
-       rm -f $(PROGS) *.o
-
-.PHONY: all run_tests clean
+       rm -f $(TEST_PROGS) *.o
index 2cede239a074dd110aa9ff3a6119b55f9c9d4ff6..34f2ec634b40ac2c8c3a1d2fac06c0257c8e7181 100644 (file)
@@ -1,15 +1,10 @@
-PROGS := tm-resched-dscr
+TEST_PROGS := tm-resched-dscr
 
-all: $(PROGS)
+all: $(TEST_PROGS)
 
-$(PROGS): ../harness.c
+$(TEST_PROGS): ../harness.c
 
-run_tests: all
-       @-for PROG in $(PROGS); do \
-               ./$$PROG; \
-       done;
+include ../../lib.mk
 
 clean:
-       rm -f $(PROGS) *.o
-
-.PHONY: all run_tests clean
+       rm -f $(TEST_PROGS) *.o
index 47ae2d385ce864ad85a7a070f29bd22c34fe3e5b..453927fea90cae7b65005fb661bf7b8254686d32 100644 (file)
@@ -6,5 +6,6 @@ all: peeksiginfo
 clean:
        rm -f peeksiginfo
 
-run_tests: all
-       @./peeksiginfo || echo "peeksiginfo selftests: [FAIL]"
+TEST_PROGS := peeksiginfo
+
+include ../lib.mk
index 04dc25e4fa92456597dc9a14862ab4192d84410d..bbd0b5398b613c08ee77eb4fbf9740383d874c0d 100644 (file)
@@ -1,12 +1,11 @@
-CC = $(CROSS_COMPILE)gcc
-
 all: get_size
 
 get_size: get_size.c
        $(CC) -static -ffreestanding -nostartfiles -s $< -o $@
 
-run_tests: all
-       ./get_size
+TEST_PROGS := get_size
+
+include ../lib.mk
 
 clean:
        $(RM) get_size
index 0a92adaf0865510f68d07914bd299c777370fddb..b3c33e071f10069ad01d0029c5d7c0350e413a18 100644 (file)
@@ -4,16 +4,10 @@
 # No binaries, but make sure arg-less "make" doesn't trigger "run_tests".
 all:
 
-# Allow specific tests to be selected.
-test_num:
-       @/bin/sh ./run_numerictests
+TEST_PROGS := run_numerictests run_stringtests
+TEST_FILES := common_tests
 
-test_string:
-       @/bin/sh ./run_stringtests
-
-run_tests: all test_num test_string
+include ../lib.mk
 
 # Nothing to clean up.
 clean:
-
-.PHONY: all run_tests clean test_num test_string
old mode 100644 (file)
new mode 100755 (executable)
old mode 100644 (file)
new mode 100755 (executable)
index eb2859f4ad2113576831e5f8ee420cf514f5194d..89a3f44bf355d65f72ad279c298920d4c0dec260 100644 (file)
@@ -1,8 +1,36 @@
-all:
-       gcc posix_timers.c -o posix_timers -lrt
+CC = $(CROSS_COMPILE)gcc
+BUILD_FLAGS = -DKTEST
+CFLAGS += -O3 -Wl,-no-as-needed -Wall $(BUILD_FLAGS)
+LDFLAGS += -lrt -lpthread
 
-run_tests: all
-       ./posix_timers
+# these are all "safe" tests that don't modify
+# system time or require escalated privledges
+TEST_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \
+            inconsistency-check raw_skew threadtest rtctest
+
+TEST_PROGS_EXTENDED = alarmtimer-suspend valid-adjtimex change_skew \
+                     skew_consistency clocksource-switch leap-a-day \
+                     leapcrash set-tai set-2038
+
+bins = $(TEST_PROGS) $(TEST_PROGS_EXTENDED)
+
+all: ${bins}
+
+include ../lib.mk
+
+# these tests require escalated privledges
+# and may modify the system time or trigger
+# other behavior like suspend
+run_destructive_tests: run_tests
+       ./alarmtimer-suspend
+       ./valid-adjtimex
+       ./change_skew
+       ./skew_consistency
+       ./clocksource-switch
+       ./leap-a-day -s -i 10
+       ./leapcrash
+       ./set-tai
+       ./set-2038
 
 clean:
-       rm -f ./posix_timers
+       rm -f ${bins}
diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c
new file mode 100644 (file)
index 0000000..aaffbde
--- /dev/null
@@ -0,0 +1,185 @@
+/* alarmtimer suspend test
+ *             John Stultz (john.stultz@linaro.org)
+ *              (C) Copyright Linaro 2013
+ *              Licensed under the GPLv2
+ *
+ *   This test makes sure the alarmtimer & RTC wakeup code is
+ *   functioning.
+ *
+ *  To build:
+ *     $ gcc alarmtimer-suspend.c -o alarmtimer-suspend -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <time.h>
+#include <string.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <pthread.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+#define CLOCK_REALTIME                 0
+#define CLOCK_MONOTONIC                        1
+#define CLOCK_PROCESS_CPUTIME_ID       2
+#define CLOCK_THREAD_CPUTIME_ID                3
+#define CLOCK_MONOTONIC_RAW            4
+#define CLOCK_REALTIME_COARSE          5
+#define CLOCK_MONOTONIC_COARSE         6
+#define CLOCK_BOOTTIME                 7
+#define CLOCK_REALTIME_ALARM           8
+#define CLOCK_BOOTTIME_ALARM           9
+#define CLOCK_HWSPECIFIC               10
+#define CLOCK_TAI                      11
+#define NR_CLOCKIDS                    12
+
+
+#define NSEC_PER_SEC 1000000000ULL
+#define UNREASONABLE_LAT (NSEC_PER_SEC * 4) /* hopefully we resume in 4secs */
+
+#define SUSPEND_SECS 15
+int alarmcount;
+int alarm_clock_id;
+struct timespec start_time;
+
+
+char *clockstring(int clockid)
+{
+       switch (clockid) {
+       case CLOCK_REALTIME:
+               return "CLOCK_REALTIME";
+       case CLOCK_MONOTONIC:
+               return "CLOCK_MONOTONIC";
+       case CLOCK_PROCESS_CPUTIME_ID:
+               return "CLOCK_PROCESS_CPUTIME_ID";
+       case CLOCK_THREAD_CPUTIME_ID:
+               return "CLOCK_THREAD_CPUTIME_ID";
+       case CLOCK_MONOTONIC_RAW:
+               return "CLOCK_MONOTONIC_RAW";
+       case CLOCK_REALTIME_COARSE:
+               return "CLOCK_REALTIME_COARSE";
+       case CLOCK_MONOTONIC_COARSE:
+               return "CLOCK_MONOTONIC_COARSE";
+       case CLOCK_BOOTTIME:
+               return "CLOCK_BOOTTIME";
+       case CLOCK_REALTIME_ALARM:
+               return "CLOCK_REALTIME_ALARM";
+       case CLOCK_BOOTTIME_ALARM:
+               return "CLOCK_BOOTTIME_ALARM";
+       case CLOCK_TAI:
+               return "CLOCK_TAI";
+       };
+       return "UNKNOWN_CLOCKID";
+}
+
+
+long long timespec_sub(struct timespec a, struct timespec b)
+{
+       long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec;
+
+       ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec;
+       return ret;
+}
+
+int final_ret = 0;
+
+void sigalarm(int signo)
+{
+       long long delta_ns;
+       struct timespec ts;
+
+       clock_gettime(alarm_clock_id, &ts);
+       alarmcount++;
+
+       delta_ns = timespec_sub(start_time, ts);
+       delta_ns -= NSEC_PER_SEC * SUSPEND_SECS * alarmcount;
+
+       printf("ALARM(%i): %ld:%ld latency: %lld ns ", alarmcount, ts.tv_sec,
+                                                       ts.tv_nsec, delta_ns);
+
+       if (delta_ns > UNREASONABLE_LAT) {
+               printf("[FAIL]\n");
+               final_ret = -1;
+       } else
+               printf("[OK]\n");
+
+}
+
+int main(void)
+{
+       timer_t tm1;
+       struct itimerspec its1, its2;
+       struct sigevent se;
+       struct sigaction act;
+       int signum = SIGRTMAX;
+
+       /* Set up signal handler: */
+       sigfillset(&act.sa_mask);
+       act.sa_flags = 0;
+       act.sa_handler = sigalarm;
+       sigaction(signum, &act, NULL);
+
+       /* Set up timer: */
+       memset(&se, 0, sizeof(se));
+       se.sigev_notify = SIGEV_SIGNAL;
+       se.sigev_signo = signum;
+       se.sigev_value.sival_int = 0;
+
+       for (alarm_clock_id = CLOCK_REALTIME_ALARM;
+                       alarm_clock_id <= CLOCK_BOOTTIME_ALARM;
+                       alarm_clock_id++) {
+
+               alarmcount = 0;
+               timer_create(alarm_clock_id, &se, &tm1);
+
+               clock_gettime(alarm_clock_id, &start_time);
+               printf("Start time (%s): %ld:%ld\n", clockstring(alarm_clock_id),
+                               start_time.tv_sec, start_time.tv_nsec);
+               printf("Setting alarm for every %i seconds\n", SUSPEND_SECS);
+               its1.it_value = start_time;
+               its1.it_value.tv_sec += SUSPEND_SECS;
+               its1.it_interval.tv_sec = SUSPEND_SECS;
+               its1.it_interval.tv_nsec = 0;
+
+               timer_settime(tm1, TIMER_ABSTIME, &its1, &its2);
+
+               while (alarmcount < 5)
+                       sleep(1); /* First 5 alarms, do nothing */
+
+               printf("Starting suspend loops\n");
+               while (alarmcount < 10) {
+                       int ret;
+
+                       sleep(1);
+                       ret = system("echo mem > /sys/power/state");
+                       if (ret)
+                               break;
+               }
+               timer_delete(tm1);
+       }
+       if (final_ret)
+               return ksft_exit_fail();
+       return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/change_skew.c b/tools/testing/selftests/timers/change_skew.c
new file mode 100644 (file)
index 0000000..cb19689
--- /dev/null
@@ -0,0 +1,107 @@
+/* ADJ_FREQ Skew change test
+ *             by: john stultz (johnstul@us.ibm.com)
+ *             (C) Copyright IBM 2012
+ *             Licensed under the GPLv2
+ *
+ *  NOTE: This is a meta-test which cranks the ADJ_FREQ knob and
+ *  then uses other tests to detect problems. Thus this test requires
+ *  that the raw_skew, inconsistency-check and nanosleep tests be
+ *  present in the same directory it is run from.
+ *
+ *  To build:
+ *     $ gcc change_skew.c -o change_skew -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000LL
+
+
+int change_skew_test(int ppm)
+{
+       struct timex tx;
+       int ret;
+
+       tx.modes = ADJ_FREQUENCY;
+       tx.freq = ppm << 16;
+
+       ret = adjtimex(&tx);
+       if (ret < 0) {
+               printf("Error adjusting freq\n");
+               return ret;
+       }
+
+       ret = system("./raw_skew");
+       ret |= system("./inconsistency-check");
+       ret |= system("./nanosleep");
+
+       return ret;
+}
+
+
+int main(int argv, char **argc)
+{
+       struct timex tx;
+       int i, ret;
+
+       int ppm[5] = {0, 250, 500, -250, -500};
+
+       /* Kill ntpd */
+       ret = system("killall -9 ntpd");
+
+       /* Make sure there's no offset adjustment going on */
+       tx.modes = ADJ_OFFSET;
+       tx.offset = 0;
+       ret = adjtimex(&tx);
+
+       if (ret < 0) {
+               printf("Maybe you're not running as root?\n");
+               return -1;
+       }
+
+       for (i = 0; i < 5; i++) {
+               printf("Using %i ppm adjustment\n", ppm[i]);
+               ret = change_skew_test(ppm[i]);
+               if (ret)
+                       break;
+       }
+
+       /* Set things back */
+       tx.modes = ADJ_FREQUENCY;
+       tx.offset = 0;
+       adjtimex(&tx);
+
+       if (ret) {
+               printf("[FAIL]");
+               return ksft_exit_fail();
+       }
+       printf("[OK]");
+       return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c
new file mode 100644 (file)
index 0000000..627ec74
--- /dev/null
@@ -0,0 +1,179 @@
+/* Clocksource change test
+ *             by: john stultz (johnstul@us.ibm.com)
+ *             (C) Copyright IBM 2012
+ *             Licensed under the GPLv2
+ *
+ *  NOTE: This is a meta-test which quickly changes the clocksourc and
+ *  then uses other tests to detect problems. Thus this test requires
+ *  that the inconsistency-check and nanosleep tests be present in the
+ *  same directory it is run from.
+ *
+ *  To build:
+ *     $ gcc clocksource-switch.c -o clocksource-switch -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/wait.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+
+int get_clocksources(char list[][30])
+{
+       int fd, i;
+       size_t size;
+       char buf[512];
+       char *head, *tmp;
+
+       fd = open("/sys/devices/system/clocksource/clocksource0/available_clocksource", O_RDONLY);
+
+       size = read(fd, buf, 512);
+
+       close(fd);
+
+       for (i = 0; i < 30; i++)
+               list[i][0] = '\0';
+
+       head = buf;
+       i = 0;
+       while (head - buf < size) {
+               /* Find the next space */
+               for (tmp = head; *tmp != ' '; tmp++) {
+                       if (*tmp == '\n')
+                               break;
+                       if (*tmp == '\0')
+                               break;
+               }
+               *tmp = '\0';
+               strcpy(list[i], head);
+               head = tmp + 1;
+               i++;
+       }
+
+       return i-1;
+}
+
+int get_cur_clocksource(char *buf, size_t size)
+{
+       int fd;
+
+       fd = open("/sys/devices/system/clocksource/clocksource0/current_clocksource", O_RDONLY);
+
+       size = read(fd, buf, size);
+
+       return 0;
+}
+
+int change_clocksource(char *clocksource)
+{
+       int fd;
+       size_t size;
+
+       fd = open("/sys/devices/system/clocksource/clocksource0/current_clocksource", O_WRONLY);
+
+       if (fd < 0)
+               return -1;
+
+       size = write(fd, clocksource, strlen(clocksource));
+
+       if (size < 0)
+               return -1;
+
+       close(fd);
+       return 0;
+}
+
+
+int run_tests(int secs)
+{
+       int ret;
+       char buf[255];
+
+       sprintf(buf, "./inconsistency-check -t %i", secs);
+       ret = system(buf);
+       if (ret)
+               return ret;
+       ret = system("./nanosleep");
+       return ret;
+}
+
+
+char clocksource_list[10][30];
+
+int main(int argv, char **argc)
+{
+       char orig_clk[512];
+       int count, i, status;
+       pid_t pid;
+
+       get_cur_clocksource(orig_clk, 512);
+
+       count = get_clocksources(clocksource_list);
+
+       if (change_clocksource(clocksource_list[0])) {
+               printf("Error: You probably need to run this as root\n");
+               return -1;
+       }
+
+       /* Check everything is sane before we start switching asyncrhonously */
+       for (i = 0; i < count; i++) {
+               printf("Validating clocksource %s\n", clocksource_list[i]);
+               if (change_clocksource(clocksource_list[i])) {
+                       status = -1;
+                       goto out;
+               }
+               if (run_tests(5)) {
+                       status = -1;
+                       goto out;
+               }
+       }
+
+
+       printf("Running Asyncrhonous Switching Tests...\n");
+       pid = fork();
+       if (!pid)
+               return run_tests(60);
+
+       while (pid != waitpid(pid, &status, WNOHANG))
+               for (i = 0; i < count; i++)
+                       if (change_clocksource(clocksource_list[i])) {
+                               status = -1;
+                               goto out;
+                       }
+out:
+       change_clocksource(orig_clk);
+
+       if (status)
+               return ksft_exit_fail();
+       return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/inconsistency-check.c b/tools/testing/selftests/timers/inconsistency-check.c
new file mode 100644 (file)
index 0000000..caf1bc9
--- /dev/null
@@ -0,0 +1,204 @@
+/* Time inconsistency check test
+ *             by: john stultz (johnstul@us.ibm.com)
+ *             (C) Copyright IBM 2003, 2004, 2005, 2012
+ *             (C) Copyright Linaro Limited 2015
+ *             Licensed under the GPLv2
+ *
+ *  To build:
+ *     $ gcc inconsistency-check.c -o inconsistency-check -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+#define CALLS_PER_LOOP 64
+#define NSEC_PER_SEC 1000000000ULL
+
+#define CLOCK_REALTIME                 0
+#define CLOCK_MONOTONIC                        1
+#define CLOCK_PROCESS_CPUTIME_ID       2
+#define CLOCK_THREAD_CPUTIME_ID                3
+#define CLOCK_MONOTONIC_RAW            4
+#define CLOCK_REALTIME_COARSE          5
+#define CLOCK_MONOTONIC_COARSE         6
+#define CLOCK_BOOTTIME                 7
+#define CLOCK_REALTIME_ALARM           8
+#define CLOCK_BOOTTIME_ALARM           9
+#define CLOCK_HWSPECIFIC               10
+#define CLOCK_TAI                      11
+#define NR_CLOCKIDS                    12
+
+char *clockstring(int clockid)
+{
+       switch (clockid) {
+       case CLOCK_REALTIME:
+               return "CLOCK_REALTIME";
+       case CLOCK_MONOTONIC:
+               return "CLOCK_MONOTONIC";
+       case CLOCK_PROCESS_CPUTIME_ID:
+               return "CLOCK_PROCESS_CPUTIME_ID";
+       case CLOCK_THREAD_CPUTIME_ID:
+               return "CLOCK_THREAD_CPUTIME_ID";
+       case CLOCK_MONOTONIC_RAW:
+               return "CLOCK_MONOTONIC_RAW";
+       case CLOCK_REALTIME_COARSE:
+               return "CLOCK_REALTIME_COARSE";
+       case CLOCK_MONOTONIC_COARSE:
+               return "CLOCK_MONOTONIC_COARSE";
+       case CLOCK_BOOTTIME:
+               return "CLOCK_BOOTTIME";
+       case CLOCK_REALTIME_ALARM:
+               return "CLOCK_REALTIME_ALARM";
+       case CLOCK_BOOTTIME_ALARM:
+               return "CLOCK_BOOTTIME_ALARM";
+       case CLOCK_TAI:
+               return "CLOCK_TAI";
+       };
+       return "UNKNOWN_CLOCKID";
+}
+
+/* returns 1 if a <= b, 0 otherwise */
+static inline int in_order(struct timespec a, struct timespec b)
+{
+       /* use unsigned to avoid false positives on 2038 rollover */
+       if ((unsigned long)a.tv_sec < (unsigned long)b.tv_sec)
+               return 1;
+       if ((unsigned long)a.tv_sec > (unsigned long)b.tv_sec)
+               return 0;
+       if (a.tv_nsec > b.tv_nsec)
+               return 0;
+       return 1;
+}
+
+
+
+int consistency_test(int clock_type, unsigned long seconds)
+{
+       struct timespec list[CALLS_PER_LOOP];
+       int i, inconsistent;
+       long now, then;
+       time_t t;
+       char *start_str;
+
+       clock_gettime(clock_type, &list[0]);
+       now = then = list[0].tv_sec;
+
+       /* timestamp start of test */
+       t = time(0);
+       start_str = ctime(&t);
+
+       while (seconds == -1 || now - then < seconds) {
+               inconsistent = 0;
+
+               /* Fill list */
+               for (i = 0; i < CALLS_PER_LOOP; i++)
+                       clock_gettime(clock_type, &list[i]);
+
+               /* Check for inconsistencies */
+               for (i = 0; i < CALLS_PER_LOOP - 1; i++)
+                       if (!in_order(list[i], list[i+1]))
+                               inconsistent = i;
+
+               /* display inconsistency */
+               if (inconsistent) {
+                       unsigned long long delta;
+
+                       printf("\%s\n", start_str);
+                       for (i = 0; i < CALLS_PER_LOOP; i++) {
+                               if (i == inconsistent)
+                                       printf("--------------------\n");
+                               printf("%lu:%lu\n", list[i].tv_sec,
+                                                       list[i].tv_nsec);
+                               if (i == inconsistent + 1)
+                                       printf("--------------------\n");
+                       }
+                       delta = list[inconsistent].tv_sec * NSEC_PER_SEC;
+                       delta += list[inconsistent].tv_nsec;
+                       delta -= list[inconsistent+1].tv_sec * NSEC_PER_SEC;
+                       delta -= list[inconsistent+1].tv_nsec;
+                       printf("Delta: %llu ns\n", delta);
+                       fflush(0);
+                       /* timestamp inconsistency*/
+                       t = time(0);
+                       printf("%s\n", ctime(&t));
+                       printf("[FAILED]\n");
+                       return -1;
+               }
+               now = list[0].tv_sec;
+       }
+       printf("[OK]\n");
+       return 0;
+}
+
+
+int main(int argc, char *argv[])
+{
+       int clockid, opt;
+       int userclock = CLOCK_REALTIME;
+       int maxclocks = NR_CLOCKIDS;
+       int runtime = 10;
+       struct timespec ts;
+
+       /* Process arguments */
+       while ((opt = getopt(argc, argv, "t:c:")) != -1) {
+               switch (opt) {
+               case 't':
+                       runtime = atoi(optarg);
+                       break;
+               case 'c':
+                       userclock = atoi(optarg);
+                       maxclocks = userclock + 1;
+                       break;
+               default:
+                       printf("Usage: %s [-t <secs>] [-c <clockid>]\n", argv[0]);
+                       printf("        -t: Number of seconds to run\n");
+                       printf("        -c: clockid to use (default, all clockids)\n");
+                       exit(-1);
+               }
+       }
+
+       setbuf(stdout, NULL);
+
+       for (clockid = userclock; clockid < maxclocks; clockid++) {
+
+               if (clockid == CLOCK_HWSPECIFIC)
+                       continue;
+
+               if (!clock_gettime(clockid, &ts)) {
+                       printf("Consistent %-30s ", clockstring(clockid));
+                       if (consistency_test(clockid, runtime))
+                               return ksft_exit_fail();
+               }
+       }
+       return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/leap-a-day.c b/tools/testing/selftests/timers/leap-a-day.c
new file mode 100644 (file)
index 0000000..b8272e6
--- /dev/null
@@ -0,0 +1,319 @@
+/* Leap second stress test
+ *              by: John Stultz (john.stultz@linaro.org)
+ *              (C) Copyright IBM 2012
+ *              (C) Copyright 2013, 2015 Linaro Limited
+ *              Licensed under the GPLv2
+ *
+ *  This test signals the kernel to insert a leap second
+ *  every day at midnight GMT. This allows for stessing the
+ *  kernel's leap-second behavior, as well as how well applications
+ *  handle the leap-second discontinuity.
+ *
+ *  Usage: leap-a-day [-s] [-i <num>]
+ *
+ *  Options:
+ *     -s:     Each iteration, set the date to 10 seconds before midnight GMT.
+ *             This speeds up the number of leapsecond transitions tested,
+ *             but because it calls settimeofday frequently, advancing the
+ *             time by 24 hours every ~16 seconds, it may cause application
+ *             disruption.
+ *
+ *     -i:     Number of iterations to run (default: infinite)
+ *
+ *  Other notes: Disabling NTP prior to running this is advised, as the two
+ *              may conflict in their commands to the kernel.
+ *
+ *  To build:
+ *     $ gcc leap-a-day.c -o leap-a-day -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000ULL
+#define CLOCK_TAI 11
+
+/* returns 1 if a <= b, 0 otherwise */
+static inline int in_order(struct timespec a, struct timespec b)
+{
+       if (a.tv_sec < b.tv_sec)
+               return 1;
+       if (a.tv_sec > b.tv_sec)
+               return 0;
+       if (a.tv_nsec > b.tv_nsec)
+               return 0;
+       return 1;
+}
+
+struct timespec timespec_add(struct timespec ts, unsigned long long ns)
+{
+       ts.tv_nsec += ns;
+       while (ts.tv_nsec >= NSEC_PER_SEC) {
+               ts.tv_nsec -= NSEC_PER_SEC;
+               ts.tv_sec++;
+       }
+       return ts;
+}
+
+char *time_state_str(int state)
+{
+       switch (state) {
+       case TIME_OK:   return "TIME_OK";
+       case TIME_INS:  return "TIME_INS";
+       case TIME_DEL:  return "TIME_DEL";
+       case TIME_OOP:  return "TIME_OOP";
+       case TIME_WAIT: return "TIME_WAIT";
+       case TIME_BAD:  return "TIME_BAD";
+       }
+       return "ERROR";
+}
+
+/* clear NTP time_status & time_state */
+int clear_time_state(void)
+{
+       struct timex tx;
+       int ret;
+
+       /*
+        * We have to call adjtime twice here, as kernels
+        * prior to 6b1859dba01c7 (included in 3.5 and
+        * -stable), had an issue with the state machine
+        * and wouldn't clear the STA_INS/DEL flag directly.
+        */
+       tx.modes = ADJ_STATUS;
+       tx.status = STA_PLL;
+       ret = adjtimex(&tx);
+
+       /* Clear maxerror, as it can cause UNSYNC to be set */
+       tx.modes = ADJ_MAXERROR;
+       tx.maxerror = 0;
+       ret = adjtimex(&tx);
+
+       /* Clear the status */
+       tx.modes = ADJ_STATUS;
+       tx.status = 0;
+       ret = adjtimex(&tx);
+
+       return ret;
+}
+
+/* Make sure we cleanup on ctrl-c */
+void handler(int unused)
+{
+       clear_time_state();
+       exit(0);
+}
+
+/* Test for known hrtimer failure */
+void test_hrtimer_failure(void)
+{
+       struct timespec now, target;
+
+       clock_gettime(CLOCK_REALTIME, &now);
+       target = timespec_add(now, NSEC_PER_SEC/2);
+       clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &target, NULL);
+       clock_gettime(CLOCK_REALTIME, &now);
+
+       if (!in_order(target, now))
+               printf("ERROR: hrtimer early expiration failure observed.\n");
+}
+
+int main(int argc, char **argv)
+{
+       int settime = 0;
+       int tai_time = 0;
+       int insert = 1;
+       int iterations = -1;
+       int opt;
+
+       /* Process arguments */
+       while ((opt = getopt(argc, argv, "sti:")) != -1) {
+               switch (opt) {
+               case 's':
+                       printf("Setting time to speed up testing\n");
+                       settime = 1;
+                       break;
+               case 'i':
+                       iterations = atoi(optarg);
+                       break;
+               case 't':
+                       tai_time = 1;
+                       break;
+               default:
+                       printf("Usage: %s [-s] [-i <iterations>]\n", argv[0]);
+                       printf("        -s: Set time to right before leap second each iteration\n");
+                       printf("        -i: Number of iterations\n");
+                       printf("        -t: Print TAI time\n");
+                       exit(-1);
+               }
+       }
+
+       /* Make sure TAI support is present if -t was used */
+       if (tai_time) {
+               struct timespec ts;
+
+               if (clock_gettime(CLOCK_TAI, &ts)) {
+                       printf("System doesn't support CLOCK_TAI\n");
+                       ksft_exit_fail();
+               }
+       }
+
+       signal(SIGINT, handler);
+       signal(SIGKILL, handler);
+
+       if (iterations < 0)
+               printf("This runs continuously. Press ctrl-c to stop\n");
+       else
+               printf("Running for %i iterations. Press ctrl-c to stop\n", iterations);
+
+       printf("\n");
+       while (1) {
+               int ret;
+               struct timespec ts;
+               struct timex tx;
+               time_t now, next_leap;
+
+               /* Get the current time */
+               clock_gettime(CLOCK_REALTIME, &ts);
+
+               /* Calculate the next possible leap second 23:59:60 GMT */
+               next_leap = ts.tv_sec;
+               next_leap += 86400 - (next_leap % 86400);
+
+               if (settime) {
+                       struct timeval tv;
+
+                       tv.tv_sec = next_leap - 10;
+                       tv.tv_usec = 0;
+                       settimeofday(&tv, NULL);
+                       printf("Setting time to %s", ctime(&tv.tv_sec));
+               }
+
+               /* Reset NTP time state */
+               clear_time_state();
+
+               /* Set the leap second insert flag */
+               tx.modes = ADJ_STATUS;
+               if (insert)
+                       tx.status = STA_INS;
+               else
+                       tx.status = STA_DEL;
+               ret = adjtimex(&tx);
+               if (ret < 0) {
+                       printf("Error: Problem setting STA_INS/STA_DEL!: %s\n",
+                                                       time_state_str(ret));
+                       return ksft_exit_fail();
+               }
+
+               /* Validate STA_INS was set */
+               tx.modes = 0;
+               ret = adjtimex(&tx);
+               if (tx.status != STA_INS && tx.status != STA_DEL) {
+                       printf("Error: STA_INS/STA_DEL not set!: %s\n",
+                                                       time_state_str(ret));
+                       return ksft_exit_fail();
+               }
+
+               if (tai_time) {
+                       printf("Using TAI time,"
+                               " no inconsistencies should be seen!\n");
+               }
+
+               printf("Scheduling leap second for %s", ctime(&next_leap));
+
+               /* Wake up 3 seconds before leap */
+               ts.tv_sec = next_leap - 3;
+               ts.tv_nsec = 0;
+
+               while (clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &ts, NULL))
+                       printf("Something woke us up, returning to sleep\n");
+
+               /* Validate STA_INS is still set */
+               tx.modes = 0;
+               ret = adjtimex(&tx);
+               if (tx.status != STA_INS && tx.status != STA_DEL) {
+                       printf("Something cleared STA_INS/STA_DEL, setting it again.\n");
+                       tx.modes = ADJ_STATUS;
+                       if (insert)
+                               tx.status = STA_INS;
+                       else
+                               tx.status = STA_DEL;
+                       ret = adjtimex(&tx);
+               }
+
+               /* Check adjtimex output every half second */
+               now = tx.time.tv_sec;
+               while (now < next_leap + 2) {
+                       char buf[26];
+                       struct timespec tai;
+
+                       tx.modes = 0;
+                       ret = adjtimex(&tx);
+
+                       if (tai_time) {
+                               clock_gettime(CLOCK_TAI, &tai);
+                               printf("%ld sec, %9ld ns\t%s\n",
+                                               tai.tv_sec,
+                                               tai.tv_nsec,
+                                               time_state_str(ret));
+                       } else {
+                               ctime_r(&tx.time.tv_sec, buf);
+                               buf[strlen(buf)-1] = 0; /*remove trailing\n */
+
+                               printf("%s + %6ld us (%i)\t%s\n",
+                                               buf,
+                                               tx.time.tv_usec,
+                                               tx.tai,
+                                               time_state_str(ret));
+                       }
+                       now = tx.time.tv_sec;
+                       /* Sleep for another half second */
+                       ts.tv_sec = 0;
+                       ts.tv_nsec = NSEC_PER_SEC / 2;
+                       clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL);
+               }
+               /* Switch to using other mode */
+               insert = !insert;
+
+               /* Note if kernel has known hrtimer failure */
+               test_hrtimer_failure();
+
+               printf("Leap complete\n\n");
+
+               if ((iterations != -1) && !(--iterations))
+                       break;
+       }
+
+       clear_time_state();
+       return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/leapcrash.c b/tools/testing/selftests/timers/leapcrash.c
new file mode 100644 (file)
index 0000000..a1071bd
--- /dev/null
@@ -0,0 +1,120 @@
+/* Demo leapsecond deadlock
+ *              by: John Stultz (john.stultz@linaro.org)
+ *              (C) Copyright IBM 2012
+ *              (C) Copyright 2013, 2015 Linaro Limited
+ *              Licensed under the GPL
+ *
+ * This test demonstrates leapsecond deadlock that is possibe
+ * on kernels from 2.6.26 to 3.3.
+ *
+ * WARNING: THIS WILL LIKELY HARDHANG SYSTEMS AND MAY LOSE DATA
+ * RUN AT YOUR OWN RISK!
+ *  To build:
+ *     $ gcc leapcrash.c -o leapcrash -lrt
+ */
+
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+
+
+/* clear NTP time_status & time_state */
+int clear_time_state(void)
+{
+       struct timex tx;
+       int ret;
+
+       /*
+        * We have to call adjtime twice here, as kernels
+        * prior to 6b1859dba01c7 (included in 3.5 and
+        * -stable), had an issue with the state machine
+        * and wouldn't clear the STA_INS/DEL flag directly.
+        */
+       tx.modes = ADJ_STATUS;
+       tx.status = STA_PLL;
+       ret = adjtimex(&tx);
+
+       tx.modes = ADJ_STATUS;
+       tx.status = 0;
+       ret = adjtimex(&tx);
+
+       return ret;
+}
+
+/* Make sure we cleanup on ctrl-c */
+void handler(int unused)
+{
+       clear_time_state();
+       exit(0);
+}
+
+
+int main(void)
+{
+       struct timex tx;
+       struct timespec ts;
+       time_t next_leap;
+       int count = 0;
+
+       setbuf(stdout, NULL);
+
+       signal(SIGINT, handler);
+       signal(SIGKILL, handler);
+       printf("This runs for a few minutes. Press ctrl-c to stop\n");
+
+       clear_time_state();
+
+
+       /* Get the current time */
+       clock_gettime(CLOCK_REALTIME, &ts);
+
+       /* Calculate the next possible leap second 23:59:60 GMT */
+       next_leap = ts.tv_sec;
+       next_leap += 86400 - (next_leap % 86400);
+
+       for (count = 0; count < 20; count++) {
+               struct timeval tv;
+
+
+               /* set the time to 2 seconds before the leap */
+               tv.tv_sec = next_leap - 2;
+               tv.tv_usec = 0;
+               if (settimeofday(&tv, NULL)) {
+                       printf("Error: You're likely not running with proper (ie: root) permissions\n");
+                       return ksft_exit_fail();
+               }
+               tx.modes = 0;
+               adjtimex(&tx);
+
+               /* hammer on adjtime w/ STA_INS */
+               while (tx.time.tv_sec < next_leap + 1) {
+                       /* Set the leap second insert flag */
+                       tx.modes = ADJ_STATUS;
+                       tx.status = STA_INS;
+                       adjtimex(&tx);
+               }
+               clear_time_state();
+               printf(".");
+       }
+       printf("[OK]\n");
+       return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/mqueue-lat.c b/tools/testing/selftests/timers/mqueue-lat.c
new file mode 100644 (file)
index 0000000..a2a3924
--- /dev/null
@@ -0,0 +1,124 @@
+/* Measure mqueue timeout latency
+ *              by: john stultz (john.stultz@linaro.org)
+ *             (C) Copyright Linaro 2013
+ *
+ *             Inspired with permission from example test by:
+ *                     Romain Francoise <romain@orebokech.com>
+ *              Licensed under the GPLv2
+ *
+ *  To build:
+ *     $ gcc mqueue-lat.c -o mqueue-lat -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include <errno.h>
+#include <mqueue.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000ULL
+
+#define TARGET_TIMEOUT         100000000       /* 100ms in nanoseconds */
+#define UNRESONABLE_LATENCY    40000000        /* 40ms in nanosecs */
+
+
+long long timespec_sub(struct timespec a, struct timespec b)
+{
+       long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec;
+
+       ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec;
+       return ret;
+}
+
+struct timespec timespec_add(struct timespec ts, unsigned long long ns)
+{
+       ts.tv_nsec += ns;
+       while (ts.tv_nsec >= NSEC_PER_SEC) {
+               ts.tv_nsec -= NSEC_PER_SEC;
+               ts.tv_sec++;
+       }
+       return ts;
+}
+
+int mqueue_lat_test(void)
+{
+
+       mqd_t q;
+       struct mq_attr attr;
+       struct timespec start, end, now, target;
+       int i, count, ret;
+
+       q = mq_open("/foo", O_CREAT | O_RDONLY, 0666, NULL);
+       if (q < 0) {
+               perror("mq_open");
+               return -1;
+       }
+       mq_getattr(q, &attr);
+
+
+       count = 100;
+       clock_gettime(CLOCK_MONOTONIC, &start);
+
+       for (i = 0; i < count; i++) {
+               char buf[attr.mq_msgsize];
+
+               clock_gettime(CLOCK_REALTIME, &now);
+               target = now;
+               target = timespec_add(now, TARGET_TIMEOUT); /* 100ms */
+
+               ret = mq_timedreceive(q, buf, sizeof(buf), NULL, &target);
+               if (ret < 0 && errno != ETIMEDOUT) {
+                       perror("mq_timedreceive");
+                       return -1;
+               }
+       }
+       clock_gettime(CLOCK_MONOTONIC, &end);
+
+       mq_close(q);
+
+       if ((timespec_sub(start, end)/count) > TARGET_TIMEOUT + UNRESONABLE_LATENCY)
+               return -1;
+
+       return 0;
+}
+
+int main(int argc, char **argv)
+{
+       int ret;
+
+       printf("Mqueue latency :                          ");
+
+       ret = mqueue_lat_test();
+       if (ret < 0) {
+               printf("[FAILED]\n");
+               return ksft_exit_fail();
+       }
+       printf("[OK]\n");
+       return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/nanosleep.c b/tools/testing/selftests/timers/nanosleep.c
new file mode 100644 (file)
index 0000000..8a3c29d
--- /dev/null
@@ -0,0 +1,174 @@
+/* Make sure timers don't return early
+ *              by: john stultz (johnstul@us.ibm.com)
+ *                 John Stultz (john.stultz@linaro.org)
+ *              (C) Copyright IBM 2012
+ *              (C) Copyright Linaro 2013 2015
+ *              Licensed under the GPLv2
+ *
+ *  To build:
+ *     $ gcc nanosleep.c -o nanosleep -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000ULL
+
+#define CLOCK_REALTIME                 0
+#define CLOCK_MONOTONIC                        1
+#define CLOCK_PROCESS_CPUTIME_ID       2
+#define CLOCK_THREAD_CPUTIME_ID                3
+#define CLOCK_MONOTONIC_RAW            4
+#define CLOCK_REALTIME_COARSE          5
+#define CLOCK_MONOTONIC_COARSE         6
+#define CLOCK_BOOTTIME                 7
+#define CLOCK_REALTIME_ALARM           8
+#define CLOCK_BOOTTIME_ALARM           9
+#define CLOCK_HWSPECIFIC               10
+#define CLOCK_TAI                      11
+#define NR_CLOCKIDS                    12
+
+#define UNSUPPORTED 0xf00f
+
+char *clockstring(int clockid)
+{
+       switch (clockid) {
+       case CLOCK_REALTIME:
+               return "CLOCK_REALTIME";
+       case CLOCK_MONOTONIC:
+               return "CLOCK_MONOTONIC";
+       case CLOCK_PROCESS_CPUTIME_ID:
+               return "CLOCK_PROCESS_CPUTIME_ID";
+       case CLOCK_THREAD_CPUTIME_ID:
+               return "CLOCK_THREAD_CPUTIME_ID";
+       case CLOCK_MONOTONIC_RAW:
+               return "CLOCK_MONOTONIC_RAW";
+       case CLOCK_REALTIME_COARSE:
+               return "CLOCK_REALTIME_COARSE";
+       case CLOCK_MONOTONIC_COARSE:
+               return "CLOCK_MONOTONIC_COARSE";
+       case CLOCK_BOOTTIME:
+               return "CLOCK_BOOTTIME";
+       case CLOCK_REALTIME_ALARM:
+               return "CLOCK_REALTIME_ALARM";
+       case CLOCK_BOOTTIME_ALARM:
+               return "CLOCK_BOOTTIME_ALARM";
+       case CLOCK_TAI:
+               return "CLOCK_TAI";
+       };
+       return "UNKNOWN_CLOCKID";
+}
+
+/* returns 1 if a <= b, 0 otherwise */
+static inline int in_order(struct timespec a, struct timespec b)
+{
+       if (a.tv_sec < b.tv_sec)
+               return 1;
+       if (a.tv_sec > b.tv_sec)
+               return 0;
+       if (a.tv_nsec > b.tv_nsec)
+               return 0;
+       return 1;
+}
+
+struct timespec timespec_add(struct timespec ts, unsigned long long ns)
+{
+       ts.tv_nsec += ns;
+       while (ts.tv_nsec >= NSEC_PER_SEC) {
+               ts.tv_nsec -= NSEC_PER_SEC;
+               ts.tv_sec++;
+       }
+       return ts;
+}
+
+int nanosleep_test(int clockid, long long ns)
+{
+       struct timespec now, target, rel;
+
+       /* First check abs time */
+       if (clock_gettime(clockid, &now))
+               return UNSUPPORTED;
+       target = timespec_add(now, ns);
+
+       if (clock_nanosleep(clockid, TIMER_ABSTIME, &target, NULL))
+               return UNSUPPORTED;
+       clock_gettime(clockid, &now);
+
+       if (!in_order(target, now))
+               return -1;
+
+       /* Second check reltime */
+       clock_gettime(clockid, &now);
+       rel.tv_sec = 0;
+       rel.tv_nsec = 0;
+       rel = timespec_add(rel, ns);
+       target = timespec_add(now, ns);
+       clock_nanosleep(clockid, 0, &rel, NULL);
+       clock_gettime(clockid, &now);
+
+       if (!in_order(target, now))
+               return -1;
+       return 0;
+}
+
+int main(int argc, char **argv)
+{
+       long long length;
+       int clockid, ret;
+
+       for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) {
+
+               /* Skip cputime clockids since nanosleep won't increment cputime */
+               if (clockid == CLOCK_PROCESS_CPUTIME_ID ||
+                               clockid == CLOCK_THREAD_CPUTIME_ID ||
+                               clockid == CLOCK_HWSPECIFIC)
+                       continue;
+
+               printf("Nanosleep %-31s ", clockstring(clockid));
+
+               length = 10;
+               while (length <= (NSEC_PER_SEC * 10)) {
+                       ret = nanosleep_test(clockid, length);
+                       if (ret == UNSUPPORTED) {
+                               printf("[UNSUPPORTED]\n");
+                               goto next;
+                       }
+                       if (ret < 0) {
+                               printf("[FAILED]\n");
+                               return ksft_exit_fail();
+                       }
+                       length *= 100;
+               }
+               printf("[OK]\n");
+next:
+               ret = 0;
+       }
+       return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/nsleep-lat.c b/tools/testing/selftests/timers/nsleep-lat.c
new file mode 100644 (file)
index 0000000..2d7898f
--- /dev/null
@@ -0,0 +1,190 @@
+/* Measure nanosleep timer latency
+ *              by: john stultz (john.stultz@linaro.org)
+ *             (C) Copyright Linaro 2013
+ *              Licensed under the GPLv2
+ *
+ *  To build:
+ *     $ gcc nsleep-lat.c -o nsleep-lat -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000ULL
+
+#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */
+
+
+#define CLOCK_REALTIME                 0
+#define CLOCK_MONOTONIC                        1
+#define CLOCK_PROCESS_CPUTIME_ID       2
+#define CLOCK_THREAD_CPUTIME_ID                3
+#define CLOCK_MONOTONIC_RAW            4
+#define CLOCK_REALTIME_COARSE          5
+#define CLOCK_MONOTONIC_COARSE         6
+#define CLOCK_BOOTTIME                 7
+#define CLOCK_REALTIME_ALARM           8
+#define CLOCK_BOOTTIME_ALARM           9
+#define CLOCK_HWSPECIFIC               10
+#define CLOCK_TAI                      11
+#define NR_CLOCKIDS                    12
+
+#define UNSUPPORTED 0xf00f
+
+char *clockstring(int clockid)
+{
+       switch (clockid) {
+       case CLOCK_REALTIME:
+               return "CLOCK_REALTIME";
+       case CLOCK_MONOTONIC:
+               return "CLOCK_MONOTONIC";
+       case CLOCK_PROCESS_CPUTIME_ID:
+               return "CLOCK_PROCESS_CPUTIME_ID";
+       case CLOCK_THREAD_CPUTIME_ID:
+               return "CLOCK_THREAD_CPUTIME_ID";
+       case CLOCK_MONOTONIC_RAW:
+               return "CLOCK_MONOTONIC_RAW";
+       case CLOCK_REALTIME_COARSE:
+               return "CLOCK_REALTIME_COARSE";
+       case CLOCK_MONOTONIC_COARSE:
+               return "CLOCK_MONOTONIC_COARSE";
+       case CLOCK_BOOTTIME:
+               return "CLOCK_BOOTTIME";
+       case CLOCK_REALTIME_ALARM:
+               return "CLOCK_REALTIME_ALARM";
+       case CLOCK_BOOTTIME_ALARM:
+               return "CLOCK_BOOTTIME_ALARM";
+       case CLOCK_TAI:
+               return "CLOCK_TAI";
+       };
+       return "UNKNOWN_CLOCKID";
+}
+
+struct timespec timespec_add(struct timespec ts, unsigned long long ns)
+{
+       ts.tv_nsec += ns;
+       while (ts.tv_nsec >= NSEC_PER_SEC) {
+               ts.tv_nsec -= NSEC_PER_SEC;
+               ts.tv_sec++;
+       }
+       return ts;
+}
+
+
+long long timespec_sub(struct timespec a, struct timespec b)
+{
+       long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec;
+
+       ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec;
+       return ret;
+}
+
+int nanosleep_lat_test(int clockid, long long ns)
+{
+       struct timespec start, end, target;
+       long long latency = 0;
+       int i, count;
+
+       target.tv_sec = ns/NSEC_PER_SEC;
+       target.tv_nsec = ns%NSEC_PER_SEC;
+
+       if (clock_gettime(clockid, &start))
+               return UNSUPPORTED;
+       if (clock_nanosleep(clockid, 0, &target, NULL))
+               return UNSUPPORTED;
+
+       count = 10;
+
+       /* First check relative latency */
+       clock_gettime(clockid, &start);
+       for (i = 0; i < count; i++)
+               clock_nanosleep(clockid, 0, &target, NULL);
+       clock_gettime(clockid, &end);
+
+       if (((timespec_sub(start, end)/count)-ns) > UNRESONABLE_LATENCY) {
+               printf("Large rel latency: %lld ns :", (timespec_sub(start, end)/count)-ns);
+               return -1;
+       }
+
+       /* Next check absolute latency */
+       for (i = 0; i < count; i++) {
+               clock_gettime(clockid, &start);
+               target = timespec_add(start, ns);
+               clock_nanosleep(clockid, TIMER_ABSTIME, &target, NULL);
+               clock_gettime(clockid, &end);
+               latency += timespec_sub(target, end);
+       }
+
+       if (latency/count > UNRESONABLE_LATENCY) {
+               printf("Large abs latency: %lld ns :", latency/count);
+               return -1;
+       }
+
+       return 0;
+}
+
+
+
+int main(int argc, char **argv)
+{
+       long long length;
+       int clockid, ret;
+
+       for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) {
+
+               /* Skip cputime clockids since nanosleep won't increment cputime */
+               if (clockid == CLOCK_PROCESS_CPUTIME_ID ||
+                               clockid == CLOCK_THREAD_CPUTIME_ID ||
+                               clockid == CLOCK_HWSPECIFIC)
+                       continue;
+
+               printf("nsleep latency %-26s ", clockstring(clockid));
+
+               length = 10;
+               while (length <= (NSEC_PER_SEC * 10)) {
+                       ret = nanosleep_lat_test(clockid, length);
+                       if (ret)
+                               break;
+                       length *= 100;
+
+               }
+
+               if (ret == UNSUPPORTED) {
+                       printf("[UNSUPPORTED]\n");
+                       continue;
+               }
+               if (ret < 0) {
+                       printf("[FAILED]\n");
+                       return ksft_exit_fail();
+               }
+               printf("[OK]\n");
+       }
+       return ksft_exit_pass();
+}
index f87d970a485c48b7d10e304e911fd3699c41bb35..5a246a02dff3c6986a1bd06496473ec5be1fd252 100644 (file)
@@ -35,10 +35,11 @@ static void user_loop(void)
 static void kernel_loop(void)
 {
        void *addr = sbrk(0);
+       int err = 0;
 
-       while (!done) {
-               brk(addr + 4096);
-               brk(addr);
+       while (!done && !err) {
+               err = brk(addr + 4096);
+               err |= brk(addr);
        }
 }
 
@@ -190,8 +191,6 @@ static int check_timer_create(int which)
 
 int main(int argc, char **argv)
 {
-       int err;
-
        printf("Testing posix timers. False negative may happen on CPU execution \n");
        printf("based timers if other threads run on the CPU...\n");
 
diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c
new file mode 100644 (file)
index 0000000..30906bf
--- /dev/null
@@ -0,0 +1,154 @@
+/* CLOCK_MONOTONIC vs CLOCK_MONOTONIC_RAW skew test
+ *             by: john stultz (johnstul@us.ibm.com)
+ *                 John Stultz <john.stultz@linaro.org>
+ *             (C) Copyright IBM 2012
+ *             (C) Copyright Linaro Limited 2015
+ *             Licensed under the GPLv2
+ *
+ *  To build:
+ *     $ gcc raw_skew.c -o raw_skew -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+
+#define CLOCK_MONOTONIC_RAW            4
+#define NSEC_PER_SEC 1000000000LL
+
+#define shift_right(x, s) ({           \
+       __typeof__(x) __x = (x);        \
+       __typeof__(s) __s = (s);        \
+       __x < 0 ? -(-__x >> __s) : __x >> __s; \
+})
+
+long long llabs(long long val)
+{
+       if (val < 0)
+               val = -val;
+       return val;
+}
+
+unsigned long long ts_to_nsec(struct timespec ts)
+{
+       return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
+}
+
+struct timespec nsec_to_ts(long long ns)
+{
+       struct timespec ts;
+
+       ts.tv_sec = ns/NSEC_PER_SEC;
+       ts.tv_nsec = ns%NSEC_PER_SEC;
+       return ts;
+}
+
+long long diff_timespec(struct timespec start, struct timespec end)
+{
+       long long start_ns, end_ns;
+
+       start_ns = ts_to_nsec(start);
+       end_ns = ts_to_nsec(end);
+       return end_ns - start_ns;
+}
+
+void get_monotonic_and_raw(struct timespec *mon, struct timespec *raw)
+{
+       struct timespec start, mid, end;
+       long long diff = 0, tmp;
+       int i;
+
+       for (i = 0; i < 3; i++) {
+               long long newdiff;
+
+               clock_gettime(CLOCK_MONOTONIC, &start);
+               clock_gettime(CLOCK_MONOTONIC_RAW, &mid);
+               clock_gettime(CLOCK_MONOTONIC, &end);
+
+               newdiff = diff_timespec(start, end);
+               if (diff == 0 || newdiff < diff) {
+                       diff = newdiff;
+                       *raw = mid;
+                       tmp = (ts_to_nsec(start) + ts_to_nsec(end))/2;
+                       *mon = nsec_to_ts(tmp);
+               }
+       }
+}
+
+int main(int argv, char **argc)
+{
+       struct timespec mon, raw, start, end;
+       long long delta1, delta2, interval, eppm, ppm;
+       struct timex tx1, tx2;
+
+       setbuf(stdout, NULL);
+
+       if (clock_gettime(CLOCK_MONOTONIC_RAW, &raw)) {
+               printf("ERR: NO CLOCK_MONOTONIC_RAW\n");
+               return -1;
+       }
+
+       tx1.modes = 0;
+       adjtimex(&tx1);
+       get_monotonic_and_raw(&mon, &raw);
+       start = mon;
+       delta1 = diff_timespec(mon, raw);
+
+       if (tx1.offset)
+               printf("WARNING: ADJ_OFFSET in progress, this will cause inaccurate results\n");
+
+       printf("Estimating clock drift: ");
+       sleep(120);
+
+       get_monotonic_and_raw(&mon, &raw);
+       end = mon;
+       tx2.modes = 0;
+       adjtimex(&tx2);
+       delta2 = diff_timespec(mon, raw);
+
+       interval = diff_timespec(start, end);
+
+       /* calculate measured ppm between MONOTONIC and MONOTONIC_RAW */
+       eppm = ((delta2-delta1)*NSEC_PER_SEC)/interval;
+       eppm = -eppm;
+       printf("%lld.%i(est)", eppm/1000, abs((int)(eppm%1000)));
+
+       /* Avg the two actual freq samples adjtimex gave us */
+       ppm = (tx1.freq + tx2.freq) * 1000 / 2;
+       ppm = (long long)tx1.freq * 1000;
+       ppm = shift_right(ppm, 16);
+       printf(" %lld.%i(act)", ppm/1000, abs((int)(ppm%1000)));
+
+       if (llabs(eppm - ppm) > 1000) {
+               printf("        [FAILED]\n");
+               return ksft_exit_fail();
+       }
+       printf("        [OK]\n");
+       return  ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/rtctest.c b/tools/testing/selftests/timers/rtctest.c
new file mode 100644 (file)
index 0000000..d80ae85
--- /dev/null
@@ -0,0 +1,271 @@
+/*
+ *      Real Time Clock Driver Test/Example Program
+ *
+ *      Compile with:
+ *                  gcc -s -Wall -Wstrict-prototypes rtctest.c -o rtctest
+ *
+ *      Copyright (C) 1996, Paul Gortmaker.
+ *
+ *      Released under the GNU General Public License, version 2,
+ *      included herein by reference.
+ *
+ */
+
+#include <stdio.h>
+#include <linux/rtc.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+
+
+/*
+ * This expects the new RTC class driver framework, working with
+ * clocks that will often not be clones of what the PC-AT had.
+ * Use the command line to specify another RTC if you need one.
+ */
+static const char default_rtc[] = "/dev/rtc0";
+
+
+int main(int argc, char **argv)
+{
+       int i, fd, retval, irqcount = 0;
+       unsigned long tmp, data;
+       struct rtc_time rtc_tm;
+       const char *rtc = default_rtc;
+       struct timeval start, end, diff;
+
+       switch (argc) {
+       case 2:
+               rtc = argv[1];
+               /* FALLTHROUGH */
+       case 1:
+               break;
+       default:
+               fprintf(stderr, "usage:  rtctest [rtcdev]\n");
+               return 1;
+       }
+
+       fd = open(rtc, O_RDONLY);
+
+       if (fd ==  -1) {
+               perror(rtc);
+               exit(errno);
+       }
+
+       fprintf(stderr, "\n\t\t\tRTC Driver Test Example.\n\n");
+
+       /* Turn on update interrupts (one per second) */
+       retval = ioctl(fd, RTC_UIE_ON, 0);
+       if (retval == -1) {
+               if (errno == ENOTTY) {
+                       fprintf(stderr,
+                               "\n...Update IRQs not supported.\n");
+                       goto test_READ;
+               }
+               perror("RTC_UIE_ON ioctl");
+               exit(errno);
+       }
+
+       fprintf(stderr, "Counting 5 update (1/sec) interrupts from reading %s:",
+                       rtc);
+       fflush(stderr);
+       for (i=1; i<6; i++) {
+               /* This read will block */
+               retval = read(fd, &data, sizeof(unsigned long));
+               if (retval == -1) {
+                       perror("read");
+                       exit(errno);
+               }
+               fprintf(stderr, " %d",i);
+               fflush(stderr);
+               irqcount++;
+       }
+
+       fprintf(stderr, "\nAgain, from using select(2) on /dev/rtc:");
+       fflush(stderr);
+       for (i=1; i<6; i++) {
+               struct timeval tv = {5, 0};     /* 5 second timeout on select */
+               fd_set readfds;
+
+               FD_ZERO(&readfds);
+               FD_SET(fd, &readfds);
+               /* The select will wait until an RTC interrupt happens. */
+               retval = select(fd+1, &readfds, NULL, NULL, &tv);
+               if (retval == -1) {
+                       perror("select");
+                       exit(errno);
+               }
+               /* This read won't block unlike the select-less case above. */
+               retval = read(fd, &data, sizeof(unsigned long));
+               if (retval == -1) {
+                       perror("read");
+                       exit(errno);
+               }
+               fprintf(stderr, " %d",i);
+               fflush(stderr);
+               irqcount++;
+       }
+
+       /* Turn off update interrupts */
+       retval = ioctl(fd, RTC_UIE_OFF, 0);
+       if (retval == -1) {
+               perror("RTC_UIE_OFF ioctl");
+               exit(errno);
+       }
+
+test_READ:
+       /* Read the RTC time/date */
+       retval = ioctl(fd, RTC_RD_TIME, &rtc_tm);
+       if (retval == -1) {
+               perror("RTC_RD_TIME ioctl");
+               exit(errno);
+       }
+
+       fprintf(stderr, "\n\nCurrent RTC date/time is %d-%d-%d, %02d:%02d:%02d.\n",
+               rtc_tm.tm_mday, rtc_tm.tm_mon + 1, rtc_tm.tm_year + 1900,
+               rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
+
+       /* Set the alarm to 5 sec in the future, and check for rollover */
+       rtc_tm.tm_sec += 5;
+       if (rtc_tm.tm_sec >= 60) {
+               rtc_tm.tm_sec %= 60;
+               rtc_tm.tm_min++;
+       }
+       if (rtc_tm.tm_min == 60) {
+               rtc_tm.tm_min = 0;
+               rtc_tm.tm_hour++;
+       }
+       if (rtc_tm.tm_hour == 24)
+               rtc_tm.tm_hour = 0;
+
+       retval = ioctl(fd, RTC_ALM_SET, &rtc_tm);
+       if (retval == -1) {
+               if (errno == ENOTTY) {
+                       fprintf(stderr,
+                               "\n...Alarm IRQs not supported.\n");
+                       goto test_PIE;
+               }
+               perror("RTC_ALM_SET ioctl");
+               exit(errno);
+       }
+
+       /* Read the current alarm settings */
+       retval = ioctl(fd, RTC_ALM_READ, &rtc_tm);
+       if (retval == -1) {
+               perror("RTC_ALM_READ ioctl");
+               exit(errno);
+       }
+
+       fprintf(stderr, "Alarm time now set to %02d:%02d:%02d.\n",
+               rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
+
+       /* Enable alarm interrupts */
+       retval = ioctl(fd, RTC_AIE_ON, 0);
+       if (retval == -1) {
+               perror("RTC_AIE_ON ioctl");
+               exit(errno);
+       }
+
+       fprintf(stderr, "Waiting 5 seconds for alarm...");
+       fflush(stderr);
+       /* This blocks until the alarm ring causes an interrupt */
+       retval = read(fd, &data, sizeof(unsigned long));
+       if (retval == -1) {
+               perror("read");
+               exit(errno);
+       }
+       irqcount++;
+       fprintf(stderr, " okay. Alarm rang.\n");
+
+       /* Disable alarm interrupts */
+       retval = ioctl(fd, RTC_AIE_OFF, 0);
+       if (retval == -1) {
+               perror("RTC_AIE_OFF ioctl");
+               exit(errno);
+       }
+
+test_PIE:
+       /* Read periodic IRQ rate */
+       retval = ioctl(fd, RTC_IRQP_READ, &tmp);
+       if (retval == -1) {
+               /* not all RTCs support periodic IRQs */
+               if (errno == ENOTTY) {
+                       fprintf(stderr, "\nNo periodic IRQ support\n");
+                       goto done;
+               }
+               perror("RTC_IRQP_READ ioctl");
+               exit(errno);
+       }
+       fprintf(stderr, "\nPeriodic IRQ rate is %ldHz.\n", tmp);
+
+       fprintf(stderr, "Counting 20 interrupts at:");
+       fflush(stderr);
+
+       /* The frequencies 128Hz, 256Hz, ... 8192Hz are only allowed for root. */
+       for (tmp=2; tmp<=64; tmp*=2) {
+
+               retval = ioctl(fd, RTC_IRQP_SET, tmp);
+               if (retval == -1) {
+                       /* not all RTCs can change their periodic IRQ rate */
+                       if (errno == ENOTTY) {
+                               fprintf(stderr,
+                                       "\n...Periodic IRQ rate is fixed\n");
+                               goto done;
+                       }
+                       perror("RTC_IRQP_SET ioctl");
+                       exit(errno);
+               }
+
+               fprintf(stderr, "\n%ldHz:\t", tmp);
+               fflush(stderr);
+
+               /* Enable periodic interrupts */
+               retval = ioctl(fd, RTC_PIE_ON, 0);
+               if (retval == -1) {
+                       perror("RTC_PIE_ON ioctl");
+                       exit(errno);
+               }
+
+               for (i=1; i<21; i++) {
+                       gettimeofday(&start, NULL);
+                       /* This blocks */
+                       retval = read(fd, &data, sizeof(unsigned long));
+                       if (retval == -1) {
+                               perror("read");
+                               exit(errno);
+                       }
+                       gettimeofday(&end, NULL);
+                       timersub(&end, &start, &diff);
+                       if (diff.tv_sec > 0 ||
+                           diff.tv_usec > ((1000000L / tmp) * 1.10)) {
+                               fprintf(stderr, "\nPIE delta error: %ld.%06ld should be close to 0.%06ld\n",
+                                      diff.tv_sec, diff.tv_usec,
+                                      (1000000L / tmp));
+                               fflush(stdout);
+                               exit(-1);
+                       }
+
+                       fprintf(stderr, " %d",i);
+                       fflush(stderr);
+                       irqcount++;
+               }
+
+               /* Disable periodic interrupts */
+               retval = ioctl(fd, RTC_PIE_OFF, 0);
+               if (retval == -1) {
+                       perror("RTC_PIE_OFF ioctl");
+                       exit(errno);
+               }
+       }
+
+done:
+       fprintf(stderr, "\n\n\t\t\t *** Test complete ***\n");
+
+       close(fd);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/timers/set-2038.c b/tools/testing/selftests/timers/set-2038.c
new file mode 100644 (file)
index 0000000..c8a7e14
--- /dev/null
@@ -0,0 +1,144 @@
+/* Time bounds setting test
+ *             by: john stultz (johnstul@us.ibm.com)
+ *             (C) Copyright IBM 2012
+ *             Licensed under the GPLv2
+ *
+ *  NOTE: This is a meta-test which sets the time to edge cases then
+ *  uses other tests to detect problems. Thus this test requires that
+ *  the inconsistency-check and nanosleep tests be present in the same
+ *  directory it is run from.
+ *
+ *  To build:
+ *     $ gcc set-2038.c -o set-2038 -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <sys/time.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000LL
+
+#define KTIME_MAX      ((long long)~((unsigned long long)1 << 63))
+#define KTIME_SEC_MAX  (KTIME_MAX / NSEC_PER_SEC)
+
+#define YEAR_1901 (-0x7fffffffL)
+#define YEAR_1970 1
+#define YEAR_2038 0x7fffffffL                  /*overflows 32bit time_t */
+#define YEAR_2262 KTIME_SEC_MAX                        /*overflows 64bit ktime_t */
+#define YEAR_MAX  ((long long)((1ULL<<63)-1))  /*overflows 64bit time_t */
+
+int is32bits(void)
+{
+       return (sizeof(long) == 4);
+}
+
+int settime(long long time)
+{
+       struct timeval now;
+       int ret;
+
+       now.tv_sec = (time_t)time;
+       now.tv_usec  = 0;
+
+       ret = settimeofday(&now, NULL);
+
+       printf("Setting time to 0x%lx: %d\n", (long)time, ret);
+       return ret;
+}
+
+int do_tests(void)
+{
+       int ret;
+
+       ret = system("date");
+       ret = system("./inconsistency-check -c 0 -t 20");
+       ret |= system("./nanosleep");
+       ret |= system("./nsleep-lat");
+       return ret;
+
+}
+
+int main(int argc, char *argv[])
+{
+       int ret = 0;
+       int opt, dangerous = 0;
+       time_t start;
+
+       /* Process arguments */
+       while ((opt = getopt(argc, argv, "d")) != -1) {
+               switch (opt) {
+               case 'd':
+                       dangerous = 1;
+               }
+       }
+
+       start = time(0);
+
+       /* First test that crazy values don't work */
+       if (!settime(YEAR_1901)) {
+               ret = -1;
+               goto out;
+       }
+       if (!settime(YEAR_MAX)) {
+               ret = -1;
+               goto out;
+       }
+       if (!is32bits() && !settime(YEAR_2262)) {
+               ret = -1;
+               goto out;
+       }
+
+       /* Now test behavior near edges */
+       settime(YEAR_1970);
+       ret = do_tests();
+       if (ret)
+               goto out;
+
+       settime(YEAR_2038 - 600);
+       ret = do_tests();
+       if (ret)
+               goto out;
+
+       /* The rest of the tests can blowup on 32bit systems */
+       if (is32bits() && !dangerous)
+               goto out;
+       /* Test rollover behavior 32bit edge */
+       settime(YEAR_2038 - 10);
+       ret = do_tests();
+       if (ret)
+               goto out;
+
+       settime(YEAR_2262 - 600);
+       ret = do_tests();
+
+out:
+       /* restore clock */
+       settime(start);
+       if (ret)
+               return ksft_exit_fail();
+       return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/set-tai.c b/tools/testing/selftests/timers/set-tai.c
new file mode 100644 (file)
index 0000000..dc88dbc
--- /dev/null
@@ -0,0 +1,79 @@
+/* Set tai offset
+ *              by: John Stultz <john.stultz@linaro.org>
+ *              (C) Copyright Linaro 2013
+ *              Licensed under the GPLv2
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+int set_tai(int offset)
+{
+       struct timex tx;
+
+       memset(&tx, 0, sizeof(tx));
+
+       tx.modes = ADJ_TAI;
+       tx.constant = offset;
+
+       return adjtimex(&tx);
+}
+
+int get_tai(void)
+{
+       struct timex tx;
+
+       memset(&tx, 0, sizeof(tx));
+
+       adjtimex(&tx);
+       return tx.tai;
+}
+
+int main(int argc, char **argv)
+{
+       int i, ret;
+
+       ret = get_tai();
+       printf("tai offset started at %i\n", ret);
+
+       printf("Checking tai offsets can be properly set: ");
+       for (i = 1; i <= 60; i++) {
+               ret = set_tai(i);
+               ret = get_tai();
+               if (ret != i) {
+                       printf("[FAILED] expected: %i got %i\n", i, ret);
+                       return ksft_exit_fail();
+               }
+       }
+       printf("[OK]\n");
+       return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/set-timer-lat.c b/tools/testing/selftests/timers/set-timer-lat.c
new file mode 100644 (file)
index 0000000..4fc98c5
--- /dev/null
@@ -0,0 +1,216 @@
+/* set_timer latency test
+ *             John Stultz (john.stultz@linaro.org)
+ *              (C) Copyright Linaro 2014
+ *              Licensed under the GPLv2
+ *
+ *   This test makes sure the set_timer api is correct
+ *
+ *  To build:
+ *     $ gcc set-timer-lat.c -o set-timer-lat -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <time.h>
+#include <string.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <pthread.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+#define CLOCK_REALTIME                 0
+#define CLOCK_MONOTONIC                        1
+#define CLOCK_PROCESS_CPUTIME_ID       2
+#define CLOCK_THREAD_CPUTIME_ID                3
+#define CLOCK_MONOTONIC_RAW            4
+#define CLOCK_REALTIME_COARSE          5
+#define CLOCK_MONOTONIC_COARSE         6
+#define CLOCK_BOOTTIME                 7
+#define CLOCK_REALTIME_ALARM           8
+#define CLOCK_BOOTTIME_ALARM           9
+#define CLOCK_HWSPECIFIC               10
+#define CLOCK_TAI                      11
+#define NR_CLOCKIDS                    12
+
+
+#define NSEC_PER_SEC 1000000000ULL
+#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */
+
+#define TIMER_SECS 1
+int alarmcount;
+int clock_id;
+struct timespec start_time;
+long long max_latency_ns;
+
+char *clockstring(int clockid)
+{
+       switch (clockid) {
+       case CLOCK_REALTIME:
+               return "CLOCK_REALTIME";
+       case CLOCK_MONOTONIC:
+               return "CLOCK_MONOTONIC";
+       case CLOCK_PROCESS_CPUTIME_ID:
+               return "CLOCK_PROCESS_CPUTIME_ID";
+       case CLOCK_THREAD_CPUTIME_ID:
+               return "CLOCK_THREAD_CPUTIME_ID";
+       case CLOCK_MONOTONIC_RAW:
+               return "CLOCK_MONOTONIC_RAW";
+       case CLOCK_REALTIME_COARSE:
+               return "CLOCK_REALTIME_COARSE";
+       case CLOCK_MONOTONIC_COARSE:
+               return "CLOCK_MONOTONIC_COARSE";
+       case CLOCK_BOOTTIME:
+               return "CLOCK_BOOTTIME";
+       case CLOCK_REALTIME_ALARM:
+               return "CLOCK_REALTIME_ALARM";
+       case CLOCK_BOOTTIME_ALARM:
+               return "CLOCK_BOOTTIME_ALARM";
+       case CLOCK_TAI:
+               return "CLOCK_TAI";
+       };
+       return "UNKNOWN_CLOCKID";
+}
+
+
+long long timespec_sub(struct timespec a, struct timespec b)
+{
+       long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec;
+
+       ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec;
+       return ret;
+}
+
+
+void sigalarm(int signo)
+{
+       long long delta_ns;
+       struct timespec ts;
+
+       clock_gettime(clock_id, &ts);
+       alarmcount++;
+
+       delta_ns = timespec_sub(start_time, ts);
+       delta_ns -= NSEC_PER_SEC * TIMER_SECS * alarmcount;
+
+       if (delta_ns < 0)
+               printf("%s timer fired early: FAIL\n", clockstring(clock_id));
+
+       if (delta_ns > max_latency_ns)
+               max_latency_ns = delta_ns;
+}
+
+int do_timer(int clock_id, int flags)
+{
+       struct sigevent se;
+       timer_t tm1;
+       struct itimerspec its1, its2;
+       int err;
+
+       /* Set up timer: */
+       memset(&se, 0, sizeof(se));
+       se.sigev_notify = SIGEV_SIGNAL;
+       se.sigev_signo = SIGRTMAX;
+       se.sigev_value.sival_int = 0;
+
+       max_latency_ns = 0;
+       alarmcount = 0;
+
+       err = timer_create(clock_id, &se, &tm1);
+       if (err) {
+               if ((clock_id == CLOCK_REALTIME_ALARM) ||
+                   (clock_id == CLOCK_BOOTTIME_ALARM)) {
+                       printf("%-22s %s missing CAP_WAKE_ALARM?    : [UNSUPPORTED]\n",
+                                       clockstring(clock_id),
+                                       flags ? "ABSTIME":"RELTIME");
+                       return 0;
+               }
+               printf("%s - timer_create() failed\n", clockstring(clock_id));
+               return -1;
+       }
+
+       clock_gettime(clock_id, &start_time);
+       if (flags) {
+               its1.it_value = start_time;
+               its1.it_value.tv_sec += TIMER_SECS;
+       } else {
+               its1.it_value.tv_sec = TIMER_SECS;
+               its1.it_value.tv_nsec = 0;
+       }
+       its1.it_interval.tv_sec = TIMER_SECS;
+       its1.it_interval.tv_nsec = 0;
+
+       err = timer_settime(tm1, flags, &its1, &its2);
+       if (err) {
+               printf("%s - timer_settime() failed\n", clockstring(clock_id));
+               return -1;
+       }
+
+       while (alarmcount < 5)
+               sleep(1);
+
+       printf("%-22s %s max latency: %10lld ns : ",
+                       clockstring(clock_id),
+                       flags ? "ABSTIME":"RELTIME",
+                       max_latency_ns);
+
+       timer_delete(tm1);
+       if (max_latency_ns < UNRESONABLE_LATENCY) {
+               printf("[OK]\n");
+               return 0;
+       }
+       printf("[FAILED]\n");
+       return -1;
+}
+
+int main(void)
+{
+       struct sigaction act;
+       int signum = SIGRTMAX;
+       int ret = 0;
+
+       /* Set up signal handler: */
+       sigfillset(&act.sa_mask);
+       act.sa_flags = 0;
+       act.sa_handler = sigalarm;
+       sigaction(signum, &act, NULL);
+
+       printf("Setting timers for every %i seconds\n", TIMER_SECS);
+       for (clock_id = 0; clock_id < NR_CLOCKIDS; clock_id++) {
+
+               if ((clock_id == CLOCK_PROCESS_CPUTIME_ID) ||
+                               (clock_id == CLOCK_THREAD_CPUTIME_ID) ||
+                               (clock_id == CLOCK_MONOTONIC_RAW) ||
+                               (clock_id == CLOCK_REALTIME_COARSE) ||
+                               (clock_id == CLOCK_MONOTONIC_COARSE) ||
+                               (clock_id == CLOCK_HWSPECIFIC))
+                       continue;
+
+               ret |= do_timer(clock_id, TIMER_ABSTIME);
+               ret |= do_timer(clock_id, 0);
+       }
+       if (ret)
+               return ksft_exit_fail();
+       return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/skew_consistency.c b/tools/testing/selftests/timers/skew_consistency.c
new file mode 100644 (file)
index 0000000..5562f84
--- /dev/null
@@ -0,0 +1,89 @@
+/* ADJ_FREQ Skew consistency test
+ *             by: john stultz (johnstul@us.ibm.com)
+ *             (C) Copyright IBM 2012
+ *             Licensed under the GPLv2
+ *
+ *  NOTE: This is a meta-test which cranks the ADJ_FREQ knob back
+ *  and forth and watches for consistency problems. Thus this test requires
+ *  that the inconsistency-check tests be present in the same directory it
+ *  is run from.
+ *
+ *  To build:
+ *     $ gcc skew_consistency.c -o skew_consistency -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/wait.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000LL
+
+int main(int argv, char **argc)
+{
+       struct timex tx;
+       int ret, ppm;
+       pid_t pid;
+
+
+       printf("Running Asyncrhonous Frequency Changing Tests...\n");
+
+       pid = fork();
+       if (!pid)
+               return system("./inconsistency-check -c 1 -t 600");
+
+       ppm = 500;
+       ret = 0;
+
+       while (pid != waitpid(pid, &ret, WNOHANG)) {
+               ppm = -ppm;
+               tx.modes = ADJ_FREQUENCY;
+               tx.freq = ppm << 16;
+               adjtimex(&tx);
+               usleep(500000);
+       }
+
+       /* Set things back */
+       tx.modes = ADJ_FREQUENCY;
+       tx.offset = 0;
+       adjtimex(&tx);
+
+
+       if (ret) {
+               printf("[FAILED]\n");
+               return ksft_exit_fail();
+       }
+       printf("[OK]\n");
+       return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/threadtest.c b/tools/testing/selftests/timers/threadtest.c
new file mode 100644 (file)
index 0000000..e632e11
--- /dev/null
@@ -0,0 +1,204 @@
+/* threadtest.c
+ *             by: john stultz (johnstul@us.ibm.com)
+ *             (C) Copyright IBM 2004, 2005, 2006, 2012
+ *             Licensed under the GPLv2
+ *
+ *  To build:
+ *     $ gcc threadtest.c -o threadtest -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <pthread.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+
+/* serializes shared list access */
+pthread_mutex_t list_lock = PTHREAD_MUTEX_INITIALIZER;
+/* serializes console output */
+pthread_mutex_t print_lock = PTHREAD_MUTEX_INITIALIZER;
+
+
+#define MAX_THREADS 128
+#define LISTSIZE 128
+
+int done = 0;
+
+struct timespec global_list[LISTSIZE];
+int listcount = 0;
+
+
+void checklist(struct timespec *list, int size)
+{
+       int i, j;
+       struct timespec *a, *b;
+
+       /* scan the list */
+       for (i = 0; i < size-1; i++) {
+               a = &list[i];
+               b = &list[i+1];
+
+               /* look for any time inconsistencies */
+               if ((b->tv_sec <= a->tv_sec) &&
+                       (b->tv_nsec < a->tv_nsec)) {
+
+                       /* flag other threads */
+                       done = 1;
+
+                       /*serialize printing to avoid junky output*/
+                       pthread_mutex_lock(&print_lock);
+
+                       /* dump the list */
+                       printf("\n");
+                       for (j = 0; j < size; j++) {
+                               if (j == i)
+                                       printf("---------------\n");
+                               printf("%lu:%lu\n", list[j].tv_sec, list[j].tv_nsec);
+                               if (j == i+1)
+                                       printf("---------------\n");
+                       }
+                       printf("[FAILED]\n");
+
+                       pthread_mutex_unlock(&print_lock);
+               }
+       }
+}
+
+/* The shared thread shares a global list
+ * that each thread fills while holding the lock.
+ * This stresses clock syncronization across cpus.
+ */
+void *shared_thread(void *arg)
+{
+       while (!done) {
+               /* protect the list */
+               pthread_mutex_lock(&list_lock);
+
+               /* see if we're ready to check the list */
+               if (listcount >= LISTSIZE) {
+                       checklist(global_list, LISTSIZE);
+                       listcount = 0;
+               }
+               clock_gettime(CLOCK_MONOTONIC, &global_list[listcount++]);
+
+               pthread_mutex_unlock(&list_lock);
+       }
+       return NULL;
+}
+
+
+/* Each independent thread fills in its own
+ * list. This stresses clock_gettime() lock contention.
+ */
+void *independent_thread(void *arg)
+{
+       struct timespec my_list[LISTSIZE];
+       int count;
+
+       while (!done) {
+               /* fill the list */
+               for (count = 0; count < LISTSIZE; count++)
+                       clock_gettime(CLOCK_MONOTONIC, &my_list[count]);
+               checklist(my_list, LISTSIZE);
+       }
+       return NULL;
+}
+
+#define DEFAULT_THREAD_COUNT 8
+#define DEFAULT_RUNTIME 30
+
+int main(int argc, char **argv)
+{
+       int thread_count, i;
+       time_t start, now, runtime;
+       char buf[255];
+       pthread_t pth[MAX_THREADS];
+       int opt;
+       void *tret;
+       int ret = 0;
+       void *(*thread)(void *) = shared_thread;
+
+       thread_count = DEFAULT_THREAD_COUNT;
+       runtime = DEFAULT_RUNTIME;
+
+       /* Process arguments */
+       while ((opt = getopt(argc, argv, "t:n:i")) != -1) {
+               switch (opt) {
+               case 't':
+                       runtime = atoi(optarg);
+                       break;
+               case 'n':
+                       thread_count = atoi(optarg);
+                       break;
+               case 'i':
+                       thread = independent_thread;
+                       printf("using independent threads\n");
+                       break;
+               default:
+                       printf("Usage: %s [-t <secs>] [-n <numthreads>] [-i]\n", argv[0]);
+                       printf("        -t: time to run\n");
+                       printf("        -n: number of threads\n");
+                       printf("        -i: use independent threads\n");
+                       return -1;
+               }
+       }
+
+       if (thread_count > MAX_THREADS)
+               thread_count = MAX_THREADS;
+
+
+       setbuf(stdout, NULL);
+
+       start = time(0);
+       strftime(buf, 255, "%a, %d %b %Y %T %z", localtime(&start));
+       printf("%s\n", buf);
+       printf("Testing consistency with %i threads for %ld seconds: ", thread_count, runtime);
+
+       /* spawn */
+       for (i = 0; i < thread_count; i++)
+               pthread_create(&pth[i], 0, thread, 0);
+
+       while (time(&now) < start + runtime) {
+               sleep(1);
+               if (done) {
+                       ret = 1;
+                       strftime(buf, 255, "%a, %d %b %Y %T %z", localtime(&now));
+                       printf("%s\n", buf);
+                       goto out;
+               }
+       }
+       printf("[OK]\n");
+       done = 1;
+
+out:
+       /* wait */
+       for (i = 0; i < thread_count; i++)
+               pthread_join(pth[i], &tret);
+
+       /* die */
+       if (ret)
+               ksft_exit_fail();
+       return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c
new file mode 100644 (file)
index 0000000..e86d937
--- /dev/null
@@ -0,0 +1,202 @@
+/* valid adjtimex test
+ *              by: John Stultz <john.stultz@linaro.org>
+ *              (C) Copyright Linaro 2015
+ *              Licensed under the GPLv2
+ *
+ *  This test validates adjtimex interface with valid
+ *  and invalid test data.
+ *
+ *  Usage: valid-adjtimex
+ *
+ *  To build:
+ *     $ gcc valid-adjtimex.c -o valid-adjtimex -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000L
+
+/* clear NTP time_status & time_state */
+int clear_time_state(void)
+{
+       struct timex tx;
+       int ret;
+
+       tx.modes = ADJ_STATUS;
+       tx.status = 0;
+       ret = adjtimex(&tx);
+       return ret;
+}
+
+#define NUM_FREQ_VALID 32
+#define NUM_FREQ_OUTOFRANGE 4
+#define NUM_FREQ_INVALID 2
+
+long valid_freq[NUM_FREQ_VALID] = {
+       -499<<16,
+       -450<<16,
+       -400<<16,
+       -350<<16,
+       -300<<16,
+       -250<<16,
+       -200<<16,
+       -150<<16,
+       -100<<16,
+       -75<<16,
+       -50<<16,
+       -25<<16,
+       -10<<16,
+       -5<<16,
+       -1<<16,
+       -1000,
+       1<<16,
+       5<<16,
+       10<<16,
+       25<<16,
+       50<<16,
+       75<<16,
+       100<<16,
+       150<<16,
+       200<<16,
+       250<<16,
+       300<<16,
+       350<<16,
+       400<<16,
+       450<<16,
+       499<<16,
+};
+
+long outofrange_freq[NUM_FREQ_OUTOFRANGE] = {
+       -1000<<16,
+       -550<<16,
+       550<<16,
+       1000<<16,
+};
+
+#define LONG_MAX (~0UL>>1)
+#define LONG_MIN (-LONG_MAX - 1)
+
+long invalid_freq[NUM_FREQ_INVALID] = {
+       LONG_MAX,
+       LONG_MIN,
+};
+
+int validate_freq(void)
+{
+       struct timex tx;
+       int ret, pass = 0;
+       int i;
+
+       clear_time_state();
+
+       memset(&tx, 0, sizeof(struct timex));
+       /* Set the leap second insert flag */
+
+       printf("Testing ADJ_FREQ... ");
+       for (i = 0; i < NUM_FREQ_VALID; i++) {
+               tx.modes = ADJ_FREQUENCY;
+               tx.freq = valid_freq[i];
+
+               ret = adjtimex(&tx);
+               if (ret < 0) {
+                       printf("[FAIL]\n");
+                       printf("Error: adjtimex(ADJ_FREQ, %ld - %ld ppm\n",
+                               valid_freq[i], valid_freq[i]>>16);
+                       pass = -1;
+                       goto out;
+               }
+               tx.modes = 0;
+               ret = adjtimex(&tx);
+               if (tx.freq != valid_freq[i]) {
+                       printf("Warning: freq value %ld not what we set it (%ld)!\n",
+                                       tx.freq, valid_freq[i]);
+               }
+       }
+       for (i = 0; i < NUM_FREQ_OUTOFRANGE; i++) {
+               tx.modes = ADJ_FREQUENCY;
+               tx.freq = outofrange_freq[i];
+
+               ret = adjtimex(&tx);
+               if (ret < 0) {
+                       printf("[FAIL]\n");
+                       printf("Error: adjtimex(ADJ_FREQ, %ld - %ld ppm\n",
+                               outofrange_freq[i], outofrange_freq[i]>>16);
+                       pass = -1;
+                       goto out;
+               }
+               tx.modes = 0;
+               ret = adjtimex(&tx);
+               if (tx.freq == outofrange_freq[i]) {
+                       printf("[FAIL]\n");
+                       printf("ERROR: out of range value %ld actually set!\n",
+                                       tx.freq);
+                       pass = -1;
+                       goto out;
+               }
+       }
+
+
+       if (sizeof(long) == 8) { /* this case only applies to 64bit systems */
+               for (i = 0; i < NUM_FREQ_INVALID; i++) {
+                       tx.modes = ADJ_FREQUENCY;
+                       tx.freq = invalid_freq[i];
+                       ret = adjtimex(&tx);
+                       if (ret >= 0) {
+                               printf("[FAIL]\n");
+                               printf("Error: No failure on invalid ADJ_FREQUENCY %ld\n",
+                                       invalid_freq[i]);
+                               pass = -1;
+                               goto out;
+                       }
+               }
+       }
+
+       printf("[OK]\n");
+out:
+       /* reset freq to zero */
+       tx.modes = ADJ_FREQUENCY;
+       tx.freq = 0;
+       ret = adjtimex(&tx);
+
+       return pass;
+}
+
+
+int main(int argc, char **argv)
+{
+       if (validate_freq())
+               return ksft_exit_fail();
+
+       return ksft_exit_pass();
+}
index 12c9d15bab075a6eaaa5f171a0833841c829b470..d401b63c5b1ad09273d2228add75d8635694172f 100644 (file)
@@ -3,5 +3,6 @@
 # No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
 all:
 
-run_tests: all
-       ./test_user_copy.sh
+TEST_PROGS := test_user_copy.sh
+
+include ../lib.mk
index 077828c889f1377886b98c93349919d55ccb57a2..a5ce9534eb15f35335b389ec89f0379bfc45c224 100644 (file)
@@ -1,6 +1,5 @@
 # Makefile for vm selftests
 
-CC = $(CROSS_COMPILE)gcc
 CFLAGS = -Wall
 BINARIES = hugepage-mmap hugepage-shm map_hugetlb thuge-gen hugetlbfstest
 BINARIES += transhuge-stress
@@ -9,8 +8,10 @@ all: $(BINARIES)
 %: %.c
        $(CC) $(CFLAGS) -o $@ $^ -lrt
 
-run_tests: all
-       @/bin/sh ./run_vmtests || (echo "vmtests: [FAIL]"; exit 1)
+TEST_PROGS := run_vmtests
+TEST_FILES := $(BINARIES)
+
+include ../lib.mk
 
 clean:
        $(RM) $(BINARIES)
old mode 100644 (file)
new mode 100755 (executable)
diff --git a/tools/testing/selftests/x86/.gitignore b/tools/testing/selftests/x86/.gitignore
new file mode 100644 (file)
index 0000000..15034fe
--- /dev/null
@@ -0,0 +1,2 @@
+*_32
+*_64
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
new file mode 100644 (file)
index 0000000..f0a7918
--- /dev/null
@@ -0,0 +1,48 @@
+.PHONY: all all_32 all_64 check_build32 clean run_tests
+
+TARGETS_C_BOTHBITS := sigreturn
+
+BINARIES_32 := $(TARGETS_C_BOTHBITS:%=%_32)
+BINARIES_64 := $(TARGETS_C_BOTHBITS:%=%_64)
+
+CFLAGS := -O2 -g -std=gnu99 -pthread -Wall
+
+UNAME_P := $(shell uname -p)
+
+# Always build 32-bit tests
+all: all_32
+
+# If we're on a 64-bit host, build 64-bit tests as well
+ifeq ($(shell uname -p),x86_64)
+all: all_64
+endif
+
+all_32: check_build32 $(BINARIES_32)
+
+all_64: $(BINARIES_64)
+
+clean:
+       $(RM) $(BINARIES_32) $(BINARIES_64)
+
+run_tests:
+       ./run_x86_tests.sh
+
+$(TARGETS_C_BOTHBITS:%=%_32): %_32: %.c
+       $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl
+
+$(TARGETS_C_BOTHBITS:%=%_64): %_64: %.c
+       $(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl
+
+check_build32:
+       @if ! $(CC) -m32 -o /dev/null trivial_32bit_program.c; then     \
+         echo "Warning: you seem to have a broken 32-bit build" 2>&1;  \
+         echo "environment.  If you are using a Debian-like";          \
+         echo " distribution, try:";                                   \
+         echo "";                                                      \
+         echo "  apt-get install gcc-multilib libc6-i386 libc6-dev-i386"; \
+         echo "";                                                      \
+         echo "If you are using a Fedora-like distribution, try:";     \
+         echo "";                                                      \
+         echo "  yum install glibc-devel.*i686";                       \
+         exit 1;                                                       \
+       fi
diff --git a/tools/testing/selftests/x86/run_x86_tests.sh b/tools/testing/selftests/x86/run_x86_tests.sh
new file mode 100644 (file)
index 0000000..3d3ec65
--- /dev/null
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+# This is deliberately minimal.  IMO kselftests should provide a standard
+# script here.
+./sigreturn_32 || exit 1
+
+if [[ "$uname -p" -eq "x86_64" ]]; then
+    ./sigreturn_64 || exit 1
+fi
+
+exit 0
diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c
new file mode 100644 (file)
index 0000000..b5aa1ba
--- /dev/null
@@ -0,0 +1,684 @@
+/*
+ * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace
+ * Copyright (c) 2014-2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * This is a series of tests that exercises the sigreturn(2) syscall and
+ * the IRET / SYSRET paths in the kernel.
+ *
+ * For now, this focuses on the effects of unusual CS and SS values,
+ * and it has a bunch of tests to make sure that ESP/RSP is restored
+ * properly.
+ *
+ * The basic idea behind these tests is to raise(SIGUSR1) to create a
+ * sigcontext frame, plug in the values to be tested, and then return,
+ * which implicitly invokes sigreturn(2) and programs the user context
+ * as desired.
+ *
+ * For tests for which we expect sigreturn and the subsequent return to
+ * user mode to succeed, we return to a short trampoline that generates
+ * SIGTRAP so that the meat of the tests can be ordinary C code in a
+ * SIGTRAP handler.
+ *
+ * The inner workings of each test is documented below.
+ *
+ * Do not run on outdated, unpatched kernels at risk of nasty crashes.
+ */
+
+#define _GNU_SOURCE
+
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <asm/ldt.h>
+#include <err.h>
+#include <setjmp.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <sys/ptrace.h>
+#include <sys/user.h>
+
+/*
+ * In principle, this test can run on Linux emulation layers (e.g.
+ * Illumos "LX branded zones").  Solaris-based kernels reserve LDT
+ * entries 0-5 for their own internal purposes, so start our LDT
+ * allocations above that reservation.  (The tests don't pass on LX
+ * branded zones, but at least this lets them run.)
+ */
+#define LDT_OFFSET 6
+
+/* An aligned stack accessible through some of our segments. */
+static unsigned char stack16[65536] __attribute__((aligned(4096)));
+
+/*
+ * An aligned int3 instruction used as a trampoline.  Some of the tests
+ * want to fish out their ss values, so this trampoline copies ss to eax
+ * before the int3.
+ */
+asm (".pushsection .text\n\t"
+     ".type int3, @function\n\t"
+     ".align 4096\n\t"
+     "int3:\n\t"
+     "mov %ss,%eax\n\t"
+     "int3\n\t"
+     ".size int3, . - int3\n\t"
+     ".align 4096, 0xcc\n\t"
+     ".popsection");
+extern char int3[4096];
+
+/*
+ * At startup, we prepapre:
+ *
+ * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero
+ *   descriptor or out of bounds).
+ * - code16_sel: A 16-bit LDT code segment pointing to int3.
+ * - data16_sel: A 16-bit LDT data segment pointing to stack16.
+ * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3.
+ * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16.
+ * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16.
+ * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to
+ *   stack16.
+ *
+ * For no particularly good reason, xyz_sel is a selector value with the
+ * RPL and LDT bits filled in, whereas xyz_idx is just an index into the
+ * descriptor table.  These variables will be zero if their respective
+ * segments could not be allocated.
+ */
+static unsigned short ldt_nonexistent_sel;
+static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel;
+
+static unsigned short gdt_data16_idx, gdt_npdata32_idx;
+
+static unsigned short GDT3(int idx)
+{
+       return (idx << 3) | 3;
+}
+
+static unsigned short LDT3(int idx)
+{
+       return (idx << 3) | 7;
+}
+
+/* Our sigaltstack scratch space. */
+static char altstack_data[SIGSTKSZ];
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+                      int flags)
+{
+       struct sigaction sa;
+       memset(&sa, 0, sizeof(sa));
+       sa.sa_sigaction = handler;
+       sa.sa_flags = SA_SIGINFO | flags;
+       sigemptyset(&sa.sa_mask);
+       if (sigaction(sig, &sa, 0))
+               err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+       struct sigaction sa;
+       memset(&sa, 0, sizeof(sa));
+       sa.sa_handler = SIG_DFL;
+       sigemptyset(&sa.sa_mask);
+       if (sigaction(sig, &sa, 0))
+               err(1, "sigaction");
+}
+
+static void add_ldt(const struct user_desc *desc, unsigned short *var,
+                   const char *name)
+{
+       if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) {
+               *var = LDT3(desc->entry_number);
+       } else {
+               printf("[NOTE]\tFailed to create %s segment\n", name);
+               *var = 0;
+       }
+}
+
+static void setup_ldt(void)
+{
+       if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16))
+               errx(1, "stack16 is too high\n");
+       if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3))
+               errx(1, "int3 is too high\n");
+
+       ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2);
+
+       const struct user_desc code16_desc = {
+               .entry_number    = LDT_OFFSET + 0,
+               .base_addr       = (unsigned long)int3,
+               .limit           = 4095,
+               .seg_32bit       = 0,
+               .contents        = 2, /* Code, not conforming */
+               .read_exec_only  = 0,
+               .limit_in_pages  = 0,
+               .seg_not_present = 0,
+               .useable         = 0
+       };
+       add_ldt(&code16_desc, &code16_sel, "code16");
+
+       const struct user_desc data16_desc = {
+               .entry_number    = LDT_OFFSET + 1,
+               .base_addr       = (unsigned long)stack16,
+               .limit           = 0xffff,
+               .seg_32bit       = 0,
+               .contents        = 0, /* Data, grow-up */
+               .read_exec_only  = 0,
+               .limit_in_pages  = 0,
+               .seg_not_present = 0,
+               .useable         = 0
+       };
+       add_ldt(&data16_desc, &data16_sel, "data16");
+
+       const struct user_desc npcode32_desc = {
+               .entry_number    = LDT_OFFSET + 3,
+               .base_addr       = (unsigned long)int3,
+               .limit           = 4095,
+               .seg_32bit       = 1,
+               .contents        = 2, /* Code, not conforming */
+               .read_exec_only  = 0,
+               .limit_in_pages  = 0,
+               .seg_not_present = 1,
+               .useable         = 0
+       };
+       add_ldt(&npcode32_desc, &npcode32_sel, "npcode32");
+
+       const struct user_desc npdata32_desc = {
+               .entry_number    = LDT_OFFSET + 4,
+               .base_addr       = (unsigned long)stack16,
+               .limit           = 0xffff,
+               .seg_32bit       = 1,
+               .contents        = 0, /* Data, grow-up */
+               .read_exec_only  = 0,
+               .limit_in_pages  = 0,
+               .seg_not_present = 1,
+               .useable         = 0
+       };
+       add_ldt(&npdata32_desc, &npdata32_sel, "npdata32");
+
+       struct user_desc gdt_data16_desc = {
+               .entry_number    = -1,
+               .base_addr       = (unsigned long)stack16,
+               .limit           = 0xffff,
+               .seg_32bit       = 0,
+               .contents        = 0, /* Data, grow-up */
+               .read_exec_only  = 0,
+               .limit_in_pages  = 0,
+               .seg_not_present = 0,
+               .useable         = 0
+       };
+
+       if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) {
+               /*
+                * This probably indicates vulnerability to CVE-2014-8133.
+                * Merely getting here isn't definitive, though, and we'll
+                * diagnose the problem for real later on.
+                */
+               printf("[WARN]\tset_thread_area allocated data16 at index %d\n",
+                      gdt_data16_desc.entry_number);
+               gdt_data16_idx = gdt_data16_desc.entry_number;
+       } else {
+               printf("[OK]\tset_thread_area refused 16-bit data\n");
+       }
+
+       struct user_desc gdt_npdata32_desc = {
+               .entry_number    = -1,
+               .base_addr       = (unsigned long)stack16,
+               .limit           = 0xffff,
+               .seg_32bit       = 1,
+               .contents        = 0, /* Data, grow-up */
+               .read_exec_only  = 0,
+               .limit_in_pages  = 0,
+               .seg_not_present = 1,
+               .useable         = 0
+       };
+
+       if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) {
+               /*
+                * As a hardening measure, newer kernels don't allow this.
+                */
+               printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n",
+                      gdt_npdata32_desc.entry_number);
+               gdt_npdata32_idx = gdt_npdata32_desc.entry_number;
+       } else {
+               printf("[OK]\tset_thread_area refused 16-bit data\n");
+       }
+}
+
+/* State used by our signal handlers. */
+static gregset_t initial_regs, requested_regs, resulting_regs;
+
+/* Instructions for the SIGUSR1 handler. */
+static volatile unsigned short sig_cs, sig_ss;
+static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno;
+
+/* Abstractions for some 32-bit vs 64-bit differences. */
+#ifdef __x86_64__
+# define REG_IP REG_RIP
+# define REG_SP REG_RSP
+# define REG_AX REG_RAX
+
+struct selectors {
+       unsigned short cs, gs, fs, ss;
+};
+
+static unsigned short *ssptr(ucontext_t *ctx)
+{
+       struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
+       return &sels->ss;
+}
+
+static unsigned short *csptr(ucontext_t *ctx)
+{
+       struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
+       return &sels->cs;
+}
+#else
+# define REG_IP REG_EIP
+# define REG_SP REG_ESP
+# define REG_AX REG_EAX
+
+static greg_t *ssptr(ucontext_t *ctx)
+{
+       return &ctx->uc_mcontext.gregs[REG_SS];
+}
+
+static greg_t *csptr(ucontext_t *ctx)
+{
+       return &ctx->uc_mcontext.gregs[REG_CS];
+}
+#endif
+
+/* Number of errors in the current test case. */
+static volatile sig_atomic_t nerrs;
+
+/*
+ * SIGUSR1 handler.  Sets CS and SS as requested and points IP to the
+ * int3 trampoline.  Sets SP to a large known value so that we can see
+ * whether the value round-trips back to user mode correctly.
+ */
+static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
+{
+       ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+       memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+
+       *csptr(ctx) = sig_cs;
+       *ssptr(ctx) = sig_ss;
+
+       ctx->uc_mcontext.gregs[REG_IP] =
+               sig_cs == code16_sel ? 0 : (unsigned long)&int3;
+       ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL;
+       ctx->uc_mcontext.gregs[REG_AX] = 0;
+
+       memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+       requested_regs[REG_AX] = *ssptr(ctx);   /* The asm code does this. */
+
+       return;
+}
+
+/*
+ * Called after a successful sigreturn.  Restores our state so that
+ * the original raise(SIGUSR1) returns.
+ */
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+       ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+       sig_err = ctx->uc_mcontext.gregs[REG_ERR];
+       sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO];
+
+       unsigned short ss;
+       asm ("mov %%ss,%0" : "=r" (ss));
+
+       greg_t asm_ss = ctx->uc_mcontext.gregs[REG_AX];
+       if (asm_ss != sig_ss && sig == SIGTRAP) {
+               /* Sanity check failure. */
+               printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n",
+                      ss, *ssptr(ctx), (unsigned long long)asm_ss);
+               nerrs++;
+       }
+
+       memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+       memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t));
+
+       sig_trapped = sig;
+}
+
+/*
+ * Checks a given selector for its code bitness or returns -1 if it's not
+ * a usable code segment selector.
+ */
+int cs_bitness(unsigned short cs)
+{
+       uint32_t valid = 0, ar;
+       asm ("lar %[cs], %[ar]\n\t"
+            "jnz 1f\n\t"
+            "mov $1, %[valid]\n\t"
+            "1:"
+            : [ar] "=r" (ar), [valid] "+rm" (valid)
+            : [cs] "r" (cs));
+
+       if (!valid)
+               return -1;
+
+       bool db = (ar & (1 << 22));
+       bool l = (ar & (1 << 21));
+
+       if (!(ar & (1<<11)))
+           return -1;  /* Not code. */
+
+       if (l && !db)
+               return 64;
+       else if (!l && db)
+               return 32;
+       else if (!l && !db)
+               return 16;
+       else
+               return -1;      /* Unknown bitness. */
+}
+
+/* Finds a usable code segment of the requested bitness. */
+int find_cs(int bitness)
+{
+       unsigned short my_cs;
+
+       asm ("mov %%cs,%0" :  "=r" (my_cs));
+
+       if (cs_bitness(my_cs) == bitness)
+               return my_cs;
+       if (cs_bitness(my_cs + (2 << 3)) == bitness)
+               return my_cs + (2 << 3);
+       if (my_cs > (2<<3) && cs_bitness(my_cs - (2 << 3)) == bitness)
+           return my_cs - (2 << 3);
+       if (cs_bitness(code16_sel) == bitness)
+               return code16_sel;
+
+       printf("[WARN]\tCould not find %d-bit CS\n", bitness);
+       return -1;
+}
+
+static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
+{
+       int cs = find_cs(cs_bits);
+       if (cs == -1) {
+               printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n",
+                      cs_bits, use_16bit_ss ? 16 : 32);
+               return 0;
+       }
+
+       if (force_ss != -1) {
+               sig_ss = force_ss;
+       } else {
+               if (use_16bit_ss) {
+                       if (!data16_sel) {
+                               printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n",
+                                      cs_bits);
+                               return 0;
+                       }
+                       sig_ss = data16_sel;
+               } else {
+                       asm volatile ("mov %%ss,%0" : "=r" (sig_ss));
+               }
+       }
+
+       sig_cs = cs;
+
+       printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n",
+              cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss,
+              (sig_ss & 4) ? "" : ", GDT");
+
+       raise(SIGUSR1);
+
+       nerrs = 0;
+
+       /*
+        * Check that each register had an acceptable value when the
+        * int3 trampoline was invoked.
+        */
+       for (int i = 0; i < NGREG; i++) {
+               greg_t req = requested_regs[i], res = resulting_regs[i];
+               if (i == REG_TRAPNO || i == REG_IP)
+                       continue;       /* don't care */
+               if (i == REG_SP) {
+                       printf("\tSP: %llx -> %llx\n", (unsigned long long)req,
+                              (unsigned long long)res);
+
+                       /*
+                        * In many circumstances, the high 32 bits of rsp
+                        * are zeroed.  For example, we could be a real
+                        * 32-bit program, or we could hit any of a number
+                        * of poorly-documented IRET or segmented ESP
+                        * oddities.  If this happens, it's okay.
+                        */
+                       if (res == (req & 0xFFFFFFFF))
+                               continue;  /* OK; not expected to work */
+               }
+
+               bool ignore_reg = false;
+#if __i386__
+               if (i == REG_UESP)
+                       ignore_reg = true;
+#else
+               if (i == REG_CSGSFS) {
+                       struct selectors *req_sels =
+                               (void *)&requested_regs[REG_CSGSFS];
+                       struct selectors *res_sels =
+                               (void *)&resulting_regs[REG_CSGSFS];
+                       if (req_sels->cs != res_sels->cs) {
+                               printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n",
+                                      req_sels->cs, res_sels->cs);
+                               nerrs++;
+                       }
+
+                       if (req_sels->ss != res_sels->ss) {
+                               printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n",
+                                      req_sels->ss, res_sels->ss);
+                               nerrs++;
+                       }
+
+                       continue;
+               }
+#endif
+
+               /* Sanity check on the kernel */
+               if (i == REG_AX && requested_regs[i] != resulting_regs[i]) {
+                       printf("[FAIL]\tAX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n",
+                              (unsigned long long)requested_regs[i],
+                              (unsigned long long)resulting_regs[i]);
+                       nerrs++;
+                       continue;
+               }
+
+               if (requested_regs[i] != resulting_regs[i] && !ignore_reg) {
+                       /*
+                        * SP is particularly interesting here.  The
+                        * usual cause of failures is that we hit the
+                        * nasty IRET case of returning to a 16-bit SS,
+                        * in which case bits 16:31 of the *kernel*
+                        * stack pointer persist in ESP.
+                        */
+                       printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n",
+                              i, (unsigned long long)requested_regs[i],
+                              (unsigned long long)resulting_regs[i]);
+                       nerrs++;
+               }
+       }
+
+       if (nerrs == 0)
+               printf("[OK]\tall registers okay\n");
+
+       return nerrs;
+}
+
+static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs)
+{
+       int cs = force_cs == -1 ? find_cs(cs_bits) : force_cs;
+       if (cs == -1)
+               return 0;
+
+       sig_cs = cs;
+       sig_ss = ss;
+
+       printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n",
+              cs_bits, sig_cs, sig_ss);
+
+       sig_trapped = 0;
+       raise(SIGUSR1);
+       if (sig_trapped) {
+               char errdesc[32] = "";
+               if (sig_err) {
+                       const char *src = (sig_err & 1) ? " EXT" : "";
+                       const char *table;
+                       if ((sig_err & 0x6) == 0x0)
+                               table = "GDT";
+                       else if ((sig_err & 0x6) == 0x4)
+                               table = "LDT";
+                       else if ((sig_err & 0x6) == 0x2)
+                               table = "IDT";
+                       else
+                               table = "???";
+
+                       sprintf(errdesc, "%s%s index %d, ",
+                               table, src, sig_err >> 3);
+               }
+
+               char trapname[32];
+               if (sig_trapno == 13)
+                       strcpy(trapname, "GP");
+               else if (sig_trapno == 11)
+                       strcpy(trapname, "NP");
+               else if (sig_trapno == 12)
+                       strcpy(trapname, "SS");
+               else if (sig_trapno == 32)
+                       strcpy(trapname, "IRET");  /* X86_TRAP_IRET */
+               else
+                       sprintf(trapname, "%d", sig_trapno);
+
+               printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n",
+                      trapname, (unsigned long)sig_err,
+                      errdesc, strsignal(sig_trapped));
+               return 0;
+       } else {
+               printf("[FAIL]\tDid not get SIGSEGV\n");
+               return 1;
+       }
+}
+
+int main()
+{
+       int total_nerrs = 0;
+       unsigned short my_cs, my_ss;
+
+       asm volatile ("mov %%cs,%0" : "=r" (my_cs));
+       asm volatile ("mov %%ss,%0" : "=r" (my_ss));
+       setup_ldt();
+
+       stack_t stack = {
+               .ss_sp = altstack_data,
+               .ss_size = SIGSTKSZ,
+       };
+       if (sigaltstack(&stack, NULL) != 0)
+               err(1, "sigaltstack");
+
+       sethandler(SIGUSR1, sigusr1, 0);
+       sethandler(SIGTRAP, sigtrap, SA_ONSTACK);
+
+       /* Easy cases: return to a 32-bit SS in each possible CS bitness. */
+       total_nerrs += test_valid_sigreturn(64, false, -1);
+       total_nerrs += test_valid_sigreturn(32, false, -1);
+       total_nerrs += test_valid_sigreturn(16, false, -1);
+
+       /*
+        * Test easy espfix cases: return to a 16-bit LDT SS in each possible
+        * CS bitness.  NB: with a long mode CS, the SS bitness is irrelevant.
+        *
+        * This catches the original missing-espfix-on-64-bit-kernels issue
+        * as well as CVE-2014-8134.
+        */
+       total_nerrs += test_valid_sigreturn(64, true, -1);
+       total_nerrs += test_valid_sigreturn(32, true, -1);
+       total_nerrs += test_valid_sigreturn(16, true, -1);
+
+       if (gdt_data16_idx) {
+               /*
+                * For performance reasons, Linux skips espfix if SS points
+                * to the GDT.  If we were able to allocate a 16-bit SS in
+                * the GDT, see if it leaks parts of the kernel stack pointer.
+                *
+                * This tests for CVE-2014-8133.
+                */
+               total_nerrs += test_valid_sigreturn(64, true,
+                                                   GDT3(gdt_data16_idx));
+               total_nerrs += test_valid_sigreturn(32, true,
+                                                   GDT3(gdt_data16_idx));
+               total_nerrs += test_valid_sigreturn(16, true,
+                                                   GDT3(gdt_data16_idx));
+       }
+
+       /*
+        * We're done testing valid sigreturn cases.  Now we test states
+        * for which sigreturn itself will succeed but the subsequent
+        * entry to user mode will fail.
+        *
+        * Depending on the failure mode and the kernel bitness, these
+        * entry failures can generate SIGSEGV, SIGBUS, or SIGILL.
+        */
+       clearhandler(SIGTRAP);
+       sethandler(SIGSEGV, sigtrap, SA_ONSTACK);
+       sethandler(SIGBUS, sigtrap, SA_ONSTACK);
+       sethandler(SIGILL, sigtrap, SA_ONSTACK);  /* 32-bit kernels do this */
+
+       /* Easy failures: invalid SS, resulting in #GP(0) */
+       test_bad_iret(64, ldt_nonexistent_sel, -1);
+       test_bad_iret(32, ldt_nonexistent_sel, -1);
+       test_bad_iret(16, ldt_nonexistent_sel, -1);
+
+       /* These fail because SS isn't a data segment, resulting in #GP(SS) */
+       test_bad_iret(64, my_cs, -1);
+       test_bad_iret(32, my_cs, -1);
+       test_bad_iret(16, my_cs, -1);
+
+       /* Try to return to a not-present code segment, triggering #NP(SS). */
+       test_bad_iret(32, my_ss, npcode32_sel);
+
+       /*
+        * Try to return to a not-present but otherwise valid data segment.
+        * This will cause IRET to fail with #SS on the espfix stack.  This
+        * exercises CVE-2014-9322.
+        *
+        * Note that, if espfix is enabled, 64-bit Linux will lose track
+        * of the actual cause of failure and report #GP(0) instead.
+        * This would be very difficult for Linux to avoid, because
+        * espfix64 causes IRET failures to be promoted to #DF, so the
+        * original exception frame is never pushed onto the stack.
+        */
+       test_bad_iret(32, npdata32_sel, -1);
+
+       /*
+        * Try to return to a not-present but otherwise valid data
+        * segment without invoking espfix.  Newer kernels don't allow
+        * this to happen in the first place.  On older kernels, though,
+        * this can trigger CVE-2014-9322.
+        */
+       if (gdt_npdata32_idx)
+               test_bad_iret(32, GDT3(gdt_npdata32_idx), -1);
+
+       return total_nerrs ? 1 : 0;
+}
diff --git a/tools/testing/selftests/x86/trivial_32bit_program.c b/tools/testing/selftests/x86/trivial_32bit_program.c
new file mode 100644 (file)
index 0000000..2e231be
--- /dev/null
@@ -0,0 +1,14 @@
+/*
+ * Trivial program to check that we have a valid 32-bit build environment.
+ * Copyright (c) 2015 Andy Lutomirski
+ * GPL v2
+ */
+
+#include <stdio.h>
+
+int main()
+{
+       printf("\n");
+
+       return 0;
+}
index 6e54f3542126b189be45d2bce32b009721a9d3ab..98c95f2fcba4a63912fb81fbafd3854b08835e00 100644 (file)
@@ -85,13 +85,22 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
+/*
+ * Work function for handling the backup timer that we schedule when a vcpu is
+ * no longer running, but had a timer programmed to fire in the future.
+ */
 static void kvm_timer_inject_irq_work(struct work_struct *work)
 {
        struct kvm_vcpu *vcpu;
 
        vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired);
        vcpu->arch.timer_cpu.armed = false;
-       kvm_timer_inject_irq(vcpu);
+
+       /*
+        * If the vcpu is blocked we want to wake it up so that it will see
+        * the timer has expired when entering the guest.
+        */
+       kvm_vcpu_kick(vcpu);
 }
 
 static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
@@ -102,6 +111,21 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
        return HRTIMER_NORESTART;
 }
 
+bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
+{
+       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+       cycle_t cval, now;
+
+       if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) ||
+               !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE))
+               return false;
+
+       cval = timer->cntv_cval;
+       now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
+
+       return cval <= now;
+}
+
 /**
  * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu
  * @vcpu: The vcpu pointer
@@ -119,6 +143,13 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
         * populate the CPU timer again.
         */
        timer_disarm(timer);
+
+       /*
+        * If the timer expired while we were not scheduled, now is the time
+        * to inject it.
+        */
+       if (kvm_timer_should_fire(vcpu))
+               kvm_timer_inject_irq(vcpu);
 }
 
 /**
@@ -134,16 +165,9 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
        cycle_t cval, now;
        u64 ns;
 
-       if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) ||
-               !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE))
-               return;
-
-       cval = timer->cntv_cval;
-       now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
-
        BUG_ON(timer_is_armed(timer));
 
-       if (cval <= now) {
+       if (kvm_timer_should_fire(vcpu)) {
                /*
                 * Timer has already expired while we were not
                 * looking. Inject the interrupt and carry on.
@@ -152,6 +176,9 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
                return;
        }
 
+       cval = timer->cntv_cval;
+       now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
+
        ns = cyclecounter_cyc2ns(timecounter->cc, cval - now, timecounter->mask,
                                 &timecounter->frac);
        timer_arm(timer, ns);
index 19c6210f02cf5c2003b96a3ecbb198321114a2a6..13907970d11c3a94b8dc0a5b1848973035cc41cf 100644 (file)
@@ -107,6 +107,22 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
                                             vcpu->vcpu_id);
 }
 
+static bool handle_mmio_set_active_reg(struct kvm_vcpu *vcpu,
+                                      struct kvm_exit_mmio *mmio,
+                                      phys_addr_t offset)
+{
+       return vgic_handle_set_active_reg(vcpu->kvm, mmio, offset,
+                                         vcpu->vcpu_id);
+}
+
+static bool handle_mmio_clear_active_reg(struct kvm_vcpu *vcpu,
+                                        struct kvm_exit_mmio *mmio,
+                                        phys_addr_t offset)
+{
+       return vgic_handle_clear_active_reg(vcpu->kvm, mmio, offset,
+                                           vcpu->vcpu_id);
+}
+
 static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu,
                                     struct kvm_exit_mmio *mmio,
                                     phys_addr_t offset)
@@ -303,7 +319,7 @@ static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
                return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false);
 }
 
-static const struct kvm_mmio_range vgic_dist_ranges[] = {
+static const struct vgic_io_range vgic_dist_ranges[] = {
        {
                .base           = GIC_DIST_CTRL,
                .len            = 12,
@@ -344,13 +360,13 @@ static const struct kvm_mmio_range vgic_dist_ranges[] = {
                .base           = GIC_DIST_ACTIVE_SET,
                .len            = VGIC_MAX_IRQS / 8,
                .bits_per_irq   = 1,
-               .handle_mmio    = handle_mmio_raz_wi,
+               .handle_mmio    = handle_mmio_set_active_reg,
        },
        {
                .base           = GIC_DIST_ACTIVE_CLEAR,
                .len            = VGIC_MAX_IRQS / 8,
                .bits_per_irq   = 1,
-               .handle_mmio    = handle_mmio_raz_wi,
+               .handle_mmio    = handle_mmio_clear_active_reg,
        },
        {
                .base           = GIC_DIST_PRI,
@@ -388,24 +404,6 @@ static const struct kvm_mmio_range vgic_dist_ranges[] = {
        {}
 };
 
-static bool vgic_v2_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
-                               struct kvm_exit_mmio *mmio)
-{
-       unsigned long base = vcpu->kvm->arch.vgic.vgic_dist_base;
-
-       if (!is_in_range(mmio->phys_addr, mmio->len, base,
-                        KVM_VGIC_V2_DIST_SIZE))
-               return false;
-
-       /* GICv2 does not support accesses wider than 32 bits */
-       if (mmio->len > 4) {
-               kvm_inject_dabt(vcpu, mmio->phys_addr);
-               return true;
-       }
-
-       return vgic_handle_mmio_range(vcpu, run, mmio, vgic_dist_ranges, base);
-}
-
 static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
 {
        struct kvm *kvm = vcpu->kvm;
@@ -490,6 +488,7 @@ static bool vgic_v2_queue_sgi(struct kvm_vcpu *vcpu, int irq)
 static int vgic_v2_map_resources(struct kvm *kvm,
                                 const struct vgic_params *params)
 {
+       struct vgic_dist *dist = &kvm->arch.vgic;
        int ret = 0;
 
        if (!irqchip_in_kernel(kvm))
@@ -500,13 +499,17 @@ static int vgic_v2_map_resources(struct kvm *kvm,
        if (vgic_ready(kvm))
                goto out;
 
-       if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) ||
-           IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) {
+       if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
+           IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) {
                kvm_err("Need to set vgic cpu and dist addresses first\n");
                ret = -ENXIO;
                goto out;
        }
 
+       vgic_register_kvm_io_dev(kvm, dist->vgic_dist_base,
+                                KVM_VGIC_V2_DIST_SIZE,
+                                vgic_dist_ranges, -1, &dist->dist_iodev);
+
        /*
         * Initialize the vgic if this hasn't already been done on demand by
         * accessing the vgic state from userspace.
@@ -514,18 +517,23 @@ static int vgic_v2_map_resources(struct kvm *kvm,
        ret = vgic_init(kvm);
        if (ret) {
                kvm_err("Unable to allocate maps\n");
-               goto out;
+               goto out_unregister;
        }
 
-       ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
+       ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base,
                                    params->vcpu_base, KVM_VGIC_V2_CPU_SIZE,
                                    true);
        if (ret) {
                kvm_err("Unable to remap VGIC CPU to VCPU\n");
-               goto out;
+               goto out_unregister;
        }
 
-       kvm->arch.vgic.ready = true;
+       dist->ready = true;
+       goto out;
+
+out_unregister:
+       kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dist->dist_iodev.dev);
+
 out:
        if (ret)
                kvm_vgic_destroy(kvm);
@@ -554,7 +562,6 @@ void vgic_v2_init_emulation(struct kvm *kvm)
 {
        struct vgic_dist *dist = &kvm->arch.vgic;
 
-       dist->vm_ops.handle_mmio = vgic_v2_handle_mmio;
        dist->vm_ops.queue_sgi = vgic_v2_queue_sgi;
        dist->vm_ops.add_sgi_source = vgic_v2_add_sgi_source;
        dist->vm_ops.init_model = vgic_v2_init_model;
@@ -631,7 +638,7 @@ static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu,
  * CPU Interface Register accesses - these are not accessed by the VM, but by
  * user space for saving and restoring VGIC state.
  */
-static const struct kvm_mmio_range vgic_cpu_ranges[] = {
+static const struct vgic_io_range vgic_cpu_ranges[] = {
        {
                .base           = GIC_CPU_CTRL,
                .len            = 12,
@@ -658,12 +665,13 @@ static int vgic_attr_regs_access(struct kvm_device *dev,
                                 struct kvm_device_attr *attr,
                                 u32 *reg, bool is_write)
 {
-       const struct kvm_mmio_range *r = NULL, *ranges;
+       const struct vgic_io_range *r = NULL, *ranges;
        phys_addr_t offset;
        int ret, cpuid, c;
        struct kvm_vcpu *vcpu, *tmp_vcpu;
        struct vgic_dist *vgic;
        struct kvm_exit_mmio mmio;
+       u32 data;
 
        offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
        cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >>
@@ -685,6 +693,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev,
 
        mmio.len = 4;
        mmio.is_write = is_write;
+       mmio.data = &data;
        if (is_write)
                mmio_data_write(&mmio, ~0, *reg);
        switch (attr->group) {
@@ -699,7 +708,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev,
        default:
                BUG();
        }
-       r = vgic_find_range(ranges, &mmio, offset);
+       r = vgic_find_range(ranges, 4, offset);
 
        if (unlikely(!r || !r->handle_mmio)) {
                ret = -ENXIO;
index b3f154631515eda6bccef2a6094757cf0f0b135f..e9c3a7a83833bf2ef058cfd407b92bafe20da073 100644 (file)
@@ -340,7 +340,7 @@ static bool handle_mmio_idregs(struct kvm_vcpu *vcpu,
        return false;
 }
 
-static const struct kvm_mmio_range vgic_v3_dist_ranges[] = {
+static const struct vgic_io_range vgic_v3_dist_ranges[] = {
        {
                .base           = GICD_CTLR,
                .len            = 0x04,
@@ -502,6 +502,43 @@ static const struct kvm_mmio_range vgic_v3_dist_ranges[] = {
        {},
 };
 
+static bool handle_mmio_ctlr_redist(struct kvm_vcpu *vcpu,
+                                   struct kvm_exit_mmio *mmio,
+                                   phys_addr_t offset)
+{
+       /* since we don't support LPIs, this register is zero for now */
+       vgic_reg_access(mmio, NULL, offset,
+                       ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
+       return false;
+}
+
+static bool handle_mmio_typer_redist(struct kvm_vcpu *vcpu,
+                                    struct kvm_exit_mmio *mmio,
+                                    phys_addr_t offset)
+{
+       u32 reg;
+       u64 mpidr;
+       struct kvm_vcpu *redist_vcpu = mmio->private;
+       int target_vcpu_id = redist_vcpu->vcpu_id;
+
+       /* the upper 32 bits contain the affinity value */
+       if ((offset & ~3) == 4) {
+               mpidr = kvm_vcpu_get_mpidr_aff(redist_vcpu);
+               reg = compress_mpidr(mpidr);
+
+               vgic_reg_access(mmio, &reg, offset,
+                               ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
+               return false;
+       }
+
+       reg = redist_vcpu->vcpu_id << 8;
+       if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1)
+               reg |= GICR_TYPER_LAST;
+       vgic_reg_access(mmio, &reg, offset,
+                       ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
+       return false;
+}
+
 static bool handle_mmio_set_enable_reg_redist(struct kvm_vcpu *vcpu,
                                              struct kvm_exit_mmio *mmio,
                                              phys_addr_t offset)
@@ -570,186 +607,107 @@ static bool handle_mmio_cfg_reg_redist(struct kvm_vcpu *vcpu,
        return vgic_handle_cfg_reg(reg, mmio, offset);
 }
 
-static const struct kvm_mmio_range vgic_redist_sgi_ranges[] = {
+#define SGI_base(x) ((x) + SZ_64K)
+
+static const struct vgic_io_range vgic_redist_ranges[] = {
+       {
+               .base           = GICR_CTLR,
+               .len            = 0x04,
+               .bits_per_irq   = 0,
+               .handle_mmio    = handle_mmio_ctlr_redist,
+       },
+       {
+               .base           = GICR_TYPER,
+               .len            = 0x08,
+               .bits_per_irq   = 0,
+               .handle_mmio    = handle_mmio_typer_redist,
+       },
+       {
+               .base           = GICR_IIDR,
+               .len            = 0x04,
+               .bits_per_irq   = 0,
+               .handle_mmio    = handle_mmio_iidr,
+       },
+       {
+               .base           = GICR_WAKER,
+               .len            = 0x04,
+               .bits_per_irq   = 0,
+               .handle_mmio    = handle_mmio_raz_wi,
+       },
        {
-               .base           = GICR_IGROUPR0,
+               .base           = GICR_IDREGS,
+               .len            = 0x30,
+               .bits_per_irq   = 0,
+               .handle_mmio    = handle_mmio_idregs,
+       },
+       {
+               .base           = SGI_base(GICR_IGROUPR0),
                .len            = 0x04,
                .bits_per_irq   = 1,
                .handle_mmio    = handle_mmio_rao_wi,
        },
        {
-               .base           = GICR_ISENABLER0,
+               .base           = SGI_base(GICR_ISENABLER0),
                .len            = 0x04,
                .bits_per_irq   = 1,
                .handle_mmio    = handle_mmio_set_enable_reg_redist,
        },
        {
-               .base           = GICR_ICENABLER0,
+               .base           = SGI_base(GICR_ICENABLER0),
                .len            = 0x04,
                .bits_per_irq   = 1,
                .handle_mmio    = handle_mmio_clear_enable_reg_redist,
        },
        {
-               .base           = GICR_ISPENDR0,
+               .base           = SGI_base(GICR_ISPENDR0),
                .len            = 0x04,
                .bits_per_irq   = 1,
                .handle_mmio    = handle_mmio_set_pending_reg_redist,
        },
        {
-               .base           = GICR_ICPENDR0,
+               .base           = SGI_base(GICR_ICPENDR0),
                .len            = 0x04,
                .bits_per_irq   = 1,
                .handle_mmio    = handle_mmio_clear_pending_reg_redist,
        },
        {
-               .base           = GICR_ISACTIVER0,
+               .base           = SGI_base(GICR_ISACTIVER0),
                .len            = 0x04,
                .bits_per_irq   = 1,
                .handle_mmio    = handle_mmio_raz_wi,
        },
        {
-               .base           = GICR_ICACTIVER0,
+               .base           = SGI_base(GICR_ICACTIVER0),
                .len            = 0x04,
                .bits_per_irq   = 1,
                .handle_mmio    = handle_mmio_raz_wi,
        },
        {
-               .base           = GICR_IPRIORITYR0,
+               .base           = SGI_base(GICR_IPRIORITYR0),
                .len            = 0x20,
                .bits_per_irq   = 8,
                .handle_mmio    = handle_mmio_priority_reg_redist,
        },
        {
-               .base           = GICR_ICFGR0,
+               .base           = SGI_base(GICR_ICFGR0),
                .len            = 0x08,
                .bits_per_irq   = 2,
                .handle_mmio    = handle_mmio_cfg_reg_redist,
        },
        {
-               .base           = GICR_IGRPMODR0,
+               .base           = SGI_base(GICR_IGRPMODR0),
                .len            = 0x04,
                .bits_per_irq   = 1,
                .handle_mmio    = handle_mmio_raz_wi,
        },
        {
-               .base           = GICR_NSACR,
+               .base           = SGI_base(GICR_NSACR),
                .len            = 0x04,
                .handle_mmio    = handle_mmio_raz_wi,
        },
        {},
 };
 
-static bool handle_mmio_ctlr_redist(struct kvm_vcpu *vcpu,
-                                   struct kvm_exit_mmio *mmio,
-                                   phys_addr_t offset)
-{
-       /* since we don't support LPIs, this register is zero for now */
-       vgic_reg_access(mmio, NULL, offset,
-                       ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
-       return false;
-}
-
-static bool handle_mmio_typer_redist(struct kvm_vcpu *vcpu,
-                                    struct kvm_exit_mmio *mmio,
-                                    phys_addr_t offset)
-{
-       u32 reg;
-       u64 mpidr;
-       struct kvm_vcpu *redist_vcpu = mmio->private;
-       int target_vcpu_id = redist_vcpu->vcpu_id;
-
-       /* the upper 32 bits contain the affinity value */
-       if ((offset & ~3) == 4) {
-               mpidr = kvm_vcpu_get_mpidr_aff(redist_vcpu);
-               reg = compress_mpidr(mpidr);
-
-               vgic_reg_access(mmio, &reg, offset,
-                               ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
-               return false;
-       }
-
-       reg = redist_vcpu->vcpu_id << 8;
-       if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1)
-               reg |= GICR_TYPER_LAST;
-       vgic_reg_access(mmio, &reg, offset,
-                       ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
-       return false;
-}
-
-static const struct kvm_mmio_range vgic_redist_ranges[] = {
-       {
-               .base           = GICR_CTLR,
-               .len            = 0x04,
-               .bits_per_irq   = 0,
-               .handle_mmio    = handle_mmio_ctlr_redist,
-       },
-       {
-               .base           = GICR_TYPER,
-               .len            = 0x08,
-               .bits_per_irq   = 0,
-               .handle_mmio    = handle_mmio_typer_redist,
-       },
-       {
-               .base           = GICR_IIDR,
-               .len            = 0x04,
-               .bits_per_irq   = 0,
-               .handle_mmio    = handle_mmio_iidr,
-       },
-       {
-               .base           = GICR_WAKER,
-               .len            = 0x04,
-               .bits_per_irq   = 0,
-               .handle_mmio    = handle_mmio_raz_wi,
-       },
-       {
-               .base           = GICR_IDREGS,
-               .len            = 0x30,
-               .bits_per_irq   = 0,
-               .handle_mmio    = handle_mmio_idregs,
-       },
-       {},
-};
-
-/*
- * This function splits accesses between the distributor and the two
- * redistributor parts (private/SPI). As each redistributor is accessible
- * from any CPU, we have to determine the affected VCPU by taking the faulting
- * address into account. We then pass this VCPU to the handler function via
- * the private parameter.
- */
-#define SGI_BASE_OFFSET SZ_64K
-static bool vgic_v3_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
-                               struct kvm_exit_mmio *mmio)
-{
-       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-       unsigned long dbase = dist->vgic_dist_base;
-       unsigned long rdbase = dist->vgic_redist_base;
-       int nrcpus = atomic_read(&vcpu->kvm->online_vcpus);
-       int vcpu_id;
-       const struct kvm_mmio_range *mmio_range;
-
-       if (is_in_range(mmio->phys_addr, mmio->len, dbase, GIC_V3_DIST_SIZE)) {
-               return vgic_handle_mmio_range(vcpu, run, mmio,
-                                             vgic_v3_dist_ranges, dbase);
-       }
-
-       if (!is_in_range(mmio->phys_addr, mmio->len, rdbase,
-           GIC_V3_REDIST_SIZE * nrcpus))
-               return false;
-
-       vcpu_id = (mmio->phys_addr - rdbase) / GIC_V3_REDIST_SIZE;
-       rdbase += (vcpu_id * GIC_V3_REDIST_SIZE);
-       mmio->private = kvm_get_vcpu(vcpu->kvm, vcpu_id);
-
-       if (mmio->phys_addr >= rdbase + SGI_BASE_OFFSET) {
-               rdbase += SGI_BASE_OFFSET;
-               mmio_range = vgic_redist_sgi_ranges;
-       } else {
-               mmio_range = vgic_redist_ranges;
-       }
-       return vgic_handle_mmio_range(vcpu, run, mmio, mmio_range, rdbase);
-}
-
 static bool vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, int irq)
 {
        if (vgic_queue_irq(vcpu, 0, irq)) {
@@ -766,6 +724,9 @@ static int vgic_v3_map_resources(struct kvm *kvm,
 {
        int ret = 0;
        struct vgic_dist *dist = &kvm->arch.vgic;
+       gpa_t rdbase = dist->vgic_redist_base;
+       struct vgic_io_device *iodevs = NULL;
+       int i;
 
        if (!irqchip_in_kernel(kvm))
                return 0;
@@ -791,7 +752,41 @@ static int vgic_v3_map_resources(struct kvm *kvm,
                goto out;
        }
 
-       kvm->arch.vgic.ready = true;
+       ret = vgic_register_kvm_io_dev(kvm, dist->vgic_dist_base,
+                                      GIC_V3_DIST_SIZE, vgic_v3_dist_ranges,
+                                      -1, &dist->dist_iodev);
+       if (ret)
+               goto out;
+
+       iodevs = kcalloc(dist->nr_cpus, sizeof(iodevs[0]), GFP_KERNEL);
+       if (!iodevs) {
+               ret = -ENOMEM;
+               goto out_unregister;
+       }
+
+       for (i = 0; i < dist->nr_cpus; i++) {
+               ret = vgic_register_kvm_io_dev(kvm, rdbase,
+                                              SZ_128K, vgic_redist_ranges,
+                                              i, &iodevs[i]);
+               if (ret)
+                       goto out_unregister;
+               rdbase += GIC_V3_REDIST_SIZE;
+       }
+
+       dist->redist_iodevs = iodevs;
+       dist->ready = true;
+       goto out;
+
+out_unregister:
+       kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dist->dist_iodev.dev);
+       if (iodevs) {
+               for (i = 0; i < dist->nr_cpus; i++) {
+                       if (iodevs[i].dev.ops)
+                               kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
+                                                         &iodevs[i].dev);
+               }
+       }
+
 out:
        if (ret)
                kvm_vgic_destroy(kvm);
@@ -832,7 +827,6 @@ void vgic_v3_init_emulation(struct kvm *kvm)
 {
        struct vgic_dist *dist = &kvm->arch.vgic;
 
-       dist->vm_ops.handle_mmio = vgic_v3_handle_mmio;
        dist->vm_ops.queue_sgi = vgic_v3_queue_sgi;
        dist->vm_ops.add_sgi_source = vgic_v3_add_sgi_source;
        dist->vm_ops.init_model = vgic_v3_init_model;
index c9f60f52458802f4a66a3912732d8665bc4a3e32..8d550ff14700c8a628b9c2c6ab55301f6bd0e92b 100644 (file)
@@ -31,6 +31,9 @@
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_mmu.h>
+#include <trace/events/kvm.h>
+#include <asm/kvm.h>
+#include <kvm/iodev.h>
 
 /*
  * How the whole thing works (courtesy of Christoffer Dall):
@@ -263,6 +266,13 @@ static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq)
        return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq);
 }
 
+static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq)
+{
+       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+       return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq);
+}
+
 static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq)
 {
        struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
@@ -277,6 +287,20 @@ static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq)
        vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0);
 }
 
+static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq)
+{
+       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+       vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1);
+}
+
+static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq)
+{
+       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+       vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0);
+}
+
 static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq)
 {
        struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
@@ -520,6 +544,44 @@ bool vgic_handle_clear_pending_reg(struct kvm *kvm,
        return false;
 }
 
+bool vgic_handle_set_active_reg(struct kvm *kvm,
+                               struct kvm_exit_mmio *mmio,
+                               phys_addr_t offset, int vcpu_id)
+{
+       u32 *reg;
+       struct vgic_dist *dist = &kvm->arch.vgic;
+
+       reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset);
+       vgic_reg_access(mmio, reg, offset,
+                       ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
+
+       if (mmio->is_write) {
+               vgic_update_state(kvm);
+               return true;
+       }
+
+       return false;
+}
+
+bool vgic_handle_clear_active_reg(struct kvm *kvm,
+                                 struct kvm_exit_mmio *mmio,
+                                 phys_addr_t offset, int vcpu_id)
+{
+       u32 *reg;
+       struct vgic_dist *dist = &kvm->arch.vgic;
+
+       reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset);
+       vgic_reg_access(mmio, reg, offset,
+                       ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
+
+       if (mmio->is_write) {
+               vgic_update_state(kvm);
+               return true;
+       }
+
+       return false;
+}
+
 static u32 vgic_cfg_expand(u16 val)
 {
        u32 res = 0;
@@ -588,16 +650,12 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
 }
 
 /**
- * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor
+ * vgic_unqueue_irqs - move pending/active IRQs from LRs to the distributor
  * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs
  *
- * Move any pending IRQs that have already been assigned to LRs back to the
+ * Move any IRQs that have already been assigned to LRs back to the
  * emulated distributor state so that the complete emulated state can be read
  * from the main emulation structures without investigating the LRs.
- *
- * Note that IRQs in the active state in the LRs get their pending state moved
- * to the distributor but the active state stays in the LRs, because we don't
- * track the active state on the distributor side.
  */
 void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 {
@@ -613,12 +671,22 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
                 * 01: pending
                 * 10: active
                 * 11: pending and active
-                *
-                * If the LR holds only an active interrupt (not pending) then
-                * just leave it alone.
                 */
-               if ((lr.state & LR_STATE_MASK) == LR_STATE_ACTIVE)
-                       continue;
+               BUG_ON(!(lr.state & LR_STATE_MASK));
+
+               /* Reestablish SGI source for pending and active IRQs */
+               if (lr.irq < VGIC_NR_SGIS)
+                       add_sgi_source(vcpu, lr.irq, lr.source);
+
+               /*
+                * If the LR holds an active (10) or a pending and active (11)
+                * interrupt then move the active state to the
+                * distributor tracking bit.
+                */
+               if (lr.state & LR_STATE_ACTIVE) {
+                       vgic_irq_set_active(vcpu, lr.irq);
+                       lr.state &= ~LR_STATE_ACTIVE;
+               }
 
                /*
                 * Reestablish the pending state on the distributor and the
@@ -626,21 +694,19 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
                 * is fine, then we are only setting a few bits that were
                 * already set.
                 */
-               vgic_dist_irq_set_pending(vcpu, lr.irq);
-               if (lr.irq < VGIC_NR_SGIS)
-                       add_sgi_source(vcpu, lr.irq, lr.source);
-               lr.state &= ~LR_STATE_PENDING;
+               if (lr.state & LR_STATE_PENDING) {
+                       vgic_dist_irq_set_pending(vcpu, lr.irq);
+                       lr.state &= ~LR_STATE_PENDING;
+               }
+
                vgic_set_lr(vcpu, i, lr);
 
                /*
-                * If there's no state left on the LR (it could still be
-                * active), then the LR does not hold any useful info and can
-                * be marked as free for other use.
+                * Mark the LR as free for other use.
                 */
-               if (!(lr.state & LR_STATE_MASK)) {
-                       vgic_retire_lr(i, lr.irq, vcpu);
-                       vgic_irq_clear_queued(vcpu, lr.irq);
-               }
+               BUG_ON(lr.state & LR_STATE_MASK);
+               vgic_retire_lr(i, lr.irq, vcpu);
+               vgic_irq_clear_queued(vcpu, lr.irq);
 
                /* Finally update the VGIC state. */
                vgic_update_state(vcpu->kvm);
@@ -648,24 +714,21 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 }
 
 const
-struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges,
-                                      struct kvm_exit_mmio *mmio,
-                                      phys_addr_t offset)
-{
-       const struct kvm_mmio_range *r = ranges;
-
-       while (r->len) {
-               if (offset >= r->base &&
-                   (offset + mmio->len) <= (r->base + r->len))
-                       return r;
-               r++;
+struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges,
+                                     int len, gpa_t offset)
+{
+       while (ranges->len) {
+               if (offset >= ranges->base &&
+                   (offset + len) <= (ranges->base + ranges->len))
+                       return ranges;
+               ranges++;
        }
 
        return NULL;
 }
 
 static bool vgic_validate_access(const struct vgic_dist *dist,
-                                const struct kvm_mmio_range *range,
+                                const struct vgic_io_range *range,
                                 unsigned long offset)
 {
        int irq;
@@ -693,9 +756,8 @@ static bool vgic_validate_access(const struct vgic_dist *dist,
 static bool call_range_handler(struct kvm_vcpu *vcpu,
                               struct kvm_exit_mmio *mmio,
                               unsigned long offset,
-                              const struct kvm_mmio_range *range)
+                              const struct vgic_io_range *range)
 {
-       u32 *data32 = (void *)mmio->data;
        struct kvm_exit_mmio mmio32;
        bool ret;
 
@@ -712,91 +774,142 @@ static bool call_range_handler(struct kvm_vcpu *vcpu,
        mmio32.private = mmio->private;
 
        mmio32.phys_addr = mmio->phys_addr + 4;
-       if (mmio->is_write)
-               *(u32 *)mmio32.data = data32[1];
+       mmio32.data = &((u32 *)mmio->data)[1];
        ret = range->handle_mmio(vcpu, &mmio32, offset + 4);
-       if (!mmio->is_write)
-               data32[1] = *(u32 *)mmio32.data;
 
        mmio32.phys_addr = mmio->phys_addr;
-       if (mmio->is_write)
-               *(u32 *)mmio32.data = data32[0];
+       mmio32.data = &((u32 *)mmio->data)[0];
        ret |= range->handle_mmio(vcpu, &mmio32, offset);
-       if (!mmio->is_write)
-               data32[0] = *(u32 *)mmio32.data;
 
        return ret;
 }
 
 /**
- * vgic_handle_mmio_range - handle an in-kernel MMIO access
+ * vgic_handle_mmio_access - handle an in-kernel MMIO access
+ * This is called by the read/write KVM IO device wrappers below.
  * @vcpu:      pointer to the vcpu performing the access
- * @run:       pointer to the kvm_run structure
- * @mmio:      pointer to the data describing the access
- * @ranges:    array of MMIO ranges in a given region
- * @mmio_base: base address of that region
+ * @this:      pointer to the KVM IO device in charge
+ * @addr:      guest physical address of the access
+ * @len:       size of the access
+ * @val:       pointer to the data region
+ * @is_write:  read or write access
  *
  * returns true if the MMIO access could be performed
  */
-bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run,
-                           struct kvm_exit_mmio *mmio,
-                           const struct kvm_mmio_range *ranges,
-                           unsigned long mmio_base)
+static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu,
+                                  struct kvm_io_device *this, gpa_t addr,
+                                  int len, void *val, bool is_write)
 {
-       const struct kvm_mmio_range *range;
        struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+       struct vgic_io_device *iodev = container_of(this,
+                                                   struct vgic_io_device, dev);
+       struct kvm_run *run = vcpu->run;
+       const struct vgic_io_range *range;
+       struct kvm_exit_mmio mmio;
        bool updated_state;
-       unsigned long offset;
+       gpa_t offset;
 
-       offset = mmio->phys_addr - mmio_base;
-       range = vgic_find_range(ranges, mmio, offset);
+       offset = addr - iodev->addr;
+       range = vgic_find_range(iodev->reg_ranges, len, offset);
        if (unlikely(!range || !range->handle_mmio)) {
-               pr_warn("Unhandled access %d %08llx %d\n",
-                       mmio->is_write, mmio->phys_addr, mmio->len);
-               return false;
+               pr_warn("Unhandled access %d %08llx %d\n", is_write, addr, len);
+               return -ENXIO;
        }
 
-       spin_lock(&vcpu->kvm->arch.vgic.lock);
+       mmio.phys_addr = addr;
+       mmio.len = len;
+       mmio.is_write = is_write;
+       mmio.data = val;
+       mmio.private = iodev->redist_vcpu;
+
+       spin_lock(&dist->lock);
        offset -= range->base;
        if (vgic_validate_access(dist, range, offset)) {
-               updated_state = call_range_handler(vcpu, mmio, offset, range);
+               updated_state = call_range_handler(vcpu, &mmio, offset, range);
        } else {
-               if (!mmio->is_write)
-                       memset(mmio->data, 0, mmio->len);
+               if (!is_write)
+                       memset(val, 0, len);
                updated_state = false;
        }
-       spin_unlock(&vcpu->kvm->arch.vgic.lock);
-       kvm_prepare_mmio(run, mmio);
+       spin_unlock(&dist->lock);
+       run->mmio.is_write      = is_write;
+       run->mmio.len           = len;
+       run->mmio.phys_addr     = addr;
+       memcpy(run->mmio.data, val, len);
+
        kvm_handle_mmio_return(vcpu, run);
 
        if (updated_state)
                vgic_kick_vcpus(vcpu->kvm);
 
-       return true;
+       return 0;
+}
+
+static int vgic_handle_mmio_read(struct kvm_vcpu *vcpu,
+                                struct kvm_io_device *this,
+                                gpa_t addr, int len, void *val)
+{
+       return vgic_handle_mmio_access(vcpu, this, addr, len, val, false);
 }
 
+static int vgic_handle_mmio_write(struct kvm_vcpu *vcpu,
+                                 struct kvm_io_device *this,
+                                 gpa_t addr, int len, const void *val)
+{
+       return vgic_handle_mmio_access(vcpu, this, addr, len, (void *)val,
+                                      true);
+}
+
+struct kvm_io_device_ops vgic_io_ops = {
+       .read   = vgic_handle_mmio_read,
+       .write  = vgic_handle_mmio_write,
+};
+
 /**
- * vgic_handle_mmio - handle an in-kernel MMIO access for the GIC emulation
- * @vcpu:      pointer to the vcpu performing the access
- * @run:       pointer to the kvm_run structure
- * @mmio:      pointer to the data describing the access
+ * vgic_register_kvm_io_dev - register VGIC register frame on the KVM I/O bus
+ * @kvm:            The VM structure pointer
+ * @base:           The (guest) base address for the register frame
+ * @len:            Length of the register frame window
+ * @ranges:         Describing the handler functions for each register
+ * @redist_vcpu_id: The VCPU ID to pass on to the handlers on call
+ * @iodev:          Points to memory to be passed on to the handler
  *
- * returns true if the MMIO access has been performed in kernel space,
- * and false if it needs to be emulated in user space.
- * Calls the actual handling routine for the selected VGIC model.
+ * @iodev stores the parameters of this function to be usable by the handler
+ * respectively the dispatcher function (since the KVM I/O bus framework lacks
+ * an opaque parameter). Initialization is done in this function, but the
+ * reference should be valid and unique for the whole VGIC lifetime.
+ * If the register frame is not mapped for a specific VCPU, pass -1 to
+ * @redist_vcpu_id.
  */
-bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
-                     struct kvm_exit_mmio *mmio)
+int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len,
+                            const struct vgic_io_range *ranges,
+                            int redist_vcpu_id,
+                            struct vgic_io_device *iodev)
 {
-       if (!irqchip_in_kernel(vcpu->kvm))
-               return false;
+       struct kvm_vcpu *vcpu = NULL;
+       int ret;
 
-       /*
-        * This will currently call either vgic_v2_handle_mmio() or
-        * vgic_v3_handle_mmio(), which in turn will call
-        * vgic_handle_mmio_range() defined above.
-        */
-       return vcpu->kvm->arch.vgic.vm_ops.handle_mmio(vcpu, run, mmio);
+       if (redist_vcpu_id >= 0)
+               vcpu = kvm_get_vcpu(kvm, redist_vcpu_id);
+
+       iodev->addr             = base;
+       iodev->len              = len;
+       iodev->reg_ranges       = ranges;
+       iodev->redist_vcpu      = vcpu;
+
+       kvm_iodevice_init(&iodev->dev, &vgic_io_ops);
+
+       mutex_lock(&kvm->slots_lock);
+
+       ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, base, len,
+                                     &iodev->dev);
+       mutex_unlock(&kvm->slots_lock);
+
+       /* Mark the iodev as invalid if registration fails. */
+       if (ret)
+               iodev->dev.ops = NULL;
+
+       return ret;
 }
 
 static int vgic_nr_shared_irqs(struct vgic_dist *dist)
@@ -804,6 +917,36 @@ static int vgic_nr_shared_irqs(struct vgic_dist *dist)
        return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS;
 }
 
+static int compute_active_for_cpu(struct kvm_vcpu *vcpu)
+{
+       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+       unsigned long *active, *enabled, *act_percpu, *act_shared;
+       unsigned long active_private, active_shared;
+       int nr_shared = vgic_nr_shared_irqs(dist);
+       int vcpu_id;
+
+       vcpu_id = vcpu->vcpu_id;
+       act_percpu = vcpu->arch.vgic_cpu.active_percpu;
+       act_shared = vcpu->arch.vgic_cpu.active_shared;
+
+       active = vgic_bitmap_get_cpu_map(&dist->irq_active, vcpu_id);
+       enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
+       bitmap_and(act_percpu, active, enabled, VGIC_NR_PRIVATE_IRQS);
+
+       active = vgic_bitmap_get_shared_map(&dist->irq_active);
+       enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
+       bitmap_and(act_shared, active, enabled, nr_shared);
+       bitmap_and(act_shared, act_shared,
+                  vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
+                  nr_shared);
+
+       active_private = find_first_bit(act_percpu, VGIC_NR_PRIVATE_IRQS);
+       active_shared = find_first_bit(act_shared, nr_shared);
+
+       return (active_private < VGIC_NR_PRIVATE_IRQS ||
+               active_shared < nr_shared);
+}
+
 static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
 {
        struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
@@ -835,7 +978,7 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
 
 /*
  * Update the interrupt state and determine which CPUs have pending
- * interrupts. Must be called with distributor lock held.
+ * or active interrupts. Must be called with distributor lock held.
  */
 void vgic_update_state(struct kvm *kvm)
 {
@@ -849,10 +992,13 @@ void vgic_update_state(struct kvm *kvm)
        }
 
        kvm_for_each_vcpu(c, vcpu, kvm) {
-               if (compute_pending_for_cpu(vcpu)) {
-                       pr_debug("CPU%d has pending interrupts\n", c);
+               if (compute_pending_for_cpu(vcpu))
                        set_bit(c, dist->irq_pending_on_cpu);
-               }
+
+               if (compute_active_for_cpu(vcpu))
+                       set_bit(c, dist->irq_active_on_cpu);
+               else
+                       clear_bit(c, dist->irq_active_on_cpu);
        }
 }
 
@@ -955,6 +1101,26 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
        }
 }
 
+static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
+                                int lr_nr, struct vgic_lr vlr)
+{
+       if (vgic_irq_is_active(vcpu, irq)) {
+               vlr.state |= LR_STATE_ACTIVE;
+               kvm_debug("Set active, clear distributor: 0x%x\n", vlr.state);
+               vgic_irq_clear_active(vcpu, irq);
+               vgic_update_state(vcpu->kvm);
+       } else if (vgic_dist_irq_is_pending(vcpu, irq)) {
+               vlr.state |= LR_STATE_PENDING;
+               kvm_debug("Set pending: 0x%x\n", vlr.state);
+       }
+
+       if (!vgic_irq_is_edge(vcpu, irq))
+               vlr.state |= LR_EOI_INT;
+
+       vgic_set_lr(vcpu, lr_nr, vlr);
+       vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
+}
+
 /*
  * Queue an interrupt to a CPU virtual interface. Return true on success,
  * or false if it wasn't possible to queue it.
@@ -982,9 +1148,7 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
                if (vlr.source == sgi_source_id) {
                        kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq);
                        BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
-                       vlr.state |= LR_STATE_PENDING;
-                       vgic_set_lr(vcpu, lr, vlr);
-                       vgic_sync_lr_elrsr(vcpu, lr, vlr);
+                       vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
                        return true;
                }
        }
@@ -1001,12 +1165,8 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 
        vlr.irq = irq;
        vlr.source = sgi_source_id;
-       vlr.state = LR_STATE_PENDING;
-       if (!vgic_irq_is_edge(vcpu, irq))
-               vlr.state |= LR_EOI_INT;
-
-       vgic_set_lr(vcpu, lr, vlr);
-       vgic_sync_lr_elrsr(vcpu, lr, vlr);
+       vlr.state = 0;
+       vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
 
        return true;
 }
@@ -1038,39 +1198,49 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 {
        struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
        struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+       unsigned long *pa_percpu, *pa_shared;
        int i, vcpu_id;
        int overflow = 0;
+       int nr_shared = vgic_nr_shared_irqs(dist);
 
        vcpu_id = vcpu->vcpu_id;
 
+       pa_percpu = vcpu->arch.vgic_cpu.pend_act_percpu;
+       pa_shared = vcpu->arch.vgic_cpu.pend_act_shared;
+
+       bitmap_or(pa_percpu, vgic_cpu->pending_percpu, vgic_cpu->active_percpu,
+                 VGIC_NR_PRIVATE_IRQS);
+       bitmap_or(pa_shared, vgic_cpu->pending_shared, vgic_cpu->active_shared,
+                 nr_shared);
        /*
         * We may not have any pending interrupt, or the interrupts
         * may have been serviced from another vcpu. In all cases,
         * move along.
         */
-       if (!kvm_vgic_vcpu_pending_irq(vcpu)) {
-               pr_debug("CPU%d has no pending interrupt\n", vcpu_id);
+       if (!kvm_vgic_vcpu_pending_irq(vcpu) && !kvm_vgic_vcpu_active_irq(vcpu))
                goto epilog;
-       }
 
        /* SGIs */
-       for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) {
+       for_each_set_bit(i, pa_percpu, VGIC_NR_SGIS) {
                if (!queue_sgi(vcpu, i))
                        overflow = 1;
        }
 
        /* PPIs */
-       for_each_set_bit_from(i, vgic_cpu->pending_percpu, VGIC_NR_PRIVATE_IRQS) {
+       for_each_set_bit_from(i, pa_percpu, VGIC_NR_PRIVATE_IRQS) {
                if (!vgic_queue_hwirq(vcpu, i))
                        overflow = 1;
        }
 
        /* SPIs */
-       for_each_set_bit(i, vgic_cpu->pending_shared, vgic_nr_shared_irqs(dist)) {
+       for_each_set_bit(i, pa_shared, nr_shared) {
                if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS))
                        overflow = 1;
        }
 
+
+
+
 epilog:
        if (overflow) {
                vgic_enable_underflow(vcpu);
@@ -1089,7 +1259,9 @@ epilog:
 static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 {
        u32 status = vgic_get_interrupt_status(vcpu);
+       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
        bool level_pending = false;
+       struct kvm *kvm = vcpu->kvm;
 
        kvm_debug("STATUS = %08x\n", status);
 
@@ -1106,6 +1278,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
                        struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
                        WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
 
+                       spin_lock(&dist->lock);
                        vgic_irq_clear_queued(vcpu, vlr.irq);
                        WARN_ON(vlr.state & LR_STATE_MASK);
                        vlr.state = 0;
@@ -1124,6 +1297,17 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
                         */
                        vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
 
+                       /*
+                        * kvm_notify_acked_irq calls kvm_set_irq()
+                        * to reset the IRQ level. Need to release the
+                        * lock for kvm_set_irq to grab it.
+                        */
+                       spin_unlock(&dist->lock);
+
+                       kvm_notify_acked_irq(kvm, 0,
+                                            vlr.irq - VGIC_NR_PRIVATE_IRQS);
+                       spin_lock(&dist->lock);
+
                        /* Any additional pending interrupt? */
                        if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
                                vgic_cpu_irq_set(vcpu, vlr.irq);
@@ -1133,6 +1317,8 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
                                vgic_cpu_irq_clear(vcpu, vlr.irq);
                        }
 
+                       spin_unlock(&dist->lock);
+
                        /*
                         * Despite being EOIed, the LR may not have
                         * been marked as empty.
@@ -1155,10 +1341,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
        return level_pending;
 }
 
-/*
- * Sync back the VGIC state after a guest run. The distributor lock is
- * needed so we don't get preempted in the middle of the state processing.
- */
+/* Sync back the VGIC state after a guest run */
 static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 {
        struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
@@ -1205,14 +1388,10 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 
 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 {
-       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
        if (!irqchip_in_kernel(vcpu->kvm))
                return;
 
-       spin_lock(&dist->lock);
        __kvm_vgic_sync_hwstate(vcpu);
-       spin_unlock(&dist->lock);
 }
 
 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
@@ -1225,6 +1404,17 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
        return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
 }
 
+int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu)
+{
+       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+       if (!irqchip_in_kernel(vcpu->kvm))
+               return 0;
+
+       return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu);
+}
+
+
 void vgic_kick_vcpus(struct kvm *kvm)
 {
        struct kvm_vcpu *vcpu;
@@ -1397,8 +1587,12 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
        struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 
        kfree(vgic_cpu->pending_shared);
+       kfree(vgic_cpu->active_shared);
+       kfree(vgic_cpu->pend_act_shared);
        kfree(vgic_cpu->vgic_irq_lr_map);
        vgic_cpu->pending_shared = NULL;
+       vgic_cpu->active_shared = NULL;
+       vgic_cpu->pend_act_shared = NULL;
        vgic_cpu->vgic_irq_lr_map = NULL;
 }
 
@@ -1408,9 +1602,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
 
        int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8;
        vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
+       vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL);
+       vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL);
        vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL);
 
-       if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) {
+       if (!vgic_cpu->pending_shared
+               || !vgic_cpu->active_shared
+               || !vgic_cpu->pend_act_shared
+               || !vgic_cpu->vgic_irq_lr_map) {
                kvm_vgic_vcpu_destroy(vcpu);
                return -ENOMEM;
        }
@@ -1463,10 +1662,12 @@ void kvm_vgic_destroy(struct kvm *kvm)
        kfree(dist->irq_spi_mpidr);
        kfree(dist->irq_spi_target);
        kfree(dist->irq_pending_on_cpu);
+       kfree(dist->irq_active_on_cpu);
        dist->irq_sgi_sources = NULL;
        dist->irq_spi_cpu = NULL;
        dist->irq_spi_target = NULL;
        dist->irq_pending_on_cpu = NULL;
+       dist->irq_active_on_cpu = NULL;
        dist->nr_cpus = 0;
 }
 
@@ -1502,6 +1703,7 @@ int vgic_init(struct kvm *kvm)
        ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs);
        ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs);
        ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs);
+       ret |= vgic_init_bitmap(&dist->irq_active, nr_cpus, nr_irqs);
        ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs);
        ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs);
 
@@ -1514,10 +1716,13 @@ int vgic_init(struct kvm *kvm)
                                       GFP_KERNEL);
        dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long),
                                           GFP_KERNEL);
+       dist->irq_active_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long),
+                                          GFP_KERNEL);
        if (!dist->irq_sgi_sources ||
            !dist->irq_spi_cpu ||
            !dist->irq_spi_target ||
-           !dist->irq_pending_on_cpu) {
+           !dist->irq_pending_on_cpu ||
+           !dist->irq_active_on_cpu) {
                ret = -ENOMEM;
                goto out;
        }
@@ -1845,12 +2050,9 @@ int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
        return r;
 }
 
-int vgic_has_attr_regs(const struct kvm_mmio_range *ranges, phys_addr_t offset)
+int vgic_has_attr_regs(const struct vgic_io_range *ranges, phys_addr_t offset)
 {
-       struct kvm_exit_mmio dev_attr_mmio;
-
-       dev_attr_mmio.len = 4;
-       if (vgic_find_range(ranges, &dev_attr_mmio, offset))
+       if (vgic_find_range(ranges, 4, offset))
                return 0;
        else
                return -ENXIO;
@@ -1883,8 +2085,10 @@ static struct notifier_block vgic_cpu_nb = {
 };
 
 static const struct of_device_id vgic_ids[] = {
-       { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
-       { .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
+       { .compatible = "arm,cortex-a15-gic",   .data = vgic_v2_probe, },
+       { .compatible = "arm,cortex-a7-gic",    .data = vgic_v2_probe, },
+       { .compatible = "arm,gic-400",          .data = vgic_v2_probe, },
+       { .compatible = "arm,gic-v3",           .data = vgic_v3_probe, },
        {},
 };
 
@@ -1932,3 +2136,38 @@ out_free_irq:
        free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
        return ret;
 }
+
+int kvm_irq_map_gsi(struct kvm *kvm,
+                   struct kvm_kernel_irq_routing_entry *entries,
+                   int gsi)
+{
+       return gsi;
+}
+
+int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
+       return pin;
+}
+
+int kvm_set_irq(struct kvm *kvm, int irq_source_id,
+               u32 irq, int level, bool line_status)
+{
+       unsigned int spi = irq + VGIC_NR_PRIVATE_IRQS;
+
+       trace_kvm_set_irq(irq, level, irq_source_id);
+
+       BUG_ON(!vgic_initialized(kvm));
+
+       if (spi > kvm->arch.vgic.nr_irqs)
+               return -EINVAL;
+       return kvm_vgic_inject_irq(kvm, 0, spi, level);
+
+}
+
+/* MSI not implemented yet */
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
+               struct kvm *kvm, int irq_source_id,
+               int level, bool line_status)
+{
+       return 0;
+}
index 1e83bdf5f499b24dd8ea19d5f456fcc740d87bad..0df74cbb6200686ab8cfbc853b11f27cc954b678 100644 (file)
@@ -20,6 +20,8 @@
 #ifndef __KVM_VGIC_H__
 #define __KVM_VGIC_H__
 
+#include <kvm/iodev.h>
+
 #define VGIC_ADDR_UNDEF                (-1)
 #define IS_VGIC_ADDR_UNDEF(_x)  ((_x) == VGIC_ADDR_UNDEF)
 
@@ -57,6 +59,14 @@ void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq);
 void vgic_unqueue_irqs(struct kvm_vcpu *vcpu);
 
+struct kvm_exit_mmio {
+       phys_addr_t     phys_addr;
+       void            *data;
+       u32             len;
+       bool            is_write;
+       void            *private;
+};
+
 void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg,
                     phys_addr_t offset, int mode);
 bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
@@ -74,7 +84,7 @@ void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value)
        *((u32 *)mmio->data) = cpu_to_le32(value) & mask;
 }
 
-struct kvm_mmio_range {
+struct vgic_io_range {
        phys_addr_t base;
        unsigned long len;
        int bits_per_irq;
@@ -82,6 +92,11 @@ struct kvm_mmio_range {
                            phys_addr_t offset);
 };
 
+int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len,
+                            const struct vgic_io_range *ranges,
+                            int redist_id,
+                            struct vgic_io_device *iodev);
+
 static inline bool is_in_range(phys_addr_t addr, unsigned long len,
                               phys_addr_t baseaddr, unsigned long size)
 {
@@ -89,14 +104,8 @@ static inline bool is_in_range(phys_addr_t addr, unsigned long len,
 }
 
 const
-struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges,
-                                      struct kvm_exit_mmio *mmio,
-                                      phys_addr_t offset);
-
-bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run,
-                           struct kvm_exit_mmio *mmio,
-                           const struct kvm_mmio_range *ranges,
-                           unsigned long mmio_base);
+struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges,
+                                     int len, gpa_t offset);
 
 bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
                            phys_addr_t offset, int vcpu_id, int access);
@@ -107,12 +116,20 @@ bool vgic_handle_set_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
 bool vgic_handle_clear_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
                                   phys_addr_t offset, int vcpu_id);
 
+bool vgic_handle_set_active_reg(struct kvm *kvm,
+                               struct kvm_exit_mmio *mmio,
+                               phys_addr_t offset, int vcpu_id);
+
+bool vgic_handle_clear_active_reg(struct kvm *kvm,
+                                 struct kvm_exit_mmio *mmio,
+                                 phys_addr_t offset, int vcpu_id);
+
 bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
                         phys_addr_t offset);
 
 void vgic_kick_vcpus(struct kvm *kvm);
 
-int vgic_has_attr_regs(const struct kvm_mmio_range *ranges, phys_addr_t offset);
+int vgic_has_attr_regs(const struct vgic_io_range *ranges, phys_addr_t offset);
 int vgic_set_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr);
 int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr);
 
index 00d86427af0f8bae911c2e41a33303c2d02a3428..571c1ce37d152f86c3690d9e2427f5eeca97cd2c 100644 (file)
@@ -8,7 +8,7 @@
  *
  */
 
-#include "iodev.h"
+#include <kvm/iodev.h>
 
 #include <linux/kvm_host.h>
 #include <linux/slab.h>
@@ -60,8 +60,9 @@ static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev)
        return 1;
 }
 
-static int coalesced_mmio_write(struct kvm_io_device *this,
-                               gpa_t addr, int len, const void *val)
+static int coalesced_mmio_write(struct kvm_vcpu *vcpu,
+                               struct kvm_io_device *this, gpa_t addr,
+                               int len, const void *val)
 {
        struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
        struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring;
index 148b2392c762ba763a6ad09b314699c451b463d0..9ff4193dfa493c3e226c3fd554061b171ca7b9c5 100644 (file)
@@ -36,7 +36,7 @@
 #include <linux/seqlock.h>
 #include <trace/events/kvm.h>
 
-#include "iodev.h"
+#include <kvm/iodev.h>
 
 #ifdef CONFIG_HAVE_KVM_IRQFD
 /*
@@ -311,6 +311,9 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
        unsigned int events;
        int idx;
 
+       if (!kvm_arch_intc_initialized(kvm))
+               return -EAGAIN;
+
        irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
        if (!irqfd)
                return -ENOMEM;
@@ -712,8 +715,8 @@ ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
 
 /* MMIO/PIO writes trigger an event if the addr/val match */
 static int
-ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
-               const void *val)
+ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
+               int len, const void *val)
 {
        struct _ioeventfd *p = to_ioeventfd(this);
 
diff --git a/virt/kvm/iodev.h b/virt/kvm/iodev.h
deleted file mode 100644 (file)
index 12fd3ca..0000000
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-#ifndef __KVM_IODEV_H__
-#define __KVM_IODEV_H__
-
-#include <linux/kvm_types.h>
-#include <asm/errno.h>
-
-struct kvm_io_device;
-
-/**
- * kvm_io_device_ops are called under kvm slots_lock.
- * read and write handlers return 0 if the transaction has been handled,
- * or non-zero to have it passed to the next device.
- **/
-struct kvm_io_device_ops {
-       int (*read)(struct kvm_io_device *this,
-                   gpa_t addr,
-                   int len,
-                   void *val);
-       int (*write)(struct kvm_io_device *this,
-                    gpa_t addr,
-                    int len,
-                    const void *val);
-       void (*destructor)(struct kvm_io_device *this);
-};
-
-
-struct kvm_io_device {
-       const struct kvm_io_device_ops *ops;
-};
-
-static inline void kvm_iodevice_init(struct kvm_io_device *dev,
-                                    const struct kvm_io_device_ops *ops)
-{
-       dev->ops = ops;
-}
-
-static inline int kvm_iodevice_read(struct kvm_io_device *dev,
-                                   gpa_t addr, int l, void *v)
-{
-       return dev->ops->read ? dev->ops->read(dev, addr, l, v) : -EOPNOTSUPP;
-}
-
-static inline int kvm_iodevice_write(struct kvm_io_device *dev,
-                                    gpa_t addr, int l, const void *v)
-{
-       return dev->ops->write ? dev->ops->write(dev, addr, l, v) : -EOPNOTSUPP;
-}
-
-static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
-{
-       if (dev->ops->destructor)
-               dev->ops->destructor(dev);
-}
-
-#endif /* __KVM_IODEV_H__ */
index 7f256f31df102e36da59a8ebed636f1c9615cb00..1d56a901e791788d9f2c855dcf3e96a9b650df77 100644 (file)
@@ -105,7 +105,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
        i = kvm_irq_map_gsi(kvm, irq_set, irq);
        srcu_read_unlock(&kvm->irq_srcu, idx);
 
-       while(i--) {
+       while (i--) {
                int r;
                r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level,
                                   line_status);
index cc6a25d95fbff532bf5b00b0c339bec91ddc5bcf..d3fc9399062a5034b99eaa3d12c855699fdbf608 100644 (file)
@@ -16,7 +16,7 @@
  *
  */
 
-#include "iodev.h"
+#include <kvm/iodev.h>
 
 #include <linux/kvm_host.h>
 #include <linux/kvm.h>
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
 
-unsigned int halt_poll_ns = 0;
+static unsigned int halt_poll_ns;
 module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
 
 /*
  * Ordering of locks:
  *
- *             kvm->lock --> kvm->slots_lock --> kvm->irq_lock
+ *     kvm->lock --> kvm->slots_lock --> kvm->irq_lock
  */
 
 DEFINE_SPINLOCK(kvm_lock);
@@ -80,7 +80,7 @@ static DEFINE_RAW_SPINLOCK(kvm_count_lock);
 LIST_HEAD(vm_list);
 
 static cpumask_var_t cpus_hardware_enabled;
-static int kvm_usage_count = 0;
+static int kvm_usage_count;
 static atomic_t hardware_enable_failed;
 
 struct kmem_cache *kvm_vcpu_cache;
@@ -539,20 +539,12 @@ void *kvm_kvzalloc(unsigned long size)
                return kzalloc(size, GFP_KERNEL);
 }
 
-void kvm_kvfree(const void *addr)
-{
-       if (is_vmalloc_addr(addr))
-               vfree(addr);
-       else
-               kfree(addr);
-}
-
 static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
 {
        if (!memslot->dirty_bitmap)
                return;
 
-       kvm_kvfree(memslot->dirty_bitmap);
+       kvfree(memslot->dirty_bitmap);
        memslot->dirty_bitmap = NULL;
 }
 
@@ -888,8 +880,8 @@ int __kvm_set_memory_region(struct kvm *kvm,
                 * or moved, memslot will be created.
                 *
                 * validation of sp->gfn happens in:
-                *      - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
-                *      - kvm_is_visible_gfn (mmu_check_roots)
+                *      - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
+                *      - kvm_is_visible_gfn (mmu_check_roots)
                 */
                kvm_arch_flush_shadow_memslot(kvm, slot);
 
@@ -1061,9 +1053,11 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
                mask = xchg(&dirty_bitmap[i], 0);
                dirty_bitmap_buffer[i] = mask;
 
-               offset = i * BITS_PER_LONG;
-               kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset,
-                                                               mask);
+               if (mask) {
+                       offset = i * BITS_PER_LONG;
+                       kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
+                                                               offset, mask);
+               }
        }
 
        spin_unlock(&kvm->mmu_lock);
@@ -1193,16 +1187,6 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
        return gfn_to_hva_memslot_prot(slot, gfn, writable);
 }
 
-static int kvm_read_hva(void *data, void __user *hva, int len)
-{
-       return __copy_from_user(data, hva, len);
-}
-
-static int kvm_read_hva_atomic(void *data, void __user *hva, int len)
-{
-       return __copy_from_user_inatomic(data, hva, len);
-}
-
 static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
        unsigned long start, int write, struct page **page)
 {
@@ -1481,7 +1465,6 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
 
        return kvm_pfn_to_page(pfn);
 }
-
 EXPORT_SYMBOL_GPL(gfn_to_page);
 
 void kvm_release_page_clean(struct page *page)
@@ -1517,6 +1500,7 @@ void kvm_set_pfn_dirty(pfn_t pfn)
 {
        if (!kvm_is_reserved_pfn(pfn)) {
                struct page *page = pfn_to_page(pfn);
+
                if (!PageReserved(page))
                        SetPageDirty(page);
        }
@@ -1554,7 +1538,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
        addr = gfn_to_hva_prot(kvm, gfn, NULL);
        if (kvm_is_error_hva(addr))
                return -EFAULT;
-       r = kvm_read_hva(data, (void __user *)addr + offset, len);
+       r = __copy_from_user(data, (void __user *)addr + offset, len);
        if (r)
                return -EFAULT;
        return 0;
@@ -1593,7 +1577,7 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
        if (kvm_is_error_hva(addr))
                return -EFAULT;
        pagefault_disable();
-       r = kvm_read_hva_atomic(data, (void __user *)addr + offset, len);
+       r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len);
        pagefault_enable();
        if (r)
                return -EFAULT;
@@ -1653,8 +1637,8 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
        ghc->generation = slots->generation;
        ghc->len = len;
        ghc->memslot = gfn_to_memslot(kvm, start_gfn);
-       ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, &nr_pages_avail);
-       if (!kvm_is_error_hva(ghc->hva) && nr_pages_avail >= nr_pages_needed) {
+       ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, NULL);
+       if (!kvm_is_error_hva(ghc->hva) && nr_pages_needed <= 1) {
                ghc->hva += offset;
        } else {
                /*
@@ -1742,7 +1726,7 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
        int offset = offset_in_page(gpa);
        int ret;
 
-        while ((seg = next_segment(len, offset)) != 0) {
+       while ((seg = next_segment(len, offset)) != 0) {
                ret = kvm_clear_guest_page(kvm, gfn, offset, seg);
                if (ret < 0)
                        return ret;
@@ -1800,6 +1784,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
        start = cur = ktime_get();
        if (halt_poll_ns) {
                ktime_t stop = ktime_add_ns(ktime_get(), halt_poll_ns);
+
                do {
                        /*
                         * This sets KVM_REQ_UNHALT if an interrupt
@@ -2118,7 +2103,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
         * Special cases: vcpu ioctls that are asynchronous to vcpu execution,
         * so vcpu_load() would break it.
         */
-       if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_INTERRUPT)
+       if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_S390_IRQ || ioctl == KVM_INTERRUPT)
                return kvm_arch_vcpu_ioctl(filp, ioctl, arg);
 #endif
 
@@ -2135,6 +2120,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
                        /* The thread running this VCPU changed. */
                        struct pid *oldpid = vcpu->pid;
                        struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
+
                        rcu_assign_pointer(vcpu->pid, newpid);
                        if (oldpid)
                                synchronize_rcu();
@@ -2205,7 +2191,7 @@ out_free1:
                if (r)
                        goto out;
                r = -EFAULT;
-               if (copy_to_user(argp, &mp_state, sizeof mp_state))
+               if (copy_to_user(argp, &mp_state, sizeof(mp_state)))
                        goto out;
                r = 0;
                break;
@@ -2214,7 +2200,7 @@ out_free1:
                struct kvm_mp_state mp_state;
 
                r = -EFAULT;
-               if (copy_from_user(&mp_state, argp, sizeof mp_state))
+               if (copy_from_user(&mp_state, argp, sizeof(mp_state)))
                        goto out;
                r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state);
                break;
@@ -2223,13 +2209,13 @@ out_free1:
                struct kvm_translation tr;
 
                r = -EFAULT;
-               if (copy_from_user(&tr, argp, sizeof tr))
+               if (copy_from_user(&tr, argp, sizeof(tr)))
                        goto out;
                r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr);
                if (r)
                        goto out;
                r = -EFAULT;
-               if (copy_to_user(argp, &tr, sizeof tr))
+               if (copy_to_user(argp, &tr, sizeof(tr)))
                        goto out;
                r = 0;
                break;
@@ -2238,7 +2224,7 @@ out_free1:
                struct kvm_guest_debug dbg;
 
                r = -EFAULT;
-               if (copy_from_user(&dbg, argp, sizeof dbg))
+               if (copy_from_user(&dbg, argp, sizeof(dbg)))
                        goto out;
                r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg);
                break;
@@ -2252,14 +2238,14 @@ out_free1:
                if (argp) {
                        r = -EFAULT;
                        if (copy_from_user(&kvm_sigmask, argp,
-                                          sizeof kvm_sigmask))
+                                          sizeof(kvm_sigmask)))
                                goto out;
                        r = -EINVAL;
-                       if (kvm_sigmask.len != sizeof sigset)
+                       if (kvm_sigmask.len != sizeof(sigset))
                                goto out;
                        r = -EFAULT;
                        if (copy_from_user(&sigset, sigmask_arg->sigset,
-                                          sizeof sigset))
+                                          sizeof(sigset)))
                                goto out;
                        p = &sigset;
                }
@@ -2321,14 +2307,14 @@ static long kvm_vcpu_compat_ioctl(struct file *filp,
                if (argp) {
                        r = -EFAULT;
                        if (copy_from_user(&kvm_sigmask, argp,
-                                          sizeof kvm_sigmask))
+                                          sizeof(kvm_sigmask)))
                                goto out;
                        r = -EINVAL;
-                       if (kvm_sigmask.len != sizeof csigset)
+                       if (kvm_sigmask.len != sizeof(csigset))
                                goto out;
                        r = -EFAULT;
                        if (copy_from_user(&csigset, sigmask_arg->sigset,
-                                          sizeof csigset))
+                                          sizeof(csigset)))
                                goto out;
                        sigset_from_compat(&sigset, &csigset);
                        r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset);
@@ -2525,7 +2511,7 @@ static long kvm_vm_ioctl(struct file *filp,
 
                r = -EFAULT;
                if (copy_from_user(&kvm_userspace_mem, argp,
-                                               sizeof kvm_userspace_mem))
+                                               sizeof(kvm_userspace_mem)))
                        goto out;
 
                r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem);
@@ -2535,7 +2521,7 @@ static long kvm_vm_ioctl(struct file *filp,
                struct kvm_dirty_log log;
 
                r = -EFAULT;
-               if (copy_from_user(&log, argp, sizeof log))
+               if (copy_from_user(&log, argp, sizeof(log)))
                        goto out;
                r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
                break;
@@ -2543,16 +2529,18 @@ static long kvm_vm_ioctl(struct file *filp,
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
        case KVM_REGISTER_COALESCED_MMIO: {
                struct kvm_coalesced_mmio_zone zone;
+
                r = -EFAULT;
-               if (copy_from_user(&zone, argp, sizeof zone))
+               if (copy_from_user(&zone, argp, sizeof(zone)))
                        goto out;
                r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone);
                break;
        }
        case KVM_UNREGISTER_COALESCED_MMIO: {
                struct kvm_coalesced_mmio_zone zone;
+
                r = -EFAULT;
-               if (copy_from_user(&zone, argp, sizeof zone))
+               if (copy_from_user(&zone, argp, sizeof(zone)))
                        goto out;
                r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone);
                break;
@@ -2562,7 +2550,7 @@ static long kvm_vm_ioctl(struct file *filp,
                struct kvm_irqfd data;
 
                r = -EFAULT;
-               if (copy_from_user(&data, argp, sizeof data))
+               if (copy_from_user(&data, argp, sizeof(data)))
                        goto out;
                r = kvm_irqfd(kvm, &data);
                break;
@@ -2571,7 +2559,7 @@ static long kvm_vm_ioctl(struct file *filp,
                struct kvm_ioeventfd data;
 
                r = -EFAULT;
-               if (copy_from_user(&data, argp, sizeof data))
+               if (copy_from_user(&data, argp, sizeof(data)))
                        goto out;
                r = kvm_ioeventfd(kvm, &data);
                break;
@@ -2592,7 +2580,7 @@ static long kvm_vm_ioctl(struct file *filp,
                struct kvm_msi msi;
 
                r = -EFAULT;
-               if (copy_from_user(&msi, argp, sizeof msi))
+               if (copy_from_user(&msi, argp, sizeof(msi)))
                        goto out;
                r = kvm_send_userspace_msi(kvm, &msi);
                break;
@@ -2604,7 +2592,7 @@ static long kvm_vm_ioctl(struct file *filp,
                struct kvm_irq_level irq_event;
 
                r = -EFAULT;
-               if (copy_from_user(&irq_event, argp, sizeof irq_event))
+               if (copy_from_user(&irq_event, argp, sizeof(irq_event)))
                        goto out;
 
                r = kvm_vm_ioctl_irq_line(kvm, &irq_event,
@@ -2614,7 +2602,7 @@ static long kvm_vm_ioctl(struct file *filp,
 
                r = -EFAULT;
                if (ioctl == KVM_IRQ_LINE_STATUS) {
-                       if (copy_to_user(argp, &irq_event, sizeof irq_event))
+                       if (copy_to_user(argp, &irq_event, sizeof(irq_event)))
                                goto out;
                }
 
@@ -2647,7 +2635,7 @@ static long kvm_vm_ioctl(struct file *filp,
                        goto out_free_irq_routing;
                r = kvm_set_irq_routing(kvm, entries, routing.nr,
                                        routing.flags);
-       out_free_irq_routing:
+out_free_irq_routing:
                vfree(entries);
                break;
        }
@@ -2822,8 +2810,7 @@ static void hardware_enable_nolock(void *junk)
        if (r) {
                cpumask_clear_cpu(cpu, cpus_hardware_enabled);
                atomic_inc(&hardware_enable_failed);
-               printk(KERN_INFO "kvm: enabling virtualization on "
-                                "CPU%d failed\n", cpu);
+               pr_info("kvm: enabling virtualization on CPU%d failed\n", cpu);
        }
 }
 
@@ -2899,12 +2886,12 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
        val &= ~CPU_TASKS_FROZEN;
        switch (val) {
        case CPU_DYING:
-               printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
+               pr_info("kvm: disabling virtualization on CPU%d\n",
                       cpu);
                hardware_disable();
                break;
        case CPU_STARTING:
-               printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
+               pr_info("kvm: enabling virtualization on CPU%d\n",
                       cpu);
                hardware_enable();
                break;
@@ -2921,7 +2908,7 @@ static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
         *
         * And Intel TXT required VMX off for all cpu when system shutdown.
         */
-       printk(KERN_INFO "kvm: exiting hardware virtualization\n");
+       pr_info("kvm: exiting hardware virtualization\n");
        kvm_rebooting = true;
        on_each_cpu(hardware_disable_nolock, NULL, 1);
        return NOTIFY_OK;
@@ -2945,7 +2932,7 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
 }
 
 static inline int kvm_io_bus_cmp(const struct kvm_io_range *r1,
-                                 const struct kvm_io_range *r2)
+                                const struct kvm_io_range *r2)
 {
        if (r1->addr < r2->addr)
                return -1;
@@ -2998,7 +2985,7 @@ static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus,
        return off;
 }
 
-static int __kvm_io_bus_write(struct kvm_io_bus *bus,
+static int __kvm_io_bus_write(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus,
                              struct kvm_io_range *range, const void *val)
 {
        int idx;
@@ -3009,7 +2996,7 @@ static int __kvm_io_bus_write(struct kvm_io_bus *bus,
 
        while (idx < bus->dev_count &&
                kvm_io_bus_cmp(range, &bus->range[idx]) == 0) {
-               if (!kvm_iodevice_write(bus->range[idx].dev, range->addr,
+               if (!kvm_iodevice_write(vcpu, bus->range[idx].dev, range->addr,
                                        range->len, val))
                        return idx;
                idx++;
@@ -3019,7 +3006,7 @@ static int __kvm_io_bus_write(struct kvm_io_bus *bus,
 }
 
 /* kvm_io_bus_write - called under kvm->slots_lock */
-int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
                     int len, const void *val)
 {
        struct kvm_io_bus *bus;
@@ -3031,14 +3018,14 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
                .len = len,
        };
 
-       bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
-       r = __kvm_io_bus_write(bus, &range, val);
+       bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+       r = __kvm_io_bus_write(vcpu, bus, &range, val);
        return r < 0 ? r : 0;
 }
 
 /* kvm_io_bus_write_cookie - called under kvm->slots_lock */
-int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
-                           int len, const void *val, long cookie)
+int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
+                           gpa_t addr, int len, const void *val, long cookie)
 {
        struct kvm_io_bus *bus;
        struct kvm_io_range range;
@@ -3048,12 +3035,12 @@ int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
                .len = len,
        };
 
-       bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
+       bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
 
        /* First try the device referenced by cookie. */
        if ((cookie >= 0) && (cookie < bus->dev_count) &&
            (kvm_io_bus_cmp(&range, &bus->range[cookie]) == 0))
-               if (!kvm_iodevice_write(bus->range[cookie].dev, addr, len,
+               if (!kvm_iodevice_write(vcpu, bus->range[cookie].dev, addr, len,
                                        val))
                        return cookie;
 
@@ -3061,11 +3048,11 @@ int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
         * cookie contained garbage; fall back to search and return the
         * correct cookie value.
         */
-       return __kvm_io_bus_write(bus, &range, val);
+       return __kvm_io_bus_write(vcpu, bus, &range, val);
 }
 
-static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range,
-                            void *val)
+static int __kvm_io_bus_read(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus,
+                            struct kvm_io_range *range, void *val)
 {
        int idx;
 
@@ -3075,7 +3062,7 @@ static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range,
 
        while (idx < bus->dev_count &&
                kvm_io_bus_cmp(range, &bus->range[idx]) == 0) {
-               if (!kvm_iodevice_read(bus->range[idx].dev, range->addr,
+               if (!kvm_iodevice_read(vcpu, bus->range[idx].dev, range->addr,
                                       range->len, val))
                        return idx;
                idx++;
@@ -3086,7 +3073,7 @@ static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range,
 EXPORT_SYMBOL_GPL(kvm_io_bus_write);
 
 /* kvm_io_bus_read - called under kvm->slots_lock */
-int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
                    int len, void *val)
 {
        struct kvm_io_bus *bus;
@@ -3098,8 +3085,8 @@ int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
                .len = len,
        };
 
-       bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
-       r = __kvm_io_bus_read(bus, &range, val);
+       bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+       r = __kvm_io_bus_read(vcpu, bus, &range, val);
        return r < 0 ? r : 0;
 }
 
@@ -3269,6 +3256,7 @@ struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
 static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
 {
        struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
+
        if (vcpu->preempted)
                vcpu->preempted = false;
 
@@ -3350,7 +3338,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
 
        r = misc_register(&kvm_dev);
        if (r) {
-               printk(KERN_ERR "kvm: misc device register failed\n");
+               pr_err("kvm: misc device register failed\n");
                goto out_unreg;
        }
 
@@ -3361,7 +3349,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
 
        r = kvm_init_debug();
        if (r) {
-               printk(KERN_ERR "kvm: create debugfs files failed\n");
+               pr_err("kvm: create debugfs files failed\n");
                goto out_undebugfs;
        }