]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/commitdiff
Merge tag 'v4.5-rc1' into x86/asm, to refresh the branch before merging new changes
authorIngo Molnar <mingo@kernel.org>
Fri, 29 Jan 2016 08:41:18 +0000 (09:41 +0100)
committerIngo Molnar <mingo@kernel.org>
Fri, 29 Jan 2016 08:41:18 +0000 (09:41 +0100)
Signed-off-by: Ingo Molnar <mingo@kernel.org>
15 files changed:
arch/x86/entry/calling.h
arch/x86/entry/vdso/vdso2c.h
arch/x86/entry/vdso/vma.c
arch/x86/entry/vsyscall/vsyscall_gtod.c
arch/x86/include/asm/clocksource.h
arch/x86/include/asm/frame.h
arch/x86/include/asm/mmu.h
arch/x86/include/asm/vdso.h
arch/x86/include/asm/vgtod.h
arch/x86/kernel/head_64.S
arch/x86/kernel/signal.c
include/linux/mm.h
include/linux/mm_types.h
mm/memory.c
mm/mmap.c

index e32206e0986828390e769604ec6cd510b1f2296e..9a9e5884066c6581878b56cde27e75f542b21249 100644 (file)
@@ -201,37 +201,6 @@ For 32-bit we have the following conventions - kernel is built with
        .byte 0xf1
        .endm
 
-#else /* CONFIG_X86_64 */
-
-/*
- * For 32bit only simplified versions of SAVE_ALL/RESTORE_ALL. These
- * are different from the entry_32.S versions in not changing the segment
- * registers. So only suitable for in kernel use, not when transitioning
- * from or to user space. The resulting stack frame is not a standard
- * pt_regs frame. The main use case is calling C code from assembler
- * when all the registers need to be preserved.
- */
-
-       .macro SAVE_ALL
-       pushl %eax
-       pushl %ebp
-       pushl %edi
-       pushl %esi
-       pushl %edx
-       pushl %ecx
-       pushl %ebx
-       .endm
-
-       .macro RESTORE_ALL
-       popl %ebx
-       popl %ecx
-       popl %edx
-       popl %esi
-       popl %edi
-       popl %ebp
-       popl %eax
-       .endm
-
 #endif /* CONFIG_X86_64 */
 
 /*
index 0224987556ce80bd606063b56ef124b0857a3f44..abe961c7c71c7464e9707afa01bfe14efc89ba5f 100644 (file)
@@ -150,16 +150,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
        }
        fprintf(outfile, "\n};\n\n");
 
-       fprintf(outfile, "static struct page *pages[%lu];\n\n",
-               mapping_size / 4096);
-
        fprintf(outfile, "const struct vdso_image %s = {\n", name);
        fprintf(outfile, "\t.data = raw_data,\n");
        fprintf(outfile, "\t.size = %lu,\n", mapping_size);
-       fprintf(outfile, "\t.text_mapping = {\n");
-       fprintf(outfile, "\t\t.name = \"[vdso]\",\n");
-       fprintf(outfile, "\t\t.pages = pages,\n");
-       fprintf(outfile, "\t},\n");
        if (alt_sec) {
                fprintf(outfile, "\t.alt = %lu,\n",
                        (unsigned long)GET_LE(&alt_sec->sh_offset));
index b8f69e264ac4148afbdaeedc69e24132980117a9..7c912fefe79b7017f57b05604ec166689d8a6642 100644 (file)
@@ -27,13 +27,7 @@ unsigned int __read_mostly vdso64_enabled = 1;
 
 void __init init_vdso_image(const struct vdso_image *image)
 {
-       int i;
-       int npages = (image->size) / PAGE_SIZE;
-
        BUG_ON(image->size % PAGE_SIZE != 0);
-       for (i = 0; i < npages; i++)
-               image->text_mapping.pages[i] =
-                       virt_to_page(image->data + i*PAGE_SIZE);
 
        apply_alternatives((struct alt_instr *)(image->data + image->alt),
                           (struct alt_instr *)(image->data + image->alt +
@@ -90,18 +84,87 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
 #endif
 }
 
+static int vdso_fault(const struct vm_special_mapping *sm,
+                     struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+       const struct vdso_image *image = vma->vm_mm->context.vdso_image;
+
+       if (!image || (vmf->pgoff << PAGE_SHIFT) >= image->size)
+               return VM_FAULT_SIGBUS;
+
+       vmf->page = virt_to_page(image->data + (vmf->pgoff << PAGE_SHIFT));
+       get_page(vmf->page);
+       return 0;
+}
+
+static const struct vm_special_mapping text_mapping = {
+       .name = "[vdso]",
+       .fault = vdso_fault,
+};
+
+static int vvar_fault(const struct vm_special_mapping *sm,
+                     struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+       const struct vdso_image *image = vma->vm_mm->context.vdso_image;
+       long sym_offset;
+       int ret = -EFAULT;
+
+       if (!image)
+               return VM_FAULT_SIGBUS;
+
+       sym_offset = (long)(vmf->pgoff << PAGE_SHIFT) +
+               image->sym_vvar_start;
+
+       /*
+        * Sanity check: a symbol offset of zero means that the page
+        * does not exist for this vdso image, not that the page is at
+        * offset zero relative to the text mapping.  This should be
+        * impossible here, because sym_offset should only be zero for
+        * the page past the end of the vvar mapping.
+        */
+       if (sym_offset == 0)
+               return VM_FAULT_SIGBUS;
+
+       if (sym_offset == image->sym_vvar_page) {
+               ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address,
+                                   __pa_symbol(&__vvar_page) >> PAGE_SHIFT);
+       } else if (sym_offset == image->sym_hpet_page) {
+#ifdef CONFIG_HPET_TIMER
+               if (hpet_address && vclock_was_used(VCLOCK_HPET)) {
+                       ret = vm_insert_pfn_prot(
+                               vma,
+                               (unsigned long)vmf->virtual_address,
+                               hpet_address >> PAGE_SHIFT,
+                               pgprot_noncached(PAGE_READONLY));
+               }
+#endif
+       } else if (sym_offset == image->sym_pvclock_page) {
+               struct pvclock_vsyscall_time_info *pvti =
+                       pvclock_pvti_cpu0_va();
+               if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) {
+                       ret = vm_insert_pfn(
+                               vma,
+                               (unsigned long)vmf->virtual_address,
+                               __pa(pvti) >> PAGE_SHIFT);
+               }
+       }
+
+       if (ret == 0 || ret == -EBUSY)
+               return VM_FAULT_NOPAGE;
+
+       return VM_FAULT_SIGBUS;
+}
+
 static int map_vdso(const struct vdso_image *image, bool calculate_addr)
 {
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma;
        unsigned long addr, text_start;
        int ret = 0;
-       static struct page *no_pages[] = {NULL};
-       static struct vm_special_mapping vvar_mapping = {
+       static const struct vm_special_mapping vvar_mapping = {
                .name = "[vvar]",
-               .pages = no_pages,
+               .fault = vvar_fault,
        };
-       struct pvclock_vsyscall_time_info *pvti;
 
        if (calculate_addr) {
                addr = vdso_addr(current->mm->start_stack,
@@ -121,6 +184,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
 
        text_start = addr - image->sym_vvar_start;
        current->mm->context.vdso = (void __user *)text_start;
+       current->mm->context.vdso_image = image;
 
        /*
         * MAYWRITE to allow gdb to COW and set breakpoints
@@ -130,7 +194,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
                                       image->size,
                                       VM_READ|VM_EXEC|
                                       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-                                      &image->text_mapping);
+                                      &text_mapping);
 
        if (IS_ERR(vma)) {
                ret = PTR_ERR(vma);
@@ -140,7 +204,8 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
        vma = _install_special_mapping(mm,
                                       addr,
                                       -image->sym_vvar_start,
-                                      VM_READ|VM_MAYREAD,
+                                      VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
+                                      VM_PFNMAP,
                                       &vvar_mapping);
 
        if (IS_ERR(vma)) {
@@ -148,41 +213,6 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
                goto up_fail;
        }
 
-       if (image->sym_vvar_page)
-               ret = remap_pfn_range(vma,
-                                     text_start + image->sym_vvar_page,
-                                     __pa_symbol(&__vvar_page) >> PAGE_SHIFT,
-                                     PAGE_SIZE,
-                                     PAGE_READONLY);
-
-       if (ret)
-               goto up_fail;
-
-#ifdef CONFIG_HPET_TIMER
-       if (hpet_address && image->sym_hpet_page) {
-               ret = io_remap_pfn_range(vma,
-                       text_start + image->sym_hpet_page,
-                       hpet_address >> PAGE_SHIFT,
-                       PAGE_SIZE,
-                       pgprot_noncached(PAGE_READONLY));
-
-               if (ret)
-                       goto up_fail;
-       }
-#endif
-
-       pvti = pvclock_pvti_cpu0_va();
-       if (pvti && image->sym_pvclock_page) {
-               ret = remap_pfn_range(vma,
-                                     text_start + image->sym_pvclock_page,
-                                     __pa(pvti) >> PAGE_SHIFT,
-                                     PAGE_SIZE,
-                                     PAGE_READONLY);
-
-               if (ret)
-                       goto up_fail;
-       }
-
 up_fail:
        if (ret)
                current->mm->context.vdso = NULL;
index 51e3304169951619362ea4a1494716e4f20696bf..0fb3a104ac626b07e0a4e604a8ebf36764bea2cf 100644 (file)
@@ -16,6 +16,8 @@
 #include <asm/vgtod.h>
 #include <asm/vvar.h>
 
+int vclocks_used __read_mostly;
+
 DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
 
 void update_vsyscall_tz(void)
@@ -26,12 +28,17 @@ void update_vsyscall_tz(void)
 
 void update_vsyscall(struct timekeeper *tk)
 {
+       int vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
        struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
 
+       /* Mark the new vclock used. */
+       BUILD_BUG_ON(VCLOCK_MAX >= 32);
+       WRITE_ONCE(vclocks_used, READ_ONCE(vclocks_used) | (1 << vclock_mode));
+
        gtod_write_begin(vdata);
 
        /* copy vsyscall data */
-       vdata->vclock_mode      = tk->tkr_mono.clock->archdata.vclock_mode;
+       vdata->vclock_mode      = vclock_mode;
        vdata->cycle_last       = tk->tkr_mono.cycle_last;
        vdata->mask             = tk->tkr_mono.mask;
        vdata->mult             = tk->tkr_mono.mult;
index eda81dc0f4ae091c5ff085450ff277f68aa933a9..d194266acb28e52d237c19c21291633c15d99c9e 100644 (file)
@@ -3,10 +3,11 @@
 #ifndef _ASM_X86_CLOCKSOURCE_H
 #define _ASM_X86_CLOCKSOURCE_H
 
-#define VCLOCK_NONE 0  /* No vDSO clock available.     */
-#define VCLOCK_TSC  1  /* vDSO should use vread_tsc.   */
-#define VCLOCK_HPET 2  /* vDSO should use vread_hpet.  */
-#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */
+#define VCLOCK_NONE    0  /* No vDSO clock available.  */
+#define VCLOCK_TSC     1  /* vDSO should use vread_tsc.        */
+#define VCLOCK_HPET    2  /* vDSO should use vread_hpet.       */
+#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */
+#define VCLOCK_MAX     3
 
 struct arch_clocksource_data {
        int vclock_mode;
index 793179cf8e21aa89636f869fc3a9e2fe0b4a29e0..6e4d170726b758a75a46777f416d07bc7ada01e7 100644 (file)
@@ -1,23 +1,44 @@
-#ifdef __ASSEMBLY__
+#ifndef _ASM_X86_FRAME_H
+#define _ASM_X86_FRAME_H
 
 #include <asm/asm.h>
 
-/* The annotation hides the frame from the unwinder and makes it look
-   like a ordinary ebp save/restore. This avoids some special cases for
-   frame pointer later */
+/*
+ * These are stack frame creation macros.  They should be used by every
+ * callable non-leaf asm function to make kernel stack traces more reliable.
+ */
+
 #ifdef CONFIG_FRAME_POINTER
-       .macro FRAME
-       __ASM_SIZE(push,)       %__ASM_REG(bp)
-       __ASM_SIZE(mov)         %__ASM_REG(sp), %__ASM_REG(bp)
-       .endm
-       .macro ENDFRAME
-       __ASM_SIZE(pop,)        %__ASM_REG(bp)
-       .endm
-#else
-       .macro FRAME
-       .endm
-       .macro ENDFRAME
-       .endm
-#endif
-
-#endif  /*  __ASSEMBLY__  */
+
+#ifdef __ASSEMBLY__
+
+.macro FRAME_BEGIN
+       push %_ASM_BP
+       _ASM_MOV %_ASM_SP, %_ASM_BP
+.endm
+
+.macro FRAME_END
+       pop %_ASM_BP
+.endm
+
+#else /* !__ASSEMBLY__ */
+
+#define FRAME_BEGIN                            \
+       "push %" _ASM_BP "\n"                   \
+       _ASM_MOV "%" _ASM_SP ", %" _ASM_BP "\n"
+
+#define FRAME_END "pop %" _ASM_BP "\n"
+
+#endif /* __ASSEMBLY__ */
+
+#define FRAME_OFFSET __ASM_SEL(4, 8)
+
+#else /* !CONFIG_FRAME_POINTER */
+
+#define FRAME_BEGIN
+#define FRAME_END
+#define FRAME_OFFSET 0
+
+#endif /* CONFIG_FRAME_POINTER */
+
+#endif /* _ASM_X86_FRAME_H */
index 55234d5e7160db83bd9854a40c87b8e70593b3c7..1ea0baef1175c407e9097e125a84b226be28266c 100644 (file)
@@ -19,7 +19,8 @@ typedef struct {
 #endif
 
        struct mutex lock;
-       void __user *vdso;
+       void __user *vdso;                      /* vdso base address */
+       const struct vdso_image *vdso_image;    /* vdso image in use */
 
        atomic_t perf_rdpmc_allowed;    /* nonzero if rdpmc is allowed */
 } mm_context_t;
index deabaf9759b640d5cd93f50f9db67ef2dc60a807..43dc55be524e7fdc8e5150f87b02a8639b20b3f3 100644 (file)
@@ -13,9 +13,6 @@ struct vdso_image {
        void *data;
        unsigned long size;   /* Always a multiple of PAGE_SIZE */
 
-       /* text_mapping.pages is big enough for data/size page pointers */
-       struct vm_special_mapping text_mapping;
-
        unsigned long alt, alt_len;
 
        long sym_vvar_start;  /* Negative offset to the vvar area */
index f556c4843aa18af74359dfeb2a41d39d9a2c3bb9..e728699db7741f0282441a79635a88afd23259c8 100644 (file)
@@ -37,6 +37,12 @@ struct vsyscall_gtod_data {
 };
 extern struct vsyscall_gtod_data vsyscall_gtod_data;
 
+extern int vclocks_used;
+static inline bool vclock_was_used(int vclock)
+{
+       return READ_ONCE(vclocks_used) & (1 << vclock);
+}
+
 static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s)
 {
        unsigned ret;
index ffdc0e8603902b12b0cd44fd175d806b046ccb4b..2e974680f5ad4acb514f50bc69833a5e0ecb6268 100644 (file)
@@ -38,7 +38,6 @@
 #define pud_index(x)   (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
 
 L4_PAGE_OFFSET = pgd_index(__PAGE_OFFSET)
-L3_PAGE_OFFSET = pud_index(__PAGE_OFFSET)
 L4_START_KERNEL = pgd_index(__START_KERNEL_map)
 L3_START_KERNEL = pud_index(__START_KERNEL_map)
 
index cb6282c3638ffbd32bcb33663d8cefc17eac8a8e..c07ff5ddbd477eb44edb9ea960623d17b6d43924 100644 (file)
@@ -692,12 +692,15 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 
 static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
 {
-#if defined(CONFIG_X86_32) || !defined(CONFIG_X86_64)
+#ifdef CONFIG_X86_64
+       if (is_ia32_task())
+               return __NR_ia32_restart_syscall;
+#endif
+#ifdef CONFIG_X86_X32_ABI
+       return __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT);
+#else
        return __NR_restart_syscall;
-#else /* !CONFIG_X86_32 && CONFIG_X86_64 */
-       return test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall :
-               __NR_restart_syscall | (regs->orig_ax & __X32_SYSCALL_BIT);
-#endif /* CONFIG_X86_32 || !CONFIG_X86_64 */
+#endif
 }
 
 /*
index f1cd22f2df1ac50438e7d70bb0df85c44580b8d3..a7fef5958730ef49798df55d16cc544c38ec4178 100644 (file)
@@ -2137,6 +2137,8 @@ int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
 int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
 int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
                        unsigned long pfn);
+int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
+                       unsigned long pfn, pgprot_t pgprot);
 int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
                        pfn_t pfn);
 int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len);
index d3ebb9d21a5334d26e85bc865d318535f5864569..87577d37f97b95e499986acd1f37d92abff989dc 100644 (file)
@@ -566,10 +566,26 @@ static inline void clear_tlb_flush_pending(struct mm_struct *mm)
 }
 #endif
 
-struct vm_special_mapping
-{
-       const char *name;
+struct vm_fault;
+
+struct vm_special_mapping {
+       const char *name;       /* The name, e.g. "[vdso]". */
+
+       /*
+        * If .fault is not provided, this points to a
+        * NULL-terminated array of pages that back the special mapping.
+        *
+        * This must not be NULL unless .fault is provided.
+        */
        struct page **pages;
+
+       /*
+        * If non-NULL, then this is called to resolve page faults
+        * on the special mapping.  If used, .pages is not checked.
+        */
+       int (*fault)(const struct vm_special_mapping *sm,
+                    struct vm_area_struct *vma,
+                    struct vm_fault *vmf);
 };
 
 enum tlb_flush_reason {
index 30991f83d0bf54dc537f1927a10883aa5787dd97..5aa4f55eb7865323318fbdfa94bb44e73e0f1ae8 100644 (file)
@@ -1550,9 +1550,30 @@ out:
  */
 int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
                        unsigned long pfn)
+{
+       return vm_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot);
+}
+EXPORT_SYMBOL(vm_insert_pfn);
+
+/**
+ * vm_insert_pfn_prot - insert single pfn into user vma with specified pgprot
+ * @vma: user vma to map to
+ * @addr: target user address of this page
+ * @pfn: source kernel pfn
+ * @pgprot: pgprot flags for the inserted page
+ *
+ * This is exactly like vm_insert_pfn, except that it allows drivers to
+ * to override pgprot on a per-page basis.
+ *
+ * This only makes sense for IO mappings, and it makes no sense for
+ * cow mappings.  In general, using multiple vmas is preferable;
+ * vm_insert_pfn_prot should only be used if using multiple VMAs is
+ * impractical.
+ */
+int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
+                       unsigned long pfn, pgprot_t pgprot)
 {
        int ret;
-       pgprot_t pgprot = vma->vm_page_prot;
        /*
         * Technically, architectures with pte_special can avoid all these
         * restrictions (same for remap_pfn_range).  However we would like
@@ -1574,7 +1595,7 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 
        return ret;
 }
-EXPORT_SYMBOL(vm_insert_pfn);
+EXPORT_SYMBOL(vm_insert_pfn_prot);
 
 int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
                        pfn_t pfn)
index 84b12624ceb01d83762172634179825b086961fd..407ab434d5ee6728613e155cc1f6d74b6b7a0a0d 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3033,11 +3033,16 @@ static int special_mapping_fault(struct vm_area_struct *vma,
        pgoff_t pgoff;
        struct page **pages;
 
-       if (vma->vm_ops == &legacy_special_mapping_vmops)
+       if (vma->vm_ops == &legacy_special_mapping_vmops) {
                pages = vma->vm_private_data;
-       else
-               pages = ((struct vm_special_mapping *)vma->vm_private_data)->
-                       pages;
+       } else {
+               struct vm_special_mapping *sm = vma->vm_private_data;
+
+               if (sm->fault)
+                       return sm->fault(sm, vma, vmf);
+
+               pages = sm->pages;
+       }
 
        for (pgoff = vmf->pgoff; pgoff && *pages; ++pages)
                pgoff--;