]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/commitdiff
Merge branch 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 23 Jun 2008 19:48:50 +0000 (12:48 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 23 Jun 2008 19:48:50 +0000 (12:48 -0700)
* 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  sched: refactor wait_for_completion_timeout()
  sched: fix wait_for_completion_timeout() spurious failure under heavy load
  sched: rt: dont stop the period timer when there are tasks wanting to run

12 files changed:
arch/x86/xen/enlighten.c
arch/x86/xen/mmu.c
arch/x86/xen/xen-head.S
drivers/char/tty_ioctl.c
drivers/infiniband/hw/mthca/mthca_memfree.c
drivers/lguest/x86/core.c
drivers/xen/events.c
fs/select.c
include/linux/tty_driver.h
mm/memory.c
sound/isa/sb/sb_mixer.c
sound/pci/aw2/aw2-alsa.c

index c8a56e457d61a35c21040bbe91486f9ceb9bf6bf..c048de34d6a1dad0671fc966e1f332ec3db77219 100644 (file)
@@ -1228,6 +1228,11 @@ asmlinkage void __init xen_start_kernel(void)
        if (xen_feature(XENFEAT_supervisor_mode_kernel))
                pv_info.kernel_rpl = 0;
 
+       /* Prevent unwanted bits from being set in PTEs. */
+       __supported_pte_mask &= ~_PAGE_GLOBAL;
+       if (!is_initial_xendomain())
+               __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
+
        /* set the limit of our address space */
        xen_reserve_top();
 
index 3525ef523a74c6e102954a36ab7763a40f3de444..265601d5a6ae3d95887a33074b15b564d72a4c9b 100644 (file)
@@ -179,48 +179,54 @@ out:
                preempt_enable();
 }
 
-pteval_t xen_pte_val(pte_t pte)
+/* Assume pteval_t is equivalent to all the other *val_t types. */
+static pteval_t pte_mfn_to_pfn(pteval_t val)
 {
-       pteval_t ret = pte.pte;
+       if (val & _PAGE_PRESENT) {
+               unsigned long mfn = (val & PTE_MASK) >> PAGE_SHIFT;
+               pteval_t flags = val & ~PTE_MASK;
+               val = (mfn_to_pfn(mfn) << PAGE_SHIFT) | flags;
+       }
+
+       return val;
+}
+
+static pteval_t pte_pfn_to_mfn(pteval_t val)
+{
+       if (val & _PAGE_PRESENT) {
+               unsigned long pfn = (val & PTE_MASK) >> PAGE_SHIFT;
+               pteval_t flags = val & ~PTE_MASK;
+               val = (pfn_to_mfn(pfn) << PAGE_SHIFT) | flags;
+       }
 
-       if (ret & _PAGE_PRESENT)
-               ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
+       return val;
+}
 
-       return ret;
+pteval_t xen_pte_val(pte_t pte)
+{
+       return pte_mfn_to_pfn(pte.pte);
 }
 
 pgdval_t xen_pgd_val(pgd_t pgd)
 {
-       pgdval_t ret = pgd.pgd;
-       if (ret & _PAGE_PRESENT)
-               ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
-       return ret;
+       return pte_mfn_to_pfn(pgd.pgd);
 }
 
 pte_t xen_make_pte(pteval_t pte)
 {
-       if (pte & _PAGE_PRESENT) {
-               pte = phys_to_machine(XPADDR(pte)).maddr;
-               pte &= ~(_PAGE_PCD | _PAGE_PWT);
-       }
-
-       return (pte_t){ .pte = pte };
+       pte = pte_pfn_to_mfn(pte);
+       return native_make_pte(pte);
 }
 
 pgd_t xen_make_pgd(pgdval_t pgd)
 {
-       if (pgd & _PAGE_PRESENT)
-               pgd = phys_to_machine(XPADDR(pgd)).maddr;
-
-       return (pgd_t){ pgd };
+       pgd = pte_pfn_to_mfn(pgd);
+       return native_make_pgd(pgd);
 }
 
 pmdval_t xen_pmd_val(pmd_t pmd)
 {
-       pmdval_t ret = native_pmd_val(pmd);
-       if (ret & _PAGE_PRESENT)
-               ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
-       return ret;
+       return pte_mfn_to_pfn(pmd.pmd);
 }
 #ifdef CONFIG_X86_PAE
 void xen_set_pud(pud_t *ptr, pud_t val)
@@ -267,9 +273,7 @@ void xen_pmd_clear(pmd_t *pmdp)
 
 pmd_t xen_make_pmd(pmdval_t pmd)
 {
-       if (pmd & _PAGE_PRESENT)
-               pmd = phys_to_machine(XPADDR(pmd)).maddr;
-
+       pmd = pte_pfn_to_mfn(pmd);
        return native_make_pmd(pmd);
 }
 #else  /* !PAE */
index 288d587ce73cef8562fbccf751dcd395c4131d25..3175e973fd0d9475ad2f1ce0724dadc44cf394a3 100644 (file)
@@ -17,7 +17,7 @@ ENTRY(startup_xen)
 
        __FINIT
 
-.pushsection .bss.page_aligned
+.pushsection .text
        .align PAGE_SIZE_asm
 ENTRY(hypercall_page)
        .skip 0x1000
index b1a757a5ee271c9c4a4dc5a4b2fc18c35c7fb491..8f81139d61949b4f4300baaacb69e56e473a8a69 100644 (file)
@@ -981,16 +981,9 @@ EXPORT_SYMBOL_GPL(tty_perform_flush);
 int n_tty_ioctl(struct tty_struct *tty, struct file *file,
                       unsigned int cmd, unsigned long arg)
 {
-       struct tty_struct *real_tty;
        unsigned long flags;
        int retval;
 
-       if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
-           tty->driver->subtype == PTY_TYPE_MASTER)
-               real_tty = tty->link;
-       else
-               real_tty = tty;
-
        switch (cmd) {
        case TCXONC:
                retval = tty_check_change(tty);
index b224079d4e1fd8898944de6c083e1889eabadf6f..d5862e5d99a09ce89509976b9bdc5049e49e5b26 100644 (file)
@@ -109,7 +109,11 @@ static int mthca_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_m
 {
        struct page *page;
 
-       page = alloc_pages(gfp_mask, order);
+       /*
+        * Use __GFP_ZERO because buggy firmware assumes ICM pages are
+        * cleared, and subtle failures are seen if they aren't.
+        */
+       page = alloc_pages(gfp_mask | __GFP_ZERO, order);
        if (!page)
                return -ENOMEM;
 
index 5126d5d9ea0e8d8d97fa0eaf47ed22216329328b..2e554a4ab33792dc7e92318cafe11a75e6c35246 100644 (file)
@@ -176,7 +176,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
         * we set it now, so we can trap and pass that trap to the Guest if it
         * uses the FPU. */
        if (cpu->ts)
-               lguest_set_ts();
+               unlazy_fpu(current);
 
        /* SYSENTER is an optimized way of doing system calls.  We can't allow
         * it because it always jumps to privilege level 0.  A normal Guest
@@ -196,6 +196,10 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
         * trap made the switcher code come back, and an error code which some
         * traps set.  */
 
+        /* Restore SYSENTER if it's supposed to be on. */
+        if (boot_cpu_has(X86_FEATURE_SEP))
+               wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
+
        /* If the Guest page faulted, then the cr2 register will tell us the
         * bad virtual address.  We have to grab this now, because once we
         * re-enable interrupts an interrupt could fault and thus overwrite
@@ -203,13 +207,12 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
        if (cpu->regs->trapnum == 14)
                cpu->arch.last_pagefault = read_cr2();
        /* Similarly, if we took a trap because the Guest used the FPU,
-        * we have to restore the FPU it expects to see. */
+        * we have to restore the FPU it expects to see.
+        * math_state_restore() may sleep and we may even move off to
+        * a different CPU. So all the critical stuff should be done
+        * before this.  */
        else if (cpu->regs->trapnum == 7)
                math_state_restore();
-
-       /* Restore SYSENTER if it's supposed to be on. */
-       if (boot_cpu_has(X86_FEATURE_SEP))
-               wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
 }
 
 /*H:130 Now we've examined the hypercall code; our Guest can make requests.
index 4f0f22b020ea4aa192dbab56cba76edd6076a837..76e5b7386af953d7a8ee7c1ff7b17ec28a0fe407 100644 (file)
@@ -529,7 +529,7 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
 
 #ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
                /* Clear master flag /before/ clearing selector flag. */
-               rmb();
+               wmb();
 #endif
                pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
                while (pending_words != 0) {
index 8dda969614a9163f602fa62dca53a411c3400136..da0e88201c3a03bfa7e44eea8805a62dcd540539 100644 (file)
@@ -249,7 +249,6 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
                                                retval++;
                                        }
                                }
-                               cond_resched();
                        }
                        if (res_in)
                                *rinp = res_in;
@@ -257,6 +256,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
                                *routp = res_out;
                        if (res_ex)
                                *rexp = res_ex;
+                       cond_resched();
                }
                wait = NULL;
                if (retval || !*timeout || signal_pending(current))
index 59f1c0bd8f9c6f8734a1320185a3eb28a9b29990..d2a003586761cbef8f5913ec49831c7cf41e7624 100644 (file)
@@ -27,8 +27,7 @@
  *     This routine is called by the kernel to write a series of
  *     characters to the tty device.  The characters may come from
  *     user space or kernel space.  This routine will return the
- *     number of characters actually accepted for writing.  This
- *     routine is mandatory.
+ *     number of characters actually accepted for writing.
  *
  *     Optional: Required for writable devices.
  *
  *     This routine notifies the tty driver that it should hangup the
  *     tty device.
  *
- *     Required:
+ *     Optional:
  *
  * void (*break_ctl)(struct tty_stuct *tty, int state);
  *
index 9aefaae4685812e12505551ade19ed6183220422..d14b251a25a638dec1b5327a6261a67660ba67d5 100644 (file)
@@ -1045,6 +1045,26 @@ no_page_table:
        return page;
 }
 
+/* Can we do the FOLL_ANON optimization? */
+static inline int use_zero_page(struct vm_area_struct *vma)
+{
+       /*
+        * We don't want to optimize FOLL_ANON for make_pages_present()
+        * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
+        * we want to get the page from the page tables to make sure
+        * that we serialize and update with any other user of that
+        * mapping.
+        */
+       if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
+               return 0;
+       /*
+        * And if we have a fault or a nopfn routine, it's not an
+        * anonymous region.
+        */
+       return !vma->vm_ops ||
+               (!vma->vm_ops->fault && !vma->vm_ops->nopfn);
+}
+
 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                unsigned long start, int len, int write, int force,
                struct page **pages, struct vm_area_struct **vmas)
@@ -1119,8 +1139,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                foll_flags = FOLL_TOUCH;
                if (pages)
                        foll_flags |= FOLL_GET;
-               if (!write && !(vma->vm_flags & VM_LOCKED) &&
-                   (!vma->vm_ops || !vma->vm_ops->fault))
+               if (!write && use_zero_page(vma))
                        foll_flags |= FOLL_ANON;
 
                do {
@@ -1766,7 +1785,6 @@ gotten:
        page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
        if (likely(pte_same(*page_table, orig_pte))) {
                if (old_page) {
-                       page_remove_rmap(old_page, vma);
                        if (!PageAnon(old_page)) {
                                dec_mm_counter(mm, file_rss);
                                inc_mm_counter(mm, anon_rss);
@@ -1788,6 +1806,32 @@ gotten:
                lru_cache_add_active(new_page);
                page_add_new_anon_rmap(new_page, vma, address);
 
+               if (old_page) {
+                       /*
+                        * Only after switching the pte to the new page may
+                        * we remove the mapcount here. Otherwise another
+                        * process may come and find the rmap count decremented
+                        * before the pte is switched to the new page, and
+                        * "reuse" the old page writing into it while our pte
+                        * here still points into it and can be read by other
+                        * threads.
+                        *
+                        * The critical issue is to order this
+                        * page_remove_rmap with the ptp_clear_flush above.
+                        * Those stores are ordered by (if nothing else,)
+                        * the barrier present in the atomic_add_negative
+                        * in page_remove_rmap.
+                        *
+                        * Then the TLB flush in ptep_clear_flush ensures that
+                        * no process can access the old page before the
+                        * decremented mapcount is visible. And the old page
+                        * cannot be reused until after the decremented
+                        * mapcount is visible. So transitively, TLBs to
+                        * old page will be flushed before it can be reused.
+                        */
+                       page_remove_rmap(old_page, vma);
+               }
+
                /* Free the old page.. */
                new_page = old_page;
                ret |= VM_FAULT_WRITE;
index 91d14224f6b332de1119d89bc07bff76c00f37cc..73d4572d136bb893da5435ecc97313a52ccb7ba0 100644 (file)
@@ -925,7 +925,7 @@ static unsigned char als4000_saved_regs[] = {
 static void save_mixer(struct snd_sb *chip, unsigned char *regs, int num_regs)
 {
        unsigned char *val = chip->saved_regs;
-       snd_assert(num_regs > ARRAY_SIZE(chip->saved_regs), return);
+       snd_assert(num_regs <= ARRAY_SIZE(chip->saved_regs), return);
        for (; num_regs; num_regs--)
                *val++ = snd_sbmixer_read(chip, *regs++);
 }
@@ -933,7 +933,7 @@ static void save_mixer(struct snd_sb *chip, unsigned char *regs, int num_regs)
 static void restore_mixer(struct snd_sb *chip, unsigned char *regs, int num_regs)
 {
        unsigned char *val = chip->saved_regs;
-       snd_assert(num_regs > ARRAY_SIZE(chip->saved_regs), return);
+       snd_assert(num_regs <= ARRAY_SIZE(chip->saved_regs), return);
        for (; num_regs; num_regs--)
                snd_sbmixer_write(chip, *regs++, *val++);
 }
index 56f87cd33c19fcef9f5afb08bc472042a04a89fa..3f00ddf450f81c287b198cdbdf4c8a17b180c4f8 100644 (file)
@@ -316,6 +316,8 @@ static int __devinit snd_aw2_create(struct snd_card *card,
                return -ENOMEM;
        }
 
+       /* (2) initialization of the chip hardware */
+       snd_aw2_saa7146_setup(&chip->saa7146, chip->iobase_virt);
 
        if (request_irq(pci->irq, snd_aw2_saa7146_interrupt,
                        IRQF_SHARED, "Audiowerk2", chip)) {
@@ -329,8 +331,6 @@ static int __devinit snd_aw2_create(struct snd_card *card,
        }
        chip->irq = pci->irq;
 
-       /* (2) initialization of the chip hardware */
-       snd_aw2_saa7146_setup(&chip->saa7146, chip->iobase_virt);
        err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops);
        if (err < 0) {
                free_irq(chip->irq, (void *)chip);