]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/commitdiff
Merge branch 'sched/urgent' into sched/core
authorIngo Molnar <mingo@elte.hu>
Thu, 5 Aug 2010 07:46:29 +0000 (09:46 +0200)
committerIngo Molnar <mingo@elte.hu>
Thu, 5 Aug 2010 07:46:29 +0000 (09:46 +0200)
Conflicts:
include/linux/sched.h

Merge reason: Add the leftover .35 urgent bits, fix the conflict.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
172 files changed:
Documentation/00-INDEX
Documentation/IO-mapping.txt [deleted file]
Documentation/bus-virt-phys-mapping.txt [new file with mode: 0644]
MAINTAINERS
arch/parisc/kernel/ftrace.c
arch/powerpc/include/asm/cputable.h
arch/powerpc/kernel/process.c
arch/sparc/configs/sparc64_defconfig
arch/sparc/include/asm/cache.h
arch/sparc/include/asm/pgtable_32.h
arch/sparc/kernel/perf_event.c
arch/sparc/kernel/sun4d_irq.c
arch/sparc/kernel/ttable.S
arch/sparc/mm/srmmu.c
arch/sparc/mm/sun4c.c
arch/x86/kernel/apic/apic.c
arch/x86/kernel/early-quirks.c
arch/x86/kernel/kprobes.c
arch/x86/kernel/quirks.c
arch/x86/kernel/setup_percpu.c
arch/x86/kvm/mmu.c
arch/x86/pci/i386.c
arch/x86/pci/mrst.c
drivers/clocksource/cs5535-clockevt.c
drivers/edac/Kconfig
drivers/edac/mpc85xx_edac.c
drivers/gpio/cs5535-gpio.c
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/nouveau/nouveau_bios.c
drivers/gpu/drm/nouveau/nouveau_fbcon.c
drivers/gpu/drm/radeon/r100.c
drivers/gpu/drm/radeon/r300.c
drivers/gpu/drm/radeon/r520.c
drivers/gpu/drm/radeon/r600.c
drivers/gpu/drm/radeon/r600_blit.c
drivers/gpu/drm/radeon/radeon.h
drivers/gpu/drm/radeon/radeon_atombios.c
drivers/gpu/drm/radeon/radeon_connectors.c
drivers/gpu/drm/radeon/radeon_device.c
drivers/gpu/drm/radeon/radeon_legacy_tv.c
drivers/gpu/drm/radeon/rs400.c
drivers/gpu/drm/radeon/rs600.c
drivers/gpu/drm/radeon/rs690.c
drivers/gpu/drm/radeon/rv515.c
drivers/gpu/drm/ttm/ttm_page_alloc.c
drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
drivers/misc/cs5535-mfgpt.c
drivers/mmc/host/sdhci-s3c.c
drivers/net/ibmveth.c
drivers/net/pcmcia/axnet_cs.c
drivers/net/r8169.c
drivers/net/wireless/ath/ath9k/hif_usb.c
drivers/net/wireless/hostap/hostap_pci.c
drivers/net/wireless/iwlwifi/iwl-sta.h
drivers/net/wireless/rt2x00/rt2x00dev.c
drivers/pci/setup-res.c
drivers/pcmcia/pcmcia_resource.c
drivers/platform/x86/intel_scu_ipc.c
drivers/power/ds2782_battery.c
drivers/s390/block/dasd_devmap.c
drivers/s390/cio/chsc.c
drivers/sbus/char/openprom.c
drivers/serial/suncore.c
drivers/serial/sunsu.c
drivers/usb/gadget/f_fs.c
drivers/vhost/net.c
drivers/video/aty/radeon_pm.c
fs/btrfs/ctree.c
fs/btrfs/ioctl.c
fs/ceph/auth_x.c
fs/ceph/mds_client.c
fs/ceph/mds_client.h
fs/ceph/messenger.c
fs/ceph/osdmap.c
fs/dcache.c
fs/gfs2/glock.c
fs/gfs2/quota.c
fs/gfs2/quota.h
fs/inode.c
fs/jbd2/journal.c
fs/jbd2/transaction.c
fs/jffs2/xattr.c
fs/mbcache.c
fs/nfs/dir.c
fs/nfs/internal.h
fs/ocfs2/aops.c
fs/ocfs2/dlm/dlmdomain.c
fs/ocfs2/dlm/dlmmaster.c
fs/ocfs2/dlm/dlmrecovery.c
fs/ocfs2/file.c
fs/ocfs2/file.h
fs/ocfs2/journal.c
fs/ocfs2/localalloc.c
fs/ocfs2/quota_global.c
fs/ocfs2/quota_local.c
fs/ocfs2/refcounttree.c
fs/ocfs2/suballoc.c
fs/ocfs2/xattr.c
fs/partitions/ibm.c
fs/quota/dquot.c
fs/ubifs/shrinker.c
fs/ubifs/ubifs.h
fs/xfs/linux-2.6/xfs_buf.c
fs/xfs/linux-2.6/xfs_super.c
fs/xfs/linux-2.6/xfs_sync.c
fs/xfs/linux-2.6/xfs_sync.h
fs/xfs/linux-2.6/xfs_trace.h
fs/xfs/quota/xfs_qm.c
fs/xfs/xfs_mount.h
include/linux/cpu.h
include/linux/cpuset.h
include/linux/fb.h
include/linux/fdtable.h
include/linux/jbd2.h
include/linux/mm.h
include/linux/pci.h
include/linux/perf_event.h
include/linux/sched.h
include/linux/topology.h
include/linux/vgaarb.h
include/math-emu/op-common.h
include/net/sock.h
ipc/sem.c
kernel/cpu.c
kernel/cpuset.c
kernel/early_res.c
kernel/fork.c
kernel/hrtimer.c
kernel/lockdep.c
kernel/perf_event.c
kernel/posix-cpu-timers.c
kernel/rcutorture.c
kernel/sched.c
kernel/sched_clock.c
kernel/sched_cpupri.c
kernel/sched_cpupri.h
kernel/sched_debug.c
kernel/sched_fair.c
kernel/sched_rt.c
kernel/sched_stats.h
kernel/time/tick-sched.c
kernel/timer.c
kernel/trace/trace_clock.c
kernel/workqueue_sched.h [new file with mode: 0644]
mm/bootmem.c
mm/page_alloc.c
mm/page_cgroup.c
mm/vmscan.c
net/bluetooth/hci_conn.c
net/bluetooth/hci_event.c
net/bluetooth/l2cap.c
net/bridge/br_device.c
net/bridge/br_forward.c
net/core/dev.c
net/core/neighbour.c
net/dsa/Kconfig
net/ipv4/ipmr.c
net/ipv4/tcp.c
net/ipv4/tcp_output.c
net/ipv6/mip6.c
net/phonet/pep.c
net/sched/act_nat.c
net/sunrpc/auth.c
net/xfrm/xfrm_policy.c
sound/soc/codecs/Kconfig
sound/soc/codecs/wm8727.c
sound/soc/codecs/wm8776.c
sound/soc/codecs/wm8988.c
sound/soc/sh/fsi.c
tools/perf/arch/sparc/Makefile [new file with mode: 0644]
tools/perf/arch/sparc/util/dwarf-regs.c [new file with mode: 0644]

index dd10b51b4e652570df9a6575ac2df654bbaca15d..5405f7aecefc334b7d16c0f2764a8757e845ad43 100644 (file)
@@ -32,8 +32,6 @@ DocBook/
        - directory with DocBook templates etc. for kernel documentation.
 HOWTO
        - the process and procedures of how to do Linux kernel development.
-IO-mapping.txt
-       - how to access I/O mapped memory from within device drivers.
 IPMI.txt
        - info on Linux Intelligent Platform Management Interface (IPMI) Driver.
 IRQ-affinity.txt
@@ -84,6 +82,8 @@ blockdev/
        - info on block devices & drivers
 btmrvl.txt
        - info on Marvell Bluetooth driver usage.
+bus-virt-phys-mapping.txt
+       - how to access I/O mapped memory from within device drivers.
 cachetlb.txt
        - describes the cache/TLB flushing interfaces Linux uses.
 cdrom/
@@ -168,6 +168,8 @@ initrd.txt
        - how to use the RAM disk as an initial/temporary root filesystem.
 input/
        - info on Linux input device support.
+io-mapping.txt
+       - description of io_mapping functions in linux/io-mapping.h
 io_ordering.txt
        - info on ordering I/O writes to memory-mapped addresses.
 ioctl/
diff --git a/Documentation/IO-mapping.txt b/Documentation/IO-mapping.txt
deleted file mode 100644 (file)
index 1b5aa10..0000000
+++ /dev/null
@@ -1,208 +0,0 @@
-[ NOTE: The virt_to_bus() and bus_to_virt() functions have been
-       superseded by the functionality provided by the PCI DMA interface
-       (see Documentation/PCI/PCI-DMA-mapping.txt).  They continue
-       to be documented below for historical purposes, but new code
-       must not use them. --davidm 00/12/12 ]
-
-[ This is a mail message in response to a query on IO mapping, thus the
-  strange format for a "document" ]
-
-The AHA-1542 is a bus-master device, and your patch makes the driver give the
-controller the physical address of the buffers, which is correct on x86
-(because all bus master devices see the physical memory mappings directly). 
-
-However, on many setups, there are actually _three_ different ways of looking
-at memory addresses, and in this case we actually want the third, the
-so-called "bus address". 
-
-Essentially, the three ways of addressing memory are (this is "real memory",
-that is, normal RAM--see later about other details): 
-
- - CPU untranslated.  This is the "physical" address.  Physical address 
-   0 is what the CPU sees when it drives zeroes on the memory bus.
-
- - CPU translated address. This is the "virtual" address, and is 
-   completely internal to the CPU itself with the CPU doing the appropriate
-   translations into "CPU untranslated". 
-
- - bus address. This is the address of memory as seen by OTHER devices, 
-   not the CPU. Now, in theory there could be many different bus 
-   addresses, with each device seeing memory in some device-specific way, but
-   happily most hardware designers aren't actually actively trying to make
-   things any more complex than necessary, so you can assume that all 
-   external hardware sees the memory the same way. 
-
-Now, on normal PCs the bus address is exactly the same as the physical
-address, and things are very simple indeed. However, they are that simple
-because the memory and the devices share the same address space, and that is
-not generally necessarily true on other PCI/ISA setups. 
-
-Now, just as an example, on the PReP (PowerPC Reference Platform), the 
-CPU sees a memory map something like this (this is from memory):
-
-       0-2 GB          "real memory"
-       2 GB-3 GB       "system IO" (inb/out and similar accesses on x86)
-       3 GB-4 GB       "IO memory" (shared memory over the IO bus)
-
-Now, that looks simple enough. However, when you look at the same thing from
-the viewpoint of the devices, you have the reverse, and the physical memory
-address 0 actually shows up as address 2 GB for any IO master.
-
-So when the CPU wants any bus master to write to physical memory 0, it 
-has to give the master address 0x80000000 as the memory address.
-
-So, for example, depending on how the kernel is actually mapped on the 
-PPC, you can end up with a setup like this:
-
- physical address:     0
- virtual address:      0xC0000000
- bus address:          0x80000000
-
-where all the addresses actually point to the same thing.  It's just seen 
-through different translations..
-
-Similarly, on the Alpha, the normal translation is
-
- physical address:     0
- virtual address:      0xfffffc0000000000
- bus address:          0x40000000
-
-(but there are also Alphas where the physical address and the bus address
-are the same). 
-
-Anyway, the way to look up all these translations, you do
-
-       #include <asm/io.h>
-
-       phys_addr = virt_to_phys(virt_addr);
-       virt_addr = phys_to_virt(phys_addr);
-        bus_addr = virt_to_bus(virt_addr);
-       virt_addr = bus_to_virt(bus_addr);
-
-Now, when do you need these?
-
-You want the _virtual_ address when you are actually going to access that 
-pointer from the kernel. So you can have something like this:
-
-       /*
-        * this is the hardware "mailbox" we use to communicate with
-        * the controller. The controller sees this directly.
-        */
-       struct mailbox {
-               __u32 status;
-               __u32 bufstart;
-               __u32 buflen;
-               ..
-       } mbox;
-
-               unsigned char * retbuffer;
-
-               /* get the address from the controller */
-               retbuffer = bus_to_virt(mbox.bufstart);
-               switch (retbuffer[0]) {
-                       case STATUS_OK:
-                               ...
-
-on the other hand, you want the bus address when you have a buffer that 
-you want to give to the controller:
-
-       /* ask the controller to read the sense status into "sense_buffer" */
-       mbox.bufstart = virt_to_bus(&sense_buffer);
-       mbox.buflen = sizeof(sense_buffer);
-       mbox.status = 0;
-       notify_controller(&mbox);
-
-And you generally _never_ want to use the physical address, because you can't
-use that from the CPU (the CPU only uses translated virtual addresses), and
-you can't use it from the bus master. 
-
-So why do we care about the physical address at all? We do need the physical
-address in some cases, it's just not very often in normal code.  The physical
-address is needed if you use memory mappings, for example, because the
-"remap_pfn_range()" mm function wants the physical address of the memory to
-be remapped as measured in units of pages, a.k.a. the pfn (the memory
-management layer doesn't know about devices outside the CPU, so it
-shouldn't need to know about "bus addresses" etc).
-
-NOTE NOTE NOTE! The above is only one part of the whole equation. The above
-only talks about "real memory", that is, CPU memory (RAM). 
-
-There is a completely different type of memory too, and that's the "shared
-memory" on the PCI or ISA bus. That's generally not RAM (although in the case
-of a video graphics card it can be normal DRAM that is just used for a frame
-buffer), but can be things like a packet buffer in a network card etc. 
-
-This memory is called "PCI memory" or "shared memory" or "IO memory" or
-whatever, and there is only one way to access it: the readb/writeb and
-related functions. You should never take the address of such memory, because
-there is really nothing you can do with such an address: it's not
-conceptually in the same memory space as "real memory" at all, so you cannot
-just dereference a pointer. (Sadly, on x86 it _is_ in the same memory space,
-so on x86 it actually works to just deference a pointer, but it's not
-portable). 
-
-For such memory, you can do things like
-
- - reading:
-       /*
-        * read first 32 bits from ISA memory at 0xC0000, aka
-        * C000:0000 in DOS terms
-        */
-       unsigned int signature = isa_readl(0xC0000);
-
- - remapping and writing:
-       /*
-        * remap framebuffer PCI memory area at 0xFC000000,
-        * size 1MB, so that we can access it: We can directly
-        * access only the 640k-1MB area, so anything else
-        * has to be remapped.
-        */
-       void __iomem *baseptr = ioremap(0xFC000000, 1024*1024);
-
-       /* write a 'A' to the offset 10 of the area */
-       writeb('A',baseptr+10);
-
-       /* unmap when we unload the driver */
-       iounmap(baseptr);
-
- - copying and clearing:
-       /* get the 6-byte Ethernet address at ISA address E000:0040 */
-       memcpy_fromio(kernel_buffer, 0xE0040, 6);
-       /* write a packet to the driver */
-       memcpy_toio(0xE1000, skb->data, skb->len);
-       /* clear the frame buffer */
-       memset_io(0xA0000, 0, 0x10000);
-
-OK, that just about covers the basics of accessing IO portably.  Questions?
-Comments? You may think that all the above is overly complex, but one day you
-might find yourself with a 500 MHz Alpha in front of you, and then you'll be
-happy that your driver works ;)
-
-Note that kernel versions 2.0.x (and earlier) mistakenly called the
-ioremap() function "vremap()".  ioremap() is the proper name, but I
-didn't think straight when I wrote it originally.  People who have to
-support both can do something like:
-       /* support old naming silliness */
-       #if LINUX_VERSION_CODE < 0x020100                                     
-       #define ioremap vremap
-       #define iounmap vfree                                                     
-       #endif
-at the top of their source files, and then they can use the right names
-even on 2.0.x systems. 
-
-And the above sounds worse than it really is.  Most real drivers really
-don't do all that complex things (or rather: the complexity is not so
-much in the actual IO accesses as in error handling and timeouts etc). 
-It's generally not hard to fix drivers, and in many cases the code
-actually looks better afterwards:
-
-       unsigned long signature = *(unsigned int *) 0xC0000;
-               vs
-       unsigned long signature = readl(0xC0000);
-
-I think the second version actually is more readable, no?
-
-               Linus
-
diff --git a/Documentation/bus-virt-phys-mapping.txt b/Documentation/bus-virt-phys-mapping.txt
new file mode 100644 (file)
index 0000000..1b5aa10
--- /dev/null
@@ -0,0 +1,208 @@
+[ NOTE: The virt_to_bus() and bus_to_virt() functions have been
+       superseded by the functionality provided by the PCI DMA interface
+       (see Documentation/PCI/PCI-DMA-mapping.txt).  They continue
+       to be documented below for historical purposes, but new code
+       must not use them. --davidm 00/12/12 ]
+
+[ This is a mail message in response to a query on IO mapping, thus the
+  strange format for a "document" ]
+
+The AHA-1542 is a bus-master device, and your patch makes the driver give the
+controller the physical address of the buffers, which is correct on x86
+(because all bus master devices see the physical memory mappings directly). 
+
+However, on many setups, there are actually _three_ different ways of looking
+at memory addresses, and in this case we actually want the third, the
+so-called "bus address". 
+
+Essentially, the three ways of addressing memory are (this is "real memory",
+that is, normal RAM--see later about other details): 
+
+ - CPU untranslated.  This is the "physical" address.  Physical address 
+   0 is what the CPU sees when it drives zeroes on the memory bus.
+
+ - CPU translated address. This is the "virtual" address, and is 
+   completely internal to the CPU itself with the CPU doing the appropriate
+   translations into "CPU untranslated". 
+
+ - bus address. This is the address of memory as seen by OTHER devices, 
+   not the CPU. Now, in theory there could be many different bus 
+   addresses, with each device seeing memory in some device-specific way, but
+   happily most hardware designers aren't actually actively trying to make
+   things any more complex than necessary, so you can assume that all 
+   external hardware sees the memory the same way. 
+
+Now, on normal PCs the bus address is exactly the same as the physical
+address, and things are very simple indeed. However, they are that simple
+because the memory and the devices share the same address space, and that is
+not generally necessarily true on other PCI/ISA setups. 
+
+Now, just as an example, on the PReP (PowerPC Reference Platform), the 
+CPU sees a memory map something like this (this is from memory):
+
+       0-2 GB          "real memory"
+       2 GB-3 GB       "system IO" (inb/out and similar accesses on x86)
+       3 GB-4 GB       "IO memory" (shared memory over the IO bus)
+
+Now, that looks simple enough. However, when you look at the same thing from
+the viewpoint of the devices, you have the reverse, and the physical memory
+address 0 actually shows up as address 2 GB for any IO master.
+
+So when the CPU wants any bus master to write to physical memory 0, it 
+has to give the master address 0x80000000 as the memory address.
+
+So, for example, depending on how the kernel is actually mapped on the 
+PPC, you can end up with a setup like this:
+
+ physical address:     0
+ virtual address:      0xC0000000
+ bus address:          0x80000000
+
+where all the addresses actually point to the same thing.  It's just seen 
+through different translations..
+
+Similarly, on the Alpha, the normal translation is
+
+ physical address:     0
+ virtual address:      0xfffffc0000000000
+ bus address:          0x40000000
+
+(but there are also Alphas where the physical address and the bus address
+are the same). 
+
+Anyway, the way to look up all these translations, you do
+
+       #include <asm/io.h>
+
+       phys_addr = virt_to_phys(virt_addr);
+       virt_addr = phys_to_virt(phys_addr);
+        bus_addr = virt_to_bus(virt_addr);
+       virt_addr = bus_to_virt(bus_addr);
+
+Now, when do you need these?
+
+You want the _virtual_ address when you are actually going to access that 
+pointer from the kernel. So you can have something like this:
+
+       /*
+        * this is the hardware "mailbox" we use to communicate with
+        * the controller. The controller sees this directly.
+        */
+       struct mailbox {
+               __u32 status;
+               __u32 bufstart;
+               __u32 buflen;
+               ..
+       } mbox;
+
+               unsigned char * retbuffer;
+
+               /* get the address from the controller */
+               retbuffer = bus_to_virt(mbox.bufstart);
+               switch (retbuffer[0]) {
+                       case STATUS_OK:
+                               ...
+
+on the other hand, you want the bus address when you have a buffer that 
+you want to give to the controller:
+
+       /* ask the controller to read the sense status into "sense_buffer" */
+       mbox.bufstart = virt_to_bus(&sense_buffer);
+       mbox.buflen = sizeof(sense_buffer);
+       mbox.status = 0;
+       notify_controller(&mbox);
+
+And you generally _never_ want to use the physical address, because you can't
+use that from the CPU (the CPU only uses translated virtual addresses), and
+you can't use it from the bus master. 
+
+So why do we care about the physical address at all? We do need the physical
+address in some cases, it's just not very often in normal code.  The physical
+address is needed if you use memory mappings, for example, because the
+"remap_pfn_range()" mm function wants the physical address of the memory to
+be remapped as measured in units of pages, a.k.a. the pfn (the memory
+management layer doesn't know about devices outside the CPU, so it
+shouldn't need to know about "bus addresses" etc).
+
+NOTE NOTE NOTE! The above is only one part of the whole equation. The above
+only talks about "real memory", that is, CPU memory (RAM). 
+
+There is a completely different type of memory too, and that's the "shared
+memory" on the PCI or ISA bus. That's generally not RAM (although in the case
+of a video graphics card it can be normal DRAM that is just used for a frame
+buffer), but can be things like a packet buffer in a network card etc. 
+
+This memory is called "PCI memory" or "shared memory" or "IO memory" or
+whatever, and there is only one way to access it: the readb/writeb and
+related functions. You should never take the address of such memory, because
+there is really nothing you can do with such an address: it's not
+conceptually in the same memory space as "real memory" at all, so you cannot
+just dereference a pointer. (Sadly, on x86 it _is_ in the same memory space,
+so on x86 it actually works to just deference a pointer, but it's not
+portable). 
+
+For such memory, you can do things like
+
+ - reading:
+       /*
+        * read first 32 bits from ISA memory at 0xC0000, aka
+        * C000:0000 in DOS terms
+        */
+       unsigned int signature = isa_readl(0xC0000);
+
+ - remapping and writing:
+       /*
+        * remap framebuffer PCI memory area at 0xFC000000,
+        * size 1MB, so that we can access it: We can directly
+        * access only the 640k-1MB area, so anything else
+        * has to be remapped.
+        */
+       void __iomem *baseptr = ioremap(0xFC000000, 1024*1024);
+
+       /* write a 'A' to the offset 10 of the area */
+       writeb('A',baseptr+10);
+
+       /* unmap when we unload the driver */
+       iounmap(baseptr);
+
+ - copying and clearing:
+       /* get the 6-byte Ethernet address at ISA address E000:0040 */
+       memcpy_fromio(kernel_buffer, 0xE0040, 6);
+       /* write a packet to the driver */
+       memcpy_toio(0xE1000, skb->data, skb->len);
+       /* clear the frame buffer */
+       memset_io(0xA0000, 0, 0x10000);
+
+OK, that just about covers the basics of accessing IO portably.  Questions?
+Comments? You may think that all the above is overly complex, but one day you
+might find yourself with a 500 MHz Alpha in front of you, and then you'll be
+happy that your driver works ;)
+
+Note that kernel versions 2.0.x (and earlier) mistakenly called the
+ioremap() function "vremap()".  ioremap() is the proper name, but I
+didn't think straight when I wrote it originally.  People who have to
+support both can do something like:
+       /* support old naming silliness */
+       #if LINUX_VERSION_CODE < 0x020100                                     
+       #define ioremap vremap
+       #define iounmap vfree                                                     
+       #endif
+at the top of their source files, and then they can use the right names
+even on 2.0.x systems. 
+
+And the above sounds worse than it really is.  Most real drivers really
+don't do all that complex things (or rather: the complexity is not so
+much in the actual IO accesses as in error handling and timeouts etc). 
+It's generally not hard to fix drivers, and in many cases the code
+actually looks better afterwards:
+
+       unsigned long signature = *(unsigned int *) 0xC0000;
+               vs
+       unsigned long signature = readl(0xC0000);
+
+I think the second version actually is more readable, no?
+
+               Linus
+
index 58848125b8bfadcc70f9b37ac68d77d6aee0794b..db3d0f5061f9c42e301e420878b0dc5cbe31e6e1 100644 (file)
@@ -5336,6 +5336,7 @@ T:        git git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6.git
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-next-2.6.git
 S:     Maintained
 F:     arch/sparc/
+F:     drivers/sbus
 
 SPARC SERIAL DRIVERS
 M:     "David S. Miller" <davem@davemloft.net>
index 9877372ffdba75b209d25e9be21210f19d5b91d9..5beb97bafbb1d5818e3ab83c058199353aaf2bb2 100644 (file)
@@ -82,7 +82,7 @@ unsigned long ftrace_return_to_handler(unsigned long retval0,
        unsigned long ret;
 
        pop_return_trace(&trace, &ret);
-       trace.rettime = cpu_clock(raw_smp_processor_id());
+       trace.rettime = local_clock();
        ftrace_graph_return(&trace);
 
        if (unlikely(!ret)) {
@@ -126,7 +126,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
                return;
        }
 
-       calltime = cpu_clock(raw_smp_processor_id());
+       calltime = local_clock();
 
        if (push_return_trace(old, calltime,
                                self_addr, &trace.depth) == -EBUSY) {
index b0b21134f61a8f6fb30305b46d4f2cd07aae132f..4b611ca1a768df7f6b02777181a94ab9107ec366 100644 (file)
@@ -197,6 +197,7 @@ extern const char *powerpc_base_platform;
 #define CPU_FTR_SAO                    LONG_ASM_CONST(0x0020000000000000)
 #define CPU_FTR_CP_USE_DCBTZ           LONG_ASM_CONST(0x0040000000000000)
 #define CPU_FTR_UNALIGNED_LD_STD       LONG_ASM_CONST(0x0080000000000000)
+#define CPU_FTR_ASYM_SMT               LONG_ASM_CONST(0x0100000000000000)
 
 #ifndef __ASSEMBLY__
 
@@ -412,7 +413,7 @@ extern const char *powerpc_base_platform;
            CPU_FTR_MMCRA | CPU_FTR_SMT | \
            CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
            CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
-           CPU_FTR_DSCR | CPU_FTR_SAO)
+           CPU_FTR_DSCR | CPU_FTR_SAO  | CPU_FTR_ASYM_SMT)
 #define CPU_FTRS_CELL  (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
            CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
            CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
index 773424df828a393d9a2860ded0b380978cfdd9cc..43855c9f84de366dbb530cf16056dc4869cf7bad 100644 (file)
@@ -1263,3 +1263,14 @@ unsigned long randomize_et_dyn(unsigned long base)
 
        return ret;
 }
+
+#ifdef CONFIG_SMP
+int arch_sd_sibling_asym_packing(void)
+{
+       if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
+               printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
+               return SD_ASYM_PACKING;
+       }
+       return 0;
+}
+#endif
index 259e3fd50993a95a2970f404a9c7a4a5224a1b5d..1dc07a0014c17b22bc2af459c581e3fc216d0948 100644 (file)
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.34-rc3
-# Sat Apr  3 15:49:56 2010
+# Linux kernel version: 2.6.34
+# Wed May 26 21:14:01 2010
 #
 CONFIG_64BIT=y
 CONFIG_SPARC=y
@@ -107,10 +107,9 @@ CONFIG_PERF_COUNTERS=y
 # CONFIG_DEBUG_PERF_USE_VMALLOC is not set
 CONFIG_VM_EVENT_COUNTERS=y
 CONFIG_PCI_QUIRKS=y
-CONFIG_SLUB_DEBUG=y
 # CONFIG_COMPAT_BRK is not set
-# CONFIG_SLAB is not set
-CONFIG_SLUB=y
+CONFIG_SLAB=y
+# CONFIG_SLUB is not set
 # CONFIG_SLOB is not set
 CONFIG_PROFILING=y
 CONFIG_TRACEPOINTS=y
@@ -239,6 +238,7 @@ CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
 CONFIG_SPARSEMEM_VMEMMAP=y
 CONFIG_PAGEFLAGS_EXTENDED=y
 CONFIG_SPLIT_PTLOCK_CPUS=4
+# CONFIG_COMPACTION is not set
 CONFIG_MIGRATION=y
 CONFIG_PHYS_ADDR_T_64BIT=y
 CONFIG_ZONE_DMA_FLAG=0
@@ -351,6 +351,7 @@ CONFIG_IPV6_TUNNEL=m
 # CONFIG_RDS is not set
 # CONFIG_TIPC is not set
 # CONFIG_ATM is not set
+# CONFIG_L2TP is not set
 # CONFIG_BRIDGE is not set
 # CONFIG_NET_DSA is not set
 CONFIG_VLAN_8021Q=m
@@ -367,6 +368,7 @@ CONFIG_VLAN_8021Q=m
 # CONFIG_IEEE802154 is not set
 # CONFIG_NET_SCHED is not set
 # CONFIG_DCB is not set
+CONFIG_RPS=y
 
 #
 # Network testing
@@ -386,9 +388,14 @@ CONFIG_WIRELESS=y
 #
 # CFG80211 needs to be enabled for MAC80211
 #
+
+#
+# Some wireless drivers require a rate control algorithm
+#
 # CONFIG_WIMAX is not set
 # CONFIG_RFKILL is not set
 # CONFIG_NET_9P is not set
+# CONFIG_CAIF is not set
 
 #
 # Device Drivers
@@ -658,6 +665,7 @@ CONFIG_PHYLIB=m
 # CONFIG_NATIONAL_PHY is not set
 # CONFIG_STE10XP is not set
 # CONFIG_LSI_ET1011C_PHY is not set
+# CONFIG_MICREL_PHY is not set
 # CONFIG_MDIO_BITBANG is not set
 CONFIG_NET_ETHERNET=y
 CONFIG_MII=m
@@ -734,6 +742,8 @@ CONFIG_NETDEV_10000=y
 # CONFIG_CHELSIO_T1 is not set
 CONFIG_CHELSIO_T3_DEPENDS=y
 # CONFIG_CHELSIO_T3 is not set
+CONFIG_CHELSIO_T4_DEPENDS=y
+# CONFIG_CHELSIO_T4 is not set
 # CONFIG_ENIC is not set
 # CONFIG_IXGBE is not set
 # CONFIG_IXGBEVF is not set
@@ -766,6 +776,7 @@ CONFIG_NIU=m
 # CONFIG_USB_PEGASUS is not set
 # CONFIG_USB_RTL8150 is not set
 # CONFIG_USB_USBNET is not set
+# CONFIG_USB_IPHETH is not set
 # CONFIG_WAN is not set
 # CONFIG_FDDI is not set
 # CONFIG_HIPPI is not set
@@ -778,7 +789,6 @@ CONFIG_PPP_DEFLATE=m
 CONFIG_PPP_BSDCOMP=m
 CONFIG_PPP_MPPE=m
 CONFIG_PPPOE=m
-# CONFIG_PPPOL2TP is not set
 # CONFIG_SLIP is not set
 CONFIG_SLHC=m
 # CONFIG_NET_FC is not set
@@ -816,6 +826,7 @@ CONFIG_INPUT_KEYBOARD=y
 CONFIG_KEYBOARD_ATKBD=y
 # CONFIG_QT2160 is not set
 CONFIG_KEYBOARD_LKKBD=m
+# CONFIG_KEYBOARD_TCA6416 is not set
 # CONFIG_KEYBOARD_MAX7359 is not set
 # CONFIG_KEYBOARD_NEWTON is not set
 # CONFIG_KEYBOARD_OPENCORES is not set
@@ -840,6 +851,7 @@ CONFIG_MOUSE_SERIAL=y
 # CONFIG_INPUT_TABLET is not set
 # CONFIG_INPUT_TOUCHSCREEN is not set
 CONFIG_INPUT_MISC=y
+# CONFIG_INPUT_AD714X is not set
 CONFIG_INPUT_SPARCSPKR=y
 # CONFIG_INPUT_ATI_REMOTE is not set
 # CONFIG_INPUT_ATI_REMOTE2 is not set
@@ -848,6 +860,7 @@ CONFIG_INPUT_SPARCSPKR=y
 # CONFIG_INPUT_YEALINK is not set
 # CONFIG_INPUT_CM109 is not set
 # CONFIG_INPUT_UINPUT is not set
+# CONFIG_INPUT_PCF8574 is not set
 
 #
 # Hardware I/O ports
@@ -871,6 +884,7 @@ CONFIG_HW_CONSOLE=y
 # CONFIG_VT_HW_CONSOLE_BINDING is not set
 # CONFIG_DEVKMEM is not set
 # CONFIG_SERIAL_NONSTANDARD is not set
+# CONFIG_N_GSM is not set
 # CONFIG_NOZOMI is not set
 
 #
@@ -893,6 +907,8 @@ CONFIG_SERIAL_CORE_CONSOLE=y
 # CONFIG_SERIAL_JSM is not set
 # CONFIG_SERIAL_TIMBERDALE is not set
 # CONFIG_SERIAL_GRLIB_GAISLER_APBUART is not set
+# CONFIG_SERIAL_ALTERA_JTAGUART is not set
+# CONFIG_SERIAL_ALTERA_UART is not set
 CONFIG_UNIX98_PTYS=y
 # CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
 # CONFIG_LEGACY_PTYS is not set
@@ -1306,11 +1322,14 @@ CONFIG_USB_HIDDEV=y
 CONFIG_HID_A4TECH=y
 CONFIG_HID_APPLE=y
 CONFIG_HID_BELKIN=y
+# CONFIG_HID_CANDO is not set
 CONFIG_HID_CHERRY=y
 CONFIG_HID_CHICONY=y
+# CONFIG_HID_PRODIKEYS is not set
 CONFIG_HID_CYPRESS=y
 CONFIG_HID_DRAGONRISE=y
 # CONFIG_DRAGONRISE_FF is not set
+# CONFIG_HID_EGALAX is not set
 CONFIG_HID_EZKEY=y
 CONFIG_HID_KYE=y
 CONFIG_HID_GYRATION=y
@@ -1328,7 +1347,9 @@ CONFIG_HID_ORTEK=y
 CONFIG_HID_PANTHERLORD=y
 # CONFIG_PANTHERLORD_FF is not set
 CONFIG_HID_PETALYNX=y
+# CONFIG_HID_PICOLCD is not set
 # CONFIG_HID_QUANTA is not set
+# CONFIG_HID_ROCCAT_KONE is not set
 CONFIG_HID_SAMSUNG=y
 CONFIG_HID_SONY=y
 # CONFIG_HID_STANTUM is not set
@@ -1342,6 +1363,7 @@ CONFIG_HID_THRUSTMASTER=y
 # CONFIG_THRUSTMASTER_FF is not set
 CONFIG_HID_ZEROPLUS=y
 # CONFIG_ZEROPLUS_FF is not set
+# CONFIG_HID_ZYDACRON is not set
 CONFIG_USB_SUPPORT=y
 CONFIG_USB_ARCH_HAS_HCD=y
 CONFIG_USB_ARCH_HAS_OHCI=y
@@ -1356,7 +1378,6 @@ CONFIG_USB=y
 # CONFIG_USB_DEVICEFS is not set
 # CONFIG_USB_DEVICE_CLASS is not set
 # CONFIG_USB_DYNAMIC_MINORS is not set
-# CONFIG_USB_OTG is not set
 # CONFIG_USB_MON is not set
 # CONFIG_USB_WUSB is not set
 # CONFIG_USB_WUSB_CBAF is not set
@@ -1521,10 +1542,6 @@ CONFIG_RTC_DRV_STARFIRE=y
 # CONFIG_DMADEVICES is not set
 # CONFIG_AUXDISPLAY is not set
 # CONFIG_UIO is not set
-
-#
-# TI VLYNQ
-#
 # CONFIG_STAGING is not set
 
 #
@@ -1706,8 +1723,8 @@ CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
 CONFIG_SCHEDSTATS=y
 # CONFIG_TIMER_STATS is not set
 # CONFIG_DEBUG_OBJECTS is not set
-# CONFIG_SLUB_DEBUG_ON is not set
-# CONFIG_SLUB_STATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_KMEMLEAK is not set
 # CONFIG_DEBUG_RT_MUTEXES is not set
 # CONFIG_RT_MUTEX_TESTER is not set
 # CONFIG_DEBUG_SPINLOCK is not set
@@ -1742,6 +1759,9 @@ CONFIG_SYSCTL_SYSCALL_CHECK=y
 # CONFIG_DEBUG_PAGEALLOC is not set
 CONFIG_NOP_TRACER=y
 CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
+CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST=y
+CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_HAVE_SYSCALL_TRACEPOINTS=y
@@ -1769,12 +1789,12 @@ CONFIG_BLK_DEV_IO_TRACE=y
 # CONFIG_RING_BUFFER_BENCHMARK is not set
 # CONFIG_DYNAMIC_DEBUG is not set
 # CONFIG_DMA_API_DEBUG is not set
+# CONFIG_ATOMIC64_SELFTEST is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
 # CONFIG_DEBUG_STACK_USAGE is not set
 # CONFIG_DEBUG_DCFLUSH is not set
-# CONFIG_STACK_DEBUG is not set
 # CONFIG_DEBUG_STRICT_USER_COPY_CHECKS is not set
 
 #
@@ -1895,6 +1915,7 @@ CONFIG_CRYPTO_DEFLATE=y
 #
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_HW=y
+# CONFIG_CRYPTO_DEV_NIAGARA2 is not set
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
 CONFIG_BINARY_PRINTF=y
 
index 0588b8c7faa26b5eb596a1c52a1bcbf77a53bc92..69358b590c9156953ee3c45deb471e4833c7d370 100644 (file)
@@ -11,7 +11,6 @@
 
 #define L1_CACHE_SHIFT 5
 #define L1_CACHE_BYTES 32
-#define L1_CACHE_ALIGN(x) ((((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1)))
 
 #ifdef CONFIG_SPARC32
 #define SMP_CACHE_BYTES_SHIFT 5
index 77f906d8cc21f3874ee079aa731edf49f02dea75..0ece77f477536b773a9bd310515008c3b152055c 100644 (file)
@@ -142,13 +142,12 @@ BTFIXUPDEF_CALL_CONST(unsigned long, pgd_page_vaddr, pgd_t)
 #define pmd_page(pmd) BTFIXUP_CALL(pmd_page)(pmd)
 #define pgd_page_vaddr(pgd) BTFIXUP_CALL(pgd_page_vaddr)(pgd)
 
-BTFIXUPDEF_SETHI(none_mask)
 BTFIXUPDEF_CALL_CONST(int, pte_present, pte_t)
 BTFIXUPDEF_CALL(void, pte_clear, pte_t *)
 
 static inline int pte_none(pte_t pte)
 {
-       return !(pte_val(pte) & ~BTFIXUP_SETHI(none_mask));
+       return !pte_val(pte);
 }
 
 #define pte_present(pte) BTFIXUP_CALL(pte_present)(pte)
@@ -160,7 +159,7 @@ BTFIXUPDEF_CALL(void, pmd_clear, pmd_t *)
 
 static inline int pmd_none(pmd_t pmd)
 {
-       return !(pmd_val(pmd) & ~BTFIXUP_SETHI(none_mask));
+       return !pmd_val(pmd);
 }
 
 #define pmd_bad(pmd) BTFIXUP_CALL(pmd_bad)(pmd)
index 0ec92c8861dd3a68fe0465b71ccf8007ad6e40a7..44faabc3c02c920bba1b9f12272d1a907895161e 100644 (file)
@@ -657,6 +657,7 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr)
                cpuc->current_idx[i] = idx;
 
                enc = perf_event_get_enc(cpuc->events[i]);
+               pcr &= ~mask_for_index(idx);
                pcr |= event_encoding(enc, idx);
        }
 out:
index ab036a72de5a16460cdb8b548dbcf15bb2e04db6..e11b4612dabb32df7cd0155ed6aed40515c038ad 100644 (file)
@@ -183,7 +183,7 @@ void sun4d_free_irq(unsigned int irq, void *dev_id)
                goto out_unlock;
        }
        
-       if (action && tmp)
+       if (tmp)
                tmp->next = action->next;
        else
                *actionp = action->next;
index 76d837fc47d3fdd498c0b2fdb276c9b25abe1c0b..c6dfdaa29e208994fab3b1942cd24878d0bd2d98 100644 (file)
@@ -64,7 +64,7 @@ tl0_irq6:     TRAP_IRQ(smp_call_function_single_client, 6)
 tl0_irq6:      BTRAP(0x46)
 #endif
 tl0_irq7:      TRAP_IRQ(deferred_pcr_work_irq, 7)
-#ifdef CONFIG_KGDB
+#if defined(CONFIG_KGDB) && defined(CONFIG_SMP)
 tl0_irq8:      TRAP_IRQ(smp_kgdb_capture_client, 8)
 #else
 tl0_irq8:      BTRAP(0x48)
index f5f75a58e0b3cc7077f78a610f6cc6107c75bcb8..b0b43aa5e45a483984b0acd7a4641c0765eb8859 100644 (file)
@@ -2215,8 +2215,6 @@ void __init ld_mmu_srmmu(void)
        BTFIXUPSET_CALL(pmd_page, srmmu_pmd_page, BTFIXUPCALL_NORM);
        BTFIXUPSET_CALL(pgd_page_vaddr, srmmu_pgd_page, BTFIXUPCALL_NORM);
 
-       BTFIXUPSET_SETHI(none_mask, 0xF0000000);
-
        BTFIXUPSET_CALL(pte_present, srmmu_pte_present, BTFIXUPCALL_NORM);
        BTFIXUPSET_CALL(pte_clear, srmmu_pte_clear, BTFIXUPCALL_SWAPO0G0);
 
index cf38846753dd55c3ece5d7be244b23d9ca3fbda3..4289f90f86972f6020318bf002162fccf5a968e8 100644 (file)
@@ -2087,9 +2087,6 @@ void __init ld_mmu_sun4c(void)
 
        BTFIXUPSET_CALL(set_pte, sun4c_set_pte, BTFIXUPCALL_STO1O0);
 
-       /* The 2.4.18 code does not set this on sun4c, how does it work? XXX */
-       /* BTFIXUPSET_SETHI(none_mask, 0x00000000); */  /* Defaults to zero? */
-
        BTFIXUPSET_CALL(pte_pfn, sun4c_pte_pfn, BTFIXUPCALL_NORM);
 #if 0 /* PAGE_SHIFT <= 12 */ /* Eek. Investigate. XXX */
        BTFIXUPSET_CALL(pmd_page, sun4c_pmd_page, BTFIXUPCALL_ANDNINT(PAGE_SIZE - 1));
index c02cc692985c13b902f1a9c914535170f9670d01..a96489ee6cabf04a53664300f87141c4bbffc293 100644 (file)
@@ -921,7 +921,7 @@ void disable_local_APIC(void)
        unsigned int value;
 
        /* APIC hasn't been mapped yet */
-       if (!apic_phys)
+       if (!x2apic_mode && !apic_phys)
                return;
 
        clear_local_APIC();
index ebdb85cf2686fa36702cd4d50b657f22de85b3bd..e5cc7e82e60ddbf1bd1ca2871fdb7d7fc7628e34 100644 (file)
@@ -18,6 +18,7 @@
 #include <asm/apic.h>
 #include <asm/iommu.h>
 #include <asm/gart.h>
+#include <asm/hpet.h>
 
 static void __init fix_hypertransport_config(int num, int slot, int func)
 {
@@ -191,6 +192,21 @@ static void __init ati_bugs_contd(int num, int slot, int func)
 }
 #endif
 
+/*
+ * Force the read back of the CMP register in hpet_next_event()
+ * to work around the problem that the CMP register write seems to be
+ * delayed. See hpet_next_event() for details.
+ *
+ * We do this on all SMBUS incarnations for now until we have more
+ * information about the affected chipsets.
+ */
+static void __init ati_hpet_bugs(int num, int slot, int func)
+{
+#ifdef CONFIG_HPET_TIMER
+       hpet_readback_cmp = 1;
+#endif
+}
+
 #define QFLAG_APPLY_ONCE       0x1
 #define QFLAG_APPLIED          0x2
 #define QFLAG_DONE             (QFLAG_APPLY_ONCE|QFLAG_APPLIED)
@@ -220,6 +236,8 @@ static struct chipset early_qrk[] __initdata = {
          PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs },
        { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
          PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd },
+       { PCI_VENDOR_ID_ATI, PCI_ANY_ID,
+         PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_hpet_bugs },
        {}
 };
 
index 345a4b1fe1446812d65e25fd424886d05aeb1fe4..675879b65ce666c91b868c96972ea35f107810f4 100644 (file)
@@ -640,8 +640,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
        /* Skip cs, ip, orig_ax and gs. */      \
        "       subl $16, %esp\n"       \
        "       pushl %fs\n"            \
-       "       pushl %ds\n"            \
        "       pushl %es\n"            \
+       "       pushl %ds\n"            \
        "       pushl %eax\n"           \
        "       pushl %ebp\n"           \
        "       pushl %edi\n"           \
index e72d3fc6547d6fe767785e869deb90ea683d8cdc..939b9e98245f733262cdee8198daa08498abf87e 100644 (file)
@@ -498,15 +498,10 @@ void force_hpet_resume(void)
  * See erratum #27 (Misinterpreted MSI Requests May Result in
  * Corrupted LPC DMA Data) in AMD Publication #46837,
  * "SB700 Family Product Errata", Rev. 1.0, March 2010.
- *
- * Also force the read back of the CMP register in hpet_next_event()
- * to work around the problem that the CMP register write seems to be
- * delayed. See hpet_next_event() for details.
  */
 static void force_disable_hpet_msi(struct pci_dev *unused)
 {
        hpet_msi_disable = 1;
-       hpet_readback_cmp = 1;
 }
 
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
index de3b63ae3da26300297e315a6f7a9b5157d429ee..a60df9ae645440789181acd318e12bd4c450aeff 100644 (file)
@@ -238,6 +238,15 @@ void __init setup_per_cpu_areas(void)
 #ifdef CONFIG_NUMA
                per_cpu(x86_cpu_to_node_map, cpu) =
                        early_per_cpu_map(x86_cpu_to_node_map, cpu);
+               /*
+                * Ensure that the boot cpu numa_node is correct when the boot
+                * cpu is on a node that doesn't have memory installed.
+                * Also cpu_up() will call cpu_to_node() for APs when
+                * MEMORY_HOTPLUG is defined, before per_cpu(numa_node) is set
+                * up later with c_init aka intel_init/amd_init.
+                * So set them all (boot cpu and all APs).
+                */
+               set_cpu_numa_node(cpu, early_cpu_to_node(cpu));
 #endif
 #endif
                /*
@@ -257,14 +266,6 @@ void __init setup_per_cpu_areas(void)
        early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
 #endif
 
-#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
-       /*
-        * make sure boot cpu numa_node is right, when boot cpu is on the
-        * node that doesn't have mem installed
-        */
-       set_cpu_numa_node(boot_cpu_id, early_cpu_to_node(boot_cpu_id));
-#endif
-
        /* Setup node to cpumask map */
        setup_node_to_cpumask_map();
 
index 3699613e88304d81cb4774c5ee69e1e4464d8a1f..b1ed0a1a591338c801d49268c2f784477fd4a0a1 100644 (file)
@@ -2926,7 +2926,7 @@ static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm)
        return kvm_mmu_zap_page(kvm, page) + 1;
 }
 
-static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
+static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
 {
        struct kvm *kvm;
        struct kvm *kvm_freed = NULL;
index 6fdb3ec30c3197e15fc54e18c91291f5eb403450..55253095be84c66d37c5dcaaba63bd4ebaee6df2 100644 (file)
@@ -184,6 +184,7 @@ static void __init pcibios_allocate_resources(int pass)
                                        idx, r, disabled, pass);
                                if (pci_claim_resource(dev, idx) < 0) {
                                        /* We'll assign a new address later */
+                                       dev->fw_addr[idx] = r->start;
                                        r->end -= r->start;
                                        r->start = 0;
                                }
index 7ef3a2735df39f2fdfbd4624a440e22d3e169879..cb29191cee5877824391a33de0cea9c5255f2c55 100644 (file)
@@ -66,8 +66,9 @@ static int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn)
                                          devfn, pos, 4, &pcie_cap))
                        return 0;
 
-               if (pcie_cap == 0xffffffff)
-                       return 0;
+               if (PCI_EXT_CAP_ID(pcie_cap) == 0x0000 ||
+                       PCI_EXT_CAP_ID(pcie_cap) == 0xffff)
+                       break;
 
                if (PCI_EXT_CAP_ID(pcie_cap) == PCI_EXT_CAP_ID_VNDR) {
                        raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number,
@@ -76,7 +77,7 @@ static int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn)
                                return pos;
                }
 
-               pos = pcie_cap >> 20;
+               pos = PCI_EXT_CAP_NEXT(pcie_cap);
        }
 
        return 0;
index d7be69f131546b02a1784ac750e4f3dc042e10d1..b7dab32ce63cd883974ac0e9b3646d301460cc7e 100644 (file)
@@ -194,6 +194,6 @@ err_timer:
 
 module_init(cs5535_mfgpt_init);
 
-MODULE_AUTHOR("Andres Salomon <dilinger@collabora.co.uk>");
+MODULE_AUTHOR("Andres Salomon <dilinger@queued.net>");
 MODULE_DESCRIPTION("CS5535/CS5536 MFGPT clock event driver");
 MODULE_LICENSE("GPL");
index aedef7941b22855f17c892f73d370ae10f17632c..0d2f9dbb47e4fa6d3d3a4440b8427fe544407af3 100644 (file)
@@ -209,7 +209,7 @@ config EDAC_I5100
 
 config EDAC_MPC85XX
        tristate "Freescale MPC83xx / MPC85xx"
-       depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || MPC85xx)
+       depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || PPC_85xx)
        help
          Support for error detection and correction on the Freescale
          MPC8349, MPC8560, MPC8540, MPC8548
index 52ca09bf4726ae9cb5984d38c79d86705c5b51f8..f39b00a46eda36c1d6e6d265e7aa02245d012556 100644 (file)
@@ -1120,6 +1120,7 @@ static struct of_device_id mpc85xx_mc_err_of_match[] = {
        { .compatible = "fsl,mpc8555-memory-controller", },
        { .compatible = "fsl,mpc8560-memory-controller", },
        { .compatible = "fsl,mpc8568-memory-controller", },
+       { .compatible = "fsl,mpc8569-memory-controller", },
        { .compatible = "fsl,mpc8572-memory-controller", },
        { .compatible = "fsl,mpc8349-memory-controller", },
        { .compatible = "fsl,p2020-memory-controller", },
index f73a1555e49d7d92fad5da024c4face448a8e114..e23c06893d19f62195247220075aafce1cfa75c0 100644 (file)
@@ -352,6 +352,6 @@ static void __exit cs5535_gpio_exit(void)
 module_init(cs5535_gpio_init);
 module_exit(cs5535_gpio_exit);
 
-MODULE_AUTHOR("Andres Salomon <dilinger@collabora.co.uk>");
+MODULE_AUTHOR("Andres Salomon <dilinger@queued.net>");
 MODULE_DESCRIPTION("AMD CS5535/CS5536 GPIO driver");
 MODULE_LICENSE("GPL");
index 074385882ccfe721ff5630b9e825950c6479af7e..51bd301cf10d9002555177d95b28e86703145d67 100644 (file)
@@ -2241,6 +2241,7 @@ i915_gem_object_get_pages(struct drm_gem_object *obj,
                page = read_cache_page_gfp(mapping, i,
                                           GFP_HIGHUSER |
                                           __GFP_COLD |
+                                          __GFP_RECLAIMABLE |
                                           gfpmask);
                if (IS_ERR(page))
                        goto err_pages;
@@ -4741,6 +4742,16 @@ i915_gem_load(struct drm_device *dev)
        list_add(&dev_priv->mm.shrink_list, &shrink_list);
        spin_unlock(&shrink_list_lock);
 
+       /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
+       if (IS_GEN3(dev)) {
+               u32 tmp = I915_READ(MI_ARB_STATE);
+               if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) {
+                       /* arb state is a masked write, so set bit + bit in mask */
+                       tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT);
+                       I915_WRITE(MI_ARB_STATE, tmp);
+               }
+       }
+
        /* Old X drivers will take 0-2 for front, back, depth buffers */
        if (!drm_core_check_feature(dev, DRIVER_MODESET))
                dev_priv->fence_reg_start = 3;
@@ -4977,7 +4988,7 @@ i915_gpu_is_active(struct drm_device *dev)
 }
 
 static int
-i915_gem_shrink(int nr_to_scan, gfp_t gfp_mask)
+i915_gem_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
 {
        drm_i915_private_t *dev_priv, *next_dev;
        struct drm_i915_gem_object *obj_priv, *next_obj;
index 150400f405346de04d5693bfedc91d36f0a85a2c..6d9b0288272a7e0c4f424bd66de1964af60e9a22 100644 (file)
 #define LM_BURST_LENGTH     0x00000700
 #define LM_FIFO_WATERMARK   0x0000001F
 #define MI_ARB_STATE   0x020e4 /* 915+ only */
+#define   MI_ARB_MASK_SHIFT      16    /* shift for enable bits */
+
+/* Make render/texture TLB fetches lower priorty than associated data
+ *   fetches. This is not turned on by default
+ */
+#define   MI_ARB_RENDER_TLB_LOW_PRIORITY       (1 << 15)
+
+/* Isoch request wait on GTT enable (Display A/B/C streams).
+ * Make isoch requests stall on the TLB update. May cause
+ * display underruns (test mode only)
+ */
+#define   MI_ARB_ISOCH_WAIT_GTT                        (1 << 14)
+
+/* Block grant count for isoch requests when block count is
+ * set to a finite value.
+ */
+#define   MI_ARB_BLOCK_GRANT_MASK              (3 << 12)
+#define   MI_ARB_BLOCK_GRANT_8                 (0 << 12)       /* for 3 display planes */
+#define   MI_ARB_BLOCK_GRANT_4                 (1 << 12)       /* for 2 display planes */
+#define   MI_ARB_BLOCK_GRANT_2                 (2 << 12)       /* for 1 display plane */
+#define   MI_ARB_BLOCK_GRANT_0                 (3 << 12)       /* don't use */
+
+/* Enable render writes to complete in C2/C3/C4 power states.
+ * If this isn't enabled, render writes are prevented in low
+ * power states. That seems bad to me.
+ */
+#define   MI_ARB_C3_LP_WRITE_ENABLE            (1 << 11)
+
+/* This acknowledges an async flip immediately instead
+ * of waiting for 2TLB fetches.
+ */
+#define   MI_ARB_ASYNC_FLIP_ACK_IMMEDIATE      (1 << 10)
+
+/* Enables non-sequential data reads through arbiter
+ */
+#define   MI_ARB_DUAL_DATA_PHASE_DISABLE               (1 << 9)
+
+/* Disable FSB snooping of cacheable write cycles from binner/render
+ * command stream
+ */
+#define   MI_ARB_CACHE_SNOOP_DISABLE           (1 << 8)
+
+/* Arbiter time slice for non-isoch streams */
+#define   MI_ARB_TIME_SLICE_MASK               (7 << 5)
+#define   MI_ARB_TIME_SLICE_1                  (0 << 5)
+#define   MI_ARB_TIME_SLICE_2                  (1 << 5)
+#define   MI_ARB_TIME_SLICE_4                  (2 << 5)
+#define   MI_ARB_TIME_SLICE_6                  (3 << 5)
+#define   MI_ARB_TIME_SLICE_8                  (4 << 5)
+#define   MI_ARB_TIME_SLICE_10                 (5 << 5)
+#define   MI_ARB_TIME_SLICE_14                 (6 << 5)
+#define   MI_ARB_TIME_SLICE_16                 (7 << 5)
+
+/* Low priority grace period page size */
+#define   MI_ARB_LOW_PRIORITY_GRACE_4KB                (0 << 4)        /* default */
+#define   MI_ARB_LOW_PRIORITY_GRACE_8KB                (1 << 4)
+
+/* Disable display A/B trickle feed */
+#define   MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE  (1 << 2)
+
+/* Set display plane priority */
+#define   MI_ARB_DISPLAY_PRIORITY_A_B          (0 << 0)        /* display A > display B */
+#define   MI_ARB_DISPLAY_PRIORITY_B_A          (1 << 0)        /* display B > display A */
+
 #define CACHE_MODE_0   0x02120 /* 915+ only */
 #define   CM0_MASK_SHIFT          16
 #define   CM0_IZ_OPT_DISABLE      (1<<6)
index fc924b64919529cfccc14a85c3eb28ae0ab8f50d..e492919faf44167b14b97da9537f4aa881eb7969 100644 (file)
@@ -203,36 +203,26 @@ struct methods {
        const bool rw;
 };
 
-static struct methods nv04_methods[] = {
-       { "PROM", load_vbios_prom, false },
-       { "PRAMIN", load_vbios_pramin, true },
-       { "PCIROM", load_vbios_pci, true },
-};
-
-static struct methods nv50_methods[] = {
-       { "ACPI", load_vbios_acpi, true },
+static struct methods shadow_methods[] = {
        { "PRAMIN", load_vbios_pramin, true },
        { "PROM", load_vbios_prom, false },
        { "PCIROM", load_vbios_pci, true },
+       { "ACPI", load_vbios_acpi, true },
 };
 
-#define METHODCNT 3
-
 static bool NVShadowVBIOS(struct drm_device *dev, uint8_t *data)
 {
-       struct drm_nouveau_private *dev_priv = dev->dev_private;
-       struct methods *methods;
-       int i;
+       const int nr_methods = ARRAY_SIZE(shadow_methods);
+       struct methods *methods = shadow_methods;
        int testscore = 3;
-       int scores[METHODCNT];
+       int scores[nr_methods], i;
 
        if (nouveau_vbios) {
-               methods = nv04_methods;
-               for (i = 0; i < METHODCNT; i++)
+               for (i = 0; i < nr_methods; i++)
                        if (!strcasecmp(nouveau_vbios, methods[i].desc))
                                break;
 
-               if (i < METHODCNT) {
+               if (i < nr_methods) {
                        NV_INFO(dev, "Attempting to use BIOS image from %s\n",
                                methods[i].desc);
 
@@ -244,12 +234,7 @@ static bool NVShadowVBIOS(struct drm_device *dev, uint8_t *data)
                NV_ERROR(dev, "VBIOS source \'%s\' invalid\n", nouveau_vbios);
        }
 
-       if (dev_priv->card_type < NV_50)
-               methods = nv04_methods;
-       else
-               methods = nv50_methods;
-
-       for (i = 0; i < METHODCNT; i++) {
+       for (i = 0; i < nr_methods; i++) {
                NV_TRACE(dev, "Attempting to load BIOS image from %s\n",
                         methods[i].desc);
                data[0] = data[1] = 0;  /* avoid reuse of previous image */
@@ -260,7 +245,7 @@ static bool NVShadowVBIOS(struct drm_device *dev, uint8_t *data)
        }
 
        while (--testscore > 0) {
-               for (i = 0; i < METHODCNT; i++) {
+               for (i = 0; i < nr_methods; i++) {
                        if (scores[i] == testscore) {
                                NV_TRACE(dev, "Using BIOS image from %s\n",
                                         methods[i].desc);
index c9a4a0d2a11593a0d2010bdd979352c4d5cd6003..257ea130ae13a3c7fa0c8d2ca5b60ab5ce79d706 100644 (file)
@@ -387,7 +387,8 @@ int nouveau_fbcon_init(struct drm_device *dev)
        dev_priv->nfbdev = nfbdev;
        nfbdev->helper.funcs = &nouveau_fbcon_helper_funcs;
 
-       ret = drm_fb_helper_init(dev, &nfbdev->helper, 2, 4);
+       ret = drm_fb_helper_init(dev, &nfbdev->helper,
+                                nv_two_heads(dev) ? 2 : 1, 4);
        if (ret) {
                kfree(nfbdev);
                return ret;
index 3970e62eaab8f75582e0838e5770a7346731bff6..aab5ba040bd6a1fbdf92a8dafe1b9078bc1f65fa 100644 (file)
@@ -2354,6 +2354,7 @@ void r100_mc_init(struct radeon_device *rdev)
        if (rdev->flags & RADEON_IS_IGP)
                base = (RREG32(RADEON_NB_TOM) & 0xffff) << 16;
        radeon_vram_location(rdev, &rdev->mc, base);
+       rdev->mc.gtt_base_align = 0;
        if (!(rdev->flags & RADEON_IS_AGP))
                radeon_gtt_location(rdev, &rdev->mc);
        radeon_update_bandwidth_info(rdev);
index 7e81db5eb8041b1c66afe1ade046724abf235805..19a7ef7ee3448f867ec11ab4a0f57f9dfa047013 100644 (file)
@@ -481,6 +481,7 @@ void r300_mc_init(struct radeon_device *rdev)
        if (rdev->flags & RADEON_IS_IGP)
                base = (RREG32(RADEON_NB_TOM) & 0xffff) << 16;
        radeon_vram_location(rdev, &rdev->mc, base);
+       rdev->mc.gtt_base_align = 0;
        if (!(rdev->flags & RADEON_IS_AGP))
                radeon_gtt_location(rdev, &rdev->mc);
        radeon_update_bandwidth_info(rdev);
@@ -1176,6 +1177,8 @@ int r300_cs_parse(struct radeon_cs_parser *p)
        int r;
 
        track = kzalloc(sizeof(*track), GFP_KERNEL);
+       if (track == NULL)
+               return -ENOMEM;
        r100_cs_track_clear(p->rdev, track);
        p->track = track;
        do {
index 34330df2848329c178a9f2a659d44b0596b0bbaf..694af7cc23ac1b3ce91b8abb71ca1d3fdbb58ae2 100644 (file)
@@ -125,6 +125,7 @@ void r520_mc_init(struct radeon_device *rdev)
        r520_vram_get_type(rdev);
        r100_vram_init_sizes(rdev);
        radeon_vram_location(rdev, &rdev->mc, 0);
+       rdev->mc.gtt_base_align = 0;
        if (!(rdev->flags & RADEON_IS_AGP))
                radeon_gtt_location(rdev, &rdev->mc);
        radeon_update_bandwidth_info(rdev);
index 3d6645ce21518a640dd0981e38a67e223e86a49e..e100f69faeec80a0c447ee186774a14ea95620c4 100644 (file)
@@ -1179,6 +1179,7 @@ void r600_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
                if (rdev->flags & RADEON_IS_IGP)
                        base = (RREG32(MC_VM_FB_LOCATION) & 0xFFFF) << 24;
                radeon_vram_location(rdev, &rdev->mc, base);
+               rdev->mc.gtt_base_align = 0;
                radeon_gtt_location(rdev, mc);
        }
 }
index f4fb88ece2bbc0221e9a967518a94745fb545be6..ca5c29f707797425f8844c4f464c25a8ecb137cc 100644 (file)
@@ -538,9 +538,12 @@ int
 r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv)
 {
        drm_radeon_private_t *dev_priv = dev->dev_private;
+       int ret;
        DRM_DEBUG("\n");
 
-       r600_nomm_get_vb(dev);
+       ret = r600_nomm_get_vb(dev);
+       if (ret)
+               return ret;
 
        dev_priv->blit_vb->file_priv = file_priv;
 
index ab61aaa887bb449c486b59abccb89772b021f35a..2f94dc66c1836f4bf5d6814bb51b5730a13945ad 100644 (file)
@@ -351,6 +351,7 @@ struct radeon_mc {
        int                     vram_mtrr;
        bool                    vram_is_ddr;
        bool                    igp_sideport_enabled;
+       u64                     gtt_base_align;
 };
 
 bool radeon_combios_sideport_present(struct radeon_device *rdev);
index 99bd8a9c56b38f8431510bf3cde781345510feb1..10673ae59cfa10ee8e408de3a9f84e54e2cab7ed 100644 (file)
@@ -280,6 +280,15 @@ static bool radeon_atom_apply_quirks(struct drm_device *dev,
                }
        }
 
+       /* ASUS HD 3600 board lists the DVI port as HDMI */
+       if ((dev->pdev->device == 0x9598) &&
+           (dev->pdev->subsystem_vendor == 0x1043) &&
+           (dev->pdev->subsystem_device == 0x01e4)) {
+               if (*connector_type == DRM_MODE_CONNECTOR_HDMIA) {
+                       *connector_type = DRM_MODE_CONNECTOR_DVII;
+               }
+       }
+
        /* ASUS HD 3450 board lists the DVI port as HDMI */
        if ((dev->pdev->device == 0x95C5) &&
            (dev->pdev->subsystem_vendor == 0x1043) &&
@@ -1029,8 +1038,15 @@ bool radeon_atombios_sideport_present(struct radeon_device *rdev)
                                      data_offset);
                switch (crev) {
                case 1:
-                       if (igp_info->info.ucMemoryType & 0xf0)
-                               return true;
+                       /* AMD IGPS */
+                       if ((rdev->family == CHIP_RS690) ||
+                           (rdev->family == CHIP_RS740)) {
+                               if (igp_info->info.ulBootUpMemoryClock)
+                                       return true;
+                       } else {
+                               if (igp_info->info.ucMemoryType & 0xf0)
+                                       return true;
+                       }
                        break;
                case 2:
                        if (igp_info->info_2.ucMemoryType & 0x0f)
index f58f8bd8f77b0c7f2fc984130d83cfbb3a40c591..adccbc2c202c6b01e22cca3f1ae27dc0334e351b 100644 (file)
@@ -771,14 +771,14 @@ static enum drm_connector_status radeon_dvi_detect(struct drm_connector *connect
                        } else
                                ret = connector_status_connected;
 
-                       /* multiple connectors on the same encoder with the same ddc line
-                        * This tends to be HDMI and DVI on the same encoder with the
-                        * same ddc line.  If the edid says HDMI, consider the HDMI port
-                        * connected and the DVI port disconnected.  If the edid doesn't
-                        * say HDMI, vice versa.
+                       /* This gets complicated.  We have boards with VGA + HDMI with a
+                        * shared DDC line and we have boards with DVI-D + HDMI with a shared
+                        * DDC line.  The latter is more complex because with DVI<->HDMI adapters
+                        * you don't really know what's connected to which port as both are digital.
                         */
                        if (radeon_connector->shared_ddc && (ret == connector_status_connected)) {
                                struct drm_device *dev = connector->dev;
+                               struct radeon_device *rdev = dev->dev_private;
                                struct drm_connector *list_connector;
                                struct radeon_connector *list_radeon_connector;
                                list_for_each_entry(list_connector, &dev->mode_config.connector_list, head) {
@@ -788,15 +788,10 @@ static enum drm_connector_status radeon_dvi_detect(struct drm_connector *connect
                                        if (list_radeon_connector->shared_ddc &&
                                            (list_radeon_connector->ddc_bus->rec.i2c_id ==
                                             radeon_connector->ddc_bus->rec.i2c_id)) {
-                                               if (drm_detect_hdmi_monitor(radeon_connector->edid)) {
-                                                       if (connector->connector_type == DRM_MODE_CONNECTOR_DVID) {
-                                                               kfree(radeon_connector->edid);
-                                                               radeon_connector->edid = NULL;
-                                                               ret = connector_status_disconnected;
-                                                       }
-                                               } else {
-                                                       if ((connector->connector_type == DRM_MODE_CONNECTOR_HDMIA) ||
-                                                           (connector->connector_type == DRM_MODE_CONNECTOR_HDMIB)) {
+                                               /* cases where both connectors are digital */
+                                               if (list_connector->connector_type != DRM_MODE_CONNECTOR_VGA) {
+                                                       /* hpd is our only option in this case */
+                                                       if (!radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) {
                                                                kfree(radeon_connector->edid);
                                                                radeon_connector->edid = NULL;
                                                                ret = connector_status_disconnected;
index 5f317317aba29dc972331f32994c0acae66e9840..dd279da90546ee03929b02b8bd7095683ef31caf 100644 (file)
@@ -226,20 +226,20 @@ void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
 {
        u64 size_af, size_bf;
 
-       size_af = 0xFFFFFFFF - mc->vram_end;
-       size_bf = mc->vram_start;
+       size_af = ((0xFFFFFFFF - mc->vram_end) + mc->gtt_base_align) & ~mc->gtt_base_align;
+       size_bf = mc->vram_start & ~mc->gtt_base_align;
        if (size_bf > size_af) {
                if (mc->gtt_size > size_bf) {
                        dev_warn(rdev->dev, "limiting GTT\n");
                        mc->gtt_size = size_bf;
                }
-               mc->gtt_start = mc->vram_start - mc->gtt_size;
+               mc->gtt_start = (mc->vram_start & ~mc->gtt_base_align) - mc->gtt_size;
        } else {
                if (mc->gtt_size > size_af) {
                        dev_warn(rdev->dev, "limiting GTT\n");
                        mc->gtt_size = size_af;
                }
-               mc->gtt_start = mc->vram_end + 1;
+               mc->gtt_start = (mc->vram_end + 1 + mc->gtt_base_align) & ~mc->gtt_base_align;
        }
        mc->gtt_end = mc->gtt_start + mc->gtt_size - 1;
        dev_info(rdev->dev, "GTT: %lluM 0x%08llX - 0x%08llX\n",
index f2ed27c8055bc90c0f9004471e063d0e89471801..03204039774308bc01eefdd69616f310b50af739 100644 (file)
@@ -642,8 +642,8 @@ void radeon_legacy_tv_mode_set(struct drm_encoder *encoder,
        }
        flicker_removal = (tmp + 500) / 1000;
 
-       if (flicker_removal < 2)
-               flicker_removal = 2;
+       if (flicker_removal < 3)
+               flicker_removal = 3;
        for (i = 0; i < ARRAY_SIZE(SLOPE_limit); ++i) {
                if (flicker_removal == SLOPE_limit[i])
                        break;
index 9e4240b3bf0bb1ff950b4c74a8e003fc51903e2b..f454c9a5e7f22269a9ffe0618d87f1a74e05f276 100644 (file)
@@ -57,7 +57,9 @@ void rs400_gart_adjust_size(struct radeon_device *rdev)
        }
        if (rdev->family == CHIP_RS400 || rdev->family == CHIP_RS480) {
                /* FIXME: RS400 & RS480 seems to have issue with GART size
-                * if 4G of system memory (needs more testing) */
+                * if 4G of system memory (needs more testing)
+                */
+               /* XXX is this still an issue with proper alignment? */
                rdev->mc.gtt_size = 32 * 1024 * 1024;
                DRM_ERROR("Forcing to 32M GART size (because of ASIC bug ?)\n");
        }
@@ -263,6 +265,7 @@ void rs400_mc_init(struct radeon_device *rdev)
        r100_vram_init_sizes(rdev);
        base = (RREG32(RADEON_NB_TOM) & 0xffff) << 16;
        radeon_vram_location(rdev, &rdev->mc, base);
+       rdev->mc.gtt_base_align = rdev->mc.gtt_size - 1;
        radeon_gtt_location(rdev, &rdev->mc);
        radeon_update_bandwidth_info(rdev);
 }
index 7bb4c3e52f3b055ceafc27471e516477e4a3fa5b..6dc15ea8ba33ce088f273b17eea0f1fc42e537df 100644 (file)
@@ -698,6 +698,7 @@ void rs600_mc_init(struct radeon_device *rdev)
        base = G_000004_MC_FB_START(base) << 16;
        rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev);
        radeon_vram_location(rdev, &rdev->mc, base);
+       rdev->mc.gtt_base_align = 0;
        radeon_gtt_location(rdev, &rdev->mc);
        radeon_update_bandwidth_info(rdev);
 }
index f4f0a61bcdce3d188dd01a5737120c4e9a0d6432..ce4ecbe108163126c74fb14779eaf90dcf15ce41 100644 (file)
@@ -162,6 +162,7 @@ void rs690_mc_init(struct radeon_device *rdev)
        rs690_pm_info(rdev);
        rdev->mc.igp_sideport_enabled = radeon_atombios_sideport_present(rdev);
        radeon_vram_location(rdev, &rdev->mc, base);
+       rdev->mc.gtt_base_align = rdev->mc.gtt_size - 1;
        radeon_gtt_location(rdev, &rdev->mc);
        radeon_update_bandwidth_info(rdev);
 }
index 7d9a7b0a180ac7b735702221450e2a9e12be92fe..0c9c169a6852f453c6703901cd64dc2fb84699e9 100644 (file)
@@ -195,6 +195,7 @@ void rv515_mc_init(struct radeon_device *rdev)
        rv515_vram_get_type(rdev);
        r100_vram_init_sizes(rdev);
        radeon_vram_location(rdev, &rdev->mc, 0);
+       rdev->mc.gtt_base_align = 0;
        if (!(rdev->flags & RADEON_IS_AGP))
                radeon_gtt_location(rdev, &rdev->mc);
        radeon_update_bandwidth_info(rdev);
index b1d67dc973dce9b2f86581f3de139bf2ad34f80b..d233c65f3f7fbe4506b5e933a29ced7b323121a8 100644 (file)
@@ -40,7 +40,9 @@
 #include <linux/slab.h>
 
 #include <asm/atomic.h>
+#ifdef TTM_HAS_AGP
 #include <asm/agp.h>
+#endif
 
 #include "ttm/ttm_bo_driver.h"
 #include "ttm/ttm_page_alloc.h"
@@ -392,7 +394,7 @@ static int ttm_pool_get_num_unused_pages(void)
 /**
  * Callback for mm to request pool to reduce number of page held.
  */
-static int ttm_pool_mm_shrink(int shrink_pages, gfp_t gfp_mask)
+static int ttm_pool_mm_shrink(struct shrinker *shrink, int shrink_pages, gfp_t gfp_mask)
 {
        static atomic_t start_pool = ATOMIC_INIT(0);
        unsigned i;
index f1d62611241586f73115a0a8cd60f853837308f6..437ac786277a22520ec9fca32f1488ab6bcdcab4 100644 (file)
@@ -972,6 +972,7 @@ int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data,
        ret = copy_from_user(rects, user_rects, rects_size);
        if (unlikely(ret != 0)) {
                DRM_ERROR("Failed to get rects.\n");
+               ret = -EFAULT;
                goto out_free;
        }
 
index 9bec24db4d41385efe8a1c891f0a6e9daf072488..2d44b3300104eb183f902212bf281849dce00186 100644 (file)
@@ -366,6 +366,6 @@ static int __init cs5535_mfgpt_init(void)
 
 module_init(cs5535_mfgpt_init);
 
-MODULE_AUTHOR("Andres Salomon <dilinger@collabora.co.uk>");
+MODULE_AUTHOR("Andres Salomon <dilinger@queued.net>");
 MODULE_DESCRIPTION("CS5535/CS5536 MFGPT timer driver");
 MODULE_LICENSE("GPL");
index af217924a76eb94511fddf7789d5d1a458a2c21b..ad30f074ee151eead0ebe435037fde70055f1d20 100644 (file)
@@ -365,6 +365,26 @@ static int __devinit sdhci_s3c_probe(struct platform_device *pdev)
 
 static int __devexit sdhci_s3c_remove(struct platform_device *pdev)
 {
+       struct sdhci_host *host =  platform_get_drvdata(pdev);
+       struct sdhci_s3c *sc = sdhci_priv(host);
+       int ptr;
+
+       sdhci_remove_host(host, 1);
+
+       for (ptr = 0; ptr < 3; ptr++) {
+               clk_disable(sc->clk_bus[ptr]);
+               clk_put(sc->clk_bus[ptr]);
+       }
+       clk_disable(sc->clk_io);
+       clk_put(sc->clk_io);
+
+       iounmap(host->ioaddr);
+       release_resource(sc->ioarea);
+       kfree(sc->ioarea);
+
+       sdhci_free_host(host);
+       platform_set_drvdata(pdev, NULL);
+
        return 0;
 }
 
index 7acb3edc47ef88ba62a2de614213a85518b98274..2602852cc55a6037c5160575f3620632397fcd2c 100644 (file)
@@ -677,7 +677,7 @@ static int ibmveth_close(struct net_device *netdev)
        if (!adapter->pool_config)
                netif_stop_queue(netdev);
 
-       free_irq(netdev->irq, netdev);
+       h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE);
 
        do {
                lpar_rc = h_free_logical_lan(adapter->vdev->unit_address);
@@ -689,6 +689,8 @@ static int ibmveth_close(struct net_device *netdev)
                                     lpar_rc);
        }
 
+       free_irq(netdev->irq, netdev);
+
        adapter->rx_no_buffer = *(u64*)(((char*)adapter->buffer_list_addr) + 4096 - 8);
 
        ibmveth_cleanup(adapter);
index 5b3dfb4ab27985cd8823c08035dd066da36e4d1c..33525bf2a3d3e87f6769ac24c65861441c375dd3 100644 (file)
@@ -1168,6 +1168,7 @@ static irqreturn_t ax_interrupt(int irq, void *dev_id)
        int interrupts, nr_serviced = 0, i;
        struct ei_device *ei_local;
        int handled = 0;
+       unsigned long flags;
 
        e8390_base = dev->base_addr;
        ei_local = netdev_priv(dev);
@@ -1176,7 +1177,7 @@ static irqreturn_t ax_interrupt(int irq, void *dev_id)
         *      Protect the irq test too.
         */
         
-       spin_lock(&ei_local->page_lock);
+       spin_lock_irqsave(&ei_local->page_lock, flags);
 
        if (ei_local->irqlock) 
        {
@@ -1188,7 +1189,7 @@ static irqreturn_t ax_interrupt(int irq, void *dev_id)
                           dev->name, inb_p(e8390_base + EN0_ISR),
                           inb_p(e8390_base + EN0_IMR));
 #endif
-               spin_unlock(&ei_local->page_lock);
+               spin_unlock_irqrestore(&ei_local->page_lock, flags);
                return IRQ_NONE;
        }
     
@@ -1261,7 +1262,7 @@ static irqreturn_t ax_interrupt(int irq, void *dev_id)
        ei_local->irqlock = 0;
        outb_p(ENISR_ALL, e8390_base + EN0_IMR);
 
-       spin_unlock(&ei_local->page_lock);
+       spin_unlock_irqrestore(&ei_local->page_lock, flags);
        return IRQ_RETVAL(handled);
 }
 
index 96b6cfbf0a3a682b14216fa23a9e737a48973aaa..cdc6a5c2e70d81955efec9d3c633a593ef943b16 100644 (file)
@@ -1316,7 +1316,7 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp,
                { 0x7c800000, 0x28000000,       RTL_GIGA_MAC_VER_26 },
 
                /* 8168C family. */
-               { 0x7cf00000, 0x3ca00000,       RTL_GIGA_MAC_VER_24 },
+               { 0x7cf00000, 0x3cb00000,       RTL_GIGA_MAC_VER_24 },
                { 0x7cf00000, 0x3c900000,       RTL_GIGA_MAC_VER_23 },
                { 0x7cf00000, 0x3c800000,       RTL_GIGA_MAC_VER_18 },
                { 0x7c800000, 0x3c800000,       RTL_GIGA_MAC_VER_24 },
index 77b359162d6cad0a408754fff4cc8b9af64d3063..23c15aa9fbd5167c51e80601eb5213c0dd144283 100644 (file)
@@ -730,13 +730,17 @@ static int ath9k_hif_usb_alloc_urbs(struct hif_device_usb *hif_dev)
 
        /* RX */
        if (ath9k_hif_usb_alloc_rx_urbs(hif_dev) < 0)
-               goto err;
+               goto err_rx;
 
        /* Register Read */
        if (ath9k_hif_usb_alloc_reg_in_urb(hif_dev) < 0)
-               goto err;
+               goto err_reg;
 
        return 0;
+err_reg:
+       ath9k_hif_usb_dealloc_rx_urbs(hif_dev);
+err_rx:
+       ath9k_hif_usb_dealloc_tx_urbs(hif_dev);
 err:
        return -ENOMEM;
 }
index d24dc7dc072328fcecab649193ed4cbad0c17caa..972a9c3af39e5e7ad11dafa354b5f7314083a985 100644 (file)
@@ -330,6 +330,7 @@ static int prism2_pci_probe(struct pci_dev *pdev,
 
         dev->irq = pdev->irq;
         hw_priv->mem_start = mem;
+       dev->base_addr = (unsigned long) mem;
 
        prism2_pci_cor_sreset(local);
 
index c2a453a1a9917e4e6bb9124b4af0318416ff0f34..dc43ebd1f1fd2eb3012c900456ec30b35b9fbbb5 100644 (file)
@@ -97,6 +97,17 @@ static inline void iwl_clear_driver_stations(struct iwl_priv *priv)
        spin_lock_irqsave(&priv->sta_lock, flags);
        memset(priv->stations, 0, sizeof(priv->stations));
        priv->num_stations = 0;
+
+       /*
+        * Remove all key information that is not stored as part of station
+        * information since mac80211 may not have had a
+        * chance to remove all the keys. When device is reconfigured by
+        * mac80211 after an error all keys will be reconfigured.
+        */
+       priv->ucode_key_table = 0;
+       priv->key_mapping_key = 0;
+       memset(priv->wep_keys, 0, sizeof(priv->wep_keys));
+
        spin_unlock_irqrestore(&priv->sta_lock, flags);
 }
 
index 3ae468c4d7604b3b8e82eac570445bdbf098ab7a..f20d3eeeea7fe0ee4001bf3597255bab1ad9856e 100644 (file)
@@ -853,6 +853,11 @@ int rt2x00lib_probe_dev(struct rt2x00_dev *rt2x00dev)
                    BIT(NL80211_IFTYPE_MESH_POINT) |
                    BIT(NL80211_IFTYPE_WDS);
 
+       /*
+        * Initialize configuration work.
+        */
+       INIT_WORK(&rt2x00dev->intf_work, rt2x00lib_intf_scheduled);
+
        /*
         * Let the driver probe the device to detect the capabilities.
         */
@@ -862,11 +867,6 @@ int rt2x00lib_probe_dev(struct rt2x00_dev *rt2x00dev)
                goto exit;
        }
 
-       /*
-        * Initialize configuration work.
-        */
-       INIT_WORK(&rt2x00dev->intf_work, rt2x00lib_intf_scheduled);
-
        /*
         * Allocate queue array.
         */
index 92379e2d37e77ed4797dba23b7402f795dec80e2..2aaa13150de3ba40fd0a65cbb569f1fe96b9a49f 100644 (file)
@@ -156,6 +156,38 @@ static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev,
                                             pcibios_align_resource, dev);
        }
 
+       if (ret < 0 && dev->fw_addr[resno]) {
+               struct resource *root, *conflict;
+               resource_size_t start, end;
+
+               /*
+                * If we failed to assign anything, let's try the address
+                * where firmware left it.  That at least has a chance of
+                * working, which is better than just leaving it disabled.
+                */
+
+               if (res->flags & IORESOURCE_IO)
+                       root = &ioport_resource;
+               else
+                       root = &iomem_resource;
+
+               start = res->start;
+               end = res->end;
+               res->start = dev->fw_addr[resno];
+               res->end = res->start + size - 1;
+               dev_info(&dev->dev, "BAR %d: trying firmware assignment %pR\n",
+                        resno, res);
+               conflict = request_resource_conflict(root, res);
+               if (conflict) {
+                       dev_info(&dev->dev,
+                                "BAR %d: %pR conflicts with %s %pR\n", resno,
+                                res, conflict->name, conflict);
+                       res->start = start;
+                       res->end = end;
+               } else
+                       ret = 0;
+       }
+
        if (!ret) {
                res->flags &= ~IORESOURCE_STARTALIGN;
                dev_info(&dev->dev, "BAR %d: assigned %pR\n", resno, res);
index 29f91fac1dff81238616a076821191e8c273e01d..a4cd9adfcbc0a543462158b76a796a7f96aa811a 100644 (file)
@@ -857,8 +857,10 @@ void pcmcia_disable_device(struct pcmcia_device *p_dev)
 {
        pcmcia_release_configuration(p_dev);
        pcmcia_release_io(p_dev, &p_dev->io);
-       if (p_dev->_irq)
+       if (p_dev->_irq) {
                free_irq(p_dev->irq, p_dev->priv);
+               p_dev->_irq = 0;
+       }
        if (p_dev->win)
                pcmcia_release_window(p_dev, p_dev->win);
 }
index 40658e3385b45346ddc91132816e28ccfee4dd84..bb2f1fba637b36ec041909a26dd176be6f472c0a 100644 (file)
@@ -489,7 +489,7 @@ int intel_scu_ipc_simple_command(int cmd, int sub)
                mutex_unlock(&ipclock);
                return -ENODEV;
        }
-       ipc_command(cmd << 12 | sub);
+       ipc_command(sub << 12 | cmd);
        err = busy_loop();
        mutex_unlock(&ipclock);
        return err;
@@ -501,9 +501,9 @@ EXPORT_SYMBOL(intel_scu_ipc_simple_command);
  *     @cmd: command
  *     @sub: sub type
  *     @in: input data
- *     @inlen: input length
+ *     @inlen: input length in dwords
  *     @out: output data
- *     @outlein: output length
+ *     @outlein: output length in dwords
  *
  *     Issue a command to the SCU which involves data transfers. Do the
  *     data copies under the lock but leave it for the caller to interpret
@@ -524,7 +524,7 @@ int intel_scu_ipc_command(int cmd, int sub, u32 *in, int inlen,
        for (i = 0; i < inlen; i++)
                ipc_data_writel(*in++, 4 * i);
 
-       ipc_command((cmd << 12) | sub | (inlen << 18));
+       ipc_command((sub << 12) | cmd | (inlen << 18));
        err = busy_loop();
 
        for (i = 0; i < outlen; i++)
@@ -556,6 +556,10 @@ int intel_scu_ipc_i2c_cntrl(u32 addr, u32 *data)
        u32 cmd = 0;
 
        mutex_lock(&ipclock);
+       if (ipcdev.pdev == NULL) {
+               mutex_unlock(&ipclock);
+               return -ENODEV;
+       }
        cmd = (addr >> 24) & 0xFF;
        if (cmd == IPC_I2C_READ) {
                writel(addr, ipcdev.i2c_base + IPC_I2C_CNTRL_ADDR);
index d762a0cbc6af40b2c4aa58bde75e18e514fcd658..2afbeec8b7913f504a04beed44fc9ddee703c51b 100644 (file)
@@ -163,7 +163,7 @@ static int ds2782_get_capacity(struct ds278x_info *info, int *capacity)
        if (err)
                return err;
        *capacity = raw;
-       return raw;
+       return 0;
 }
 
 static int ds2786_get_current(struct ds278x_info *info, int *current_uA)
index 34d51dd4c53902d79ef3c21434db61923ee79fc8..bed7b4634ccd4648f42c188d8e5e2937534b1f8e 100644 (file)
@@ -948,8 +948,10 @@ static ssize_t dasd_alias_show(struct device *dev,
        if (device->discipline && device->discipline->get_uid &&
            !device->discipline->get_uid(device, &uid)) {
                if (uid.type == UA_BASE_PAV_ALIAS ||
-                   uid.type == UA_HYPER_PAV_ALIAS)
+                   uid.type == UA_HYPER_PAV_ALIAS) {
+                       dasd_put_device(device);
                        return sprintf(buf, "1\n");
+               }
        }
        dasd_put_device(device);
 
index ce7cb87479fe3b8aed786eeea182549da7352b35..407d0e9adfaf96a0d4362f8e56c986f0c17700fa 100644 (file)
@@ -713,7 +713,7 @@ int chsc_determine_base_channel_path_desc(struct chp_id chpid,
        ret = chsc_determine_channel_path_desc(chpid, 0, 0, 0, 0, chsc_resp);
        if (ret)
                goto out_free;
-       memcpy(desc, &chsc_resp->data, chsc_resp->length);
+       memcpy(desc, &chsc_resp->data, sizeof(*desc));
 out_free:
        kfree(chsc_resp);
        return ret;
index d53e62ab09da46866e808affd0d1af11d16a3c18..aacbe14e2e7aa76c0180a4e269ec23aa04aae5a8 100644 (file)
@@ -554,7 +554,7 @@ static int opiocgetnext(unsigned int cmd, void __user *argp)
 static int openprom_bsd_ioctl(struct file * file,
                              unsigned int cmd, unsigned long arg)
 {
-       DATA *data = (DATA *) file->private_data;
+       DATA *data = file->private_data;
        void __user *argp = (void __user *)arg;
        int err;
 
@@ -601,7 +601,7 @@ static int openprom_bsd_ioctl(struct file * file,
 static long openprom_ioctl(struct file * file,
                           unsigned int cmd, unsigned long arg)
 {
-       DATA *data = (DATA *) file->private_data;
+       DATA *data = file->private_data;
 
        switch (cmd) {
        case OPROMGETOPT:
index ed7d958b0a01fa96a2f4a746189090cdce45bc6d..544f2e25d0e545f30f959019f4a176bdccb20af8 100644 (file)
@@ -71,7 +71,9 @@ int sunserial_console_match(struct console *con, struct device_node *dp,
 
        con->index = line;
        drv->cons = con;
-       add_preferred_console(con->name, line, NULL);
+
+       if (!console_set_on_cmdline)
+               add_preferred_console(con->name, line, NULL);
 
        return 1;
 }
index 234459c2f0122fc24e78f5b173f3fe4a3f83555d..ffbf4553f6651966269b1be1bc57b43b37b34b98 100644 (file)
@@ -1500,20 +1500,25 @@ out_unmap:
 static int __devexit su_remove(struct of_device *op)
 {
        struct uart_sunsu_port *up = dev_get_drvdata(&op->dev);
+       bool kbdms = false;
 
        if (up->su_type == SU_PORT_MS ||
-           up->su_type == SU_PORT_KBD) {
+           up->su_type == SU_PORT_KBD)
+               kbdms = true;
+
+       if (kbdms) {
 #ifdef CONFIG_SERIO
                serio_unregister_port(&up->serio);
 #endif
-               kfree(up);
-       } else if (up->port.type != PORT_UNKNOWN) {
+       } else if (up->port.type != PORT_UNKNOWN)
                uart_remove_one_port(&sunsu_reg, &up->port);
-       }
 
        if (up->port.membase)
                of_iounmap(&op->resource[0], up->port.membase, up->reg_size);
 
+       if (kbdms)
+               kfree(up);
+
        dev_set_drvdata(&op->dev, NULL);
 
        return 0;
index d69eccf5f197cb8bc890d78d847ca5637eab0a54..2aaa0f75c6cf79936f5f1e03bc1c002e42f5e59a 100644 (file)
@@ -136,7 +136,7 @@ struct ffs_data {
         * handling setup requests immidiatelly user space may be so
         * slow that another setup will be sent to the gadget but this
         * time not to us but another function and then there could be
-        * a race.  Is taht the case? Or maybe we can use cdev->req
+        * a race.  Is that the case? Or maybe we can use cdev->req
         * after all, maybe we just need some spinlock for that? */
        struct usb_request              *ep0req;                /* P: mutex */
        struct completion               ep0req_completion;      /* P: mutex */
index 57a593c58cf418769537c06d4602910203f8e5e0..d219070fed3da07b3944ac53363673f310435f33 100644 (file)
@@ -177,8 +177,8 @@ static void handle_tx(struct vhost_net *net)
                        break;
                }
                if (err != len)
-                       pr_err("Truncated TX packet: "
-                              " len %d != %zd\n", err, len);
+                       pr_debug("Truncated TX packet: "
+                                " len %d != %zd\n", err, len);
                vhost_add_used_and_signal(&net->dev, vq, head, 0);
                total_len += len;
                if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
@@ -275,8 +275,8 @@ static void handle_rx(struct vhost_net *net)
                }
                /* TODO: Should check and handle checksum. */
                if (err > len) {
-                       pr_err("Discarded truncated rx packet: "
-                              " len %d > %zd\n", err, len);
+                       pr_debug("Discarded truncated rx packet: "
+                                " len %d > %zd\n", err, len);
                        vhost_discard_vq_desc(vq);
                        continue;
                }
@@ -534,11 +534,16 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
        rcu_assign_pointer(vq->private_data, sock);
        vhost_net_enable_vq(n, vq);
 done:
+       mutex_unlock(&vq->mutex);
+
        if (oldsock) {
                vhost_net_flush_vq(n, index);
                fput(oldsock->file);
        }
 
+       mutex_unlock(&n->dev.mutex);
+       return 0;
+
 err_vq:
        mutex_unlock(&vq->mutex);
 err:
index 515cf1978d19ef21fc5062471e1357826e4da8c6..c4e17642d9c5b9e4a8bbbd61dad399311f0a58ee 100644 (file)
@@ -2872,7 +2872,7 @@ void radeonfb_pm_init(struct radeonfb_info *rinfo, int dynclk, int ignore_devlis
                }
 
 #if 0
-               /* Power down TV DAC, taht saves a significant amount of power,
+               /* Power down TV DAC, that saves a significant amount of power,
                 * we'll have something better once we actually have some TVOut
                 * support
                 */
index 0d1d966b0fe45d20e6ff75f44a6dea64f66eb54e..c3df14ce2cc2c00d232028d1238121ff9c37e35c 100644 (file)
@@ -2304,12 +2304,17 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root,
        return ret;
 }
 
+/*
+ * min slot controls the lowest index we're willing to push to the
+ * right.  We'll push up to and including min_slot, but no lower
+ */
 static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
                                      struct btrfs_root *root,
                                      struct btrfs_path *path,
                                      int data_size, int empty,
                                      struct extent_buffer *right,
-                                     int free_space, u32 left_nritems)
+                                     int free_space, u32 left_nritems,
+                                     u32 min_slot)
 {
        struct extent_buffer *left = path->nodes[0];
        struct extent_buffer *upper = path->nodes[1];
@@ -2327,7 +2332,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
        if (empty)
                nr = 0;
        else
-               nr = 1;
+               nr = max_t(u32, 1, min_slot);
 
        if (path->slots[0] >= left_nritems)
                push_space += data_size;
@@ -2469,10 +2474,14 @@ out_unlock:
  *
  * returns 1 if the push failed because the other node didn't have enough
  * room, 0 if everything worked out and < 0 if there were major errors.
+ *
+ * this will push starting from min_slot to the end of the leaf.  It won't
+ * push any slot lower than min_slot
  */
 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
-                          *root, struct btrfs_path *path, int data_size,
-                          int empty)
+                          *root, struct btrfs_path *path,
+                          int min_data_size, int data_size,
+                          int empty, u32 min_slot)
 {
        struct extent_buffer *left = path->nodes[0];
        struct extent_buffer *right;
@@ -2514,8 +2523,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
        if (left_nritems == 0)
                goto out_unlock;
 
-       return __push_leaf_right(trans, root, path, data_size, empty,
-                               right, free_space, left_nritems);
+       return __push_leaf_right(trans, root, path, min_data_size, empty,
+                               right, free_space, left_nritems, min_slot);
 out_unlock:
        btrfs_tree_unlock(right);
        free_extent_buffer(right);
@@ -2525,12 +2534,17 @@ out_unlock:
 /*
  * push some data in the path leaf to the left, trying to free up at
  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
+ *
+ * max_slot can put a limit on how far into the leaf we'll push items.  The
+ * item at 'max_slot' won't be touched.  Use (u32)-1 to make us do all the
+ * items
  */
 static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
                                     struct btrfs_root *root,
                                     struct btrfs_path *path, int data_size,
                                     int empty, struct extent_buffer *left,
-                                    int free_space, int right_nritems)
+                                    int free_space, u32 right_nritems,
+                                    u32 max_slot)
 {
        struct btrfs_disk_key disk_key;
        struct extent_buffer *right = path->nodes[0];
@@ -2549,9 +2563,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
        slot = path->slots[1];
 
        if (empty)
-               nr = right_nritems;
+               nr = min(right_nritems, max_slot);
        else
-               nr = right_nritems - 1;
+               nr = min(right_nritems - 1, max_slot);
 
        for (i = 0; i < nr; i++) {
                item = btrfs_item_nr(right, i);
@@ -2712,10 +2726,14 @@ out:
 /*
  * push some data in the path leaf to the left, trying to free up at
  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
+ *
+ * max_slot can put a limit on how far into the leaf we'll push items.  The
+ * item at 'max_slot' won't be touched.  Use (u32)-1 to make us push all the
+ * items
  */
 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
-                         *root, struct btrfs_path *path, int data_size,
-                         int empty)
+                         *root, struct btrfs_path *path, int min_data_size,
+                         int data_size, int empty, u32 max_slot)
 {
        struct extent_buffer *right = path->nodes[0];
        struct extent_buffer *left;
@@ -2761,8 +2779,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
                goto out;
        }
 
-       return __push_leaf_left(trans, root, path, data_size,
-                              empty, left, free_space, right_nritems);
+       return __push_leaf_left(trans, root, path, min_data_size,
+                              empty, left, free_space, right_nritems,
+                              max_slot);
 out:
        btrfs_tree_unlock(left);
        free_extent_buffer(left);
@@ -2854,6 +2873,64 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
        return ret;
 }
 
+/*
+ * double splits happen when we need to insert a big item in the middle
+ * of a leaf.  A double split can leave us with 3 mostly empty leaves:
+ * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ]
+ *          A                 B                 C
+ *
+ * We avoid this by trying to push the items on either side of our target
+ * into the adjacent leaves.  If all goes well we can avoid the double split
+ * completely.
+ */
+static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
+                                         struct btrfs_root *root,
+                                         struct btrfs_path *path,
+                                         int data_size)
+{
+       int ret;
+       int progress = 0;
+       int slot;
+       u32 nritems;
+
+       slot = path->slots[0];
+
+       /*
+        * try to push all the items after our slot into the
+        * right leaf
+        */
+       ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot);
+       if (ret < 0)
+               return ret;
+
+       if (ret == 0)
+               progress++;
+
+       nritems = btrfs_header_nritems(path->nodes[0]);
+       /*
+        * our goal is to get our slot at the start or end of a leaf.  If
+        * we've done so we're done
+        */
+       if (path->slots[0] == 0 || path->slots[0] == nritems)
+               return 0;
+
+       if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
+               return 0;
+
+       /* try to push all the items before our slot into the next leaf */
+       slot = path->slots[0];
+       ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot);
+       if (ret < 0)
+               return ret;
+
+       if (ret == 0)
+               progress++;
+
+       if (progress)
+               return 0;
+       return 1;
+}
+
 /*
  * split the path's leaf in two, making sure there is at least data_size
  * available for the resulting leaf level of the path.
@@ -2876,6 +2953,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
        int wret;
        int split;
        int num_doubles = 0;
+       int tried_avoid_double = 0;
 
        l = path->nodes[0];
        slot = path->slots[0];
@@ -2884,12 +2962,14 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
                return -EOVERFLOW;
 
        /* first try to make some room by pushing left and right */
-       if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
-               wret = push_leaf_right(trans, root, path, data_size, 0);
+       if (data_size) {
+               wret = push_leaf_right(trans, root, path, data_size,
+                                      data_size, 0, 0);
                if (wret < 0)
                        return wret;
                if (wret) {
-                       wret = push_leaf_left(trans, root, path, data_size, 0);
+                       wret = push_leaf_left(trans, root, path, data_size,
+                                             data_size, 0, (u32)-1);
                        if (wret < 0)
                                return wret;
                }
@@ -2923,6 +3003,8 @@ again:
                                if (mid != nritems &&
                                    leaf_space_used(l, mid, nritems - mid) +
                                    data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+                                       if (data_size && !tried_avoid_double)
+                                               goto push_for_double;
                                        split = 2;
                                }
                        }
@@ -2939,6 +3021,8 @@ again:
                                if (mid != nritems &&
                                    leaf_space_used(l, mid, nritems - mid) +
                                    data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+                                       if (data_size && !tried_avoid_double)
+                                               goto push_for_double;
                                        split = 2 ;
                                }
                        }
@@ -3019,6 +3103,13 @@ again:
        }
 
        return ret;
+
+push_for_double:
+       push_for_double_split(trans, root, path, data_size);
+       tried_avoid_double = 1;
+       if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
+               return 0;
+       goto again;
 }
 
 static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
@@ -3915,13 +4006,15 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                        extent_buffer_get(leaf);
 
                        btrfs_set_path_blocking(path);
-                       wret = push_leaf_left(trans, root, path, 1, 1);
+                       wret = push_leaf_left(trans, root, path, 1, 1,
+                                             1, (u32)-1);
                        if (wret < 0 && wret != -ENOSPC)
                                ret = wret;
 
                        if (path->nodes[0] == leaf &&
                            btrfs_header_nritems(leaf)) {
-                               wret = push_leaf_right(trans, root, path, 1, 1);
+                               wret = push_leaf_right(trans, root, path, 1,
+                                                      1, 1, 0);
                                if (wret < 0 && wret != -ENOSPC)
                                        ret = wret;
                        }
index 4dbaf89b1337632ac7800b84b183d6cd0ab14242..9254b3d58dbef22974af3c7c61dbc9a4af7a33b6 100644 (file)
@@ -1458,7 +1458,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
         */
 
        /* the destination must be opened for writing */
-       if (!(file->f_mode & FMODE_WRITE))
+       if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
                return -EINVAL;
 
        ret = mnt_want_write(file->f_path.mnt);
@@ -1511,7 +1511,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 
        /* determine range to clone */
        ret = -EINVAL;
-       if (off >= src->i_size || off + len > src->i_size)
+       if (off + len > src->i_size || off + len < off)
                goto out_unlock;
        if (len == 0)
                olen = len = src->i_size - off;
@@ -1578,6 +1578,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
                        u64 disko = 0, diskl = 0;
                        u64 datao = 0, datal = 0;
                        u8 comp;
+                       u64 endoff;
 
                        size = btrfs_item_size_nr(leaf, slot);
                        read_extent_buffer(leaf, buf,
@@ -1712,9 +1713,18 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
                        btrfs_release_path(root, path);
 
                        inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-                       if (new_key.offset + datal > inode->i_size)
-                               btrfs_i_size_write(inode,
-                                                  new_key.offset + datal);
+
+                       /*
+                        * we round up to the block size at eof when
+                        * determining which extents to clone above,
+                        * but shouldn't round up the file size
+                        */
+                       endoff = new_key.offset + datal;
+                       if (endoff > off+olen)
+                               endoff = off+olen;
+                       if (endoff > inode->i_size)
+                               btrfs_i_size_write(inode, endoff);
+
                        BTRFS_I(inode)->flags = BTRFS_I(src)->flags;
                        ret = btrfs_update_inode(trans, root, inode);
                        BUG_ON(ret);
index 3fe49042d8adaffe6b0f18de5d1b1fd63def729f..6d44053ecff1f08af7bee8546a0435ac55a0e1e2 100644 (file)
@@ -613,6 +613,9 @@ static void ceph_x_destroy(struct ceph_auth_client *ac)
                remove_ticket_handler(ac, th);
        }
 
+       if (xi->auth_authorizer.buf)
+               ceph_buffer_put(xi->auth_authorizer.buf);
+
        kfree(ac->private);
        ac->private = NULL;
 }
index 3ab79f6c4ce8808fa7c72a66adc23dcf0fcfc97d..416c08d315db52a409e85cc094588d3b5bfceed5 100644 (file)
@@ -1514,6 +1514,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
        ceph_encode_filepath(&p, end, ino1, path1);
        ceph_encode_filepath(&p, end, ino2, path2);
 
+       /* make note of release offset, in case we need to replay */
+       req->r_request_release_offset = p - msg->front.iov_base;
+
        /* cap releases */
        releases = 0;
        if (req->r_inode_drop)
@@ -1580,6 +1583,32 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
        dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req,
             req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts);
 
+       if (req->r_got_unsafe) {
+               /*
+                * Replay.  Do not regenerate message (and rebuild
+                * paths, etc.); just use the original message.
+                * Rebuilding paths will break for renames because
+                * d_move mangles the src name.
+                */
+               msg = req->r_request;
+               rhead = msg->front.iov_base;
+
+               flags = le32_to_cpu(rhead->flags);
+               flags |= CEPH_MDS_FLAG_REPLAY;
+               rhead->flags = cpu_to_le32(flags);
+
+               if (req->r_target_inode)
+                       rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode));
+
+               rhead->num_retry = req->r_attempts - 1;
+
+               /* remove cap/dentry releases from message */
+               rhead->num_releases = 0;
+               msg->hdr.front_len = cpu_to_le32(req->r_request_release_offset);
+               msg->front.iov_len = req->r_request_release_offset;
+               return 0;
+       }
+
        if (req->r_request) {
                ceph_msg_put(req->r_request);
                req->r_request = NULL;
@@ -1601,13 +1630,9 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
        rhead->flags = cpu_to_le32(flags);
        rhead->num_fwd = req->r_num_fwd;
        rhead->num_retry = req->r_attempts - 1;
+       rhead->ino = 0;
 
        dout(" r_locked_dir = %p\n", req->r_locked_dir);
-
-       if (req->r_target_inode && req->r_got_unsafe)
-               rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode));
-       else
-               rhead->ino = 0;
        return 0;
 }
 
index b292fa42a66d8026b49a0217ffc0b4e89f381497..952410c60d093e7ba5c39ed630c6283664d90c5a 100644 (file)
@@ -188,6 +188,7 @@ struct ceph_mds_request {
        int r_old_inode_drop, r_old_inode_unless;
 
        struct ceph_msg  *r_request;  /* original request */
+       int r_request_release_offset;
        struct ceph_msg  *r_reply;
        struct ceph_mds_reply_info_parsed r_reply_info;
        int r_err;
index 9ad43a310a415595e338b3a45187131d31fca8cd..15167b2daa5562c3a093ba03fa14932657718551 100644 (file)
@@ -43,7 +43,8 @@ static void ceph_fault(struct ceph_connection *con);
  * nicely render a sockaddr as a string.
  */
 #define MAX_ADDR_STR 20
-static char addr_str[MAX_ADDR_STR][40];
+#define MAX_ADDR_STR_LEN 60
+static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN];
 static DEFINE_SPINLOCK(addr_str_lock);
 static int last_addr_str;
 
@@ -52,7 +53,6 @@ const char *pr_addr(const struct sockaddr_storage *ss)
        int i;
        char *s;
        struct sockaddr_in *in4 = (void *)ss;
-       unsigned char *quad = (void *)&in4->sin_addr.s_addr;
        struct sockaddr_in6 *in6 = (void *)ss;
 
        spin_lock(&addr_str_lock);
@@ -64,25 +64,13 @@ const char *pr_addr(const struct sockaddr_storage *ss)
 
        switch (ss->ss_family) {
        case AF_INET:
-               sprintf(s, "%u.%u.%u.%u:%u",
-                       (unsigned int)quad[0],
-                       (unsigned int)quad[1],
-                       (unsigned int)quad[2],
-                       (unsigned int)quad[3],
-                       (unsigned int)ntohs(in4->sin_port));
+               snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%u", &in4->sin_addr,
+                        (unsigned int)ntohs(in4->sin_port));
                break;
 
        case AF_INET6:
-               sprintf(s, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%u",
-                       in6->sin6_addr.s6_addr16[0],
-                       in6->sin6_addr.s6_addr16[1],
-                       in6->sin6_addr.s6_addr16[2],
-                       in6->sin6_addr.s6_addr16[3],
-                       in6->sin6_addr.s6_addr16[4],
-                       in6->sin6_addr.s6_addr16[5],
-                       in6->sin6_addr.s6_addr16[6],
-                       in6->sin6_addr.s6_addr16[7],
-                       (unsigned int)ntohs(in6->sin6_port));
+               snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%u", &in6->sin6_addr,
+                        (unsigned int)ntohs(in6->sin6_port));
                break;
 
        default:
@@ -215,12 +203,13 @@ static void set_sock_callbacks(struct socket *sock,
  */
 static struct socket *ceph_tcp_connect(struct ceph_connection *con)
 {
-       struct sockaddr *paddr = (struct sockaddr *)&con->peer_addr.in_addr;
+       struct sockaddr_storage *paddr = &con->peer_addr.in_addr;
        struct socket *sock;
        int ret;
 
        BUG_ON(con->sock);
-       ret = sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
+       ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM,
+                              IPPROTO_TCP, &sock);
        if (ret)
                return ERR_PTR(ret);
        con->sock = sock;
@@ -234,7 +223,8 @@ static struct socket *ceph_tcp_connect(struct ceph_connection *con)
 
        dout("connect %s\n", pr_addr(&con->peer_addr.in_addr));
 
-       ret = sock->ops->connect(sock, paddr, sizeof(*paddr), O_NONBLOCK);
+       ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr),
+                                O_NONBLOCK);
        if (ret == -EINPROGRESS) {
                dout("connect %s EINPROGRESS sk_state = %u\n",
                     pr_addr(&con->peer_addr.in_addr),
@@ -1009,19 +999,32 @@ int ceph_parse_ips(const char *c, const char *end,
                struct sockaddr_in *in4 = (void *)ss;
                struct sockaddr_in6 *in6 = (void *)ss;
                int port;
+               char delim = ',';
+
+               if (*p == '[') {
+                       delim = ']';
+                       p++;
+               }
 
                memset(ss, 0, sizeof(*ss));
                if (in4_pton(p, end - p, (u8 *)&in4->sin_addr.s_addr,
-                            ',', &ipend)) {
+                            delim, &ipend))
                        ss->ss_family = AF_INET;
-               else if (in6_pton(p, end - p, (u8 *)&in6->sin6_addr.s6_addr,
-                                   ',', &ipend)) {
+               else if (in6_pton(p, end - p, (u8 *)&in6->sin6_addr.s6_addr,
+                                 delim, &ipend))
                        ss->ss_family = AF_INET6;
-               } else {
+               else
                        goto bad;
-               }
                p = ipend;
 
+               if (delim == ']') {
+                       if (*p != ']') {
+                               dout("missing matching ']'\n");
+                               goto bad;
+                       }
+                       p++;
+               }
+
                /* port? */
                if (p < end && *p == ':') {
                        port = 0;
@@ -1055,7 +1058,7 @@ int ceph_parse_ips(const char *c, const char *end,
        return 0;
 
 bad:
-       pr_err("parse_ips bad ip '%s'\n", c);
+       pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c);
        return -EINVAL;
 }
 
@@ -2015,20 +2018,20 @@ void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg)
 {
        mutex_lock(&con->mutex);
        if (!list_empty(&msg->list_head)) {
-               dout("con_revoke %p msg %p\n", con, msg);
+               dout("con_revoke %p msg %p - was on queue\n", con, msg);
                list_del_init(&msg->list_head);
                ceph_msg_put(msg);
                msg->hdr.seq = 0;
-               if (con->out_msg == msg) {
-                       ceph_msg_put(con->out_msg);
-                       con->out_msg = NULL;
-               }
+       }
+       if (con->out_msg == msg) {
+               dout("con_revoke %p msg %p - was sending\n", con, msg);
+               con->out_msg = NULL;
                if (con->out_kvec_is_msg) {
                        con->out_skip = con->out_kvec_bytes;
                        con->out_kvec_is_msg = false;
                }
-       } else {
-               dout("con_revoke %p msg %p - not queued (sent?)\n", con, msg);
+               ceph_msg_put(msg);
+               msg->hdr.seq = 0;
        }
        mutex_unlock(&con->mutex);
 }
index 50ce64ebd3301eb24cf358ca7d6c1cdb6c0fe444..277f8b33957757ef506f6ac18816699b6b8156da 100644 (file)
@@ -568,6 +568,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
                if (ev > CEPH_PG_POOL_VERSION) {
                        pr_warning("got unknown v %d > %d of ceph_pg_pool\n",
                                   ev, CEPH_PG_POOL_VERSION);
+                       kfree(pi);
                        goto bad;
                }
                __decode_pool(p, pi);
index c8c78ba078271163f567c26045afff6cf0b5cd71..86d4db15473e51b3b95fa4103eedf28227cc336a 100644 (file)
@@ -896,7 +896,7 @@ EXPORT_SYMBOL(shrink_dcache_parent);
  *
  * In this case we return -1 to tell the caller that we baled.
  */
-static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
+static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 {
        if (nr) {
                if (!(gfp_mask & __GFP_FS))
index dbab3fdc258292ea027d5b2692e93cf1928a37fc..0898f3ec8212e5599df8c547d7d106aeb15d6c86 100644 (file)
@@ -1358,7 +1358,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
 }
 
 
-static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask)
+static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 {
        struct gfs2_glock *gl;
        int may_demote;
index b256d6f24288d6507bef949804e57d3e69c8cce2..8f02d3db8f428fa345062fa50581f1a35c420a0e 100644 (file)
@@ -77,7 +77,7 @@ static LIST_HEAD(qd_lru_list);
 static atomic_t qd_lru_count = ATOMIC_INIT(0);
 static DEFINE_SPINLOCK(qd_lru_lock);
 
-int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask)
+int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 {
        struct gfs2_quota_data *qd;
        struct gfs2_sbd *sdp;
index 195f60c8bd144b6ae5040c878e4f14305cafccc4..e7d236ca48bd28df49f8555fbaa9e1b90460fbd0 100644 (file)
@@ -51,7 +51,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
        return ret;
 }
 
-extern int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask);
+extern int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask);
 extern const struct quotactl_ops gfs2_quotactl_ops;
 
 #endif /* __QUOTA_DOT_H__ */
index 2bee20ae3d65ba74f970129ec8d147ab7cd4fe50..722860b323a9b40e0dc9f8b2e27e68563eb293c1 100644 (file)
@@ -512,7 +512,7 @@ static void prune_icache(int nr_to_scan)
  * This function is passed the number of inodes to scan, and it returns the
  * total number of remaining possibly-reclaimable inodes.
  */
-static int shrink_icache_memory(int nr, gfp_t gfp_mask)
+static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 {
        if (nr) {
                /*
index bc2ff5932769199f271db9055f7881b2e9c4bf54..036880895bfc8c2e99c42f6fd900819315bd508a 100644 (file)
@@ -297,7 +297,6 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
        struct page *new_page;
        unsigned int new_offset;
        struct buffer_head *bh_in = jh2bh(jh_in);
-       struct jbd2_buffer_trigger_type *triggers;
        journal_t *journal = transaction->t_journal;
 
        /*
@@ -328,21 +327,21 @@ repeat:
                done_copy_out = 1;
                new_page = virt_to_page(jh_in->b_frozen_data);
                new_offset = offset_in_page(jh_in->b_frozen_data);
-               triggers = jh_in->b_frozen_triggers;
        } else {
                new_page = jh2bh(jh_in)->b_page;
                new_offset = offset_in_page(jh2bh(jh_in)->b_data);
-               triggers = jh_in->b_triggers;
        }
 
        mapped_data = kmap_atomic(new_page, KM_USER0);
        /*
-        * Fire any commit trigger.  Do this before checking for escaping,
-        * as the trigger may modify the magic offset.  If a copy-out
-        * happens afterwards, it will have the correct data in the buffer.
+        * Fire data frozen trigger if data already wasn't frozen.  Do this
+        * before checking for escaping, as the trigger may modify the magic
+        * offset.  If a copy-out happens afterwards, it will have the correct
+        * data in the buffer.
         */
-       jbd2_buffer_commit_trigger(jh_in, mapped_data + new_offset,
-                                  triggers);
+       if (!done_copy_out)
+               jbd2_buffer_frozen_trigger(jh_in, mapped_data + new_offset,
+                                          jh_in->b_triggers);
 
        /*
         * Check for escaping
index e214d68620ac167fb5ddeb71775ead1b6063a571..b8e0806681bb0f4acf63964fa3f72ae00e1f8900 100644 (file)
@@ -725,6 +725,9 @@ done:
                page = jh2bh(jh)->b_page;
                offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK;
                source = kmap_atomic(page, KM_USER0);
+               /* Fire data frozen trigger just before we copy the data */
+               jbd2_buffer_frozen_trigger(jh, source + offset,
+                                          jh->b_triggers);
                memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
                kunmap_atomic(source, KM_USER0);
 
@@ -963,15 +966,15 @@ void jbd2_journal_set_triggers(struct buffer_head *bh,
        jh->b_triggers = type;
 }
 
-void jbd2_buffer_commit_trigger(struct journal_head *jh, void *mapped_data,
+void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data,
                                struct jbd2_buffer_trigger_type *triggers)
 {
        struct buffer_head *bh = jh2bh(jh);
 
-       if (!triggers || !triggers->t_commit)
+       if (!triggers || !triggers->t_frozen)
                return;
 
-       triggers->t_commit(triggers, bh, mapped_data, bh->b_size);
+       triggers->t_frozen(triggers, bh, mapped_data, bh->b_size);
 }
 
 void jbd2_buffer_abort_trigger(struct journal_head *jh,
index a2d58c96f1b455e55429415d0fc1497dceb9fcaa..d258e261bdc76662b9620e8a14811aaf01b4306b 100644 (file)
@@ -626,7 +626,7 @@ void jffs2_xattr_free_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *i
 
 static int check_xattr_ref_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic)
 {
-       /* success of check_xattr_ref_inode() means taht inode (ic) dose not have
+       /* success of check_xattr_ref_inode() means that inode (ic) dose not have
         * duplicate name/value pairs. If duplicate name/value pair would be found,
         * one will be removed.
         */
index ec88ff3d04a9194a4b595701e917ef784b8bdc2a..e28f21b95344378aa09ce95771481711870be902 100644 (file)
@@ -115,7 +115,7 @@ mb_cache_indexes(struct mb_cache *cache)
  * What the mbcache registers as to get shrunk dynamically.
  */
 
-static int mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask);
+static int mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
 
 static struct shrinker mb_cache_shrinker = {
        .shrink = mb_cache_shrink_fn,
@@ -191,13 +191,14 @@ forget:
  * This function is called by the kernel memory management when memory
  * gets low.
  *
+ * @shrink: (ignored)
  * @nr_to_scan: Number of objects to scan
  * @gfp_mask: (ignored)
  *
  * Returns the number of objects which are present in the cache.
  */
 static int
-mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask)
+mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
 {
        LIST_HEAD(free_list);
        struct list_head *l, *ltmp;
index 782b431ef91c9f521f18d5eeba5a33fbceb5b612..e60416d3f8188b548ec1d55e3438ea7e6f2c926f 100644 (file)
@@ -1710,7 +1710,7 @@ static void nfs_access_free_list(struct list_head *head)
        }
 }
 
-int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
+int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
 {
        LIST_HEAD(head);
        struct nfs_inode *nfsi;
index d8bd619e386c29a47224463b9a30a156bd76c6a8..e70f44b9b3f43ff5a837435e886fd73f2372a20c 100644 (file)
@@ -205,7 +205,8 @@ extern struct rpc_procinfo nfs4_procedures[];
 void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
 
 /* dir.c */
-extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask);
+extern int nfs_access_cache_shrinker(struct shrinker *shrink,
+                                       int nr_to_scan, gfp_t gfp_mask);
 
 /* inode.c */
 extern struct workqueue_struct *nfsiod_workqueue;
index 3623ca20cc186046cfbf840f03ce9705146b17ba..356e976772bf1adb112aaf3c8b28ddb71b639afb 100644 (file)
@@ -196,15 +196,14 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock,
                        dump_stack();
                        goto bail;
                }
-
-               past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
-               mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
-                    (unsigned long long)past_eof);
-
-               if (create && (iblock >= past_eof))
-                       set_buffer_new(bh_result);
        }
 
+       past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
+       mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
+            (unsigned long long)past_eof);
+       if (create && (iblock >= past_eof))
+               set_buffer_new(bh_result);
+
 bail:
        if (err < 0)
                err = -EIO;
@@ -459,36 +458,6 @@ int walk_page_buffers(     handle_t *handle,
        return ret;
 }
 
-handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
-                                                        struct page *page,
-                                                        unsigned from,
-                                                        unsigned to)
-{
-       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-       handle_t *handle;
-       int ret = 0;
-
-       handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
-       if (IS_ERR(handle)) {
-               ret = -ENOMEM;
-               mlog_errno(ret);
-               goto out;
-       }
-
-       if (ocfs2_should_order_data(inode)) {
-               ret = ocfs2_jbd2_file_inode(handle, inode);
-               if (ret < 0)
-                       mlog_errno(ret);
-       }
-out:
-       if (ret) {
-               if (!IS_ERR(handle))
-                       ocfs2_commit_trans(osb, handle);
-               handle = ERR_PTR(ret);
-       }
-       return handle;
-}
-
 static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
 {
        sector_t status;
@@ -1131,23 +1100,37 @@ out:
  */
 static int ocfs2_grab_pages_for_write(struct address_space *mapping,
                                      struct ocfs2_write_ctxt *wc,
-                                     u32 cpos, loff_t user_pos, int new,
+                                     u32 cpos, loff_t user_pos,
+                                     unsigned user_len, int new,
                                      struct page *mmap_page)
 {
        int ret = 0, i;
-       unsigned long start, target_index, index;
+       unsigned long start, target_index, end_index, index;
        struct inode *inode = mapping->host;
+       loff_t last_byte;
 
        target_index = user_pos >> PAGE_CACHE_SHIFT;
 
        /*
         * Figure out how many pages we'll be manipulating here. For
         * non allocating write, we just change the one
-        * page. Otherwise, we'll need a whole clusters worth.
+        * page. Otherwise, we'll need a whole clusters worth.  If we're
+        * writing past i_size, we only need enough pages to cover the
+        * last page of the write.
         */
        if (new) {
                wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb);
                start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos);
+               /*
+                * We need the index *past* the last page we could possibly
+                * touch.  This is the page past the end of the write or
+                * i_size, whichever is greater.
+                */
+               last_byte = max(user_pos + user_len, i_size_read(inode));
+               BUG_ON(last_byte < 1);
+               end_index = ((last_byte - 1) >> PAGE_CACHE_SHIFT) + 1;
+               if ((start + wc->w_num_pages) > end_index)
+                       wc->w_num_pages = end_index - start;
        } else {
                wc->w_num_pages = 1;
                start = target_index;
@@ -1620,21 +1603,20 @@ out:
  * write path can treat it as an non-allocating write, which has no
  * special case code for sparse/nonsparse files.
  */
-static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
-                                       unsigned len,
+static int ocfs2_expand_nonsparse_inode(struct inode *inode,
+                                       struct buffer_head *di_bh,
+                                       loff_t pos, unsigned len,
                                        struct ocfs2_write_ctxt *wc)
 {
        int ret;
-       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
        loff_t newsize = pos + len;
 
-       if (ocfs2_sparse_alloc(osb))
-               return 0;
+       BUG_ON(ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)));
 
        if (newsize <= i_size_read(inode))
                return 0;
 
-       ret = ocfs2_extend_no_holes(inode, newsize, pos);
+       ret = ocfs2_extend_no_holes(inode, di_bh, newsize, pos);
        if (ret)
                mlog_errno(ret);
 
@@ -1644,6 +1626,18 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
        return ret;
 }
 
+static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
+                          loff_t pos)
+{
+       int ret = 0;
+
+       BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)));
+       if (pos > i_size_read(inode))
+               ret = ocfs2_zero_extend(inode, di_bh, pos);
+
+       return ret;
+}
+
 int ocfs2_write_begin_nolock(struct address_space *mapping,
                             loff_t pos, unsigned len, unsigned flags,
                             struct page **pagep, void **fsdata,
@@ -1679,7 +1673,11 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
                }
        }
 
-       ret = ocfs2_expand_nonsparse_inode(inode, pos, len, wc);
+       if (ocfs2_sparse_alloc(osb))
+               ret = ocfs2_zero_tail(inode, di_bh, pos);
+       else
+               ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos, len,
+                                                  wc);
        if (ret) {
                mlog_errno(ret);
                goto out;
@@ -1789,7 +1787,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
         * that we can zero and flush if we error after adding the
         * extent.
         */
-       ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos,
+       ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len,
                                         cluster_of_pages, mmap_page);
        if (ret) {
                mlog_errno(ret);
index 6b5a492e1749f8063097a1ebef4e3ef9ad53fdb5..153abb5abef024d2ca63f6d4a23ee1c126b89f13 100644 (file)
@@ -1671,7 +1671,7 @@ struct dlm_ctxt * dlm_register_domain(const char *domain,
        struct dlm_ctxt *dlm = NULL;
        struct dlm_ctxt *new_ctxt = NULL;
 
-       if (strlen(domain) > O2NM_MAX_NAME_LEN) {
+       if (strlen(domain) >= O2NM_MAX_NAME_LEN) {
                ret = -ENAMETOOLONG;
                mlog(ML_ERROR, "domain name length too long\n");
                goto leave;
@@ -1709,6 +1709,7 @@ retry:
                }
 
                if (dlm_protocol_compare(&dlm->fs_locking_proto, fs_proto)) {
+                       spin_unlock(&dlm_domain_lock);
                        mlog(ML_ERROR,
                             "Requested locking protocol version is not "
                             "compatible with already registered domain "
index 4a7506a4e314c34014fd58ab739e836f9babe4e3..94b97fc6a88e62522b1958ed6a89de5e6803525b 100644 (file)
@@ -2808,14 +2808,8 @@ again:
                mlog(0, "trying again...\n");
                goto again;
        }
-       /* now that we are sure the MIGRATING state is there, drop
-        * the unneded state which blocked threads trying to DIRTY */
-       spin_lock(&res->spinlock);
-       BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY));
-       BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING));
-       res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY;
-       spin_unlock(&res->spinlock);
 
+       ret = 0;
        /* did the target go down or die? */
        spin_lock(&dlm->spinlock);
        if (!test_bit(target, dlm->domain_map)) {
@@ -2825,10 +2819,22 @@ again:
        }
        spin_unlock(&dlm->spinlock);
 
+       /*
+        * if target is down, we need to clear DLM_LOCK_RES_BLOCK_DIRTY for
+        * another try; otherwise, we are sure the MIGRATING state is there,
+        * drop the unneded state which blocked threads trying to DIRTY
+        */
+       spin_lock(&res->spinlock);
+       BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY));
+       res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY;
+       if (!ret)
+               BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING));
+       spin_unlock(&res->spinlock);
+
        /*
         * at this point:
         *
-        *   o the DLM_LOCK_RES_MIGRATING flag is set
+        *   o the DLM_LOCK_RES_MIGRATING flag is set if target not down
         *   o there are no pending asts on this lockres
         *   o all processes trying to reserve an ast on this
         *     lockres must wait for the MIGRATING flag to clear
index f8b75ce4be7019ab20d9c5f40d1567a4e520b8fd..9dfaac73b36da4350cc5a2f2dc7a34918a21e11e 100644 (file)
@@ -463,7 +463,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
        if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
                int bit;
 
-               bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES+1, 0);
+               bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES, 0);
                if (bit >= O2NM_MAX_NODES || bit < 0)
                        dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
                else
index 6a13ea64c44773fc239ad55a5237422ddee18e94..2b10b36d15772efcae056a62b4df1a9fe8aa2c53 100644 (file)
@@ -724,28 +724,55 @@ leave:
        return status;
 }
 
+/*
+ * While a write will already be ordering the data, a truncate will not.
+ * Thus, we need to explicitly order the zeroed pages.
+ */
+static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode)
+{
+       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+       handle_t *handle = NULL;
+       int ret = 0;
+
+       if (!ocfs2_should_order_data(inode))
+               goto out;
+
+       handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
+       if (IS_ERR(handle)) {
+               ret = -ENOMEM;
+               mlog_errno(ret);
+               goto out;
+       }
+
+       ret = ocfs2_jbd2_file_inode(handle, inode);
+       if (ret < 0)
+               mlog_errno(ret);
+
+out:
+       if (ret) {
+               if (!IS_ERR(handle))
+                       ocfs2_commit_trans(osb, handle);
+               handle = ERR_PTR(ret);
+       }
+       return handle;
+}
+
 /* Some parts of this taken from generic_cont_expand, which turned out
  * to be too fragile to do exactly what we need without us having to
  * worry about recursive locking in ->write_begin() and ->write_end(). */
-static int ocfs2_write_zero_page(struct inode *inode,
-                                u64 size)
+static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
+                                u64 abs_to)
 {
        struct address_space *mapping = inode->i_mapping;
        struct page *page;
-       unsigned long index;
-       unsigned int offset;
+       unsigned long index = abs_from >> PAGE_CACHE_SHIFT;
        handle_t *handle = NULL;
-       int ret;
+       int ret = 0;
+       unsigned zero_from, zero_to, block_start, block_end;
 
-       offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */
-       /* ugh.  in prepare/commit_write, if from==to==start of block, we
-       ** skip the prepare.  make sure we never send an offset for the start
-       ** of a block
-       */
-       if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
-               offset++;
-       }
-       index = size >> PAGE_CACHE_SHIFT;
+       BUG_ON(abs_from >= abs_to);
+       BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
+       BUG_ON(abs_from & (inode->i_blkbits - 1));
 
        page = grab_cache_page(mapping, index);
        if (!page) {
@@ -754,31 +781,56 @@ static int ocfs2_write_zero_page(struct inode *inode,
                goto out;
        }
 
-       ret = ocfs2_prepare_write_nolock(inode, page, offset, offset);
-       if (ret < 0) {
-               mlog_errno(ret);
-               goto out_unlock;
-       }
+       /* Get the offsets within the page that we want to zero */
+       zero_from = abs_from & (PAGE_CACHE_SIZE - 1);
+       zero_to = abs_to & (PAGE_CACHE_SIZE - 1);
+       if (!zero_to)
+               zero_to = PAGE_CACHE_SIZE;
 
-       if (ocfs2_should_order_data(inode)) {
-               handle = ocfs2_start_walk_page_trans(inode, page, offset,
-                                                    offset);
-               if (IS_ERR(handle)) {
-                       ret = PTR_ERR(handle);
-                       handle = NULL;
+       mlog(0,
+            "abs_from = %llu, abs_to = %llu, index = %lu, zero_from = %u, zero_to = %u\n",
+            (unsigned long long)abs_from, (unsigned long long)abs_to,
+            index, zero_from, zero_to);
+
+       /* We know that zero_from is block aligned */
+       for (block_start = zero_from; block_start < zero_to;
+            block_start = block_end) {
+               block_end = block_start + (1 << inode->i_blkbits);
+
+               /*
+                * block_start is block-aligned.  Bump it by one to
+                * force ocfs2_{prepare,commit}_write() to zero the
+                * whole block.
+                */
+               ret = ocfs2_prepare_write_nolock(inode, page,
+                                                block_start + 1,
+                                                block_start + 1);
+               if (ret < 0) {
+                       mlog_errno(ret);
                        goto out_unlock;
                }
-       }
 
-       /* must not update i_size! */
-       ret = block_commit_write(page, offset, offset);
-       if (ret < 0)
-               mlog_errno(ret);
-       else
-               ret = 0;
+               if (!handle) {
+                       handle = ocfs2_zero_start_ordered_transaction(inode);
+                       if (IS_ERR(handle)) {
+                               ret = PTR_ERR(handle);
+                               handle = NULL;
+                               break;
+                       }
+               }
+
+               /* must not update i_size! */
+               ret = block_commit_write(page, block_start + 1,
+                                        block_start + 1);
+               if (ret < 0)
+                       mlog_errno(ret);
+               else
+                       ret = 0;
+       }
 
        if (handle)
                ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
+
 out_unlock:
        unlock_page(page);
        page_cache_release(page);
@@ -786,22 +838,114 @@ out:
        return ret;
 }
 
-static int ocfs2_zero_extend(struct inode *inode,
-                            u64 zero_to_size)
+/*
+ * Find the next range to zero.  We do this in terms of bytes because
+ * that's what ocfs2_zero_extend() wants, and it is dealing with the
+ * pagecache.  We may return multiple extents.
+ *
+ * zero_start and zero_end are ocfs2_zero_extend()s current idea of what
+ * needs to be zeroed.  range_start and range_end return the next zeroing
+ * range.  A subsequent call should pass the previous range_end as its
+ * zero_start.  If range_end is 0, there's nothing to do.
+ *
+ * Unwritten extents are skipped over.  Refcounted extents are CoWd.
+ */
+static int ocfs2_zero_extend_get_range(struct inode *inode,
+                                      struct buffer_head *di_bh,
+                                      u64 zero_start, u64 zero_end,
+                                      u64 *range_start, u64 *range_end)
 {
-       int ret = 0;
-       u64 start_off;
-       struct super_block *sb = inode->i_sb;
+       int rc = 0, needs_cow = 0;
+       u32 p_cpos, zero_clusters = 0;
+       u32 zero_cpos =
+               zero_start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
+       u32 last_cpos = ocfs2_clusters_for_bytes(inode->i_sb, zero_end);
+       unsigned int num_clusters = 0;
+       unsigned int ext_flags = 0;
 
-       start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
-       while (start_off < zero_to_size) {
-               ret = ocfs2_write_zero_page(inode, start_off);
-               if (ret < 0) {
-                       mlog_errno(ret);
+       while (zero_cpos < last_cpos) {
+               rc = ocfs2_get_clusters(inode, zero_cpos, &p_cpos,
+                                       &num_clusters, &ext_flags);
+               if (rc) {
+                       mlog_errno(rc);
+                       goto out;
+               }
+
+               if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
+                       zero_clusters = num_clusters;
+                       if (ext_flags & OCFS2_EXT_REFCOUNTED)
+                               needs_cow = 1;
+                       break;
+               }
+
+               zero_cpos += num_clusters;
+       }
+       if (!zero_clusters) {
+               *range_end = 0;
+               goto out;
+       }
+
+       while ((zero_cpos + zero_clusters) < last_cpos) {
+               rc = ocfs2_get_clusters(inode, zero_cpos + zero_clusters,
+                                       &p_cpos, &num_clusters,
+                                       &ext_flags);
+               if (rc) {
+                       mlog_errno(rc);
                        goto out;
                }
 
-               start_off += sb->s_blocksize;
+               if (!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN))
+                       break;
+               if (ext_flags & OCFS2_EXT_REFCOUNTED)
+                       needs_cow = 1;
+               zero_clusters += num_clusters;
+       }
+       if ((zero_cpos + zero_clusters) > last_cpos)
+               zero_clusters = last_cpos - zero_cpos;
+
+       if (needs_cow) {
+               rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters,
+                                       UINT_MAX);
+               if (rc) {
+                       mlog_errno(rc);
+                       goto out;
+               }
+       }
+
+       *range_start = ocfs2_clusters_to_bytes(inode->i_sb, zero_cpos);
+       *range_end = ocfs2_clusters_to_bytes(inode->i_sb,
+                                            zero_cpos + zero_clusters);
+
+out:
+       return rc;
+}
+
+/*
+ * Zero one range returned from ocfs2_zero_extend_get_range().  The caller
+ * has made sure that the entire range needs zeroing.
+ */
+static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
+                                  u64 range_end)
+{
+       int rc = 0;
+       u64 next_pos;
+       u64 zero_pos = range_start;
+
+       mlog(0, "range_start = %llu, range_end = %llu\n",
+            (unsigned long long)range_start,
+            (unsigned long long)range_end);
+       BUG_ON(range_start >= range_end);
+
+       while (zero_pos < range_end) {
+               next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
+               if (next_pos > range_end)
+                       next_pos = range_end;
+               rc = ocfs2_write_zero_page(inode, zero_pos, next_pos);
+               if (rc < 0) {
+                       mlog_errno(rc);
+                       break;
+               }
+               zero_pos = next_pos;
 
                /*
                 * Very large extends have the potential to lock up
@@ -810,16 +954,63 @@ static int ocfs2_zero_extend(struct inode *inode,
                cond_resched();
        }
 
-out:
+       return rc;
+}
+
+int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
+                     loff_t zero_to_size)
+{
+       int ret = 0;
+       u64 zero_start, range_start = 0, range_end = 0;
+       struct super_block *sb = inode->i_sb;
+
+       zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
+       mlog(0, "zero_start %llu for i_size %llu\n",
+            (unsigned long long)zero_start,
+            (unsigned long long)i_size_read(inode));
+       while (zero_start < zero_to_size) {
+               ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start,
+                                                 zero_to_size,
+                                                 &range_start,
+                                                 &range_end);
+               if (ret) {
+                       mlog_errno(ret);
+                       break;
+               }
+               if (!range_end)
+                       break;
+               /* Trim the ends */
+               if (range_start < zero_start)
+                       range_start = zero_start;
+               if (range_end > zero_to_size)
+                       range_end = zero_to_size;
+
+               ret = ocfs2_zero_extend_range(inode, range_start,
+                                             range_end);
+               if (ret) {
+                       mlog_errno(ret);
+                       break;
+               }
+               zero_start = range_end;
+       }
+
        return ret;
 }
 
-int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
+int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
+                         u64 new_i_size, u64 zero_to)
 {
        int ret;
        u32 clusters_to_add;
        struct ocfs2_inode_info *oi = OCFS2_I(inode);
 
+       /*
+        * Only quota files call this without a bh, and they can't be
+        * refcounted.
+        */
+       BUG_ON(!di_bh && (oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+       BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE));
+
        clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size);
        if (clusters_to_add < oi->ip_clusters)
                clusters_to_add = 0;
@@ -840,7 +1031,7 @@ int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
         * still need to zero the area between the old i_size and the
         * new i_size.
         */
-       ret = ocfs2_zero_extend(inode, zero_to);
+       ret = ocfs2_zero_extend(inode, di_bh, zero_to);
        if (ret < 0)
                mlog_errno(ret);
 
@@ -862,27 +1053,15 @@ static int ocfs2_extend_file(struct inode *inode,
                goto out;
 
        if (i_size_read(inode) == new_i_size)
-               goto out;
+               goto out;
        BUG_ON(new_i_size < i_size_read(inode));
 
-       /*
-        * Fall through for converting inline data, even if the fs
-        * supports sparse files.
-        *
-        * The check for inline data here is legal - nobody can add
-        * the feature since we have i_mutex. We must check it again
-        * after acquiring ip_alloc_sem though, as paths like mmap
-        * might have raced us to converting the inode to extents.
-        */
-       if (!(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
-           && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
-               goto out_update_size;
-
        /*
         * The alloc sem blocks people in read/write from reading our
         * allocation until we're done changing it. We depend on
         * i_mutex to block other extend/truncate calls while we're
-        * here.
+        * here.  We even have to hold it for sparse files because there
+        * might be some tail zeroing.
         */
        down_write(&oi->ip_alloc_sem);
 
@@ -899,14 +1078,16 @@ static int ocfs2_extend_file(struct inode *inode,
                ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
                if (ret) {
                        up_write(&oi->ip_alloc_sem);
-
                        mlog_errno(ret);
                        goto out;
                }
        }
 
-       if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
-               ret = ocfs2_extend_no_holes(inode, new_i_size, new_i_size);
+       if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
+               ret = ocfs2_zero_extend(inode, di_bh, new_i_size);
+       else
+               ret = ocfs2_extend_no_holes(inode, di_bh, new_i_size,
+                                           new_i_size);
 
        up_write(&oi->ip_alloc_sem);
 
index d66cf4f7c70e34bbec1d63eccd25096eeac7e976..97bf761c9e7c7b8f1c1744bbada1d778095eb3d7 100644 (file)
@@ -54,8 +54,10 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
 int ocfs2_simple_size_update(struct inode *inode,
                             struct buffer_head *di_bh,
                             u64 new_i_size);
-int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size,
-                         u64 zero_to);
+int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
+                         u64 new_i_size, u64 zero_to);
+int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
+                     loff_t zero_to);
 int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
 int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
                  struct kstat *stat);
index 47878cf164184cf910bcbbebc10da133aa2e0c64..625de9d7088cdf2c82008b2e875094ec2b43f1d0 100644 (file)
@@ -472,7 +472,7 @@ static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger
        return container_of(triggers, struct ocfs2_triggers, ot_triggers);
 }
 
-static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
+static void ocfs2_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
                                 struct buffer_head *bh,
                                 void *data, size_t size)
 {
@@ -491,7 +491,7 @@ static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
  * Quota blocks have their own trigger because the struct ocfs2_block_check
  * offset depends on the blocksize.
  */
-static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
+static void ocfs2_dq_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
                                 struct buffer_head *bh,
                                 void *data, size_t size)
 {
@@ -511,7 +511,7 @@ static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
  * Directory blocks also have their own trigger because the
  * struct ocfs2_block_check offset depends on the blocksize.
  */
-static void ocfs2_db_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
+static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
                                 struct buffer_head *bh,
                                 void *data, size_t size)
 {
@@ -544,7 +544,7 @@ static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
 
 static struct ocfs2_triggers di_triggers = {
        .ot_triggers = {
-               .t_commit = ocfs2_commit_trigger,
+               .t_frozen = ocfs2_frozen_trigger,
                .t_abort = ocfs2_abort_trigger,
        },
        .ot_offset      = offsetof(struct ocfs2_dinode, i_check),
@@ -552,7 +552,7 @@ static struct ocfs2_triggers di_triggers = {
 
 static struct ocfs2_triggers eb_triggers = {
        .ot_triggers = {
-               .t_commit = ocfs2_commit_trigger,
+               .t_frozen = ocfs2_frozen_trigger,
                .t_abort = ocfs2_abort_trigger,
        },
        .ot_offset      = offsetof(struct ocfs2_extent_block, h_check),
@@ -560,7 +560,7 @@ static struct ocfs2_triggers eb_triggers = {
 
 static struct ocfs2_triggers rb_triggers = {
        .ot_triggers = {
-               .t_commit = ocfs2_commit_trigger,
+               .t_frozen = ocfs2_frozen_trigger,
                .t_abort = ocfs2_abort_trigger,
        },
        .ot_offset      = offsetof(struct ocfs2_refcount_block, rf_check),
@@ -568,7 +568,7 @@ static struct ocfs2_triggers rb_triggers = {
 
 static struct ocfs2_triggers gd_triggers = {
        .ot_triggers = {
-               .t_commit = ocfs2_commit_trigger,
+               .t_frozen = ocfs2_frozen_trigger,
                .t_abort = ocfs2_abort_trigger,
        },
        .ot_offset      = offsetof(struct ocfs2_group_desc, bg_check),
@@ -576,14 +576,14 @@ static struct ocfs2_triggers gd_triggers = {
 
 static struct ocfs2_triggers db_triggers = {
        .ot_triggers = {
-               .t_commit = ocfs2_db_commit_trigger,
+               .t_frozen = ocfs2_db_frozen_trigger,
                .t_abort = ocfs2_abort_trigger,
        },
 };
 
 static struct ocfs2_triggers xb_triggers = {
        .ot_triggers = {
-               .t_commit = ocfs2_commit_trigger,
+               .t_frozen = ocfs2_frozen_trigger,
                .t_abort = ocfs2_abort_trigger,
        },
        .ot_offset      = offsetof(struct ocfs2_xattr_block, xb_check),
@@ -591,14 +591,14 @@ static struct ocfs2_triggers xb_triggers = {
 
 static struct ocfs2_triggers dq_triggers = {
        .ot_triggers = {
-               .t_commit = ocfs2_dq_commit_trigger,
+               .t_frozen = ocfs2_dq_frozen_trigger,
                .t_abort = ocfs2_abort_trigger,
        },
 };
 
 static struct ocfs2_triggers dr_triggers = {
        .ot_triggers = {
-               .t_commit = ocfs2_commit_trigger,
+               .t_frozen = ocfs2_frozen_trigger,
                .t_abort = ocfs2_abort_trigger,
        },
        .ot_offset      = offsetof(struct ocfs2_dx_root_block, dr_check),
@@ -606,7 +606,7 @@ static struct ocfs2_triggers dr_triggers = {
 
 static struct ocfs2_triggers dl_triggers = {
        .ot_triggers = {
-               .t_commit = ocfs2_commit_trigger,
+               .t_frozen = ocfs2_frozen_trigger,
                .t_abort = ocfs2_abort_trigger,
        },
        .ot_offset      = offsetof(struct ocfs2_dx_leaf, dl_check),
@@ -1936,7 +1936,7 @@ void ocfs2_orphan_scan_work(struct work_struct *work)
        mutex_lock(&os->os_lock);
        ocfs2_queue_orphan_scan(osb);
        if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
-               schedule_delayed_work(&os->os_orphan_scan_work,
+               queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
                                      ocfs2_orphan_scan_timeout());
        mutex_unlock(&os->os_lock);
 }
@@ -1976,8 +1976,8 @@ void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
                atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
        else {
                atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
-               schedule_delayed_work(&os->os_orphan_scan_work,
-                                     ocfs2_orphan_scan_timeout());
+               queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
+                                  ocfs2_orphan_scan_timeout());
        }
 }
 
index 3d7419682dc069da151ae265367f214e15ae31af..ec6adbf8f5515afbaa7e8ffcb8a32b6a3c4e1ea8 100644 (file)
@@ -118,6 +118,7 @@ unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
 {
        unsigned int la_mb;
        unsigned int gd_mb;
+       unsigned int la_max_mb;
        unsigned int megs_per_slot;
        struct super_block *sb = osb->sb;
 
@@ -182,6 +183,12 @@ unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
        if (megs_per_slot < la_mb)
                la_mb = megs_per_slot;
 
+       /* We can't store more bits than we can in a block. */
+       la_max_mb = ocfs2_clusters_to_megabytes(osb->sb,
+                                               ocfs2_local_alloc_size(sb) * 8);
+       if (la_mb > la_max_mb)
+               la_mb = la_max_mb;
+
        return la_mb;
 }
 
index 2bb35fe00511e98f41f23fa9a93d752d3a82b4fe..4607923eb24c192ff3642042161684a158300908 100644 (file)
@@ -775,7 +775,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
                 * locking allocators ranks above a transaction start
                 */
                WARN_ON(journal_current_handle());
-               status = ocfs2_extend_no_holes(gqinode,
+               status = ocfs2_extend_no_holes(gqinode, NULL,
                        gqinode->i_size + (need_alloc << sb->s_blocksize_bits),
                        gqinode->i_size);
                if (status < 0)
index 8bd70d4d184d5827fdb861b7a565fc26d364f0a1..dc78764ccc4c6211bacee0a409f23606e24b3997 100644 (file)
@@ -971,7 +971,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
        u64 p_blkno;
 
        /* We are protected by dqio_sem so no locking needed */
-       status = ocfs2_extend_no_holes(lqinode,
+       status = ocfs2_extend_no_holes(lqinode, NULL,
                                       lqinode->i_size + 2 * sb->s_blocksize,
                                       lqinode->i_size);
        if (status < 0) {
@@ -1114,7 +1114,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
                return ocfs2_local_quota_add_chunk(sb, type, offset);
 
        /* We are protected by dqio_sem so no locking needed */
-       status = ocfs2_extend_no_holes(lqinode,
+       status = ocfs2_extend_no_holes(lqinode, NULL,
                                       lqinode->i_size + sb->s_blocksize,
                                       lqinode->i_size);
        if (status < 0) {
index 4793f36f6518b25f312274d2fcdc84492de26340..3ac5aa733e9c8018090bc2493d819937593760a0 100644 (file)
@@ -2931,6 +2931,12 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
 
        offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
        end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits);
+       /*
+        * We only duplicate pages until we reach the page contains i_size - 1.
+        * So trim 'end' to i_size.
+        */
+       if (end > i_size_read(context->inode))
+               end = i_size_read(context->inode);
 
        while (offset < end) {
                page_index = offset >> PAGE_CACHE_SHIFT;
@@ -4166,6 +4172,12 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
        struct inode *inode = old_dentry->d_inode;
        struct buffer_head *new_bh = NULL;
 
+       if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
+               ret = -EINVAL;
+               mlog_errno(ret);
+               goto out;
+       }
+
        ret = filemap_fdatawrite(inode->i_mapping);
        if (ret) {
                mlog_errno(ret);
index f4c2a9eb8c4d75a6354fb52c37ba93edb81bebd5..a8e6a95a353f03dcb8a34cf928ded84ff7e6d127 100644 (file)
@@ -741,7 +741,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
                     le16_to_cpu(bg->bg_free_bits_count));
        le32_add_cpu(&cl->cl_recs[alloc_rec].c_total,
                     le16_to_cpu(bg->bg_bits));
-       cl->cl_recs[alloc_rec].c_blkno  = cpu_to_le64(bg->bg_blkno);
+       cl->cl_recs[alloc_rec].c_blkno = bg->bg_blkno;
        if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count))
                le16_add_cpu(&cl->cl_next_free_rec, 1);
 
index e97b34842cfea0d188f85476a56c4a0eb0af5ce0..d03469f618012ea4aff64b62f1f77e3e8b8ec47d 100644 (file)
@@ -709,7 +709,7 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
                                         struct ocfs2_xattr_value_buf *vb,
                                         struct ocfs2_xattr_set_ctxt *ctxt)
 {
-       int status = 0;
+       int status = 0, credits;
        handle_t *handle = ctxt->handle;
        enum ocfs2_alloc_restarted why;
        u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
@@ -719,38 +719,54 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
 
        ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
 
-       status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
-                             OCFS2_JOURNAL_ACCESS_WRITE);
-       if (status < 0) {
-               mlog_errno(status);
-               goto leave;
-       }
+       while (clusters_to_add) {
+               status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
+                                      OCFS2_JOURNAL_ACCESS_WRITE);
+               if (status < 0) {
+                       mlog_errno(status);
+                       break;
+               }
 
-       prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
-       status = ocfs2_add_clusters_in_btree(handle,
-                                            &et,
-                                            &logical_start,
-                                            clusters_to_add,
-                                            0,
-                                            ctxt->data_ac,
-                                            ctxt->meta_ac,
-                                            &why);
-       if (status < 0) {
-               mlog_errno(status);
-               goto leave;
-       }
+               prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
+               status = ocfs2_add_clusters_in_btree(handle,
+                                                    &et,
+                                                    &logical_start,
+                                                    clusters_to_add,
+                                                    0,
+                                                    ctxt->data_ac,
+                                                    ctxt->meta_ac,
+                                                    &why);
+               if ((status < 0) && (status != -EAGAIN)) {
+                       if (status != -ENOSPC)
+                               mlog_errno(status);
+                       break;
+               }
 
-       ocfs2_journal_dirty(handle, vb->vb_bh);
+               ocfs2_journal_dirty(handle, vb->vb_bh);
 
-       clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters;
+               clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) -
+                                        prev_clusters;
 
-       /*
-        * We should have already allocated enough space before the transaction,
-        * so no need to restart.
-        */
-       BUG_ON(why != RESTART_NONE || clusters_to_add);
-
-leave:
+               if (why != RESTART_NONE && clusters_to_add) {
+                       /*
+                        * We can only fail in case the alloc file doesn't give
+                        * up enough clusters.
+                        */
+                       BUG_ON(why == RESTART_META);
+
+                       mlog(0, "restarting xattr value extension for %u"
+                            " clusters,.\n", clusters_to_add);
+                       credits = ocfs2_calc_extend_credits(inode->i_sb,
+                                                           &vb->vb_xv->xr_list,
+                                                           clusters_to_add);
+                       status = ocfs2_extend_trans(handle, credits);
+                       if (status < 0) {
+                               status = -ENOMEM;
+                               mlog_errno(status);
+                               break;
+                       }
+               }
+       }
 
        return status;
 }
@@ -6788,16 +6804,15 @@ out:
        return ret;
 }
 
-static int ocfs2_reflink_xattr_buckets(handle_t *handle,
+static int ocfs2_reflink_xattr_bucket(handle_t *handle,
                                u64 blkno, u64 new_blkno, u32 clusters,
+                               u32 *cpos, int num_buckets,
                                struct ocfs2_alloc_context *meta_ac,
                                struct ocfs2_alloc_context *data_ac,
                                struct ocfs2_reflink_xattr_tree_args *args)
 {
        int i, j, ret = 0;
        struct super_block *sb = args->reflink->old_inode->i_sb;
-       u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
-       u32 num_buckets = clusters * bpc;
        int bpb = args->old_bucket->bu_blocks;
        struct ocfs2_xattr_value_buf vb = {
                .vb_access = ocfs2_journal_access,
@@ -6816,14 +6831,6 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
                        break;
                }
 
-               /*
-                * The real bucket num in this series of blocks is stored
-                * in the 1st bucket.
-                */
-               if (i == 0)
-                       num_buckets = le16_to_cpu(
-                               bucket_xh(args->old_bucket)->xh_num_buckets);
-
                ret = ocfs2_xattr_bucket_journal_access(handle,
                                                args->new_bucket,
                                                OCFS2_JOURNAL_ACCESS_CREATE);
@@ -6837,6 +6844,18 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
                               bucket_block(args->old_bucket, j),
                               sb->s_blocksize);
 
+               /*
+                * Record the start cpos so that we can use it to initialize
+                * our xattr tree we also set the xh_num_bucket for the new
+                * bucket.
+                */
+               if (i == 0) {
+                       *cpos = le32_to_cpu(bucket_xh(args->new_bucket)->
+                                           xh_entries[0].xe_name_hash);
+                       bucket_xh(args->new_bucket)->xh_num_buckets =
+                               cpu_to_le16(num_buckets);
+               }
+
                ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
 
                ret = ocfs2_reflink_xattr_header(handle, args->reflink,
@@ -6866,6 +6885,7 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
                }
 
                ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
+
                ocfs2_xattr_bucket_relse(args->old_bucket);
                ocfs2_xattr_bucket_relse(args->new_bucket);
        }
@@ -6874,6 +6894,75 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
        ocfs2_xattr_bucket_relse(args->new_bucket);
        return ret;
 }
+
+static int ocfs2_reflink_xattr_buckets(handle_t *handle,
+                               struct inode *inode,
+                               struct ocfs2_reflink_xattr_tree_args *args,
+                               struct ocfs2_extent_tree *et,
+                               struct ocfs2_alloc_context *meta_ac,
+                               struct ocfs2_alloc_context *data_ac,
+                               u64 blkno, u32 cpos, u32 len)
+{
+       int ret, first_inserted = 0;
+       u32 p_cluster, num_clusters, reflink_cpos = 0;
+       u64 new_blkno;
+       unsigned int num_buckets, reflink_buckets;
+       unsigned int bpc =
+               ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
+
+       ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
+       if (ret) {
+               mlog_errno(ret);
+               goto out;
+       }
+       num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets);
+       ocfs2_xattr_bucket_relse(args->old_bucket);
+
+       while (len && num_buckets) {
+               ret = ocfs2_claim_clusters(handle, data_ac,
+                                          1, &p_cluster, &num_clusters);
+               if (ret) {
+                       mlog_errno(ret);
+                       goto out;
+               }
+
+               new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
+               reflink_buckets = min(num_buckets, bpc * num_clusters);
+
+               ret = ocfs2_reflink_xattr_bucket(handle, blkno,
+                                                new_blkno, num_clusters,
+                                                &reflink_cpos, reflink_buckets,
+                                                meta_ac, data_ac, args);
+               if (ret) {
+                       mlog_errno(ret);
+                       goto out;
+               }
+
+               /*
+                * For the 1st allocated cluster, we make it use the same cpos
+                * so that the xattr tree looks the same as the original one
+                * in the most case.
+                */
+               if (!first_inserted) {
+                       reflink_cpos = cpos;
+                       first_inserted = 1;
+               }
+               ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno,
+                                         num_clusters, 0, meta_ac);
+               if (ret)
+                       mlog_errno(ret);
+
+               mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
+                    (unsigned long long)new_blkno, num_clusters, reflink_cpos);
+
+               len -= num_clusters;
+               blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
+               num_buckets -= reflink_buckets;
+       }
+out:
+       return ret;
+}
+
 /*
  * Create the same xattr extent record in the new inode's xattr tree.
  */
@@ -6885,8 +6974,6 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
                                   void *para)
 {
        int ret, credits = 0;
-       u32 p_cluster, num_clusters;
-       u64 new_blkno;
        handle_t *handle;
        struct ocfs2_reflink_xattr_tree_args *args =
                        (struct ocfs2_reflink_xattr_tree_args *)para;
@@ -6895,6 +6982,9 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
        struct ocfs2_alloc_context *data_ac = NULL;
        struct ocfs2_extent_tree et;
 
+       mlog(0, "reflink xattr buckets %llu len %u\n",
+            (unsigned long long)blkno, len);
+
        ocfs2_init_xattr_tree_extent_tree(&et,
                                          INODE_CACHE(args->reflink->new_inode),
                                          args->new_blk_bh);
@@ -6914,32 +7004,12 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
                goto out;
        }
 
-       ret = ocfs2_claim_clusters(handle, data_ac,
-                                  len, &p_cluster, &num_clusters);
-       if (ret) {
-               mlog_errno(ret);
-               goto out_commit;
-       }
-
-       new_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cluster);
-
-       mlog(0, "reflink xattr buckets %llu to %llu, len %u\n",
-            (unsigned long long)blkno, (unsigned long long)new_blkno, len);
-       ret = ocfs2_reflink_xattr_buckets(handle, blkno, new_blkno, len,
-                                         meta_ac, data_ac, args);
-       if (ret) {
-               mlog_errno(ret);
-               goto out_commit;
-       }
-
-       mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
-            (unsigned long long)new_blkno, len, cpos);
-       ret = ocfs2_insert_extent(handle, &et, cpos, new_blkno,
-                                 len, 0, meta_ac);
+       ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et,
+                                         meta_ac, data_ac,
+                                         blkno, cpos, len);
        if (ret)
                mlog_errno(ret);
 
-out_commit:
        ocfs2_commit_trans(osb, handle);
 
 out:
index 3e73de5967ff94c9e5896aaf84cd0c6a1c7a4b96..fc8497643fd08f42d48b1bad50c4c30e5f43a782 100644 (file)
@@ -74,6 +74,7 @@ int ibm_partition(struct parsed_partitions *state)
        } *label;
        unsigned char *data;
        Sector sect;
+       sector_t labelsect;
 
        res = 0;
        blocksize = bdev_logical_block_size(bdev);
@@ -97,11 +98,20 @@ int ibm_partition(struct parsed_partitions *state)
            ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo) != 0)
                goto out_freeall;
 
+       /*
+        * Special case for FBA disks: label sector does not depend on
+        * blocksize.
+        */
+       if ((info->cu_type == 0x6310 && info->dev_type == 0x9336) ||
+           (info->cu_type == 0x3880 && info->dev_type == 0x3370))
+               labelsect = info->label_block;
+       else
+               labelsect = info->label_block * (blocksize >> 9);
+
        /*
         * Get volume label, extract name and type.
         */
-       data = read_part_sector(state, info->label_block*(blocksize/512),
-                               &sect);
+       data = read_part_sector(state, labelsect, &sect);
        if (data == NULL)
                goto out_readerr;
 
index 12c233da1b6b77e6c06d80b1bd18dbca93b65c78..437d2ca2de973d1027109b038e895e31a68b3a63 100644 (file)
@@ -676,7 +676,7 @@ static void prune_dqcache(int count)
  * This is called from kswapd when we think we need some
  * more memory
  */
-static int shrink_dqcache_memory(int nr, gfp_t gfp_mask)
+static int shrink_dqcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 {
        if (nr) {
                spin_lock(&dq_list_lock);
index 02feb59cefcac356768d8efdf9e69c9c1ded811e..0b201114a5adf9cfc9c04b6730442748c079fa9d 100644 (file)
@@ -277,7 +277,7 @@ static int kick_a_thread(void)
        return 0;
 }
 
-int ubifs_shrinker(int nr, gfp_t gfp_mask)
+int ubifs_shrinker(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 {
        int freed, contention = 0;
        long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
index 2eef553d50c817e6276e994f4aee80971cdf2191..04310878f449ac44db40c89325dd0f745f2b5cdd 100644 (file)
@@ -1575,7 +1575,7 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot);
 int ubifs_tnc_end_commit(struct ubifs_info *c);
 
 /* shrinker.c */
-int ubifs_shrinker(int nr_to_scan, gfp_t gfp_mask);
+int ubifs_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
 
 /* commit.c */
 int ubifs_bg_thread(void *info);
index 649ade8ef598693e2e57ac66475c1993b57903e3..2ee3f7a60163e899e971700aba008a243049726a 100644 (file)
@@ -45,7 +45,7 @@
 
 static kmem_zone_t *xfs_buf_zone;
 STATIC int xfsbufd(void *);
-STATIC int xfsbufd_wakeup(int, gfp_t);
+STATIC int xfsbufd_wakeup(struct shrinker *, int, gfp_t);
 STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
 static struct shrinker xfs_buf_shake = {
        .shrink = xfsbufd_wakeup,
@@ -340,7 +340,7 @@ _xfs_buf_lookup_pages(
                                        __func__, gfp_mask);
 
                        XFS_STATS_INC(xb_page_retries);
-                       xfsbufd_wakeup(0, gfp_mask);
+                       xfsbufd_wakeup(NULL, 0, gfp_mask);
                        congestion_wait(BLK_RW_ASYNC, HZ/50);
                        goto retry;
                }
@@ -1762,6 +1762,7 @@ xfs_buf_runall_queues(
 
 STATIC int
 xfsbufd_wakeup(
+       struct shrinker         *shrink,
        int                     priority,
        gfp_t                   mask)
 {
index f2d1718c9165f104befa17069c7b48cfd93a4b3a..80938c736c2769861327ea10368cd0c20d4ceb51 100644 (file)
@@ -1883,7 +1883,6 @@ init_xfs_fs(void)
                goto out_cleanup_procfs;
 
        vfs_initquota();
-       xfs_inode_shrinker_init();
 
        error = register_filesystem(&xfs_fs_type);
        if (error)
@@ -1911,7 +1910,6 @@ exit_xfs_fs(void)
 {
        vfs_exitquota();
        unregister_filesystem(&xfs_fs_type);
-       xfs_inode_shrinker_destroy();
        xfs_sysctl_unregister();
        xfs_cleanup_procfs();
        xfs_buf_terminate();
index ef7f0218bccb45779157128139ab552e8b423832..a51a07c3a70cfa8b5514add3dae137402bbc56b6 100644 (file)
@@ -144,6 +144,41 @@ restart:
        return last_error;
 }
 
+/*
+ * Select the next per-ag structure to iterate during the walk. The reclaim
+ * walk is optimised only to walk AGs with reclaimable inodes in them.
+ */
+static struct xfs_perag *
+xfs_inode_ag_iter_next_pag(
+       struct xfs_mount        *mp,
+       xfs_agnumber_t          *first,
+       int                     tag)
+{
+       struct xfs_perag        *pag = NULL;
+
+       if (tag == XFS_ICI_RECLAIM_TAG) {
+               int found;
+               int ref;
+
+               spin_lock(&mp->m_perag_lock);
+               found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
+                               (void **)&pag, *first, 1, tag);
+               if (found <= 0) {
+                       spin_unlock(&mp->m_perag_lock);
+                       return NULL;
+               }
+               *first = pag->pag_agno + 1;
+               /* open coded pag reference increment */
+               ref = atomic_inc_return(&pag->pag_ref);
+               spin_unlock(&mp->m_perag_lock);
+               trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_);
+       } else {
+               pag = xfs_perag_get(mp, *first);
+               (*first)++;
+       }
+       return pag;
+}
+
 int
 xfs_inode_ag_iterator(
        struct xfs_mount        *mp,
@@ -154,16 +189,15 @@ xfs_inode_ag_iterator(
        int                     exclusive,
        int                     *nr_to_scan)
 {
+       struct xfs_perag        *pag;
        int                     error = 0;
        int                     last_error = 0;
        xfs_agnumber_t          ag;
        int                     nr;
 
        nr = nr_to_scan ? *nr_to_scan : INT_MAX;
-       for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
-               struct xfs_perag        *pag;
-
-               pag = xfs_perag_get(mp, ag);
+       ag = 0;
+       while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, tag))) {
                error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
                                                exclusive, &nr);
                xfs_perag_put(pag);
@@ -640,6 +674,17 @@ __xfs_inode_set_reclaim_tag(
        radix_tree_tag_set(&pag->pag_ici_root,
                           XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
                           XFS_ICI_RECLAIM_TAG);
+
+       if (!pag->pag_ici_reclaimable) {
+               /* propagate the reclaim tag up into the perag radix tree */
+               spin_lock(&ip->i_mount->m_perag_lock);
+               radix_tree_tag_set(&ip->i_mount->m_perag_tree,
+                               XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
+                               XFS_ICI_RECLAIM_TAG);
+               spin_unlock(&ip->i_mount->m_perag_lock);
+               trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
+                                                       -1, _RET_IP_);
+       }
        pag->pag_ici_reclaimable++;
 }
 
@@ -674,6 +719,16 @@ __xfs_inode_clear_reclaim_tag(
        radix_tree_tag_clear(&pag->pag_ici_root,
                        XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
        pag->pag_ici_reclaimable--;
+       if (!pag->pag_ici_reclaimable) {
+               /* clear the reclaim tag from the perag radix tree */
+               spin_lock(&ip->i_mount->m_perag_lock);
+               radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
+                               XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
+                               XFS_ICI_RECLAIM_TAG);
+               spin_unlock(&ip->i_mount->m_perag_lock);
+               trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno,
+                                                       -1, _RET_IP_);
+       }
 }
 
 /*
@@ -828,83 +883,52 @@ xfs_reclaim_inodes(
 
 /*
  * Shrinker infrastructure.
- *
- * This is all far more complex than it needs to be. It adds a global list of
- * mounts because the shrinkers can only call a global context. We need to make
- * the shrinkers pass a context to avoid the need for global state.
  */
-static LIST_HEAD(xfs_mount_list);
-static struct rw_semaphore xfs_mount_list_lock;
-
 static int
 xfs_reclaim_inode_shrink(
+       struct shrinker *shrink,
        int             nr_to_scan,
        gfp_t           gfp_mask)
 {
        struct xfs_mount *mp;
        struct xfs_perag *pag;
        xfs_agnumber_t  ag;
-       int             reclaimable = 0;
+       int             reclaimable;
 
+       mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
        if (nr_to_scan) {
                if (!(gfp_mask & __GFP_FS))
                        return -1;
 
-               down_read(&xfs_mount_list_lock);
-               list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
-                       xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
+               xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
                                        XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
-                       if (nr_to_scan <= 0)
-                               break;
-               }
-               up_read(&xfs_mount_list_lock);
-       }
+               /* if we don't exhaust the scan, don't bother coming back */
+               if (nr_to_scan > 0)
+                       return -1;
+       }
 
-       down_read(&xfs_mount_list_lock);
-       list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
-               for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
-                       pag = xfs_perag_get(mp, ag);
-                       reclaimable += pag->pag_ici_reclaimable;
-                       xfs_perag_put(pag);
-               }
+       reclaimable = 0;
+       ag = 0;
+       while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag,
+                                       XFS_ICI_RECLAIM_TAG))) {
+               reclaimable += pag->pag_ici_reclaimable;
+               xfs_perag_put(pag);
        }
-       up_read(&xfs_mount_list_lock);
        return reclaimable;
 }
 
-static struct shrinker xfs_inode_shrinker = {
-       .shrink = xfs_reclaim_inode_shrink,
-       .seeks = DEFAULT_SEEKS,
-};
-
-void __init
-xfs_inode_shrinker_init(void)
-{
-       init_rwsem(&xfs_mount_list_lock);
-       register_shrinker(&xfs_inode_shrinker);
-}
-
-void
-xfs_inode_shrinker_destroy(void)
-{
-       ASSERT(list_empty(&xfs_mount_list));
-       unregister_shrinker(&xfs_inode_shrinker);
-}
-
 void
 xfs_inode_shrinker_register(
        struct xfs_mount        *mp)
 {
-       down_write(&xfs_mount_list_lock);
-       list_add_tail(&mp->m_mplist, &xfs_mount_list);
-       up_write(&xfs_mount_list_lock);
+       mp->m_inode_shrink.shrink = xfs_reclaim_inode_shrink;
+       mp->m_inode_shrink.seeks = DEFAULT_SEEKS;
+       register_shrinker(&mp->m_inode_shrink);
 }
 
 void
 xfs_inode_shrinker_unregister(
        struct xfs_mount        *mp)
 {
-       down_write(&xfs_mount_list_lock);
-       list_del(&mp->m_mplist);
-       up_write(&xfs_mount_list_lock);
+       unregister_shrinker(&mp->m_inode_shrink);
 }
index cdcbaaca9880d04dfa8f2146580b0ef216ed178f..e28139aaa4aa42c6085100856345e0156a33eef3 100644 (file)
@@ -55,8 +55,6 @@ int xfs_inode_ag_iterator(struct xfs_mount *mp,
        int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
        int flags, int tag, int write_lock, int *nr_to_scan);
 
-void xfs_inode_shrinker_init(void);
-void xfs_inode_shrinker_destroy(void);
 void xfs_inode_shrinker_register(struct xfs_mount *mp);
 void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
 
index 73d5aa117384bc1e8c626012dba881e02a5deb57..302820690904d1eff4de201ca97ca97a5fad4c9d 100644 (file)
@@ -124,7 +124,10 @@ DEFINE_EVENT(xfs_perag_class, name,        \
                 unsigned long caller_ip),                                      \
        TP_ARGS(mp, agno, refcount, caller_ip))
 DEFINE_PERAG_REF_EVENT(xfs_perag_get);
+DEFINE_PERAG_REF_EVENT(xfs_perag_get_reclaim);
 DEFINE_PERAG_REF_EVENT(xfs_perag_put);
+DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
+DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
 
 TRACE_EVENT(xfs_attr_list_node_descend,
        TP_PROTO(struct xfs_attr_list_context *ctx,
index 8c117ff2e3ab00d8939f7ec596063988faae283f..67c018392d62a8ecef3c93f35c2b942b0b52137d 100644 (file)
@@ -69,7 +69,7 @@ STATIC void   xfs_qm_list_destroy(xfs_dqlist_t *);
 
 STATIC int     xfs_qm_init_quotainos(xfs_mount_t *);
 STATIC int     xfs_qm_init_quotainfo(xfs_mount_t *);
-STATIC int     xfs_qm_shake(int, gfp_t);
+STATIC int     xfs_qm_shake(struct shrinker *, int, gfp_t);
 
 static struct shrinker xfs_qm_shaker = {
        .shrink = xfs_qm_shake,
@@ -2117,7 +2117,10 @@ xfs_qm_shake_freelist(
  */
 /* ARGSUSED */
 STATIC int
-xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask)
+xfs_qm_shake(
+       struct shrinker *shrink,
+       int             nr_to_scan,
+       gfp_t           gfp_mask)
 {
        int     ndqused, nfree, n;
 
index 1d2c7eed4eda6e3fb545f6b3dd4e0fb30515ea5f..5761087ee8ea31be3e4c571630ceca807485e5e3 100644 (file)
@@ -259,7 +259,7 @@ typedef struct xfs_mount {
        wait_queue_head_t       m_wait_single_sync_task;
        __int64_t               m_update_flags; /* sb flags we need to update
                                                   on the next remount,rw */
-       struct list_head        m_mplist;       /* inode shrinker mount list */
+       struct shrinker         m_inode_shrink; /* inode reclaim shrinker */
 } xfs_mount_t;
 
 /*
index e287863ac053d9788756ac79c9271ba562d455b1..de6b1722cdcab11e9443a027ebf5aeda7789589d 100644 (file)
@@ -48,6 +48,31 @@ extern ssize_t arch_cpu_release(const char *, size_t);
 #endif
 struct notifier_block;
 
+/*
+ * CPU notifier priorities.
+ */
+enum {
+       /*
+        * SCHED_ACTIVE marks a cpu which is coming up active during
+        * CPU_ONLINE and CPU_DOWN_FAILED and must be the first
+        * notifier.  CPUSET_ACTIVE adjusts cpuset according to
+        * cpu_active mask right after SCHED_ACTIVE.  During
+        * CPU_DOWN_PREPARE, SCHED_INACTIVE and CPUSET_INACTIVE are
+        * ordered in the similar way.
+        *
+        * This ordering guarantees consistent cpu_active mask and
+        * migration behavior to all cpu notifiers.
+        */
+       CPU_PRI_SCHED_ACTIVE    = INT_MAX,
+       CPU_PRI_CPUSET_ACTIVE   = INT_MAX - 1,
+       CPU_PRI_SCHED_INACTIVE  = INT_MIN + 1,
+       CPU_PRI_CPUSET_INACTIVE = INT_MIN,
+
+       /* migration should happen before other stuff but after perf */
+       CPU_PRI_PERF            = 20,
+       CPU_PRI_MIGRATION       = 10,
+};
+
 #ifdef CONFIG_SMP
 /* Need to know about CPUs going up/down? */
 #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE)
index 457ed765a116a4c06ecd192e0165cf5af096c818..f20eb8f16025d74534dd2b62fa22cf494404ef1c 100644 (file)
@@ -20,6 +20,7 @@ extern int number_of_cpusets; /* How many cpusets are defined in system? */
 
 extern int cpuset_init(void);
 extern void cpuset_init_smp(void);
+extern void cpuset_update_active_cpus(void);
 extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
 extern int cpuset_cpus_allowed_fallback(struct task_struct *p);
 extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
@@ -132,6 +133,11 @@ static inline void set_mems_allowed(nodemask_t nodemask)
 static inline int cpuset_init(void) { return 0; }
 static inline void cpuset_init_smp(void) {}
 
+static inline void cpuset_update_active_cpus(void)
+{
+       partition_sched_domains(1, NULL, NULL);
+}
+
 static inline void cpuset_cpus_allowed(struct task_struct *p,
                                       struct cpumask *mask)
 {
index 8e5a9dfb76bf6425934a09427b8f16df82e937be..e7445df44d6c9324fd32a85ee1145415ecbd7960 100644 (file)
@@ -873,6 +873,8 @@ struct fb_info {
 static inline struct apertures_struct *alloc_apertures(unsigned int max_num) {
        struct apertures_struct *a = kzalloc(sizeof(struct apertures_struct)
                        + max_num * sizeof(struct aperture), GFP_KERNEL);
+       if (!a)
+               return NULL;
        a->count = max_num;
        return a;
 }
index 013dc529e95ffc43121fbf8cfbac9d432fa29bc6..d147461bc2712b9c9fb7aae84d65e600d6c60fd4 100644 (file)
@@ -61,7 +61,8 @@ struct files_struct {
        (rcu_dereference_check((fdtfd), \
                               rcu_read_lock_held() || \
                               lockdep_is_held(&(files)->file_lock) || \
-                              atomic_read(&(files)->count) == 1))
+                              atomic_read(&(files)->count) == 1 || \
+                              rcu_my_thread_group_empty()))
 
 #define files_fdtable(files) \
                (rcu_dereference_check_fdtable((files), (files)->fdt))
index a4d2e9f7088ada70d8b1357f418c32cd09a5bf1a..adf832dec3f37dd639e8aa24fe3cc29c7504a6a7 100644 (file)
@@ -1026,11 +1026,12 @@ void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
 
 struct jbd2_buffer_trigger_type {
        /*
-        * Fired just before a buffer is written to the journal.
-        * mapped_data is a mapped buffer that is the frozen data for
-        * commit.
+        * Fired a the moment data to write to the journal are known to be
+        * stable - so either at the moment b_frozen_data is created or just
+        * before a buffer is written to the journal.  mapped_data is a mapped
+        * buffer that is the frozen data for commit.
         */
-       void (*t_commit)(struct jbd2_buffer_trigger_type *type,
+       void (*t_frozen)(struct jbd2_buffer_trigger_type *type,
                         struct buffer_head *bh, void *mapped_data,
                         size_t size);
 
@@ -1042,7 +1043,7 @@ struct jbd2_buffer_trigger_type {
                        struct buffer_head *bh);
 };
 
-extern void jbd2_buffer_commit_trigger(struct journal_head *jh,
+extern void jbd2_buffer_frozen_trigger(struct journal_head *jh,
                                       void *mapped_data,
                                       struct jbd2_buffer_trigger_type *triggers);
 extern void jbd2_buffer_abort_trigger(struct journal_head *jh,
index b969efb03787ee69546995ad378ac54b6bc37e3b..a2b48041b91084865a71c74b3f13342bf02fc6be 100644 (file)
@@ -999,7 +999,7 @@ static inline void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
  * querying the cache size, so a fastpath for that case is appropriate.
  */
 struct shrinker {
-       int (*shrink)(int nr_to_scan, gfp_t gfp_mask);
+       int (*shrink)(struct shrinker *, int nr_to_scan, gfp_t gfp_mask);
        int seeks;      /* seeks to recreate an obj */
 
        /* These are for internal use */
index 7cb00845f150ca185b73a47931c6a1fbcd4c1cef..f26fda76b87fc66816f2fa286e97d760c545f165 100644 (file)
@@ -288,6 +288,7 @@ struct pci_dev {
         */
        unsigned int    irq;
        struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
+       resource_size_t fw_addr[DEVICE_COUNT_RESOURCE]; /* FW-assigned addr */
 
        /* These fields are used by common fixups */
        unsigned int    transparent:1;  /* Transparent PCI bridge */
index 5d0266d94985c65acbd8b13a41961964cdde4a72..469e03e96fe722c8b465a026835b04e4ace2899a 100644 (file)
@@ -1068,7 +1068,7 @@ static inline void perf_event_disable(struct perf_event *event)           { }
 #define perf_cpu_notifier(fn)                                  \
 do {                                                           \
        static struct notifier_block fn##_nb __cpuinitdata =    \
-               { .notifier_call = fn, .priority = 20 };        \
+               { .notifier_call = fn, .priority = CPU_PRI_PERF }; \
        fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE,             \
                (void *)(unsigned long)smp_processor_id());     \
        fn(&fn##_nb, (unsigned long)CPU_STARTING,               \
index 6e0bb86de9905b63dd7139bd438f292e3d5ef60c..2091ea2a2c5cbb0b0ad2dca9a534aeac682abf11 100644 (file)
@@ -271,13 +271,10 @@ extern int runqueue_is_locked(int cpu);
 
 extern cpumask_var_t nohz_cpu_mask;
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
-extern int select_nohz_load_balancer(int cpu);
-extern int get_nohz_load_balancer(void);
+extern void select_nohz_load_balancer(int stop_tick);
+extern int get_nohz_timer_target(void);
 #else
-static inline int select_nohz_load_balancer(int cpu)
-{
-       return 0;
-}
+static inline void select_nohz_load_balancer(int stop_tick) { }
 #endif
 
 /*
@@ -798,7 +795,7 @@ enum cpu_idle_type {
 #define SD_POWERSAVINGS_BALANCE        0x0100  /* Balance for power savings */
 #define SD_SHARE_PKG_RESOURCES 0x0200  /* Domain members share cpu pkg resources */
 #define SD_SERIALIZE           0x0400  /* Only a single load balancing instance */
-
+#define SD_ASYM_PACKING                0x0800  /* Place busy groups earlier in the domain */
 #define SD_PREFER_SIBLING      0x1000  /* Prefer to place tasks in a sibling domain */
 
 enum powersavings_balance_level {
@@ -833,6 +830,8 @@ static inline int sd_balance_for_package_power(void)
        return SD_PREFER_SIBLING;
 }
 
+extern int __weak arch_sd_sibiling_asym_packing(void);
+
 /*
  * Optimise SD flags for power savings:
  * SD_BALANCE_NEWIDLE helps agressive task consolidation and power savings.
@@ -854,7 +853,7 @@ struct sched_group {
         * CPU power of this group, SCHED_LOAD_SCALE being max power for a
         * single CPU.
         */
-       unsigned int cpu_power;
+       unsigned int cpu_power, cpu_power_orig;
 
        /*
         * The CPUs this group covers.
@@ -1690,6 +1689,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 #define PF_EXITING     0x00000004      /* getting shut down */
 #define PF_EXITPIDONE  0x00000008      /* pi exit done on shut down */
 #define PF_VCPU                0x00000010      /* I'm a virtual CPU */
+#define PF_WQ_WORKER   0x00000020      /* I'm a workqueue worker */
 #define PF_FORKNOEXEC  0x00000040      /* forked but didn't exec */
 #define PF_MCE_PROCESS  0x00000080      /* process policy on mce errors */
 #define PF_SUPERPRIV   0x00000100      /* used super-user privileges */
@@ -1784,20 +1784,23 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
 #endif
 
 /*
- * Architectures can set this to 1 if they have specified
- * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
- * but then during bootup it turns out that sched_clock()
- * is reliable after all:
+ * Do not use outside of architecture code which knows its limitations.
+ *
+ * sched_clock() has no promise of monotonicity or bounded drift between
+ * CPUs, use (which you should not) requires disabling IRQs.
+ *
+ * Please use one of the three interfaces below.
  */
-#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
-extern int sched_clock_stable;
-#endif
-
-/* ftrace calls sched_clock() directly */
 extern unsigned long long notrace sched_clock(void);
+/*
+ * See the comment in kernel/sched_clock.c
+ */
+extern u64 cpu_clock(int cpu);
+extern u64 local_clock(void);
+extern u64 sched_clock_cpu(int cpu);
+
 
 extern void sched_clock_init(void);
-extern u64 sched_clock_cpu(int cpu);
 
 #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
 static inline void sched_clock_tick(void)
@@ -1812,17 +1815,19 @@ static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
 {
 }
 #else
+/*
+ * Architectures can set this to 1 if they have specified
+ * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
+ * but then during bootup it turns out that sched_clock()
+ * is reliable after all:
+ */
+extern int sched_clock_stable;
+
 extern void sched_clock_tick(void);
 extern void sched_clock_idle_sleep_event(void);
 extern void sched_clock_idle_wakeup_event(u64 delta_ns);
 #endif
 
-/*
- * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
- * clock constructed from sched_clock():
- */
-extern unsigned long long cpu_clock(int cpu);
-
 extern unsigned long long
 task_sched_runtime(struct task_struct *task);
 extern unsigned long long thread_group_sched_runtime(struct task_struct *task);
index c44df50a05ab5a81e42b1a37299fb9e790298fc3..b572e432d2f361344a7de671d31035994e5c5d6d 100644 (file)
@@ -103,6 +103,7 @@ int arch_update_cpu_topology(void);
                                | 1*SD_SHARE_PKG_RESOURCES              \
                                | 0*SD_SERIALIZE                        \
                                | 0*SD_PREFER_SIBLING                   \
+                               | arch_sd_sibling_asym_packing()        \
                                ,                                       \
        .last_balance           = jiffies,                              \
        .balance_interval       = 1,                                    \
index c9a97597699522db33b2f309bd58368970eb9651..814f294d4cd07b0ccd5e26870f681e0733a2b671 100644 (file)
@@ -29,6 +29,7 @@
  */
 
 #ifndef LINUX_VGA_H
+#define LINUX_VGA_H
 
 #include <asm/vga.h>
 
index fd882261225e793c9c08718faf9467d15b3bfea8..9696a5e2c437f3e33dc8f4d2bd50f8ff8bff6a53 100644 (file)
@@ -799,7 +799,7 @@ do {                                                                        \
                X##_e -= (_FP_W_TYPE_SIZE - rsize);                     \
        X##_e = rsize - X##_e - 1;                                      \
                                                                        \
-       if (_FP_FRACBITS_##fs < rsize && _FP_WFRACBITS_##fs < X##_e)    \
+       if (_FP_FRACBITS_##fs < rsize && _FP_WFRACBITS_##fs <= X##_e)   \
          __FP_FRAC_SRS_1(ur_, (X##_e - _FP_WFRACBITS_##fs + 1), rsize);\
        _FP_FRAC_DISASSEMBLE_##wc(X, ur_, rsize);                       \
        if ((_FP_WFRACBITS_##fs - X##_e - 1) > 0)                       \
index 731150d52799ecc5492b3590c3b7b5b34de09072..0a691ea7654aefb403e25039f08641691d3de74c 100644 (file)
@@ -1224,12 +1224,7 @@ static inline void sk_tx_queue_clear(struct sock *sk)
 
 static inline int sk_tx_queue_get(const struct sock *sk)
 {
-       return sk->sk_tx_queue_mapping;
-}
-
-static inline bool sk_tx_queue_recorded(const struct sock *sk)
-{
-       return (sk && sk->sk_tx_queue_mapping >= 0);
+       return sk ? sk->sk_tx_queue_mapping : -1;
 }
 
 static inline void sk_set_socket(struct sock *sk, struct socket *sock)
index 506c8491a8d131761432aa306e51eef441863887..40a8f462a8224b298690cb07892f93afe8c15214 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1256,6 +1256,33 @@ out:
        return un;
 }
 
+
+/**
+ * get_queue_result - Retrieve the result code from sem_queue
+ * @q: Pointer to queue structure
+ *
+ * Retrieve the return code from the pending queue. If IN_WAKEUP is found in
+ * q->status, then we must loop until the value is replaced with the final
+ * value: This may happen if a task is woken up by an unrelated event (e.g.
+ * signal) and in parallel the task is woken up by another task because it got
+ * the requested semaphores.
+ *
+ * The function can be called with or without holding the semaphore spinlock.
+ */
+static int get_queue_result(struct sem_queue *q)
+{
+       int error;
+
+       error = q->status;
+       while (unlikely(error == IN_WAKEUP)) {
+               cpu_relax();
+               error = q->status;
+       }
+
+       return error;
+}
+
+
 SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
                unsigned, nsops, const struct timespec __user *, timeout)
 {
@@ -1409,15 +1436,18 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
        else
                schedule();
 
-       error = queue.status;
-       while(unlikely(error == IN_WAKEUP)) {
-               cpu_relax();
-               error = queue.status;
-       }
+       error = get_queue_result(&queue);
 
        if (error != -EINTR) {
                /* fast path: update_queue already obtained all requested
-                * resources */
+                * resources.
+                * Perform a smp_mb(): User space could assume that semop()
+                * is a memory barrier: Without the mb(), the cpu could
+                * speculatively read in user space stale data that was
+                * overwritten by the previous owner of the semaphore.
+                */
+               smp_mb();
+
                goto out_free;
        }
 
@@ -1427,10 +1457,12 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
                goto out_free;
        }
 
+       error = get_queue_result(&queue);
+
        /*
         * If queue.status != -EINTR we are woken up by another process
         */
-       error = queue.status;
+
        if (error != -EINTR) {
                goto out_unlock_free;
        }
index 97d1b426a4ac39bd49b41b9393ed9b565f8f7f33..f6e726f184916029e2d1cfdbcd4acb2b26f14e69 100644 (file)
@@ -235,11 +235,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
                return -EINVAL;
 
        cpu_hotplug_begin();
-       set_cpu_active(cpu, false);
        err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
        if (err) {
-               set_cpu_active(cpu, true);
-
                nr_calls--;
                __cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
                printk("%s: attempt to take down CPU %u failed\n",
@@ -249,7 +246,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 
        err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
        if (err) {
-               set_cpu_active(cpu, true);
                /* CPU didn't die: tell everyone.  Can't complain. */
                cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
 
@@ -321,8 +317,6 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
                goto out_notify;
        BUG_ON(!cpu_online(cpu));
 
-       set_cpu_active(cpu, true);
-
        /* Now call notifier in preparation. */
        cpu_notify(CPU_ONLINE | mod, hcpu);
 
index 02b9611eadde3ebe638b9c24ffbbb5ec8ada5c06..7146793b5c114929a7b680f5b68206e3a7a8a127 100644 (file)
@@ -2113,31 +2113,17 @@ static void scan_for_empty_cpusets(struct cpuset *root)
  * but making no active use of cpusets.
  *
  * This routine ensures that top_cpuset.cpus_allowed tracks
- * cpu_online_map on each CPU hotplug (cpuhp) event.
+ * cpu_active_mask on each CPU hotplug (cpuhp) event.
  *
  * Called within get_online_cpus().  Needs to call cgroup_lock()
  * before calling generate_sched_domains().
  */
-static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
-                               unsigned long phase, void *unused_cpu)
+void cpuset_update_active_cpus(void)
 {
        struct sched_domain_attr *attr;
        cpumask_var_t *doms;
        int ndoms;
 
-       switch (phase) {
-       case CPU_ONLINE:
-       case CPU_ONLINE_FROZEN:
-       case CPU_DOWN_PREPARE:
-       case CPU_DOWN_PREPARE_FROZEN:
-       case CPU_DOWN_FAILED:
-       case CPU_DOWN_FAILED_FROZEN:
-               break;
-
-       default:
-               return NOTIFY_DONE;
-       }
-
        cgroup_lock();
        mutex_lock(&callback_mutex);
        cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
@@ -2148,8 +2134,6 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
 
        /* Have scheduler rebuild the domains */
        partition_sched_domains(ndoms, doms, attr);
-
-       return NOTIFY_OK;
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
@@ -2203,7 +2187,6 @@ void __init cpuset_init_smp(void)
        cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
        top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
 
-       hotcpu_notifier(cpuset_track_online_cpus, 0);
        hotplug_memory_notifier(cpuset_track_online_nodes, 10);
 
        cpuset_wq = create_singlethread_workqueue("cpuset");
index 31aa9332ef3f8d4f5447f72a3989cbfd0dc7c6df..7bfae887f211556961515b5d057cdcf2880a71c9 100644 (file)
@@ -7,6 +7,8 @@
 #include <linux/bootmem.h>
 #include <linux/mm.h>
 #include <linux/early_res.h>
+#include <linux/slab.h>
+#include <linux/kmemleak.h>
 
 /*
  * Early reserved memory areas.
@@ -319,6 +321,8 @@ void __init free_early(u64 start, u64 end)
        struct early_res *r;
        int i;
 
+       kmemleak_free_part(__va(start), end - start);
+
        i = find_overlapped_early(start, end);
        r = &early_res[i];
        if (i >= max_early_res || r->end != end || r->start != start)
@@ -333,6 +337,8 @@ void __init free_early_partial(u64 start, u64 end)
        struct early_res *r;
        int i;
 
+       kmemleak_free_part(__va(start), end - start);
+
        if (start == end)
                return;
 
index b6cce14ba0470e641bd45c68ca2067c4ee70d38c..a82a65cef741935adc51082640f63c9a5049b5ea 100644 (file)
@@ -907,7 +907,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
 {
        unsigned long new_flags = p->flags;
 
-       new_flags &= ~PF_SUPERPRIV;
+       new_flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);
        new_flags |= PF_FORKNOEXEC;
        new_flags |= PF_STARTING;
        p->flags = new_flags;
index 5c69e996bd0f0fcba57586800924b8d2989cda1b..e934339fbbef1cb6acdfb17899e50e539059d0b4 100644 (file)
@@ -144,12 +144,8 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
 static int hrtimer_get_target(int this_cpu, int pinned)
 {
 #ifdef CONFIG_NO_HZ
-       if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) {
-               int preferred_cpu = get_nohz_load_balancer();
-
-               if (preferred_cpu >= 0)
-                       return preferred_cpu;
-       }
+       if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu))
+               return get_nohz_timer_target();
 #endif
        return this_cpu;
 }
index 54286798c37b8e060078adbd7444c3f2c8da9847..f2852a5102327f74c39531a517e488c0d1e673e5 100644 (file)
@@ -146,7 +146,7 @@ static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS],
 
 static inline u64 lockstat_clock(void)
 {
-       return cpu_clock(smp_processor_id());
+       return local_clock();
 }
 
 static int lock_point(unsigned long points[], unsigned long ip)
index ff86c558af4c28dd4c9a7ea92cc592b70bee6d7d..7e32b51ff043b9a7fb47ef17aed2aef295d3de66 100644 (file)
@@ -214,7 +214,7 @@ static void perf_unpin_context(struct perf_event_context *ctx)
 
 static inline u64 perf_clock(void)
 {
-       return cpu_clock(raw_smp_processor_id());
+       return local_clock();
 }
 
 /*
index 9829646d399c5bf0304aa863f78f6f43ae1e4d5e..f66bdd33a6c61f58f033732d24b9a3a7d19ae989 100644 (file)
@@ -232,31 +232,24 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
 
 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 {
-       struct sighand_struct *sighand;
-       struct signal_struct *sig;
+       struct signal_struct *sig = tsk->signal;
        struct task_struct *t;
 
-       *times = INIT_CPUTIME;
+       times->utime = sig->utime;
+       times->stime = sig->stime;
+       times->sum_exec_runtime = sig->sum_sched_runtime;
 
        rcu_read_lock();
-       sighand = rcu_dereference(tsk->sighand);
-       if (!sighand)
+       /* make sure we can trust tsk->thread_group list */
+       if (!likely(pid_alive(tsk)))
                goto out;
 
-       sig = tsk->signal;
-
        t = tsk;
        do {
                times->utime = cputime_add(times->utime, t->utime);
                times->stime = cputime_add(times->stime, t->stime);
                times->sum_exec_runtime += t->se.sum_exec_runtime;
-
-               t = next_thread(t);
-       } while (t != tsk);
-
-       times->utime = cputime_add(times->utime, sig->utime);
-       times->stime = cputime_add(times->stime, sig->stime);
-       times->sum_exec_runtime += sig->sum_sched_runtime;
+       } while_each_thread(tsk, t);
 out:
        rcu_read_unlock();
 }
@@ -1279,10 +1272,6 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
 {
        struct signal_struct *sig;
 
-       /* tsk == current, ensure it is safe to use ->signal/sighand */
-       if (unlikely(tsk->exit_state))
-               return 0;
-
        if (!task_cputime_zero(&tsk->cputime_expires)) {
                struct task_cputime task_sample = {
                        .utime = tsk->utime,
@@ -1298,7 +1287,10 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
        if (sig->cputimer.running) {
                struct task_cputime group_sample;
 
-               thread_group_cputimer(tsk, &group_sample);
+               spin_lock(&sig->cputimer.lock);
+               group_sample = sig->cputimer.cputime;
+               spin_unlock(&sig->cputimer.lock);
+
                if (task_cputime_expired(&group_sample, &sig->cputime_expires))
                        return 1;
        }
@@ -1315,6 +1307,7 @@ void run_posix_cpu_timers(struct task_struct *tsk)
 {
        LIST_HEAD(firing);
        struct k_itimer *timer, *next;
+       unsigned long flags;
 
        BUG_ON(!irqs_disabled());
 
@@ -1325,7 +1318,8 @@ void run_posix_cpu_timers(struct task_struct *tsk)
        if (!fastpath_timer_check(tsk))
                return;
 
-       spin_lock(&tsk->sighand->siglock);
+       if (!lock_task_sighand(tsk, &flags))
+               return;
        /*
         * Here we take off tsk->signal->cpu_timers[N] and
         * tsk->cpu_timers[N] all the timers that are firing, and
@@ -1347,7 +1341,7 @@ void run_posix_cpu_timers(struct task_struct *tsk)
         * that gets the timer lock before we do will give it up and
         * spin until we've taken care of that timer below.
         */
-       spin_unlock(&tsk->sighand->siglock);
+       unlock_task_sighand(tsk, &flags);
 
        /*
         * Now that all the timers on our list have the firing flag,
index 6535ac8bc6a5935ceebd05ea24390e25b13fba41..2e2726d790b98eff18d88d71dbeba8caf8ec7f7a 100644 (file)
@@ -239,8 +239,7 @@ static unsigned long
 rcu_random(struct rcu_random_state *rrsp)
 {
        if (--rrsp->rrs_count < 0) {
-               rrsp->rrs_state +=
-                       (unsigned long)cpu_clock(raw_smp_processor_id());
+               rrsp->rrs_state += (unsigned long)local_clock();
                rrsp->rrs_count = RCU_RANDOM_REFRESH;
        }
        rrsp->rrs_state = rrsp->rrs_state * RCU_RANDOM_MULT + RCU_RANDOM_ADD;
index 63b4a14682faa3193830ec4dfc0ba26c9bada4d0..f6c9bb6ac70b382a29a907833f13bca9bc2bb61e 100644 (file)
@@ -77,6 +77,7 @@
 #include <asm/irq_regs.h>
 
 #include "sched_cpupri.h"
+#include "workqueue_sched.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/sched.h>
@@ -456,9 +457,10 @@ struct rq {
        unsigned long nr_running;
        #define CPU_LOAD_IDX_MAX 5
        unsigned long cpu_load[CPU_LOAD_IDX_MAX];
+       unsigned long last_load_update_tick;
 #ifdef CONFIG_NO_HZ
        u64 nohz_stamp;
-       unsigned char in_nohz_recently;
+       unsigned char nohz_balance_kick;
 #endif
        unsigned int skip_clock_update;
 
@@ -1192,6 +1194,27 @@ static void resched_cpu(int cpu)
 }
 
 #ifdef CONFIG_NO_HZ
+/*
+ * In the semi idle case, use the nearest busy cpu for migrating timers
+ * from an idle cpu.  This is good for power-savings.
+ *
+ * We don't do similar optimization for completely idle system, as
+ * selecting an idle cpu will add more delays to the timers than intended
+ * (as that cpu's timer base may not be uptodate wrt jiffies etc).
+ */
+int get_nohz_timer_target(void)
+{
+       int cpu = smp_processor_id();
+       int i;
+       struct sched_domain *sd;
+
+       for_each_domain(cpu, sd) {
+               for_each_cpu(i, sched_domain_span(sd))
+                       if (!idle_cpu(i))
+                               return i;
+       }
+       return cpu;
+}
 /*
  * When add_timer_on() enqueues a timer into the timer wheel of an
  * idle CPU then this timer might expire before the next timer event
@@ -1642,7 +1665,7 @@ static void update_shares(struct sched_domain *sd)
        if (root_task_group_empty())
                return;
 
-       now = cpu_clock(raw_smp_processor_id());
+       now = local_clock();
        elapsed = now - sd->last_update;
 
        if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {
@@ -1795,6 +1818,7 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
 static void calc_load_account_idle(struct rq *this_rq);
 static void update_sysctl(void);
 static int get_update_sysctl_factor(void);
+static void update_cpu_load(struct rq *this_rq);
 
 static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
 {
@@ -2257,11 +2281,55 @@ static void update_avg(u64 *avg, u64 sample)
 }
 #endif
 
-/***
+static inline void ttwu_activate(struct task_struct *p, struct rq *rq,
+                                bool is_sync, bool is_migrate, bool is_local,
+                                unsigned long en_flags)
+{
+       schedstat_inc(p, se.statistics.nr_wakeups);
+       if (is_sync)
+               schedstat_inc(p, se.statistics.nr_wakeups_sync);
+       if (is_migrate)
+               schedstat_inc(p, se.statistics.nr_wakeups_migrate);
+       if (is_local)
+               schedstat_inc(p, se.statistics.nr_wakeups_local);
+       else
+               schedstat_inc(p, se.statistics.nr_wakeups_remote);
+
+       activate_task(rq, p, en_flags);
+}
+
+static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq,
+                                       int wake_flags, bool success)
+{
+       trace_sched_wakeup(p, success);
+       check_preempt_curr(rq, p, wake_flags);
+
+       p->state = TASK_RUNNING;
+#ifdef CONFIG_SMP
+       if (p->sched_class->task_woken)
+               p->sched_class->task_woken(rq, p);
+
+       if (unlikely(rq->idle_stamp)) {
+               u64 delta = rq->clock - rq->idle_stamp;
+               u64 max = 2*sysctl_sched_migration_cost;
+
+               if (delta > max)
+                       rq->avg_idle = max;
+               else
+                       update_avg(&rq->avg_idle, delta);
+               rq->idle_stamp = 0;
+       }
+#endif
+       /* if a worker is waking up, notify workqueue */
+       if ((p->flags & PF_WQ_WORKER) && success)
+               wq_worker_waking_up(p, cpu_of(rq));
+}
+
+/**
  * try_to_wake_up - wake up a thread
- * @p: the to-be-woken-up thread
+ * @p: the thread to be awakened
  * @state: the mask of task states that can be woken
- * @sync: do a synchronous wakeup?
+ * @wake_flags: wake modifier flags (WF_*)
  *
  * Put it on the run-queue if it's not already there. The "current"
  * thread is always on the run-queue (except when the actual
@@ -2269,7 +2337,8 @@ static void update_avg(u64 *avg, u64 sample)
  * the simpler "current->state = TASK_RUNNING" to mark yourself
  * runnable without the overhead of this.
  *
- * returns failure only if the task is already active.
+ * Returns %true if @p was woken up, %false if it was already running
+ * or @state didn't match @p's state.
  */
 static int try_to_wake_up(struct task_struct *p, unsigned int state,
                          int wake_flags)
@@ -2349,38 +2418,11 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
 
 out_activate:
 #endif /* CONFIG_SMP */
-       schedstat_inc(p, se.statistics.nr_wakeups);
-       if (wake_flags & WF_SYNC)
-               schedstat_inc(p, se.statistics.nr_wakeups_sync);
-       if (orig_cpu != cpu)
-               schedstat_inc(p, se.statistics.nr_wakeups_migrate);
-       if (cpu == this_cpu)
-               schedstat_inc(p, se.statistics.nr_wakeups_local);
-       else
-               schedstat_inc(p, se.statistics.nr_wakeups_remote);
-       activate_task(rq, p, en_flags);
+       ttwu_activate(p, rq, wake_flags & WF_SYNC, orig_cpu != cpu,
+                     cpu == this_cpu, en_flags);
        success = 1;
-
 out_running:
-       trace_sched_wakeup(p, success);
-       check_preempt_curr(rq, p, wake_flags);
-
-       p->state = TASK_RUNNING;
-#ifdef CONFIG_SMP
-       if (p->sched_class->task_woken)
-               p->sched_class->task_woken(rq, p);
-
-       if (unlikely(rq->idle_stamp)) {
-               u64 delta = rq->clock - rq->idle_stamp;
-               u64 max = 2*sysctl_sched_migration_cost;
-
-               if (delta > max)
-                       rq->avg_idle = max;
-               else
-                       update_avg(&rq->avg_idle, delta);
-               rq->idle_stamp = 0;
-       }
-#endif
+       ttwu_post_activation(p, rq, wake_flags, success);
 out:
        task_rq_unlock(rq, &flags);
        put_cpu();
@@ -2388,6 +2430,37 @@ out:
        return success;
 }
 
+/**
+ * try_to_wake_up_local - try to wake up a local task with rq lock held
+ * @p: the thread to be awakened
+ *
+ * Put @p on the run-queue if it's not alredy there.  The caller must
+ * ensure that this_rq() is locked, @p is bound to this_rq() and not
+ * the current task.  this_rq() stays locked over invocation.
+ */
+static void try_to_wake_up_local(struct task_struct *p)
+{
+       struct rq *rq = task_rq(p);
+       bool success = false;
+
+       BUG_ON(rq != this_rq());
+       BUG_ON(p == current);
+       lockdep_assert_held(&rq->lock);
+
+       if (!(p->state & TASK_NORMAL))
+               return;
+
+       if (!p->se.on_rq) {
+               if (likely(!task_running(rq, p))) {
+                       schedstat_inc(rq, ttwu_count);
+                       schedstat_inc(rq, ttwu_local);
+               }
+               ttwu_activate(p, rq, false, false, true, ENQUEUE_WAKEUP);
+               success = true;
+       }
+       ttwu_post_activation(p, rq, 0, success);
+}
+
 /**
  * wake_up_process - Wake up a specific process
  * @p: The process to be woken up.
@@ -3001,24 +3074,103 @@ static void calc_load_account_active(struct rq *this_rq)
        this_rq->calc_load_update += LOAD_FREQ;
 }
 
+/*
+ * The exact cpuload at various idx values, calculated at every tick would be
+ * load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load
+ *
+ * If a cpu misses updates for n-1 ticks (as it was idle) and update gets called
+ * on nth tick when cpu may be busy, then we have:
+ * load = ((2^idx - 1) / 2^idx)^(n-1) * load
+ * load = (2^idx - 1) / 2^idx) * load + 1 / 2^idx * cur_load
+ *
+ * decay_load_missed() below does efficient calculation of
+ * load = ((2^idx - 1) / 2^idx)^(n-1) * load
+ * avoiding 0..n-1 loop doing load = ((2^idx - 1) / 2^idx) * load
+ *
+ * The calculation is approximated on a 128 point scale.
+ * degrade_zero_ticks is the number of ticks after which load at any
+ * particular idx is approximated to be zero.
+ * degrade_factor is a precomputed table, a row for each load idx.
+ * Each column corresponds to degradation factor for a power of two ticks,
+ * based on 128 point scale.
+ * Example:
+ * row 2, col 3 (=12) says that the degradation at load idx 2 after
+ * 8 ticks is 12/128 (which is an approximation of exact factor 3^8/4^8).
+ *
+ * With this power of 2 load factors, we can degrade the load n times
+ * by looking at 1 bits in n and doing as many mult/shift instead of
+ * n mult/shifts needed by the exact degradation.
+ */
+#define DEGRADE_SHIFT          7
+static const unsigned char
+               degrade_zero_ticks[CPU_LOAD_IDX_MAX] = {0, 8, 32, 64, 128};
+static const unsigned char
+               degrade_factor[CPU_LOAD_IDX_MAX][DEGRADE_SHIFT + 1] = {
+                                       {0, 0, 0, 0, 0, 0, 0, 0},
+                                       {64, 32, 8, 0, 0, 0, 0, 0},
+                                       {96, 72, 40, 12, 1, 0, 0},
+                                       {112, 98, 75, 43, 15, 1, 0},
+                                       {120, 112, 98, 76, 45, 16, 2} };
+
+/*
+ * Update cpu_load for any missed ticks, due to tickless idle. The backlog
+ * would be when CPU is idle and so we just decay the old load without
+ * adding any new load.
+ */
+static unsigned long
+decay_load_missed(unsigned long load, unsigned long missed_updates, int idx)
+{
+       int j = 0;
+
+       if (!missed_updates)
+               return load;
+
+       if (missed_updates >= degrade_zero_ticks[idx])
+               return 0;
+
+       if (idx == 1)
+               return load >> missed_updates;
+
+       while (missed_updates) {
+               if (missed_updates % 2)
+                       load = (load * degrade_factor[idx][j]) >> DEGRADE_SHIFT;
+
+               missed_updates >>= 1;
+               j++;
+       }
+       return load;
+}
+
 /*
  * Update rq->cpu_load[] statistics. This function is usually called every
- * scheduler tick (TICK_NSEC).
+ * scheduler tick (TICK_NSEC). With tickless idle this will not be called
+ * every tick. We fix it up based on jiffies.
  */
 static void update_cpu_load(struct rq *this_rq)
 {
        unsigned long this_load = this_rq->load.weight;
+       unsigned long curr_jiffies = jiffies;
+       unsigned long pending_updates;
        int i, scale;
 
        this_rq->nr_load_updates++;
 
+       /* Avoid repeated calls on same jiffy, when moving in and out of idle */
+       if (curr_jiffies == this_rq->last_load_update_tick)
+               return;
+
+       pending_updates = curr_jiffies - this_rq->last_load_update_tick;
+       this_rq->last_load_update_tick = curr_jiffies;
+
        /* Update our load: */
-       for (i = 0, scale = 1; i < CPU_LOAD_IDX_MAX; i++, scale += scale) {
+       this_rq->cpu_load[0] = this_load; /* Fasttrack for idx 0 */
+       for (i = 1, scale = 2; i < CPU_LOAD_IDX_MAX; i++, scale += scale) {
                unsigned long old_load, new_load;
 
                /* scale is effectively 1 << i now, and >> i divides by scale */
 
                old_load = this_rq->cpu_load[i];
+               old_load = decay_load_missed(old_load, pending_updates - 1, i);
                new_load = this_load;
                /*
                 * Round up the averaging division if load is increasing. This
@@ -3026,9 +3178,15 @@ static void update_cpu_load(struct rq *this_rq)
                 * example.
                 */
                if (new_load > old_load)
-                       new_load += scale-1;
-               this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i;
+                       new_load += scale - 1;
+
+               this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i;
        }
+}
+
+static void update_cpu_load_active(struct rq *this_rq)
+{
+       update_cpu_load(this_rq);
 
        calc_load_account_active(this_rq);
 }
@@ -3416,7 +3574,7 @@ void scheduler_tick(void)
 
        raw_spin_lock(&rq->lock);
        update_rq_clock(rq);
-       update_cpu_load(rq);
+       update_cpu_load_active(rq);
        curr->sched_class->task_tick(rq, curr, 0);
        raw_spin_unlock(&rq->lock);
 
@@ -3588,7 +3746,6 @@ need_resched:
        rq = cpu_rq(cpu);
        rcu_note_context_switch(cpu);
        prev = rq->curr;
-       switch_count = &prev->nivcsw;
 
        release_kernel_lock(prev);
 need_resched_nonpreemptible:
@@ -3601,11 +3758,26 @@ need_resched_nonpreemptible:
        raw_spin_lock_irq(&rq->lock);
        clear_tsk_need_resched(prev);
 
+       switch_count = &prev->nivcsw;
        if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
-               if (unlikely(signal_pending_state(prev->state, prev)))
+               if (unlikely(signal_pending_state(prev->state, prev))) {
                        prev->state = TASK_RUNNING;
-               else
+               } else {
+                       /*
+                        * If a worker is going to sleep, notify and
+                        * ask workqueue whether it wants to wake up a
+                        * task to maintain concurrency.  If so, wake
+                        * up the task.
+                        */
+                       if (prev->flags & PF_WQ_WORKER) {
+                               struct task_struct *to_wakeup;
+
+                               to_wakeup = wq_worker_sleeping(prev, cpu);
+                               if (to_wakeup)
+                                       try_to_wake_up_local(to_wakeup);
+                       }
                        deactivate_task(rq, prev, DEQUEUE_SLEEP);
+               }
                switch_count = &prev->nvcsw;
        }
 
@@ -3627,8 +3799,10 @@ need_resched_nonpreemptible:
 
                context_switch(rq, prev, next); /* unlocks the rq */
                /*
-                * the context switch might have flipped the stack from under
-                * us, hence refresh the local variables.
+                * The context switch have flipped the stack from under us
+                * and restored the local variables which were saved when
+                * this task called schedule() in the past. prev == current
+                * is still correct, but it can be moved to another cpu/rq.
                 */
                cpu = smp_processor_id();
                rq = cpu_rq(cpu);
@@ -3637,11 +3811,8 @@ need_resched_nonpreemptible:
 
        post_schedule(rq);
 
-       if (unlikely(reacquire_kernel_lock(current) < 0)) {
-               prev = rq->curr;
-               switch_count = &prev->nivcsw;
+       if (unlikely(reacquire_kernel_lock(prev)))
                goto need_resched_nonpreemptible;
-       }
 
        preempt_enable_no_resched();
        if (need_resched())
@@ -4431,12 +4602,8 @@ recheck:
         */
        if (user && !capable(CAP_SYS_NICE)) {
                if (rt_policy(policy)) {
-                       unsigned long rlim_rtprio;
-
-                       if (!lock_task_sighand(p, &flags))
-                               return -ESRCH;
-                       rlim_rtprio = task_rlimit(p, RLIMIT_RTPRIO);
-                       unlock_task_sighand(p, &flags);
+                       unsigned long rlim_rtprio =
+                                       task_rlimit(p, RLIMIT_RTPRIO);
 
                        /* can't set/change the rt policy */
                        if (policy != p->policy && !rlim_rtprio)
@@ -5806,20 +5973,49 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
  */
 static struct notifier_block __cpuinitdata migration_notifier = {
        .notifier_call = migration_call,
-       .priority = 10
+       .priority = CPU_PRI_MIGRATION,
 };
 
+static int __cpuinit sched_cpu_active(struct notifier_block *nfb,
+                                     unsigned long action, void *hcpu)
+{
+       switch (action & ~CPU_TASKS_FROZEN) {
+       case CPU_ONLINE:
+       case CPU_DOWN_FAILED:
+               set_cpu_active((long)hcpu, true);
+               return NOTIFY_OK;
+       default:
+               return NOTIFY_DONE;
+       }
+}
+
+static int __cpuinit sched_cpu_inactive(struct notifier_block *nfb,
+                                       unsigned long action, void *hcpu)
+{
+       switch (action & ~CPU_TASKS_FROZEN) {
+       case CPU_DOWN_PREPARE:
+               set_cpu_active((long)hcpu, false);
+               return NOTIFY_OK;
+       default:
+               return NOTIFY_DONE;
+       }
+}
+
 static int __init migration_init(void)
 {
        void *cpu = (void *)(long)smp_processor_id();
        int err;
 
-       /* Start one for the boot CPU: */
+       /* Initialize migration for the boot CPU */
        err = migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
        BUG_ON(err == NOTIFY_BAD);
        migration_call(&migration_notifier, CPU_ONLINE, cpu);
        register_cpu_notifier(&migration_notifier);
 
+       /* Register cpu active notifiers */
+       cpu_notifier(sched_cpu_active, CPU_PRI_SCHED_ACTIVE);
+       cpu_notifier(sched_cpu_inactive, CPU_PRI_SCHED_INACTIVE);
+
        return 0;
 }
 early_initcall(migration_init);
@@ -6054,23 +6250,18 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
                free_rootdomain(old_rd);
 }
 
-static int init_rootdomain(struct root_domain *rd, bool bootmem)
+static int init_rootdomain(struct root_domain *rd)
 {
-       gfp_t gfp = GFP_KERNEL;
-
        memset(rd, 0, sizeof(*rd));
 
-       if (bootmem)
-               gfp = GFP_NOWAIT;
-
-       if (!alloc_cpumask_var(&rd->span, gfp))
+       if (!alloc_cpumask_var(&rd->span, GFP_KERNEL))
                goto out;
-       if (!alloc_cpumask_var(&rd->online, gfp))
+       if (!alloc_cpumask_var(&rd->online, GFP_KERNEL))
                goto free_span;
-       if (!alloc_cpumask_var(&rd->rto_mask, gfp))
+       if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
                goto free_online;
 
-       if (cpupri_init(&rd->cpupri, bootmem) != 0)
+       if (cpupri_init(&rd->cpupri) != 0)
                goto free_rto_mask;
        return 0;
 
@@ -6086,7 +6277,7 @@ out:
 
 static void init_defrootdomain(void)
 {
-       init_rootdomain(&def_root_domain, true);
+       init_rootdomain(&def_root_domain);
 
        atomic_set(&def_root_domain.refcount, 1);
 }
@@ -6099,7 +6290,7 @@ static struct root_domain *alloc_rootdomain(void)
        if (!rd)
                return NULL;
 
-       if (init_rootdomain(rd, false) != 0) {
+       if (init_rootdomain(rd) != 0) {
                kfree(rd);
                return NULL;
        }
@@ -7278,29 +7469,35 @@ int __init sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
 }
 #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
 
-#ifndef CONFIG_CPUSETS
 /*
- * Add online and remove offline CPUs from the scheduler domains.
- * When cpusets are enabled they take over this function.
+ * Update cpusets according to cpu_active mask.  If cpusets are
+ * disabled, cpuset_update_active_cpus() becomes a simple wrapper
+ * around partition_sched_domains().
  */
-static int update_sched_domains(struct notifier_block *nfb,
-                               unsigned long action, void *hcpu)
+static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
+                            void *hcpu)
 {
-       switch (action) {
+       switch (action & ~CPU_TASKS_FROZEN) {
        case CPU_ONLINE:
-       case CPU_ONLINE_FROZEN:
-       case CPU_DOWN_PREPARE:
-       case CPU_DOWN_PREPARE_FROZEN:
        case CPU_DOWN_FAILED:
-       case CPU_DOWN_FAILED_FROZEN:
-               partition_sched_domains(1, NULL, NULL);
+               cpuset_update_active_cpus();
                return NOTIFY_OK;
+       default:
+               return NOTIFY_DONE;
+       }
+}
 
+static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
+                              void *hcpu)
+{
+       switch (action & ~CPU_TASKS_FROZEN) {
+       case CPU_DOWN_PREPARE:
+               cpuset_update_active_cpus();
+               return NOTIFY_OK;
        default:
                return NOTIFY_DONE;
        }
 }
-#endif
 
 static int update_runtime(struct notifier_block *nfb,
                                unsigned long action, void *hcpu)
@@ -7346,10 +7543,8 @@ void __init sched_init_smp(void)
        mutex_unlock(&sched_domains_mutex);
        put_online_cpus();
 
-#ifndef CONFIG_CPUSETS
-       /* XXX: Theoretical race here - CPU may be hotplugged now */
-       hotcpu_notifier(update_sched_domains, 0);
-#endif
+       hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE);
+       hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE);
 
        /* RT runtime code needs to handle some hotplug events */
        hotcpu_notifier(update_runtime, 0);
@@ -7594,6 +7789,9 @@ void __init sched_init(void)
 
                for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
                        rq->cpu_load[j] = 0;
+
+               rq->last_load_update_tick = jiffies;
+
 #ifdef CONFIG_SMP
                rq->sd = NULL;
                rq->rd = NULL;
@@ -7607,6 +7805,10 @@ void __init sched_init(void)
                rq->idle_stamp = 0;
                rq->avg_idle = 2*sysctl_sched_migration_cost;
                rq_attach_root(rq, &def_root_domain);
+#ifdef CONFIG_NO_HZ
+               rq->nohz_balance_kick = 0;
+               init_sched_softirq_csd(&per_cpu(remote_sched_softirq_cb, i));
+#endif
 #endif
                init_rq_hrtick(rq);
                atomic_set(&rq->nr_iowait, 0);
@@ -7651,8 +7853,11 @@ void __init sched_init(void)
        zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
 #ifdef CONFIG_SMP
 #ifdef CONFIG_NO_HZ
-       zalloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT);
-       alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT);
+       zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
+       alloc_cpumask_var(&nohz.grp_idle_mask, GFP_NOWAIT);
+       atomic_set(&nohz.load_balancer, nr_cpu_ids);
+       atomic_set(&nohz.first_pick_cpu, nr_cpu_ids);
+       atomic_set(&nohz.second_pick_cpu, nr_cpu_ids);
 #endif
        /* May be allocated at isolcpus cmdline parse time */
        if (cpu_isolated_map == NULL)
index 906a0f718cb32c16797b83df68a4eb23d3e15a16..52f1a149bfb15a871a362255498fadf90e357c57 100644 (file)
  *   Ingo Molnar <mingo@redhat.com>
  *   Guillaume Chazarain <guichaz@gmail.com>
  *
- * Create a semi stable clock from a mixture of other events, including:
- *  - gtod
+ *
+ * What:
+ *
+ * cpu_clock(i) provides a fast (execution time) high resolution
+ * clock with bounded drift between CPUs. The value of cpu_clock(i)
+ * is monotonic for constant i. The timestamp returned is in nanoseconds.
+ *
+ * ######################### BIG FAT WARNING ##########################
+ * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
+ * # go backwards !!                                                  #
+ * ####################################################################
+ *
+ * There is no strict promise about the base, although it tends to start
+ * at 0 on boot (but people really shouldn't rely on that).
+ *
+ * cpu_clock(i)       -- can be used from any context, including NMI.
+ * sched_clock_cpu(i) -- must be used with local IRQs disabled (implied by NMI)
+ * local_clock()      -- is cpu_clock() on the current cpu.
+ *
+ * How:
+ *
+ * The implementation either uses sched_clock() when
+ * !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK, which means in that case the
+ * sched_clock() is assumed to provide these properties (mostly it means
+ * the architecture provides a globally synchronized highres time source).
+ *
+ * Otherwise it tries to create a semi stable clock from a mixture of other
+ * clocks, including:
+ *
+ *  - GTOD (clock monotomic)
  *  - sched_clock()
  *  - explicit idle events
  *
- * We use gtod as base and the unstable clock deltas. The deltas are filtered,
- * making it monotonic and keeping it within an expected window.
+ * We use GTOD as base and use sched_clock() deltas to improve resolution. The
+ * deltas are filtered to provide monotonicity and keeping it within an
+ * expected window.
  *
  * Furthermore, explicit sleep and wakeup hooks allow us to account for time
  * that is otherwise invisible (TSC gets stopped).
  *
- * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
- * consistent between cpus (never more than 2 jiffies difference).
+ *
+ * Notes:
+ *
+ * The !IRQ-safetly of sched_clock() and sched_clock_cpu() comes from things
+ * like cpufreq interrupts that can change the base clock (TSC) multiplier
+ * and cause funny jumps in time -- although the filtering provided by
+ * sched_clock_cpu() should mitigate serious artifacts we cannot rely on it
+ * in general since for !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK we fully rely on
+ * sched_clock().
  */
 #include <linux/spinlock.h>
 #include <linux/hardirq.h>
@@ -170,6 +206,11 @@ again:
        return val;
 }
 
+/*
+ * Similar to cpu_clock(), but requires local IRQs to be disabled.
+ *
+ * See cpu_clock().
+ */
 u64 sched_clock_cpu(int cpu)
 {
        struct sched_clock_data *scd;
@@ -237,9 +278,19 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
 }
 EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
 
-unsigned long long cpu_clock(int cpu)
+/*
+ * As outlined at the top, provides a fast, high resolution, nanosecond
+ * time source that is monotonic per cpu argument and has bounded drift
+ * between cpus.
+ *
+ * ######################### BIG FAT WARNING ##########################
+ * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
+ * # go backwards !!                                                  #
+ * ####################################################################
+ */
+u64 cpu_clock(int cpu)
 {
-       unsigned long long clock;
+       u64 clock;
        unsigned long flags;
 
        local_irq_save(flags);
@@ -249,6 +300,25 @@ unsigned long long cpu_clock(int cpu)
        return clock;
 }
 
+/*
+ * Similar to cpu_clock() for the current cpu. Time will only be observed
+ * to be monotonic if care is taken to only compare timestampt taken on the
+ * same CPU.
+ *
+ * See cpu_clock().
+ */
+u64 local_clock(void)
+{
+       u64 clock;
+       unsigned long flags;
+
+       local_irq_save(flags);
+       clock = sched_clock_cpu(smp_processor_id());
+       local_irq_restore(flags);
+
+       return clock;
+}
+
 #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
 
 void sched_clock_init(void)
@@ -264,12 +334,17 @@ u64 sched_clock_cpu(int cpu)
        return sched_clock();
 }
 
-
-unsigned long long cpu_clock(int cpu)
+u64 cpu_clock(int cpu)
 {
        return sched_clock_cpu(cpu);
 }
 
+u64 local_clock(void)
+{
+       return sched_clock_cpu(0);
+}
+
 #endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
 
 EXPORT_SYMBOL_GPL(cpu_clock);
+EXPORT_SYMBOL_GPL(local_clock);
index e6871cb3fc83dde1901b62c6355edc30ea55a64e..2722dc1b41383fa6f8108802315cbc82c3c1e814 100644 (file)
@@ -166,14 +166,10 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
  *
  * Returns: -ENOMEM if memory fails.
  */
-int cpupri_init(struct cpupri *cp, bool bootmem)
+int cpupri_init(struct cpupri *cp)
 {
-       gfp_t gfp = GFP_KERNEL;
        int i;
 
-       if (bootmem)
-               gfp = GFP_NOWAIT;
-
        memset(cp, 0, sizeof(*cp));
 
        for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) {
@@ -181,7 +177,7 @@ int cpupri_init(struct cpupri *cp, bool bootmem)
 
                raw_spin_lock_init(&vec->lock);
                vec->count = 0;
-               if (!zalloc_cpumask_var(&vec->mask, gfp))
+               if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL))
                        goto cleanup;
        }
 
index 7cb5bb6b95be5a4be62c7a7f937388a0fe8ae1e1..9fc7d386fea4b845f5e3be3183825d00d182cda9 100644 (file)
@@ -27,7 +27,7 @@ struct cpupri {
 int  cpupri_find(struct cpupri *cp,
                 struct task_struct *p, struct cpumask *lowest_mask);
 void cpupri_set(struct cpupri *cp, int cpu, int pri);
-int cpupri_init(struct cpupri *cp, bool bootmem);
+int cpupri_init(struct cpupri *cp);
 void cpupri_cleanup(struct cpupri *cp);
 #else
 #define cpupri_set(cp, cpu, pri) do { } while (0)
index 35565395d00d32c7c295d3c36fa722ecdadda097..2e1b0d17dd9b6a8b4ac48891a988c025a8b07ed5 100644 (file)
@@ -332,7 +332,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
        PN(sysctl_sched_latency);
        PN(sysctl_sched_min_granularity);
        PN(sysctl_sched_wakeup_granularity);
-       PN(sysctl_sched_child_runs_first);
+       P(sysctl_sched_child_runs_first);
        P(sysctl_sched_features);
 #undef PN
 #undef P
index a878b5332daad5d7db16625f298a4e963edac909..806d1b227a21060aac100994a8992266c00b59b5 100644 (file)
@@ -2287,13 +2287,6 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
        unsigned long power = SCHED_LOAD_SCALE;
        struct sched_group *sdg = sd->groups;
 
-       if (sched_feat(ARCH_POWER))
-               power *= arch_scale_freq_power(sd, cpu);
-       else
-               power *= default_scale_freq_power(sd, cpu);
-
-       power >>= SCHED_LOAD_SHIFT;
-
        if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
                if (sched_feat(ARCH_POWER))
                        power *= arch_scale_smt_power(sd, cpu);
@@ -2303,6 +2296,15 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
                power >>= SCHED_LOAD_SHIFT;
        }
 
+       sdg->cpu_power_orig = power;
+
+       if (sched_feat(ARCH_POWER))
+               power *= arch_scale_freq_power(sd, cpu);
+       else
+               power *= default_scale_freq_power(sd, cpu);
+
+       power >>= SCHED_LOAD_SHIFT;
+
        power *= scale_rt_power(cpu);
        power >>= SCHED_LOAD_SHIFT;
 
@@ -2335,6 +2337,31 @@ static void update_group_power(struct sched_domain *sd, int cpu)
        sdg->cpu_power = power;
 }
 
+/*
+ * Try and fix up capacity for tiny siblings, this is needed when
+ * things like SD_ASYM_PACKING need f_b_g to select another sibling
+ * which on its own isn't powerful enough.
+ *
+ * See update_sd_pick_busiest() and check_asym_packing().
+ */
+static inline int
+fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
+{
+       /*
+        * Only siblings can have significantly less than SCHED_LOAD_SCALE
+        */
+       if (sd->level != SD_LV_SIBLING)
+               return 0;
+
+       /*
+        * If ~90% of the cpu_power is still there, we're good.
+        */
+       if (group->cpu_power * 32 > group->cpu_power_orig * 29)
+               return 1;
+
+       return 0;
+}
+
 /**
  * update_sg_lb_stats - Update sched_group's statistics for load balancing.
  * @sd: The sched_domain whose statistics are to be updated.
@@ -2400,14 +2427,14 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
         * domains. In the newly idle case, we will allow all the cpu's
         * to do the newly idle load balance.
         */
-       if (idle != CPU_NEWLY_IDLE && local_group &&
-           balance_cpu != this_cpu) {
-               *balance = 0;
-               return;
+       if (idle != CPU_NEWLY_IDLE && local_group) {
+               if (balance_cpu != this_cpu) {
+                       *balance = 0;
+                       return;
+               }
+               update_group_power(sd, this_cpu);
        }
 
-       update_group_power(sd, this_cpu);
-
        /* Adjust by relative CPU power of the group */
        sgs->avg_load = (sgs->group_load * SCHED_LOAD_SCALE) / group->cpu_power;
 
@@ -2428,6 +2455,51 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
 
        sgs->group_capacity =
                DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
+       if (!sgs->group_capacity)
+               sgs->group_capacity = fix_small_capacity(sd, group);
+}
+
+/**
+ * update_sd_pick_busiest - return 1 on busiest group
+ * @sd: sched_domain whose statistics are to be checked
+ * @sds: sched_domain statistics
+ * @sg: sched_group candidate to be checked for being the busiest
+ * @sgs: sched_group statistics
+ * @this_cpu: the current cpu
+ *
+ * Determine if @sg is a busier group than the previously selected
+ * busiest group.
+ */
+static bool update_sd_pick_busiest(struct sched_domain *sd,
+                                  struct sd_lb_stats *sds,
+                                  struct sched_group *sg,
+                                  struct sg_lb_stats *sgs,
+                                  int this_cpu)
+{
+       if (sgs->avg_load <= sds->max_load)
+               return false;
+
+       if (sgs->sum_nr_running > sgs->group_capacity)
+               return true;
+
+       if (sgs->group_imb)
+               return true;
+
+       /*
+        * ASYM_PACKING needs to move all the work to the lowest
+        * numbered CPUs in the group, therefore mark all groups
+        * higher than ourself as busy.
+        */
+       if ((sd->flags & SD_ASYM_PACKING) && sgs->sum_nr_running &&
+           this_cpu < group_first_cpu(sg)) {
+               if (!sds->busiest)
+                       return true;
+
+               if (group_first_cpu(sds->busiest) > group_first_cpu(sg))
+                       return true;
+       }
+
+       return false;
 }
 
 /**
@@ -2435,7 +2507,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
  * @sd: sched_domain whose statistics are to be updated.
  * @this_cpu: Cpu for which load balance is currently performed.
  * @idle: Idle status of this_cpu
- * @sd_idle: Idle status of the sched_domain containing group.
+ * @sd_idle: Idle status of the sched_domain containing sg.
  * @cpus: Set of cpus considered for load balancing.
  * @balance: Should we balance.
  * @sds: variable to hold the statistics for this sched_domain.
@@ -2446,7 +2518,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
                        struct sd_lb_stats *sds)
 {
        struct sched_domain *child = sd->child;
-       struct sched_group *group = sd->groups;
+       struct sched_group *sg = sd->groups;
        struct sg_lb_stats sgs;
        int load_idx, prefer_sibling = 0;
 
@@ -2459,21 +2531,20 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
        do {
                int local_group;
 
-               local_group = cpumask_test_cpu(this_cpu,
-                                              sched_group_cpus(group));
+               local_group = cpumask_test_cpu(this_cpu, sched_group_cpus(sg));
                memset(&sgs, 0, sizeof(sgs));
-               update_sg_lb_stats(sd, group, this_cpu, idle, load_idx, sd_idle,
+               update_sg_lb_stats(sd, sg, this_cpu, idle, load_idx, sd_idle,
                                local_group, cpus, balance, &sgs);
 
                if (local_group && !(*balance))
                        return;
 
                sds->total_load += sgs.group_load;
-               sds->total_pwr += group->cpu_power;
+               sds->total_pwr += sg->cpu_power;
 
                /*
                 * In case the child domain prefers tasks go to siblings
-                * first, lower the group capacity to one so that we'll try
+                * first, lower the sg capacity to one so that we'll try
                 * and move all the excess tasks away.
                 */
                if (prefer_sibling)
@@ -2481,23 +2552,72 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
 
                if (local_group) {
                        sds->this_load = sgs.avg_load;
-                       sds->this = group;
+                       sds->this = sg;
                        sds->this_nr_running = sgs.sum_nr_running;
                        sds->this_load_per_task = sgs.sum_weighted_load;
-               } else if (sgs.avg_load > sds->max_load &&
-                          (sgs.sum_nr_running > sgs.group_capacity ||
-                               sgs.group_imb)) {
+               } else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) {
                        sds->max_load = sgs.avg_load;
-                       sds->busiest = group;
+                       sds->busiest = sg;
                        sds->busiest_nr_running = sgs.sum_nr_running;
                        sds->busiest_group_capacity = sgs.group_capacity;
                        sds->busiest_load_per_task = sgs.sum_weighted_load;
                        sds->group_imb = sgs.group_imb;
                }
 
-               update_sd_power_savings_stats(group, sds, local_group, &sgs);
-               group = group->next;
-       } while (group != sd->groups);
+               update_sd_power_savings_stats(sg, sds, local_group, &sgs);
+               sg = sg->next;
+       } while (sg != sd->groups);
+}
+
+int __weak arch_sd_sibling_asym_packing(void)
+{
+       return 0*SD_ASYM_PACKING;
+}
+
+/**
+ * check_asym_packing - Check to see if the group is packed into the
+ *                     sched doman.
+ *
+ * This is primarily intended to used at the sibling level.  Some
+ * cores like POWER7 prefer to use lower numbered SMT threads.  In the
+ * case of POWER7, it can move to lower SMT modes only when higher
+ * threads are idle.  When in lower SMT modes, the threads will
+ * perform better since they share less core resources.  Hence when we
+ * have idle threads, we want them to be the higher ones.
+ *
+ * This packing function is run on idle threads.  It checks to see if
+ * the busiest CPU in this domain (core in the P7 case) has a higher
+ * CPU number than the packing function is being run on.  Here we are
+ * assuming lower CPU number will be equivalent to lower a SMT thread
+ * number.
+ *
+ * Returns 1 when packing is required and a task should be moved to
+ * this CPU.  The amount of the imbalance is returned in *imbalance.
+ *
+ * @sd: The sched_domain whose packing is to be checked.
+ * @sds: Statistics of the sched_domain which is to be packed
+ * @this_cpu: The cpu at whose sched_domain we're performing load-balance.
+ * @imbalance: returns amount of imbalanced due to packing.
+ */
+static int check_asym_packing(struct sched_domain *sd,
+                             struct sd_lb_stats *sds,
+                             int this_cpu, unsigned long *imbalance)
+{
+       int busiest_cpu;
+
+       if (!(sd->flags & SD_ASYM_PACKING))
+               return 0;
+
+       if (!sds->busiest)
+               return 0;
+
+       busiest_cpu = group_first_cpu(sds->busiest);
+       if (this_cpu > busiest_cpu)
+               return 0;
+
+       *imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->cpu_power,
+                                      SCHED_LOAD_SCALE);
+       return 1;
 }
 
 /**
@@ -2692,6 +2812,10 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
        if (!(*balance))
                goto ret;
 
+       if ((idle == CPU_IDLE || idle == CPU_NEWLY_IDLE) &&
+           check_asym_packing(sd, &sds, this_cpu, imbalance))
+               return sds.busiest;
+
        if (!sds.busiest || sds.busiest_nr_running == 0)
                goto out_balanced;
 
@@ -2726,8 +2850,9 @@ ret:
  * find_busiest_queue - find the busiest runqueue among the cpus in group.
  */
 static struct rq *
-find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
-                  unsigned long imbalance, const struct cpumask *cpus)
+find_busiest_queue(struct sched_domain *sd, struct sched_group *group,
+                  enum cpu_idle_type idle, unsigned long imbalance,
+                  const struct cpumask *cpus)
 {
        struct rq *busiest = NULL, *rq;
        unsigned long max_load = 0;
@@ -2738,6 +2863,9 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
                unsigned long capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE);
                unsigned long wl;
 
+               if (!capacity)
+                       capacity = fix_small_capacity(sd, group);
+
                if (!cpumask_test_cpu(i, cpus))
                        continue;
 
@@ -2777,9 +2905,19 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
 /* Working cpumask for load_balance and load_balance_newidle. */
 static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
 
-static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle)
+static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle,
+                              int busiest_cpu, int this_cpu)
 {
        if (idle == CPU_NEWLY_IDLE) {
+
+               /*
+                * ASYM_PACKING needs to force migrate tasks from busy but
+                * higher numbered CPUs in order to pack all tasks in the
+                * lowest numbered CPUs.
+                */
+               if ((sd->flags & SD_ASYM_PACKING) && busiest_cpu > this_cpu)
+                       return 1;
+
                /*
                 * The only task running in a non-idle cpu can be moved to this
                 * cpu in an attempt to completely freeup the other CPU
@@ -2854,7 +2992,7 @@ redo:
                goto out_balanced;
        }
 
-       busiest = find_busiest_queue(group, idle, imbalance, cpus);
+       busiest = find_busiest_queue(sd, group, idle, imbalance, cpus);
        if (!busiest) {
                schedstat_inc(sd, lb_nobusyq[idle]);
                goto out_balanced;
@@ -2898,7 +3036,8 @@ redo:
                schedstat_inc(sd, lb_failed[idle]);
                sd->nr_balance_failed++;
 
-               if (need_active_balance(sd, sd_idle, idle)) {
+               if (need_active_balance(sd, sd_idle, idle, cpu_of(busiest),
+                                       this_cpu)) {
                        raw_spin_lock_irqsave(&busiest->lock, flags);
 
                        /* don't kick the active_load_balance_cpu_stop,
@@ -3093,13 +3232,40 @@ out_unlock:
 }
 
 #ifdef CONFIG_NO_HZ
+
+static DEFINE_PER_CPU(struct call_single_data, remote_sched_softirq_cb);
+
+static void trigger_sched_softirq(void *data)
+{
+       raise_softirq_irqoff(SCHED_SOFTIRQ);
+}
+
+static inline void init_sched_softirq_csd(struct call_single_data *csd)
+{
+       csd->func = trigger_sched_softirq;
+       csd->info = NULL;
+       csd->flags = 0;
+       csd->priv = 0;
+}
+
+/*
+ * idle load balancing details
+ * - One of the idle CPUs nominates itself as idle load_balancer, while
+ *   entering idle.
+ * - This idle load balancer CPU will also go into tickless mode when
+ *   it is idle, just like all other idle CPUs
+ * - When one of the busy CPUs notice that there may be an idle rebalancing
+ *   needed, they will kick the idle load balancer, which then does idle
+ *   load balancing for all the idle CPUs.
+ */
 static struct {
        atomic_t load_balancer;
-       cpumask_var_t cpu_mask;
-       cpumask_var_t ilb_grp_nohz_mask;
-} nohz ____cacheline_aligned = {
-       .load_balancer = ATOMIC_INIT(-1),
-};
+       atomic_t first_pick_cpu;
+       atomic_t second_pick_cpu;
+       cpumask_var_t idle_cpus_mask;
+       cpumask_var_t grp_idle_mask;
+       unsigned long next_balance;     /* in jiffy units */
+} nohz ____cacheline_aligned;
 
 int get_nohz_load_balancer(void)
 {
@@ -3153,17 +3319,17 @@ static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
  */
 static inline int is_semi_idle_group(struct sched_group *ilb_group)
 {
-       cpumask_and(nohz.ilb_grp_nohz_mask, nohz.cpu_mask,
+       cpumask_and(nohz.grp_idle_mask, nohz.idle_cpus_mask,
                                        sched_group_cpus(ilb_group));
 
        /*
         * A sched_group is semi-idle when it has atleast one busy cpu
         * and atleast one idle cpu.
         */
-       if (cpumask_empty(nohz.ilb_grp_nohz_mask))
+       if (cpumask_empty(nohz.grp_idle_mask))
                return 0;
 
-       if (cpumask_equal(nohz.ilb_grp_nohz_mask, sched_group_cpus(ilb_group)))
+       if (cpumask_equal(nohz.grp_idle_mask, sched_group_cpus(ilb_group)))
                return 0;
 
        return 1;
@@ -3196,7 +3362,7 @@ static int find_new_ilb(int cpu)
         * Optimize for the case when we have no idle CPUs or only one
         * idle CPU. Don't walk the sched_domain hierarchy in such cases
         */
-       if (cpumask_weight(nohz.cpu_mask) < 2)
+       if (cpumask_weight(nohz.idle_cpus_mask) < 2)
                goto out_done;
 
        for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) {
@@ -3204,7 +3370,7 @@ static int find_new_ilb(int cpu)
 
                do {
                        if (is_semi_idle_group(ilb_group))
-                               return cpumask_first(nohz.ilb_grp_nohz_mask);
+                               return cpumask_first(nohz.grp_idle_mask);
 
                        ilb_group = ilb_group->next;
 
@@ -3212,98 +3378,116 @@ static int find_new_ilb(int cpu)
        }
 
 out_done:
-       return cpumask_first(nohz.cpu_mask);
+       return nr_cpu_ids;
 }
 #else /*  (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */
 static inline int find_new_ilb(int call_cpu)
 {
-       return cpumask_first(nohz.cpu_mask);
+       return nr_cpu_ids;
 }
 #endif
 
+/*
+ * Kick a CPU to do the nohz balancing, if it is time for it. We pick the
+ * nohz_load_balancer CPU (if there is one) otherwise fallback to any idle
+ * CPU (if there is one).
+ */
+static void nohz_balancer_kick(int cpu)
+{
+       int ilb_cpu;
+
+       nohz.next_balance++;
+
+       ilb_cpu = get_nohz_load_balancer();
+
+       if (ilb_cpu >= nr_cpu_ids) {
+               ilb_cpu = cpumask_first(nohz.idle_cpus_mask);
+               if (ilb_cpu >= nr_cpu_ids)
+                       return;
+       }
+
+       if (!cpu_rq(ilb_cpu)->nohz_balance_kick) {
+               struct call_single_data *cp;
+
+               cpu_rq(ilb_cpu)->nohz_balance_kick = 1;
+               cp = &per_cpu(remote_sched_softirq_cb, cpu);
+               __smp_call_function_single(ilb_cpu, cp, 0);
+       }
+       return;
+}
+
 /*
  * This routine will try to nominate the ilb (idle load balancing)
  * owner among the cpus whose ticks are stopped. ilb owner will do the idle
- * load balancing on behalf of all those cpus. If all the cpus in the system
- * go into this tickless mode, then there will be no ilb owner (as there is
- * no need for one) and all the cpus will sleep till the next wakeup event
- * arrives...
- *
- * For the ilb owner, tick is not stopped. And this tick will be used
- * for idle load balancing. ilb owner will still be part of
- * nohz.cpu_mask..
+ * load balancing on behalf of all those cpus.
  *
- * While stopping the tick, this cpu will become the ilb owner if there
- * is no other owner. And will be the owner till that cpu becomes busy
- * or if all cpus in the system stop their ticks at which point
- * there is no need for ilb owner.
+ * When the ilb owner becomes busy, we will not have new ilb owner until some
+ * idle CPU wakes up and goes back to idle or some busy CPU tries to kick
+ * idle load balancing by kicking one of the idle CPUs.
  *
- * When the ilb owner becomes busy, it nominates another owner, during the
- * next busy scheduler_tick()
+ * Ticks are stopped for the ilb owner as well, with busy CPU kicking this
+ * ilb owner CPU in future (when there is a need for idle load balancing on
+ * behalf of all idle CPUs).
  */
-int select_nohz_load_balancer(int stop_tick)
+void select_nohz_load_balancer(int stop_tick)
 {
        int cpu = smp_processor_id();
 
        if (stop_tick) {
-               cpu_rq(cpu)->in_nohz_recently = 1;
-
                if (!cpu_active(cpu)) {
                        if (atomic_read(&nohz.load_balancer) != cpu)
-                               return 0;
+                               return;
 
                        /*
                         * If we are going offline and still the leader,
                         * give up!
                         */
-                       if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
+                       if (atomic_cmpxchg(&nohz.load_balancer, cpu,
+                                          nr_cpu_ids) != cpu)
                                BUG();
 
-                       return 0;
+                       return;
                }
 
-               cpumask_set_cpu(cpu, nohz.cpu_mask);
+               cpumask_set_cpu(cpu, nohz.idle_cpus_mask);
 
-               /* time for ilb owner also to sleep */
-               if (cpumask_weight(nohz.cpu_mask) == num_active_cpus()) {
-                       if (atomic_read(&nohz.load_balancer) == cpu)
-                               atomic_set(&nohz.load_balancer, -1);
-                       return 0;
-               }
+               if (atomic_read(&nohz.first_pick_cpu) == cpu)
+                       atomic_cmpxchg(&nohz.first_pick_cpu, cpu, nr_cpu_ids);
+               if (atomic_read(&nohz.second_pick_cpu) == cpu)
+                       atomic_cmpxchg(&nohz.second_pick_cpu, cpu, nr_cpu_ids);
 
-               if (atomic_read(&nohz.load_balancer) == -1) {
-                       /* make me the ilb owner */
-                       if (atomic_cmpxchg(&nohz.load_balancer, -1, cpu) == -1)
-                               return 1;
-               } else if (atomic_read(&nohz.load_balancer) == cpu) {
+               if (atomic_read(&nohz.load_balancer) >= nr_cpu_ids) {
                        int new_ilb;
 
-                       if (!(sched_smt_power_savings ||
-                                               sched_mc_power_savings))
-                               return 1;
+                       /* make me the ilb owner */
+                       if (atomic_cmpxchg(&nohz.load_balancer, nr_cpu_ids,
+                                          cpu) != nr_cpu_ids)
+                               return;
+
                        /*
                         * Check to see if there is a more power-efficient
                         * ilb.
                         */
                        new_ilb = find_new_ilb(cpu);
                        if (new_ilb < nr_cpu_ids && new_ilb != cpu) {
-                               atomic_set(&nohz.load_balancer, -1);
+                               atomic_set(&nohz.load_balancer, nr_cpu_ids);
                                resched_cpu(new_ilb);
-                               return 0;
+                               return;
                        }
-                       return 1;
+                       return;
                }
        } else {
-               if (!cpumask_test_cpu(cpu, nohz.cpu_mask))
-                       return 0;
+               if (!cpumask_test_cpu(cpu, nohz.idle_cpus_mask))
+                       return;
 
-               cpumask_clear_cpu(cpu, nohz.cpu_mask);
+               cpumask_clear_cpu(cpu, nohz.idle_cpus_mask);
 
                if (atomic_read(&nohz.load_balancer) == cpu)
-                       if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
+                       if (atomic_cmpxchg(&nohz.load_balancer, cpu,
+                                          nr_cpu_ids) != cpu)
                                BUG();
        }
-       return 0;
+       return;
 }
 #endif
 
@@ -3385,11 +3569,102 @@ out:
                rq->next_balance = next_balance;
 }
 
+#ifdef CONFIG_NO_HZ
 /*
- * run_rebalance_domains is triggered when needed from the scheduler tick.
- * In CONFIG_NO_HZ case, the idle load balance owner will do the
+ * In CONFIG_NO_HZ case, the idle balance kickee will do the
  * rebalancing for all the cpus for whom scheduler ticks are stopped.
  */
+static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle)
+{
+       struct rq *this_rq = cpu_rq(this_cpu);
+       struct rq *rq;
+       int balance_cpu;
+
+       if (idle != CPU_IDLE || !this_rq->nohz_balance_kick)
+               return;
+
+       for_each_cpu(balance_cpu, nohz.idle_cpus_mask) {
+               if (balance_cpu == this_cpu)
+                       continue;
+
+               /*
+                * If this cpu gets work to do, stop the load balancing
+                * work being done for other cpus. Next load
+                * balancing owner will pick it up.
+                */
+               if (need_resched()) {
+                       this_rq->nohz_balance_kick = 0;
+                       break;
+               }
+
+               raw_spin_lock_irq(&this_rq->lock);
+               update_rq_clock(this_rq);
+               update_cpu_load(this_rq);
+               raw_spin_unlock_irq(&this_rq->lock);
+
+               rebalance_domains(balance_cpu, CPU_IDLE);
+
+               rq = cpu_rq(balance_cpu);
+               if (time_after(this_rq->next_balance, rq->next_balance))
+                       this_rq->next_balance = rq->next_balance;
+       }
+       nohz.next_balance = this_rq->next_balance;
+       this_rq->nohz_balance_kick = 0;
+}
+
+/*
+ * Current heuristic for kicking the idle load balancer
+ * - first_pick_cpu is the one of the busy CPUs. It will kick
+ *   idle load balancer when it has more than one process active. This
+ *   eliminates the need for idle load balancing altogether when we have
+ *   only one running process in the system (common case).
+ * - If there are more than one busy CPU, idle load balancer may have
+ *   to run for active_load_balance to happen (i.e., two busy CPUs are
+ *   SMT or core siblings and can run better if they move to different
+ *   physical CPUs). So, second_pick_cpu is the second of the busy CPUs
+ *   which will kick idle load balancer as soon as it has any load.
+ */
+static inline int nohz_kick_needed(struct rq *rq, int cpu)
+{
+       unsigned long now = jiffies;
+       int ret;
+       int first_pick_cpu, second_pick_cpu;
+
+       if (time_before(now, nohz.next_balance))
+               return 0;
+
+       if (!rq->nr_running)
+               return 0;
+
+       first_pick_cpu = atomic_read(&nohz.first_pick_cpu);
+       second_pick_cpu = atomic_read(&nohz.second_pick_cpu);
+
+       if (first_pick_cpu < nr_cpu_ids && first_pick_cpu != cpu &&
+           second_pick_cpu < nr_cpu_ids && second_pick_cpu != cpu)
+               return 0;
+
+       ret = atomic_cmpxchg(&nohz.first_pick_cpu, nr_cpu_ids, cpu);
+       if (ret == nr_cpu_ids || ret == cpu) {
+               atomic_cmpxchg(&nohz.second_pick_cpu, cpu, nr_cpu_ids);
+               if (rq->nr_running > 1)
+                       return 1;
+       } else {
+               ret = atomic_cmpxchg(&nohz.second_pick_cpu, nr_cpu_ids, cpu);
+               if (ret == nr_cpu_ids || ret == cpu) {
+                       if (rq->nr_running)
+                               return 1;
+               }
+       }
+       return 0;
+}
+#else
+static void nohz_idle_balance(int this_cpu, enum cpu_idle_type idle) { }
+#endif
+
+/*
+ * run_rebalance_domains is triggered when needed from the scheduler tick.
+ * Also triggered for nohz idle balancing (with nohz_balancing_kick set).
+ */
 static void run_rebalance_domains(struct softirq_action *h)
 {
        int this_cpu = smp_processor_id();
@@ -3399,37 +3674,12 @@ static void run_rebalance_domains(struct softirq_action *h)
 
        rebalance_domains(this_cpu, idle);
 
-#ifdef CONFIG_NO_HZ
        /*
-        * If this cpu is the owner for idle load balancing, then do the
+        * If this cpu has a pending nohz_balance_kick, then do the
         * balancing on behalf of the other idle cpus whose ticks are
         * stopped.
         */
-       if (this_rq->idle_at_tick &&
-           atomic_read(&nohz.load_balancer) == this_cpu) {
-               struct rq *rq;
-               int balance_cpu;
-
-               for_each_cpu(balance_cpu, nohz.cpu_mask) {
-                       if (balance_cpu == this_cpu)
-                               continue;
-
-                       /*
-                        * If this cpu gets work to do, stop the load balancing
-                        * work being done for other cpus. Next load
-                        * balancing owner will pick it up.
-                        */
-                       if (need_resched())
-                               break;
-
-                       rebalance_domains(balance_cpu, CPU_IDLE);
-
-                       rq = cpu_rq(balance_cpu);
-                       if (time_after(this_rq->next_balance, rq->next_balance))
-                               this_rq->next_balance = rq->next_balance;
-               }
-       }
-#endif
+       nohz_idle_balance(this_cpu, idle);
 }
 
 static inline int on_null_domain(int cpu)
@@ -3439,57 +3689,17 @@ static inline int on_null_domain(int cpu)
 
 /*
  * Trigger the SCHED_SOFTIRQ if it is time to do periodic load balancing.
- *
- * In case of CONFIG_NO_HZ, this is the place where we nominate a new
- * idle load balancing owner or decide to stop the periodic load balancing,
- * if the whole system is idle.
  */
 static inline void trigger_load_balance(struct rq *rq, int cpu)
 {
-#ifdef CONFIG_NO_HZ
-       /*
-        * If we were in the nohz mode recently and busy at the current
-        * scheduler tick, then check if we need to nominate new idle
-        * load balancer.
-        */
-       if (rq->in_nohz_recently && !rq->idle_at_tick) {
-               rq->in_nohz_recently = 0;
-
-               if (atomic_read(&nohz.load_balancer) == cpu) {
-                       cpumask_clear_cpu(cpu, nohz.cpu_mask);
-                       atomic_set(&nohz.load_balancer, -1);
-               }
-
-               if (atomic_read(&nohz.load_balancer) == -1) {
-                       int ilb = find_new_ilb(cpu);
-
-                       if (ilb < nr_cpu_ids)
-                               resched_cpu(ilb);
-               }
-       }
-
-       /*
-        * If this cpu is idle and doing idle load balancing for all the
-        * cpus with ticks stopped, is it time for that to stop?
-        */
-       if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) == cpu &&
-           cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
-               resched_cpu(cpu);
-               return;
-       }
-
-       /*
-        * If this cpu is idle and the idle load balancing is done by
-        * someone else, then no need raise the SCHED_SOFTIRQ
-        */
-       if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) != cpu &&
-           cpumask_test_cpu(cpu, nohz.cpu_mask))
-               return;
-#endif
        /* Don't need to rebalance while attached to NULL domain */
        if (time_after_eq(jiffies, rq->next_balance) &&
            likely(!on_null_domain(cpu)))
                raise_softirq(SCHED_SOFTIRQ);
+#ifdef CONFIG_NO_HZ
+       else if (nohz_kick_needed(rq, cpu) && likely(!on_null_domain(cpu)))
+               nohz_balancer_kick(cpu);
+#endif
 }
 
 static void rq_online_fair(struct rq *rq)
index 8afb953e31c6c1ba9a263a78929a04a0d82ff2c7..d10c80ebb67a2821038a9c59b912bf8e323d67e3 100644 (file)
@@ -1663,9 +1663,6 @@ static void watchdog(struct rq *rq, struct task_struct *p)
 {
        unsigned long soft, hard;
 
-       if (!p->signal)
-               return;
-
        /* max may change after cur was read, this will be fixed next tick */
        soft = task_rlimit(p, RLIMIT_RTTIME);
        hard = task_rlimit_max(p, RLIMIT_RTTIME);
index 32d2bd4061b02b343f566478a5fe7a07c23a3021..25c2f962f6fcb3b545719e4a8871c991bdbeb37f 100644 (file)
@@ -295,13 +295,7 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
 static inline void account_group_user_time(struct task_struct *tsk,
                                           cputime_t cputime)
 {
-       struct thread_group_cputimer *cputimer;
-
-       /* tsk == current, ensure it is safe to use ->signal */
-       if (unlikely(tsk->exit_state))
-               return;
-
-       cputimer = &tsk->signal->cputimer;
+       struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
 
        if (!cputimer->running)
                return;
@@ -325,13 +319,7 @@ static inline void account_group_user_time(struct task_struct *tsk,
 static inline void account_group_system_time(struct task_struct *tsk,
                                             cputime_t cputime)
 {
-       struct thread_group_cputimer *cputimer;
-
-       /* tsk == current, ensure it is safe to use ->signal */
-       if (unlikely(tsk->exit_state))
-               return;
-
-       cputimer = &tsk->signal->cputimer;
+       struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
 
        if (!cputimer->running)
                return;
@@ -355,16 +343,7 @@ static inline void account_group_system_time(struct task_struct *tsk,
 static inline void account_group_exec_runtime(struct task_struct *tsk,
                                              unsigned long long ns)
 {
-       struct thread_group_cputimer *cputimer;
-       struct signal_struct *sig;
-
-       sig = tsk->signal;
-       /* see __exit_signal()->task_rq_unlock_wait() */
-       barrier();
-       if (unlikely(!sig))
-               return;
-
-       cputimer = &sig->cputimer;
+       struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
 
        if (!cputimer->running)
                return;
index f898af60817156507f700abf095653352ab3f431..021d2f878f193bfe6eb2bacebd313dc3a4f5f072 100644 (file)
@@ -405,13 +405,7 @@ void tick_nohz_stop_sched_tick(int inidle)
                 * the scheduler tick in nohz_restart_sched_tick.
                 */
                if (!ts->tick_stopped) {
-                       if (select_nohz_load_balancer(1)) {
-                               /*
-                                * sched tick not stopped!
-                                */
-                               cpumask_clear_cpu(cpu, nohz_cpu_mask);
-                               goto out;
-                       }
+                       select_nohz_load_balancer(1);
 
                        ts->idle_tick = hrtimer_get_expires(&ts->sched_timer);
                        ts->tick_stopped = 1;
index ee305c8d4e18eb038a54f0f9aea8eb2f353b1f44..48d6aec0789cb238b358a9b727eca1f1a3679a13 100644 (file)
@@ -679,12 +679,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
        cpu = smp_processor_id();
 
 #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
-       if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
-               int preferred_cpu = get_nohz_load_balancer();
-
-               if (preferred_cpu >= 0)
-                       cpu = preferred_cpu;
-       }
+       if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu))
+               cpu = get_nohz_timer_target();
 #endif
        new_base = per_cpu(tvec_bases, cpu);
 
index 9d589d8dcd1aa7e9470b6868980a20c4aeaf6160..1723e2b8c589ce20a1e93691026ea4d94247be31 100644 (file)
@@ -56,7 +56,7 @@ u64 notrace trace_clock_local(void)
  */
 u64 notrace trace_clock(void)
 {
-       return cpu_clock(raw_smp_processor_id());
+       return local_clock();
 }
 
 
diff --git a/kernel/workqueue_sched.h b/kernel/workqueue_sched.h
new file mode 100644 (file)
index 0000000..af040ba
--- /dev/null
@@ -0,0 +1,16 @@
+/*
+ * kernel/workqueue_sched.h
+ *
+ * Scheduler hooks for concurrency managed workqueue.  Only to be
+ * included from sched.c and workqueue.c.
+ */
+static inline void wq_worker_waking_up(struct task_struct *task,
+                                      unsigned int cpu)
+{
+}
+
+static inline struct task_struct *wq_worker_sleeping(struct task_struct *task,
+                                                    unsigned int cpu)
+{
+       return NULL;
+}
index 58c66cc5056a7073aabf880116548a9572c2894b..142c84a54993a75ca2fbd5346e3eb5b16552f390 100644 (file)
@@ -833,15 +833,24 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
 void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
                                   unsigned long align, unsigned long goal)
 {
+       void *ptr;
+
        if (WARN_ON_ONCE(slab_is_available()))
                return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
 
 #ifdef CONFIG_NO_BOOTMEM
-       return __alloc_memory_core_early(pgdat->node_id, size, align,
+       ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+                                        goal, -1ULL);
+       if (ptr)
+               return ptr;
+
+       ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
                                         goal, -1ULL);
 #else
-       return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
+       ptr = ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
 #endif
+
+       return ptr;
 }
 
 void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
@@ -977,14 +986,21 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
 void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
                                       unsigned long align, unsigned long goal)
 {
+       void *ptr;
+
        if (WARN_ON_ONCE(slab_is_available()))
                return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
 
 #ifdef CONFIG_NO_BOOTMEM
-       return __alloc_memory_core_early(pgdat->node_id, size, align,
+       ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
+                               goal, ARCH_LOW_ADDRESS_LIMIT);
+       if (ptr)
+               return ptr;
+       ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align,
                                goal, ARCH_LOW_ADDRESS_LIMIT);
 #else
-       return ___alloc_bootmem_node(pgdat->bdata, size, align,
+       ptr = ___alloc_bootmem_node(pgdat->bdata, size, align,
                                goal, ARCH_LOW_ADDRESS_LIMIT);
 #endif
+       return ptr;
 }
index 431214b941acec39f68e171e468a59f74b9b749e..9bd339eb04c6c84691232bc7e499947fe9d13942 100644 (file)
@@ -3634,6 +3634,9 @@ void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
        int i;
        void *ptr;
 
+       if (limit > get_max_mapped())
+               limit = get_max_mapped();
+
        /* need to go over early_node_map to find out good range for node */
        for_each_active_range_index_in_nid(i, nid) {
                u64 addr;
@@ -3659,6 +3662,11 @@ void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
                ptr = phys_to_virt(addr);
                memset(ptr, 0, size);
                reserve_early_without_check(addr, addr + size, "BOOTMEM");
+               /*
+                * The min_count is set to 0 so that bootmem allocated blocks
+                * are never reported as leaks.
+                */
+               kmemleak_alloc(ptr, size, 0, 0);
                return ptr;
        }
 
index 6c0081441a326a174f83ebd66485bd9cb961f88d..5bffada7cde17a383e5ba510c2cca4be23463042 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/vmalloc.h>
 #include <linux/cgroup.h>
 #include <linux/swapops.h>
+#include <linux/kmemleak.h>
 
 static void __meminit
 __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
@@ -126,6 +127,12 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn)
                        if (!base)
                                base = vmalloc(table_size);
                }
+               /*
+                * The value stored in section->page_cgroup is (base - pfn)
+                * and it does not point to the memory block allocated above,
+                * causing kmemleak false positives.
+                */
+               kmemleak_not_leak(base);
        } else {
                /*
                 * We don't have to allocate page_cgroup again, but
index 9c7e57cc63a34f7231b77a7d8b395d3157a34ba6..b94fe1b3da435f34f567e8a23358ec18ccd452ad 100644 (file)
@@ -213,8 +213,9 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
        list_for_each_entry(shrinker, &shrinker_list, list) {
                unsigned long long delta;
                unsigned long total_scan;
-               unsigned long max_pass = (*shrinker->shrink)(0, gfp_mask);
+               unsigned long max_pass;
 
+               max_pass = (*shrinker->shrink)(shrinker, 0, gfp_mask);
                delta = (4 * scanned) / shrinker->seeks;
                delta *= max_pass;
                do_div(delta, lru_pages + 1);
@@ -242,8 +243,9 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
                        int shrink_ret;
                        int nr_before;
 
-                       nr_before = (*shrinker->shrink)(0, gfp_mask);
-                       shrink_ret = (*shrinker->shrink)(this_scan, gfp_mask);
+                       nr_before = (*shrinker->shrink)(shrinker, 0, gfp_mask);
+                       shrink_ret = (*shrinker->shrink)(shrinker, this_scan,
+                                                               gfp_mask);
                        if (shrink_ret == -1)
                                break;
                        if (shrink_ret < nr_before)
@@ -296,7 +298,7 @@ static int may_write_to_queue(struct backing_dev_info *bdi)
 static void handle_write_error(struct address_space *mapping,
                                struct page *page, int error)
 {
-       lock_page(page);
+       lock_page_nosync(page);
        if (page_mapping(page) == mapping)
                mapping_set_error(mapping, error);
        unlock_page(page);
index b10e3cdb08f87358ca64d0db8cf83c27f5ad624a..800b6b9fbbaefe15c90406e7631acd4e25b172b3 100644 (file)
@@ -358,6 +358,11 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8
                acl->sec_level = sec_level;
                acl->auth_type = auth_type;
                hci_acl_connect(acl);
+       } else {
+               if (acl->sec_level < sec_level)
+                       acl->sec_level = sec_level;
+               if (acl->auth_type < auth_type)
+                       acl->auth_type = auth_type;
        }
 
        if (type == ACL_LINK)
index 6c57fc71c7e2d0df7c75daf1054901cf7ed8eee6..786b5de0bac42819ae3e207c942840a69f2c13da 100644 (file)
@@ -1049,6 +1049,8 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s
        if (conn) {
                if (!ev->status)
                        conn->link_mode |= HCI_LM_AUTH;
+               else
+                       conn->sec_level = BT_SECURITY_LOW;
 
                clear_bit(HCI_CONN_AUTH_PEND, &conn->pend);
 
index 1b682a5aa0616911c2861b5d836c015230922faf..cf3c4073a8a655d6e9d8936cb61d30254224962f 100644 (file)
@@ -401,6 +401,11 @@ static inline void l2cap_send_rr_or_rnr(struct l2cap_pinfo *pi, u16 control)
        l2cap_send_sframe(pi, control);
 }
 
+static inline int __l2cap_no_conn_pending(struct sock *sk)
+{
+       return !(l2cap_pi(sk)->conf_state & L2CAP_CONF_CONNECT_PEND);
+}
+
 static void l2cap_do_start(struct sock *sk)
 {
        struct l2cap_conn *conn = l2cap_pi(sk)->conn;
@@ -409,12 +414,13 @@ static void l2cap_do_start(struct sock *sk)
                if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE))
                        return;
 
-               if (l2cap_check_security(sk)) {
+               if (l2cap_check_security(sk) && __l2cap_no_conn_pending(sk)) {
                        struct l2cap_conn_req req;
                        req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
                        req.psm  = l2cap_pi(sk)->psm;
 
                        l2cap_pi(sk)->ident = l2cap_get_ident(conn);
+                       l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND;
 
                        l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
                                        L2CAP_CONN_REQ, sizeof(req), &req);
@@ -464,12 +470,14 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
                }
 
                if (sk->sk_state == BT_CONNECT) {
-                       if (l2cap_check_security(sk)) {
+                       if (l2cap_check_security(sk) &&
+                                       __l2cap_no_conn_pending(sk)) {
                                struct l2cap_conn_req req;
                                req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
                                req.psm  = l2cap_pi(sk)->psm;
 
                                l2cap_pi(sk)->ident = l2cap_get_ident(conn);
+                               l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND;
 
                                l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
                                        L2CAP_CONN_REQ, sizeof(req), &req);
@@ -2912,7 +2920,6 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd
                l2cap_pi(sk)->ident = 0;
                l2cap_pi(sk)->dcid = dcid;
                l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
-
                l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_CONNECT_PEND;
 
                l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
@@ -4404,6 +4411,7 @@ static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
                                req.psm  = l2cap_pi(sk)->psm;
 
                                l2cap_pi(sk)->ident = l2cap_get_ident(conn);
+                               l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND;
 
                                l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
                                        L2CAP_CONN_REQ, sizeof(req), &req);
index eedf2c94820e26150d8823c2aa1652626c5f281d..753fc4221f3c3587e41dfd816fd6e2771578a243 100644 (file)
@@ -217,14 +217,6 @@ static bool br_devices_support_netpoll(struct net_bridge *br)
        return count != 0 && ret;
 }
 
-static void br_poll_controller(struct net_device *br_dev)
-{
-       struct netpoll *np = br_dev->npinfo->netpoll;
-
-       if (np->real_dev != br_dev)
-               netpoll_poll_dev(np->real_dev);
-}
-
 void br_netpoll_cleanup(struct net_device *dev)
 {
        struct net_bridge *br = netdev_priv(dev);
@@ -295,7 +287,6 @@ static const struct net_device_ops br_netdev_ops = {
        .ndo_do_ioctl            = br_dev_ioctl,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_netpoll_cleanup     = br_netpoll_cleanup,
-       .ndo_poll_controller     = br_poll_controller,
 #endif
 };
 
index a4e72a89e4ffc4eae7a2e7e09963bd0fb58e26a4..595da45f908854d0c9eef2e54ba94280de5afcfb 100644 (file)
@@ -50,14 +50,7 @@ int br_dev_queue_push_xmit(struct sk_buff *skb)
                        kfree_skb(skb);
                else {
                        skb_push(skb, ETH_HLEN);
-
-#ifdef CONFIG_NET_POLL_CONTROLLER
-                       if (unlikely(skb->dev->priv_flags & IFF_IN_NETPOLL)) {
-                               netpoll_send_skb(skb->dev->npinfo->netpoll, skb);
-                               skb->dev->priv_flags &= ~IFF_IN_NETPOLL;
-                       } else
-#endif
-                               dev_queue_xmit(skb);
+                       dev_queue_xmit(skb);
                }
        }
 
@@ -73,23 +66,9 @@ int br_forward_finish(struct sk_buff *skb)
 
 static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
 {
-#ifdef CONFIG_NET_POLL_CONTROLLER
-       struct net_bridge *br = to->br;
-       if (unlikely(br->dev->priv_flags & IFF_IN_NETPOLL)) {
-               struct netpoll *np;
-               to->dev->npinfo = skb->dev->npinfo;
-               np = skb->dev->npinfo->netpoll;
-               np->real_dev = np->dev = to->dev;
-               to->dev->priv_flags |= IFF_IN_NETPOLL;
-       }
-#endif
        skb->dev = to->dev;
        NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
                br_forward_finish);
-#ifdef CONFIG_NET_POLL_CONTROLLER
-       if (skb->dev->npinfo)
-               skb->dev->npinfo->netpoll->dev = br->dev;
-#endif
 }
 
 static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
index 723a34710ad401ff24afd5b1be1af86505f7da33..0ea10f849be862fff3219bc69ed4c5db0cbd9253 100644 (file)
@@ -1911,8 +1911,16 @@ static int dev_gso_segment(struct sk_buff *skb)
  */
 static inline void skb_orphan_try(struct sk_buff *skb)
 {
-       if (!skb_tx(skb)->flags)
+       struct sock *sk = skb->sk;
+
+       if (sk && !skb_tx(skb)->flags) {
+               /* skb_tx_hash() wont be able to get sk.
+                * We copy sk_hash into skb->rxhash
+                */
+               if (!skb->rxhash)
+                       skb->rxhash = sk->sk_hash;
                skb_orphan(skb);
+       }
 }
 
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
@@ -1998,8 +2006,7 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
        if (skb->sk && skb->sk->sk_hash)
                hash = skb->sk->sk_hash;
        else
-               hash = (__force u16) skb->protocol;
-
+               hash = (__force u16) skb->protocol ^ skb->rxhash;
        hash = jhash_1word(hash, hashrnd);
 
        return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
@@ -2022,12 +2029,11 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
                                        struct sk_buff *skb)
 {
-       u16 queue_index;
+       int queue_index;
        struct sock *sk = skb->sk;
 
-       if (sk_tx_queue_recorded(sk)) {
-               queue_index = sk_tx_queue_get(sk);
-       } else {
+       queue_index = sk_tx_queue_get(sk);
+       if (queue_index < 0) {
                const struct net_device_ops *ops = dev->netdev_ops;
 
                if (ops->ndo_select_queue) {
index 6ba1c0eece039f99f19c0451bf5a52b3b9090a71..a4e0a7482c2bc878bdd6e0986ac45e0dc6659b3d 100644 (file)
@@ -949,7 +949,10 @@ static void neigh_update_hhs(struct neighbour *neigh)
 {
        struct hh_cache *hh;
        void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
-               = neigh->dev->header_ops->cache_update;
+               = NULL;
+
+       if (neigh->dev->header_ops)
+               update = neigh->dev->header_ops->cache_update;
 
        if (update) {
                for (hh = neigh->hh; hh; hh = hh->hh_next) {
index c51b55400dc56818a2f4e0cca68cb74be38048a3..11201784d29a32d6fdddd972a8348d3202de0ec9 100644 (file)
@@ -1,7 +1,7 @@
 menuconfig NET_DSA
        bool "Distributed Switch Architecture support"
        default n
-       depends on EXPERIMENTAL && !S390
+       depends on EXPERIMENTAL && NET_ETHERNET && !S390
        select PHYLIB
        ---help---
          This allows you to use hardware switch chips that use
index 757f25eb9b4b2404ebebc6c4422b4ad1693ea227..7f6273506eea46522a17ff85b9a451cb18fdc7d3 100644 (file)
@@ -442,8 +442,10 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
        int err;
 
        err = ipmr_fib_lookup(net, &fl, &mrt);
-       if (err < 0)
+       if (err < 0) {
+               kfree_skb(skb);
                return err;
+       }
 
        read_lock(&mrt_lock);
        dev->stats.tx_bytes += skb->len;
@@ -1728,8 +1730,10 @@ int ip_mr_input(struct sk_buff *skb)
                goto dont_forward;
 
        err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
-       if (err < 0)
+       if (err < 0) {
+               kfree_skb(skb);
                return err;
+       }
 
        if (!local) {
                    if (IPCB(skb)->opt.router_alert) {
index 6596b4feeddc7879fc020606f53dabdc8608f3e8..65afeaec15b7ebb85610650353e881aacb3e90aa 100644 (file)
@@ -608,6 +608,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
        ssize_t spliced;
        int ret;
 
+       sock_rps_record_flow(sk);
        /*
         * We can't seek on a socket input
         */
index b4ed957f201a6f3f2ebf8d5564edec567b6435a6..7ed9dc1042d1930a7eb2107def30642cb367fa92 100644 (file)
@@ -2208,6 +2208,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
        int mib_idx;
        int fwd_rexmitting = 0;
 
+       if (!tp->packets_out)
+               return;
+
        if (!tp->lost_out)
                tp->retransmit_high = tp->snd_una;
 
index 2794b6002836f446c4c6fb9835316d9ee61bba51..d6e9599d0705a5b79749a0566cbd97dfaa9993c0 100644 (file)
@@ -347,11 +347,12 @@ static const struct xfrm_type mip6_destopt_type =
 
 static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb)
 {
+       struct ipv6hdr *iph = ipv6_hdr(skb);
        struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data;
        int err = rt2->rt_hdr.nexthdr;
 
        spin_lock(&x->lock);
-       if (!ipv6_addr_equal(&rt2->addr, (struct in6_addr *)x->coaddr) &&
+       if (!ipv6_addr_equal(&iph->daddr, (struct in6_addr *)x->coaddr) &&
            !ipv6_addr_any((struct in6_addr *)x->coaddr))
                err = -ENOENT;
        spin_unlock(&x->lock);
index 94d72e85a475ae094d14f8df5f4f54a936f0db15..b2a3ae6cad78e28324e23b857dc0a5773f569786 100644 (file)
@@ -698,6 +698,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp)
                newsk = NULL;
                goto out;
        }
+       kfree_skb(oskb);
 
        sock_hold(sk);
        pep_sk(newsk)->listener = sk;
index 570949417f388735e93bf887294326a8f24aa534..724553e8ed7bc9d8ecd668c71ab9373936a3d2d3 100644 (file)
@@ -205,7 +205,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
        {
                struct icmphdr *icmph;
 
-               if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph)))
+               if (!pskb_may_pull(skb, ihl + sizeof(*icmph)))
                        goto drop;
 
                icmph = (void *)(skb_network_header(skb) + ihl);
@@ -215,6 +215,9 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
                    (icmph->type != ICMP_PARAMETERPROB))
                        break;
 
+               if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph)))
+                       goto drop;
+
                iph = (void *)(icmph + 1);
                if (egress)
                        addr = iph->daddr;
index 73affb8624faa06ee95c6ce5bf5e7ea364908d02..8dc47f1d00019c4f4a91f252f70b5771d19a5956 100644 (file)
@@ -267,7 +267,7 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
  * Run memory cache shrinker.
  */
 static int
-rpcauth_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
+rpcauth_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
 {
        LIST_HEAD(free);
        int res;
index af1c173be4ad88906d55d7efa24fb47fcd0cf19d..a7ec5a8a2380e6277fa4a3755e01fb0aa58076fb 100644 (file)
@@ -1594,8 +1594,8 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 
        /* Try to instantiate a bundle */
        err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
-       if (err < 0) {
-               if (err != -EAGAIN)
+       if (err <= 0) {
+               if (err != 0 && err != -EAGAIN)
                        XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
                return ERR_PTR(err);
        }
@@ -1678,6 +1678,13 @@ xfrm_bundle_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir,
                        goto make_dummy_bundle;
                dst_hold(&xdst->u.dst);
                return oldflo;
+       } else if (new_xdst == NULL) {
+               num_xfrms = 0;
+               if (oldflo == NULL)
+                       goto make_dummy_bundle;
+               xdst->num_xfrms = 0;
+               dst_hold(&xdst->u.dst);
+               return oldflo;
        }
 
        /* Kill the previous bundle */
@@ -1760,6 +1767,10 @@ restart:
                                xfrm_pols_put(pols, num_pols);
                                err = PTR_ERR(xdst);
                                goto dropdst;
+                       } else if (xdst == NULL) {
+                               num_xfrms = 0;
+                               drop_pols = num_pols;
+                               goto no_transform;
                        }
 
                        spin_lock_bh(&xfrm_policy_sk_bundle_lock);
index 31ac5538fe7ef1261192d1bd345ac103023481b0..5da30eb6ad008021619ca57924462fadeaa1e800 100644 (file)
@@ -83,8 +83,8 @@ config SND_SOC_ALL_CODECS
 
 config SND_SOC_WM_HUBS
        tristate
-       default y if SND_SOC_WM8993=y
-       default m if SND_SOC_WM8993=m
+       default y if SND_SOC_WM8993=y || SND_SOC_WM8994=y
+       default m if SND_SOC_WM8993=m || SND_SOC_WM8994=m
 
 config SND_SOC_AC97_CODEC
        tristate
index 1072621e93fdee9de08476cfcd915fed2c792999..9d1df26281361ef9ed569e0b87a39d9a934f9254 100644 (file)
@@ -127,6 +127,8 @@ static __devinit int wm8727_platform_probe(struct platform_device *pdev)
                goto err_codec;
        }
 
+       return 0;
+
 err_codec:
        snd_soc_unregister_codec(codec);
 err:
index 7e4a627b4c7ee6bc5260e041f052b9e7b11edae2..4e212ed62ea609be9e1b3d45a5ac3fb6efea1a38 100644 (file)
@@ -94,7 +94,6 @@ SOC_DAPM_SINGLE("Bypass Switch", WM8776_OUTMUX, 2, 1, 0),
 
 static const struct snd_soc_dapm_widget wm8776_dapm_widgets[] = {
 SND_SOC_DAPM_INPUT("AUX"),
-SND_SOC_DAPM_INPUT("AUX"),
 
 SND_SOC_DAPM_INPUT("AIN1"),
 SND_SOC_DAPM_INPUT("AIN2"),
index 0417dae32e6f0e9de4d3b9f580519254a4e04404..19ad590ca0b390065850ce4ab8b0e4e2fd5d841a 100644 (file)
@@ -885,7 +885,6 @@ static int wm8988_register(struct wm8988_priv *wm8988,
        ret = snd_soc_register_dai(&wm8988_dai);
        if (ret != 0) {
                dev_err(codec->dev, "Failed to register DAI: %d\n", ret);
-               snd_soc_unregister_codec(codec);
                goto err_codec;
        }
 
index 3396a0db06ba08d795b942eed61ba42d81697e62..ec4acac49ebd527e0a1db89336e528566abf948c 100644 (file)
@@ -683,20 +683,15 @@ static int fsi_dai_startup(struct snd_pcm_substream *substream,
 
        /* clock inversion (CKG2) */
        data = 0;
-       switch (SH_FSI_INVERSION_MASK & flags) {
-       case SH_FSI_LRM_INV:
-               data = 1 << 12;
-               break;
-       case SH_FSI_BRM_INV:
-               data = 1 << 8;
-               break;
-       case SH_FSI_LRS_INV:
-               data = 1 << 4;
-               break;
-       case SH_FSI_BRS_INV:
-               data = 1 << 0;
-               break;
-       }
+       if (SH_FSI_LRM_INV & flags)
+               data |= 1 << 12;
+       if (SH_FSI_BRM_INV & flags)
+               data |= 1 << 8;
+       if (SH_FSI_LRS_INV & flags)
+               data |= 1 << 4;
+       if (SH_FSI_BRS_INV & flags)
+               data |= 1 << 0;
+
        fsi_reg_write(fsi, CKG2, data);
 
        /* do fmt, di fmt */
@@ -726,15 +721,15 @@ static int fsi_dai_startup(struct snd_pcm_substream *substream,
                break;
        case SH_FSI_FMT_TDM:
                msg = "TDM";
-               data = CR_FMT(CR_TDM) | (fsi->chan - 1);
                fsi->chan = is_play ?
                        SH_FSI_GET_CH_O(flags) : SH_FSI_GET_CH_I(flags);
+               data = CR_FMT(CR_TDM) | (fsi->chan - 1);
                break;
        case SH_FSI_FMT_TDM_DELAY:
                msg = "TDM Delay";
-               data = CR_FMT(CR_TDM_D) | (fsi->chan - 1);
                fsi->chan = is_play ?
                        SH_FSI_GET_CH_O(flags) : SH_FSI_GET_CH_I(flags);
+               data = CR_FMT(CR_TDM_D) | (fsi->chan - 1);
                break;
        default:
                dev_err(dai->dev, "unknown format.\n");
diff --git a/tools/perf/arch/sparc/Makefile b/tools/perf/arch/sparc/Makefile
new file mode 100644 (file)
index 0000000..15130b5
--- /dev/null
@@ -0,0 +1,4 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
+endif
diff --git a/tools/perf/arch/sparc/util/dwarf-regs.c b/tools/perf/arch/sparc/util/dwarf-regs.c
new file mode 100644 (file)
index 0000000..0ab8848
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (C) 2010 David S. Miller <davem@davemloft.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <libio.h>
+#include <dwarf-regs.h>
+
+#define SPARC_MAX_REGS 96
+
+const char *sparc_regs_table[SPARC_MAX_REGS] = {
+       "%g0", "%g1", "%g2", "%g3", "%g4", "%g5", "%g6", "%g7",
+       "%o0", "%o1", "%o2", "%o3", "%o4", "%o5", "%sp", "%o7",
+       "%l0", "%l1", "%l2", "%l3", "%l4", "%l5", "%l6", "%l7",
+       "%i0", "%i1", "%i2", "%i3", "%i4", "%i5", "%fp", "%i7",
+       "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
+       "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
+       "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
+       "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
+       "%f32", "%f33", "%f34", "%f35", "%f36", "%f37", "%f38", "%f39",
+       "%f40", "%f41", "%f42", "%f43", "%f44", "%f45", "%f46", "%f47",
+       "%f48", "%f49", "%f50", "%f51", "%f52", "%f53", "%f54", "%f55",
+       "%f56", "%f57", "%f58", "%f59", "%f60", "%f61", "%f62", "%f63",
+};
+
+/**
+ * get_arch_regstr() - lookup register name from it's DWARF register number
+ * @n: the DWARF register number
+ *
+ * get_arch_regstr() returns the name of the register in struct
+ * regdwarfnum_table from it's DWARF register number. If the register is not
+ * found in the table, this returns NULL;
+ */
+const char *get_arch_regstr(unsigned int n)
+{
+       return (n <= SPARC_MAX_REGS) ? sparc_regs_table[n] : NULL;
+}